syntax_tree_optimize_m.F90 Source File


Source Code

! Fortran Regular Expression (Forgex)
!
! MIT License
!
! (C) Amasaki Shinobu, 2023-2025
!     A regular expression engine for Fortran.
!     forgex_syntax_tree_optimize_m module is a part of Forgex.
!
#ifdef IMPURE
#define pure
#endif
module forgex_syntax_tree_optimize_m
   use :: forgex_syntax_tree_node_m, only: tree_node_t
   use :: forgex_syntax_tree_graph_m, only: tree_t
   use :: forgex_utf8_m, only: char_utf8, reverse_utf8
   use :: forgex_parameters_m, only: INVALID_INDEX, INVALID_CHAR_INDEX
   use :: forgex_segment_m, only: width_of_segment
   use :: forgex_enums_m
   use :: forgex_cube_m, only: cube_t, assignment(=)
   implicit none
   private

   !> This type is wrapper to make a allocatable character array with variable length.
   type character_array_element_t
      character(:), allocatable :: c
   end type character_array_element_t

   !> This type contains a character variables that represents each literal:
   !> all, pref, suff, and fact.
   type literal_t
      type(character_array_element_t) :: all, pref, suff, fact
      logical :: flag_closure = .false.
      logical :: flag_class = .false.
   end type literal_t

   character(0), parameter :: theta = ''

   public :: extract_literal

contains

   !> This is the public procedure of this module to obtain each literal from AST.
   pure subroutine extract_literal(tree, all, prefix, suffix, factor)
      implicit none
      type(tree_t), intent(in) :: tree
      character(:), allocatable, intent(inout) :: all, prefix, suffix, factor

      type(literal_t) :: literal

      literal = get_literal(tree)

      all = literal%all%c
      prefix = literal%pref%c
      suffix = literal%suff%c
      factor = literal%fact%c
   end subroutine extract_literal


   !> Wrapping function to retrieve literals: all, prefix, suffix, factor.
   pure function get_literal(tree) result(literal)
      implicit none
      type(tree_t), intent(in) :: tree
      type(literal_t) :: literal

      ! Recursive procedure calls start here.
      call best_factor(tree%nodes, tree%top, literal)

   end function get_literal


   !> This is recursive procedure to tour a given syntax tree.
   pure recursive subroutine best_factor(nodes, idx, lit)
      implicit none
      type(tree_node_t), intent(in) :: nodes(:)
      integer, intent(in) :: idx
      type(literal_t), intent(inout) :: lit

      type(literal_t) :: lit_l, lit_r
      type(tree_node_t) :: curr
      integer :: i

      curr = nodes(idx)
      curr%c = nodes(idx)%c
      lit%all%c = theta
      lit%pref%c = theta
      lit%suff%c = theta
      lit%fact%c = theta

      if (curr%op == op_union .or. curr%op == op_concat) then
         call best_factor(nodes, curr%left_i, lit_l)
         call best_factor(nodes, curr%right_i, lit_r)
      end if

      select case (curr%op)

      case(op_union)
         lit%pref%c = same_part_of_prefix(lit_l%pref%c, lit_r%pref%c)
         lit%suff%c = same_part_of_suffix(lit_l%suff%c, lit_r%suff%c)
         lit%flag_closure = .true.

      case(op_concat)
! #ifdef IMPURE
! write(0,*) "L103: ", lit_l%flag_class, lit_r%flag_class, lit_l%flag_closure, lit_r%flag_closure
! #endif
         lit%flag_class = lit_l%flag_class .or. lit_r%flag_class
         lit%flag_closure = lit_l%flag_closure .or. lit_r%flag_closure

         select case (return_class_closure(lit_l%flag_class, lit_r%flag_class, lit_l%flag_closure, lit_r%flag_closure))
         case (lt_N_class_N_closure)
            lit%all%c = lit_l%all%c//lit_r%all%c
            lit%pref%c = best(lit_l%pref%c, lit_l%all%c//lit_r%pref%c)
            lit%suff%c = best(lit_r%suff%c, lit_l%suff%c//lit_r%all%c)

         case (lt_N_class_R_closure)
            lit%pref%c = lit_l%all%c//lit_r%pref%c
            lit%suff%c = lit_r%suff%c           

         case (lt_N_class_L_closure)
            lit%pref%c = lit_l%pref%c
            lit%suff%c = lit_l%suff%c//lit_r%all%c

         case (lt_N_class_LR_closure)
            lit%pref%c = lit_l%pref%c
            lit%suff%c = lit_r%suff%c

         ! following 12 cases are not tested enough.
         !===========================================================================!
         case (lt_R_class_N_closure)
            lit%pref%c = best(lit_l%pref%c, lit_l%all%c//lit_r%pref%c)
            lit%suff%c = lit_r%suff%c

         case (lt_R_class_R_closure)
            lit%pref%c = best(lit_l%pref%c, lit_l%all%c//lit_r%pref%c)
            lit%suff%c = lit_r%suff%c

         case (lt_R_class_L_closure)
            lit%pref%c = lit_l%pref%c
            lit%suff%c = lit_r%suff%c
            ! lit%fact%c = best(lit_r%suff%c, lit_l%suff%c//lit_r%all%c)

         case (lt_R_class_LR_closure)
            lit%pref%c = lit_l%pref%c
            lit%suff%c = lit_r%suff%c

         !===========================================================================!
         case (lt_L_class_N_closure)
            lit%pref%c = lit_l%pref%c
            lit%suff%c = best(lit_r%suff%c, lit_l%suff%c//lit_r%all%c)

         case (lt_L_class_R_closure)
            lit%pref%c = lit_l%pref%c
            lit%suff%c = lit_r%suff%c
            continue
         case (lt_L_class_L_closure)
            lit%pref%c = lit_l%pref%c
            lit%suff%c = best(lit_r%suff%c, lit_l%suff%c//lit_r%all%c)
            continue
         case (lt_L_class_LR_closure)
            lit%pref%c = lit_l%pref%c
            lit%suff%c = lit_r%suff%c
            continue
         !===========================================================================!
         case (lt_LR_class_N_closure)
            lit%pref%c = lit_l%pref%c
            lit%suff%c = lit_r%suff%c
            continue
         case (lt_LR_class_R_closure)
            lit%pref%c = lit_l%pref%c
            lit%pref%c = lit_l%pref%c
            continue
         case (lt_LR_class_L_closure)
            lit%pref%c = lit_l%pref%c
            lit%suff%c = lit_r%suff%c
            continue
         case (lt_LR_class_LR_closure)
            lit%pref%c = lit_l%pref%c
            lit%suff%c = lit_r%suff%c
            continue
         end select

         !== Intermediate literals (factors) are not implemented and tested yet.
         ! lit%fact%c = best(best(lit_l%fact%c, lit_r%fact%c), lit_l%suff%c//lit_r%pref%c)

      case (op_closure)
         lit%flag_closure = .true.

      case (op_char)

         if (curr%c%single_flag) then
            lit%all%c = char_utf8(curr%c%first())
            lit%pref%c = char_utf8(curr%c%first())
            lit%suff%c =  char_utf8(curr%c%first())
            lit%fact%c =  char_utf8(curr%c%first())
         else
            lit%flag_class = .true.
         end if

      case (op_repeat)
         block
            type(tree_node_t) :: next_l

            ! Is the flag_class of the next node flagged? 
            call best_factor(nodes, curr%left_i, lit_l)
            lit%flag_class = lit_l%flag_class

            do i = 1, curr%min_repeat       
               call best_factor(nodes, curr%left_i, lit_l)
               lit%all%c = lit%all%c//lit_l%all%c
               lit%pref%c = lit%pref%c//lit_l%pref%c
               lit%suff%c = lit%suff%c//lit_l%suff%c
               lit%fact%c = lit%fact%c//lit_l%fact%c
               lit%flag_class = lit%flag_class .or. lit_l%flag_class
               if (lit_l%flag_closure) exit
            end do

            if (curr%min_repeat /= curr%max_repeat) lit%all%c = ''

            lit%flag_closure = curr%min_repeat /= curr%max_repeat
            lit%flag_closure = lit%flag_closure .or. lit_l%flag_closure
         end block

      case default
         lit%flag_closure = .true.
      end select
   end subroutine best_factor


   pure function best(c1, c2) result(res)
      implicit none
      character(*), intent(in) :: c1, c2
      character(:), allocatable :: res

      integer :: max_len
      res = theta
      if (len_trim(c1) > len_trim(c2)) then
         res = trim(adjustl(c1))
      else
         res = trim(adjustl(c2))
      end if
   end function best


   pure function same_part_of_prefix (c1, c2) result(res)
      use :: forgex_utf8_m
      implicit none
      character(*), intent(in) :: c1, c2

      character(:), allocatable :: res, part1, part2
      logical :: flag_return
      integer :: i
      res = ''

      i = 1
      flag_return = .false.
      do while(.not. flag_return)

         part1 = c1(i:idxutf8(c1, i))
         part2 = c2(i:idxutf8(c2, i))
         flag_return = next_idxutf8(c1, i) == INVALID_CHAR_INDEX .or. next_idxutf8(c2,i) == INVALID_CHAR_INDEX

         if (flag_return) return

         if (part1 == part2) then
            res = res//part1
         else
            exit
         end if

         i = next_idxutf8(c1, i)
      end do

   end function same_part_of_prefix


   pure function same_part_of_suffix(c1,c2) result(retval)
      character(*), intent(in) :: c1, c2
      character(:), allocatable :: retval

      character(:), allocatable :: rc1, rc2
      integer :: n, i
      n = max(len_trim(c1), len_trim(c2))

      allocate(character(n):: rc1, rc2)

      rc1 = reverse_utf8(c1)   
      rc2 = reverse_utf8(c2)
      retval = same_part_of_prefix(rc1, rc2)
      retval = reverse_utf8(retval)

   end function same_part_of_suffix


   pure function return_class_closure(f_L_class, f_R_class, f_L_closure, f_R_closure) result(retval)
      implicit none
      logical, intent(in) :: f_L_class, f_r_class, f_l_closure, f_r_closure
      integer :: retval

      if (.not. f_L_class) then
         if (.not. f_r_class) then
            if (.not. f_l_closure) then
               if (.not. f_r_closure) then
                  retval = lt_N_class_N_closure
               else
                  retval = lt_N_class_R_closure
               end if
            else
               if (.not. f_r_closure) then
                  retval = lt_N_class_L_closure
               else
                  retval = lt_N_class_LR_closure
               end if
            end if
         else
            if (.not. f_l_closure) then
               if (.not. f_r_closure) then
                  retval = lt_R_class_N_closure
               else
                  retval = lt_R_class_R_closure
               endif
            else
               if (.not. f_r_closure) then
                  retval = lt_R_class_L_closure
               else
                  retval = lt_R_class_LR_closure
               end if
            end if
         end if
      else
         if (.not. f_r_class) then
            if (.not. f_l_closure) then
               if (.not. f_r_closure) then
                  retval = lt_L_class_N_closure
               else
                  retval = lt_L_class_R_closure
               end if
            else
               if (.not. f_r_closure) then
                  retval = lt_L_class_L_closure
               else
                  retval = lt_L_class_LR_closure
               end if
            end if
         else
            if (.not. f_l_closure) then
               if (.not. f_r_closure) then
                  retval = lt_LR_class_N_closure
               else
                  retval = lt_LR_class_R_closure
               endif
            else
               if (.not. f_r_closure) then
                  retval = lt_LR_class_L_closure
               else
                  retval = lt_LR_class_LR_closure
               end if
            end if
         end if   
      end if
   end function return_class_closure

end module forgex_syntax_tree_optimize_m