syntax_tree_optimize_m.F90 Source File


Source Code

#ifdef IMPURE
#define pure
#endif
module forgex_syntax_tree_optimize_m
   use, intrinsic :: iso_fortran_env, only: int32
   use :: forgex_syntax_tree_node_m, only: tree_node_t
   use :: forgex_syntax_tree_graph_m, only: tree_t
   use :: forgex_utf8_m
   use :: forgex_enums_m
   implicit none
   private

   public :: get_prefix_literal
   public :: get_suffix_literal
   public :: get_entire_literal

contains

   pure function get_prefix_literal(tree) result(chara)
      implicit none
      type(tree_t), intent(in) :: tree
      character(:), allocatable :: chara
      logical :: each_res

      chara = ''

      call get_prefix_literal_internal(tree%nodes, tree%top, chara, each_res)

   end function get_prefix_literal

   
   pure function get_suffix_literal(tree) result(chara)
      implicit none
      type(tree_t), intent(in) :: tree
      character(:), allocatable :: chara
      logical :: has_or, has_closure

      chara = ''
      has_or = .false.
      has_closure = .false.

      call get_suffix_literal_internal(tree%nodes, tree%top, chara, has_or, has_closure)

   end function get_suffix_literal


   pure function get_entire_literal(tree) result(chara)
      implicit none
      type(tree_t), intent(in) :: tree 
      character(:),allocatable :: chara
      logical :: each_res
      chara = ''

      call get_entire_literal_internal(tree%nodes, tree%top, chara, each_res)
   end function get_entire_literal

 
   pure function is_literal_tree_node(node) result(res)
      implicit none
      type(tree_node_t), intent(in) :: node
      logical :: res

      res = .false.

      if (.not. allocated(node%c)) return

      if (node%op == op_char .and. size(node%c) == 1) then
         if (node%c(1)%min == node%c(1)%max) then
            res = .true.
         end if
      end if
   end function is_literal_tree_node


   pure function is_char_class_tree_node(node) result(res)
      implicit none
      type(tree_node_t), intent(in) :: node
      logical :: res

      res = .false.
      if (node%op == op_char) res = .true.

   end function is_char_class_tree_node


   pure recursive subroutine get_entire_literal_internal(tree, idx, literal, res)
      use :: forgex_syntax_tree_node_m
      implicit none
      type(tree_node_t), intent(in) :: tree(:)
      integer(int32), intent(in) :: idx
      character(:), allocatable, intent(inout) :: literal
      logical, intent(inout) :: res

      type(tree_node_t) :: node
      integer :: i
      node = tree(idx)

      if (node%op == op_concat) then
         call get_entire_literal_internal(tree, node%left_i, literal, res)
         if (literal == '') return
         if (res) then
            call get_entire_literal_internal(tree, node%right_i, literal, res)
         else
            literal = ''
         end if
         if (literal == '') return

      else if (node%op == op_repeat) then
         if (node%max_repeat == node%min_repeat) then
            do i = 1, node%min_repeat
               call get_entire_literal_internal(tree, node%left_i, literal, res)
            end do
         else
            res = .false.
            literal = ''
         end if

      else if (is_literal_tree_node(node)) then

         ! This size function is safe because is_literal_function returns false
         ! if the node%c is not allocated. 
         if (size(node%c, dim=1) == 1) then

            if (node%c(1)%min == node%c(1)%max) then
               literal = literal//char_utf8(node%c(1)%min)
               res = .true.
               return
            end if
         end if
         res = .false.
         literal = ''

      else
         res = .false.
         literal = ''

      end if

   end subroutine get_entire_literal_internal
   

   pure recursive subroutine get_prefix_literal_internal(tree, idx, prefix, res)
      use :: forgex_parameters_m
      implicit none
      type(tree_node_t), intent(in) :: tree(:)
      integer(int32), intent(in) :: idx
      character(:), allocatable, intent(inout) :: prefix
      logical, intent(inout) :: res

      logical :: res_left, res_right, unused
      type(tree_node_t) :: node
      character(:), allocatable :: candidate1, candidate2
      integer :: j, n
      
      if (idx < 1) return
      node = tree(idx)
      res_left = .false.
      res_right = .false.
      candidate1 = ''
      candidate2 = ''

      select case (node%op)
      case (op_concat)
         call get_prefix_literal_internal(tree, node%left_i, candidate1, res_left)

         if (res_left) then
            call get_prefix_literal_internal(tree, node%right_i, candidate2, res_right)
         end if

         prefix = prefix//candidate1//candidate2

         res = res_left .and. res_right

      case(op_union)
         call get_prefix_literal_internal(tree, node%left_i, candidate1, unused)
         call get_prefix_literal_internal(tree, node%right_i, candidate2, unused)
         prefix = extract_same_part_prefix(candidate1, candidate2)
         res = .false.
      case(op_repeat)
         n = node%min_repeat
         do j = 1, n
            call get_prefix_literal_internal(tree, node%left_i, prefix, res_left)
         end do
         res = res_left
      case (op_char)
         if (is_literal_tree_node(node)) then
            if (node%c(1)%min == node%c(1)%max) then
               prefix = prefix//adjustl_multi_byte(char_utf8(node%c(1)%min))
               res = .true.
               return
            end if
         end if
         res = .false.
      case default
         res = .false.
      end select
   end subroutine get_prefix_literal_internal


   pure recursive subroutine get_suffix_literal_internal(tree, idx, suffix,  has_or, has_closure)
      implicit none
      type(tree_node_t), intent(in) :: tree(:)
      integer(int32), intent(in) :: idx
      character(:), allocatable, intent(inout) :: suffix
      logical, intent(inout) :: has_or, has_closure
      
      logical :: or_r, or_l, closure_r, closure_l
      type(tree_node_t) :: node, parent
      character(:), allocatable :: candidate1, candidate2
      integer :: n, j

      if (idx < 1) return
      node = tree(idx)
      candidate1 = ''
      candidate2 = ''
      or_l = .false.
      or_r = .false.
      closure_l = .false.
      closure_r = .false.

      if (idx < 1) return

      select case (node%op)
      case (op_concat)
         call get_suffix_literal_internal(tree, node%right_i, suffix, or_r, closure_r)

         if(.not. or_r) call get_suffix_literal_internal(tree, node%left_i, candidate1, or_l, closure_l)

         has_or = or_l .or. or_r
         has_closure =  closure_r
         if (or_r .and. or_l) then
            return
         else if (or_r) then
            return
         else if (closure_l) then
            return
         else if (closure_r) then
            suffix = suffix
         else
            suffix = candidate1//suffix
            return
         end if
   
      case (op_union) !OR
         call get_suffix_literal_internal(tree, node%left_i, candidate1, or_l, has_closure)
         call get_suffix_literal_internal(tree, node%right_i, candidate2, or_r, has_closure)

         suffix = extract_same_part_suffix(candidate1, candidate2)

         has_or = .true.

      case(op_repeat)
         n = node%min_repeat
         do j = 1, n
            call get_suffix_literal_internal(tree, node%left_i, suffix, or_l, has_closure)
            has_or = or_l .or. has_or
         end do

         if (node%min_repeat /= node%max_repeat) has_closure = .true.

      case(op_closure)
         has_closure = .true.
         if(node%parent_i == 0) return
         parent = tree(node%parent_i)

         ! Processing the + operator
         ! Get the left of the parent node, and if it has the same suffix as the current node, return it.
         if (parent%own_i /= 0) then
            if (parent%op == op_concat) then
               if (parent%right_i == node%own_i) then
                  call get_suffix_literal_internal(tree, parent%left_i, candidate1, or_l, closure_l)
                  call get_suffix_literal_internal(tree, node%left_i, candidate2, or_r, closure_r)
                  if (candidate1 == candidate2) then
                     suffix = candidate1
                  end if
               end if
            end if
         end if
         has_or = or_l .or. or_r

      case default
         if (is_literal_tree_node(node)) then
            suffix = char_utf8(node%c(1)%min)//suffix
         else if (is_char_class_tree_node(node)) then
            has_or = .true.
         end if
      end select
   end subroutine get_suffix_literal_internal


!=====================================================================!


   pure function extract_same_part_prefix (a, b) result(res)
      use :: forgex_utf8_m
      implicit none
      character(*), intent(in) :: a, b
      character(:), allocatable :: res

      character(:), allocatable :: buf
      integer :: i, ie, n
      res = ''
      buf = ''
      n = min(len(a), len(b))
      do i = 1, n
         if (a(i:i) == b(i:i)) then
            buf = buf//a(i:i)
         else
            exit
         end if
      end do

      ! Handling UTF8 fragment bytes
      n = len(buf)
      i = 1
      do while (i <= n)
         ie = idxutf8(buf, i)
         if (n < ie) exit

         if (is_valid_multiple_byte_character(buf(i:ie))) then
            res = res//adjustl_multi_byte(buf(i:ie))
         end if
         i = ie + 1
      end do

   end function extract_same_part_prefix


   pure function extract_same_part_suffix (a, b) result(res)
      use :: forgex_utf8_m
      implicit none
      character(*), intent(in) :: a, b
      character(:), allocatable :: res
      
      character(:), allocatable :: buf
      integer :: i, ii, n, diff, ie
      character(:), allocatable :: short_s, long_s
      
      res = ''
      buf = ''

      if (len(a) < len(b)) then
         short_s = a 
         long_s = b
      else
         short_s = b
         long_s = a
      end if
 
      n = min(len(a), len(b))
      diff = max(len(a), len(b)) - n

      do i = n, 1, -1
         ii = i + diff
         if (short_s(i:i) == long_s(ii:ii)) then
            buf = a(i:i)//buf
         else
            exit
         end if
      end do

      n = len(buf)
      i= 1
      do while (i <= n)
         ie = idxutf8(buf, i)
         if (n < ie) exit
         
         if (is_valid_multiple_byte_character(buf(i:ie))) then
            res = res//adjustl_multi_byte(buf(i:ie))
         end if
         i = ie + 1
      end do
   end function extract_same_part_suffix


   pure function extract_same_part_middle(left_middle, right_middle) result(middle)
      use :: forgex_utf8_m
      implicit none
      character(*), intent(in) :: left_middle, right_middle
      character(:), allocatable :: middle
      
      integer :: i, j, max_len, len_left, len_right, len_tmp
      character(:), allocatable :: tmp_middle

      len_left = len(left_middle)
      len_right = len(right_middle)
      max_len = 0
      middle = ''

      ! Compare all substring
      do i = 1, len_left
         do j = 1, len_right
            if (left_middle(i:i) == right_middle(j:j)) then
               tmp_middle = ''
               len_tmp = 0

               ! Check whether match strings or not.
               do while (i+len_tmp <= len_left .and. j+len_tmp <= len_right)
                  if (left_middle(i:i+len_tmp) == right_middle(j:j+len_tmp)) then
                     tmp_middle = left_middle(i:i+len_tmp)
                     len_tmp = len(tmp_middle)
                  else
                     exit
                  end if
               end do

               ! Store the longest common part.
               if (len_tmp > max_len) then
                  max_len = len(tmp_middle)
                  middle = tmp_middle
               end if
            end if
         end do
      end do
   end function extract_same_part_middle
      
end module forgex_syntax_tree_optimize_m