do_matching_including Subroutine

public pure subroutine do_matching_including(automaton, string, from, to, prefix, suffix, runs_engine)

This procedure reads a text, performs regular expression matching using an automaton, and stores the string index in the argument if it contains a match.

Arguments

Type IntentOptional Attributes Name
type(automaton_t), intent(inout) :: automaton
character(len=*), intent(in) :: string
integer, intent(inout) :: from
integer, intent(inout) :: to
character(len=*), intent(in) :: prefix
character(len=*), intent(in) :: suffix
logical, intent(inout) :: runs_engine

Source Code

   pure subroutine do_matching_including (automaton, string, from, to, prefix, suffix, runs_engine)
      use :: forgex_utility_m, only: get_index_list_forward
      use :: forgex_parameters_m, only: INVALID_CHAR_INDEX, ACCEPTED_EMPTY
      implicit none
      type(automaton_t), intent(inout) :: automaton
      character(*),      intent(in)    :: string
      integer,           intent(inout) :: from, to
      character(*),      intent(in)    :: prefix, suffix
      logical,           intent(inout) :: runs_engine

      integer :: cur_i, dst_i ! current and destination index of DFA nodes
      integer :: ci           ! character index
      integer :: next_ci      ! next character index
      integer :: max_match    ! maximum value of match attempts
      integer :: start        ! starting character index
      integer :: i
      integer :: suf_idx      ! right-most suffix index
      character(:), allocatable :: str
      integer, allocatable :: index_list(:)
      logical :: do_brute_force

      do_brute_force = .false.
      runs_engine = .false.

      str = char(0)//string//char(0)

      from = 0
      to = 0
      do_brute_force = prefix == ''
      suf_idx = INVALID_CHAR_INDEX

      cur_i = automaton%initial_index

      if (cur_i == DFA_NOT_INIT) then
         error stop "DFA have not been initialized."
      end if

      if (len(string) <= 1 .and. string == '') then
         if (automaton%dfa%nodes(cur_i)%accepted) then
            from = ACCEPTED_EMPTY
            to = ACCEPTED_EMPTY
         end if
         return
      end if

      if (.not. do_brute_force) then
         call get_index_list_forward(str, prefix, suffix, index_list)
         if (.not. allocated(index_list)) return
         if (index_list(1) == INVALID_CHAR_INDEX) then
            do_brute_force = .true.
         end if
      end if

      loop_init: block
         if (do_brute_force) then
            i = 1
            start = i
         else
            ! If the first in the index list is 2, set start=1, i=0
            ! to take into account the leading NULL character. 
            if (index_list(1) == 2) then
               start = 1
               i = 0
            else
               i = 1
               start = index_list(i)
            end if

            if (suffix /= '') then
               suf_idx = index(string, suffix, back=.true.)
               if (suf_idx == 0) return
            end if

         end if
      end block loop_init


      do while (start < len(str))
         max_match = 0
         ci = start
         cur_i = automaton%initial_index
         runs_engine = .true.

         if (suf_idx /= INVALID_CHAR_INDEX) then
            if (suf_idx < ci) exit
         end if

         ! Traverse the DFA with the input string from the current starting position of ``cur_i`.
         do while (cur_i /= DFA_INVALID_INDEX)

            if (automaton%dfa%nodes(cur_i)%accepted .and. ci /= start) then
               max_match = ci
            end if

            if (ci > len(str)) exit

            next_ci = idxutf8(str, ci) + 1

            call automaton%construct(cur_i, dst_i, str(ci:next_ci-1))

            cur_i = dst_i
            ci = next_ci
         end do

         ! Update match position if a match is found.
         if (max_match > 0) then
            from = start-1
            if (from == 0) from = 1 ! handle leading NULL character.
            if (max_match >= len(str)) then
               to = len(string)
            else
               to = max_match-2
            end if
            return
         end if

         if (do_brute_force) then
            start = idxutf8(str, start) + 1 ! Bruteforce searching
            cycle
         endif

         i = i + 1
         if (i <= size(index_list)) then
            start = index_list(i)
            if (start == INVALID_CHAR_INDEX) return
         else
            return
         end if
      end do


   end subroutine do_matching_including