This procedure reads a text, performs regular expression matching using an automaton, and stores the string index in the argument if it contains a match.
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(automaton_t), | intent(inout) | :: | automaton | |||
character(len=*), | intent(in) | :: | string | |||
integer, | intent(inout) | :: | from | |||
integer, | intent(inout) | :: | to |
subroutine do_matching_including_no_literal_opts (automaton, string, from, to) use :: forgex_utility_m implicit none type(automaton_t), intent(inout) :: automaton character(*), intent(in) :: string integer, intent(inout) :: from, to integer :: cur_i, dst_i ! current and destination index of DFA nodes integer :: ci ! character index integer :: next_ci ! next character index integer :: max_match ! maximum value of match attempts integer :: start ! starting character index integer :: i character(:), allocatable :: str str = string from = 0 to = 0 str = char(0)//string//char(0) cur_i = automaton%initial_index if (cur_i == DFA_NOT_INIT) then error stop "DFA have not been initialized." end if if (len(string) <= 1 .and. string == '') then if (automaton%dfa%nodes(cur_i)%accepted) then from = ACCEPTED_EMPTY to = ACCEPTED_EMPTY end if return end if loop_init: block i = 1 start = i end block loop_init do while (start < len(str)) max_match = 0 ci = start cur_i = automaton%initial_index ! Traverse the DFA with the input string from the current starting position of ``cur_i`. do while (cur_i /= DFA_INVALID_INDEX) if (automaton%dfa%nodes(cur_i)%accepted .and. ci /= start) then max_match = ci end if if (ci > len(str)) exit next_ci = idxutf8(str, ci) + 1 call automaton%construct(cur_i, dst_i, str(ci:next_ci-1)) cur_i = dst_i ci = next_ci end do ! Update match position if a match is found. if (max_match > 0) then from = start-1 if (from == 0) from = 1 ! handle leading NULL character. if (max_match >= len(str)) then to = len(string) else to = max_match-2 end if return end if start = idxutf8(str, start) + 1 ! Bruteforce searching end do end subroutine do_matching_including_no_literal_opts