subroutine__regex Subroutine

private pure subroutine subroutine__regex(pattern, text, res, length, from, to, status, err_msg)

The function implemented for the regex subroutine.

Arguments

Type IntentOptional Attributes Name
character(len=*), intent(in) :: pattern
character(len=*), intent(in) :: text
character(len=:), intent(inout), allocatable :: res
integer, intent(inout), optional :: length
integer, intent(inout), optional :: from
integer, intent(inout), optional :: to
integer, intent(inout), optional :: status
character(len=*), intent(inout), optional :: err_msg

Source Code

   pure subroutine subroutine__regex(pattern, text, res, length, from, to, status, err_msg)
      use :: forgex_parameters_m, only: ACCEPTED_EMPTY, INVALID_CHAR_INDEX
      use :: forgex_syntax_tree_error_m, only: get_error_message
      implicit none
      character(*),              intent(in)    :: pattern, text
      character(:), allocatable, intent(inout) :: res
      integer, optional,         intent(inout) :: length, from, to, status
      character(*), optional,    intent(inout) :: err_msg

      character(:),      allocatable :: buff
      type(tree_t)                   :: tree
      type(automaton_t)              :: automaton
      integer                        :: from_l, to_l

      character(:), allocatable :: prefix, suffix, entirely_fixed_string
      logical :: unused

      ! Initialize
      prefix = ''
      suffix = ''
      entirely_fixed_string = ''
      from_l = INVALID_CHAR_INDEX
      to_l = INVALID_CHAR_INDEX

      buff = trim(pattern)

      ! Build tree from regex pattern in the buff variable.
      call tree%build(buff)

      ! Assigns invalid value of arguments if the given pattern is invalid.
      if (.not. tree%is_valid_pattern) then
         res = ""
         if (present(length)) length = 0
         if (present(from))   from = INVALID_CHAR_INDEX
         if (present(to))     to = INVALID_CHAR_INDEX
         if (present(err_msg)) err_msg = get_error_message(tree%code)
         if (present(status)) status = tree%code
         return
      end if

      ! If the whole pattern is a fixed string, get it.
      entirely_fixed_string = get_entire_literal(tree)

      ! If the pattern consists only of fixed character string,
      if (entirely_fixed_string /= '') then

         ! from_l stores the position of the literal in input text.
         from_l = index(text, entirely_fixed_string)

         ! If the literal is included, 
         if (from_l > 0) then
            ! to_l stores the position of the end of the literal in the text.
            to_l = from_l + len(entirely_fixed_string) -1
         end if

         ! If the pattern is contained,
         if (from_l > 0 .and. to_l > 0) then
            ! assign the value of the local variable to the argument,
            if (present(from)) from = from_l
            if (present(to)) to = to_l
            if (present(length)) length = len(entirely_fixed_string)
            res = text(from_l:to_l)
         else
            ! otherwise return the empty string.
            if (present(from)) from = 0
            if (present(to)) to = 0
            if (present(length)) length = 0
            res = ''
         end if
         return
      end if

   != From here on, we will deal with cases where an entire pattern is not a fixed string.

      ! Extract a prefix and a suffix from the tree.
      prefix = get_prefix_literal(tree)
      suffix = get_suffix_literal(tree)

      ! Initialize automaton
      call automaton%preprocess(tree)
      call automaton%init()

      ! Perform a match for whether a pattern is included. 
      call do_matching_including(automaton, text, from_l, to_l, prefix, suffix, unused)

      ! Handle when it matches an empty string.
      if (from_l == ACCEPTED_EMPTY .and. to_l == ACCEPTED_EMPTY) then
         res = ''
         if (present(from)) from = 0
         if (present(to)) to = 0
         if (present(length)) length = 0
         return
      end if

      ! Process if the pattern is contained in text,
      if (from_l > 0 .and. to_l > 0) then
         res = text(from_l:to_l)
         if (present(length)) length = to_l - from_l + 1
         if (present(from)) from = from_l
         if (present(to)) to = to_l
      else
         ! otherwise return an empty string and 0. 
         res = ''
         if (present(length)) length = 0
         if (present(from)) from = 0
         if (present(to)) to = 0
      end if

      ! Free the automaton instance.
      call automaton%free()
   end subroutine subroutine__regex