forgex.F90 Source File

This file includes the API module of Forgex.



Source Code

! Fortran Regular Expression (Forgex)
!
! MIT License
!
! (C) Amasaki Shinobu, 2023-2024
!     A regular expression engine for Fortran.
!     forgex module is a part of Forgex.
!
!! This file includes the API module of Forgex.
#ifdef IMPURE
#define elemental
#define pure
#endif
module forgex
   use :: forgex_syntax_tree_graph_m, only: tree_t
   use :: forgex_syntax_tree_optimize_m, only: get_prefix_literal, get_suffix_literal, get_entire_literal
   use :: forgex_automaton_m, only: automaton_t
   use :: forgex_api_internal_m, only: do_matching_exactly, do_matching_including
   use :: forgex_utility_m, only: is_there_caret_at_the_top, is_there_dollar_at_the_end
   implicit none
   private

   public :: is_valid_regex
   public :: operator(.in.)
   public :: operator(.match.)
   public :: regex
   public :: regex_f

   interface is_valid_regex
      !! The generic name for the `is_valid_regex` function implemented as `is_valid_regex_pattern`.
      module procedure :: is_valid_regex_pattern
   end interface


   interface operator(.in.)
      !! Interface for user-defined operator of `.in.`
      module procedure :: operator__in
   end interface

   interface operator(.match.)
   !! Interface for user-defined operator of `.match.`
      module procedure :: operator__match
   end interface

   interface regex
      !! The generic name for the `regex` subroutine implemented as `procedure__regex`.
      module procedure :: subroutine__regex
   end interface

   interface regex_f
      !! The generic name for the `regex_f` function implemented as `function__regex`.
      module procedure :: function__regex
   end interface regex_f

contains

   pure elemental function is_valid_regex_pattern (pattern) result(res)
      !! The function validating a given regex patten.
      implicit none
      character(*), intent(in)  :: pattern
      logical                   :: res

      character(:), allocatable :: buff
      type(tree_t)              :: tree

      buff = trim(pattern)
      call tree%build(buff)

      res = tree%is_valid_pattern
   end function is_valid_regex_pattern


   pure elemental function operator__in(pattern, str) result(res)
      use :: forgex_parameters_m, only: ACCEPTED_EMPTY, INVALID_CHAR_INDEX
      !! The function implemented for the `.in.` operator.
      implicit none
      character(*), intent(in)  :: pattern, str
      logical                   :: res

      character(:), allocatable :: buff
      type(tree_t)              :: tree
      type(automaton_t)         :: automaton
      integer                   :: from, to

      character(:), allocatable :: prefix, suffix, entirely_fixed_string
      logical :: unused

      prefix = ''
      suffix = ''
      entirely_fixed_string = ''
      from = INVALID_CHAR_INDEX
      to = INVALID_CHAR_INDEX

      buff = trim(pattern)

      ! Build a syntax tree from buff, and store the result in tree and root.
      call tree%build(buff)

      ! Reterns .false. if the given pattern is invalid.
      if (.not. tree%is_valid_pattern) then
         res = .false.
         return
      end if

      ! If the whole pattern is a fixed string, get it.
      entirely_fixed_string = get_entire_literal(tree)
      
      ! If the pattern consists only of fixed character string,
      if (entirely_fixed_string /= '') then

         ! from_l stores the position of the literal in input text.
         from = index(str, entirely_fixed_string)

          ! If the literal is included,
         if (from > 0) then
            ! to_l stores the position of the end of the literal in the text.
            to = from + len(entirely_fixed_string) -1
         end if

         ! If the pattern is contained, it returns true, otherwise it returns false.
         if (from > 0 .and. to > 0) then
            res = .true.
         else
            res = .false.
         end if
   
         return
      end if

    != From here on, we will deal with cases where an entire pattern is not a fixed string.

      ! Extract a prefix and a suffix from the tree.
      prefix = get_prefix_literal(tree)
      suffix = get_suffix_literal(tree)

      ! Initialize automaton with tree.
      call automaton%preprocess(tree)
      call automaton%init()

      ! Call the internal procedure to match string, and store the result in logical `res`.
      call do_matching_including(automaton, str, from, to, prefix, suffix, unused)
   
       ! Handle when it matches an empty string.
      if (from == ACCEPTED_EMPTY .and. to == ACCEPTED_EMPTY) then
         res = .true.
         return
      end if

      ! If the pattern is contained in the text, it returns true, otherwise it returns false.
      if (from > 0 .and. to > 0) then
         res = .true.
      else
         res = .false.
      end if

      ! Free the automaton instance.
      call automaton%free()
   end function operator__in


   pure elemental function operator__match(pattern, str) result(res)
      !! The function implemented for the `.match.` operator.
      implicit none
      character(*), intent(in)  :: pattern, str
      logical                   :: res

      character(:), allocatable :: buff
      type(tree_t)              :: tree
      type(automaton_t)         :: automaton
      character(:), allocatable :: prefix, suffix, entirely_fixed_string
      logical :: unused

      ! Initalize
      prefix = ''
      suffix = ''
      entirely_fixed_string = ''

      ! If the pattern begins with a caret character and ends with
      ! a doller character, they are removed and assigned to the string buffer.
      if (is_there_caret_at_the_top(pattern)) then
         buff = pattern(2:len(pattern))
      else
         buff = pattern(1:len(pattern))
      end if

      if (is_there_dollar_at_the_end(pattern)) then
         buff = buff(1:len_trim(pattern)-1)
      end if

      ! Build a syntax tree from buff, and store the result in tree and root.
      ! call build_syntax_tree(buff, tape, tree, root)
      call tree%build(buff)

      ! Reterns .false. if the given pattern is invalid.
      if (.not. tree%is_valid_pattern) then
         res = .false.
         return
      end if

      ! If the whole pattern is a fixed string, get it.
      entirely_fixed_string = get_entire_literal(tree)

      ! If the pattern consists only of fixed character string,
      if (entirely_fixed_string /= '') then
         if (len(str) == len(entirely_fixed_string)) then
            ! return true if the lengths are equal and the strings are equal.
            res = str == entirely_fixed_string
            return
         end if
      end if

   != From here on, we will deal with cases where an entire pattern is not a fixed string.
      
      ! Get the prefix contained in the AST.
      prefix = get_prefix_literal(tree)
      suffix = get_suffix_literal(tree)

      ! Initialize automaton with tree and root.
      call automaton%preprocess(tree)
      call automaton%init()

      ! Call the internal procedure to match string, and store the result in logical `res`.
      call do_matching_exactly(automaton, str, res, prefix, suffix, unused)

      ! Free the automaton instance.
      call automaton%free()

   end function operator__match


   !> The function implemented for the `regex` subroutine.
   pure subroutine subroutine__regex(pattern, text, res, length, from, to, status, err_msg)
      use :: forgex_parameters_m, only: ACCEPTED_EMPTY, INVALID_CHAR_INDEX
      use :: forgex_syntax_tree_error_m, only: get_error_message
      implicit none
      character(*),              intent(in)    :: pattern, text
      character(:), allocatable, intent(inout) :: res
      integer, optional,         intent(inout) :: length, from, to, status
      character(*), optional,    intent(inout) :: err_msg

      character(:),      allocatable :: buff
      type(tree_t)                   :: tree
      type(automaton_t)              :: automaton
      integer                        :: from_l, to_l

      character(:), allocatable :: prefix, suffix, entirely_fixed_string
      logical :: unused

      ! Initialize
      prefix = ''
      suffix = ''
      entirely_fixed_string = ''
      from_l = INVALID_CHAR_INDEX
      to_l = INVALID_CHAR_INDEX

      buff = trim(pattern)

      ! Build tree from regex pattern in the buff variable.
      call tree%build(buff)

      ! Assigns invalid value of arguments if the given pattern is invalid.
      if (.not. tree%is_valid_pattern) then
         res = ""
         if (present(length)) length = 0
         if (present(from))   from = INVALID_CHAR_INDEX
         if (present(to))     to = INVALID_CHAR_INDEX
         if (present(err_msg)) err_msg = get_error_message(tree%code)
         if (present(status)) status = tree%code
         return
      end if

      ! If the whole pattern is a fixed string, get it.
      entirely_fixed_string = get_entire_literal(tree)

      ! If the pattern consists only of fixed character string,
      if (entirely_fixed_string /= '') then

         ! from_l stores the position of the literal in input text.
         from_l = index(text, entirely_fixed_string)

         ! If the literal is included, 
         if (from_l > 0) then
            ! to_l stores the position of the end of the literal in the text.
            to_l = from_l + len(entirely_fixed_string) -1
         end if

         ! If the pattern is contained,
         if (from_l > 0 .and. to_l > 0) then
            ! assign the value of the local variable to the argument,
            if (present(from)) from = from_l
            if (present(to)) to = to_l
            if (present(length)) length = len(entirely_fixed_string)
            res = text(from_l:to_l)
         else
            ! otherwise return the empty string.
            if (present(from)) from = 0
            if (present(to)) to = 0
            if (present(length)) length = 0
            res = ''
         end if
         return
      end if

   != From here on, we will deal with cases where an entire pattern is not a fixed string.

      ! Extract a prefix and a suffix from the tree.
      prefix = get_prefix_literal(tree)
      suffix = get_suffix_literal(tree)

      ! Initialize automaton
      call automaton%preprocess(tree)
      call automaton%init()

      ! Perform a match for whether a pattern is included. 
      call do_matching_including(automaton, text, from_l, to_l, prefix, suffix, unused)

      ! Handle when it matches an empty string.
      if (from_l == ACCEPTED_EMPTY .and. to_l == ACCEPTED_EMPTY) then
         res = ''
         if (present(from)) from = 0
         if (present(to)) to = 0
         if (present(length)) length = 0
         return
      end if

      ! Process if the pattern is contained in text,
      if (from_l > 0 .and. to_l > 0) then
         res = text(from_l:to_l)
         if (present(length)) length = to_l - from_l + 1
         if (present(from)) from = from_l
         if (present(to)) to = to_l
      else
         ! otherwise return an empty string and 0. 
         res = ''
         if (present(length)) length = 0
         if (present(from)) from = 0
         if (present(to)) to = 0
      end if

      ! Free the automaton instance.
      call automaton%free()
   end subroutine subroutine__regex


   !> The function implemented for the `regex_f` function.
   pure function function__regex(pattern, text) result(res)
      implicit none
      character(*), intent(in)  :: pattern, text
      character(:), allocatable :: res

      call subroutine__regex(pattern, text, res)

   end function function__regex

end module forgex