automaton_m.F90 Source File

This file contains the definition of automaton_t class and its type-bound procedures.



Source Code

! Fortran Regular Expression (Forgex)
!
! MIT License
!
! (C) Amasaki Shinobu, 2023-2024
!     A regular expression engine for Fortran.
!     forgex_automaton_m module is a part of Forgex.
!
!! This file contains the definition of `automaton_t` class and its type-bound procedures.
!
!> The `forgex_automaton_m` module contains `automaton_t` definition and its type-bound procedures.
!>
#ifdef IMPURE
#define pure
#endif
module forgex_automaton_m
   use, intrinsic :: iso_fortran_env, only: int32, stderr=>error_unit
   use :: forgex_parameters_m, only: DFA_NOT_INIT, TREE_NODE_BASE, TREE_NODE_LIMIT, &
         NFA_STATE_BASE, NFA_NULL_TRANSITION, DFA_INVALID_INDEX, DFA_TRANSITION_UNIT, DFA_INITIAL_INDEX
   use :: forgex_segment_m
   use :: forgex_nfa_state_set_m
   use :: forgex_nfa_graph_m
   use :: forgex_lazy_dfa_graph_m
   use :: forgex_syntax_tree_graph_m, only: tree_t
   implicit none
   private

   type, public :: automaton_t
      !! This type contains an NFA graph, and the DFA graph that are derived from it.
      type(tree_t)                 :: tree
      type(nfa_graph_t)            :: nfa
      type(dfa_graph_t)            :: dfa
      type(nfa_state_set_t)        :: entry_set
      type(segment_t), allocatable :: all_segments(:)
      integer(int32)               :: nfa_entry, nfa_exit
      integer(int32)               :: initial_index = DFA_NOT_INIT
   contains
      procedure :: preprocess      => automaton__build_nfa
      procedure :: init            => automaton__initialize
      procedure :: epsilon_closure => automaton__epsilon_closure
      procedure :: register_state  => automaton__register_state
      procedure :: construct       => automaton__construct_dfa
      procedure :: get_reachable   => automaton__compute_reachable_state
      procedure :: move            => automaton__move
      procedure :: destination     => automaton__destination
      procedure :: free            => automaton__deallocate
      procedure :: print           => automaton__print_info
      procedure :: print_dfa       => automaton__print_dfa
   end type automaton_t

contains

   pure subroutine automaton__build_nfa(self, tree)
      use :: forgex_syntax_tree_graph_m, only: tree_t
      implicit none
      class(automaton_t), intent(inout) :: self
      type(tree_t), intent(in) :: tree

      self%tree = tree
      !-- NFA building
      call self%nfa%build(tree, self%nfa_entry, self%nfa_exit, self%all_segments)
   end subroutine automaton__build_nfa

   !> This subroutine reads `tree` and `tree_top` variable, constructs the NFA graph,
   !> and then initializes the DFA graph.
   pure subroutine automaton__initialize(self)
      implicit none
      class(automaton_t), intent(inout) :: self

      type(nfa_state_set_t) :: initial_closure
      integer(int32) :: new_index

      !-- DFA initialize
      ! Invokes DFA preprocessing.
      call self%dfa%preprocess()

      ! Check if it has been initialized.
      if (self%dfa%dfa_top /= DFA_INITIAL_INDEX) then
         error stop "DFA graph initialization is failed."
      end if

      call init_state_set(self%entry_set, self%nfa%nfa_top)
      ! Constructing a DFA initial state from the NFA initial state.
      call add_nfa_state(self%entry_set, self%nfa_entry)

      call init_state_set(initial_closure, self%nfa%nfa_top)
      initial_closure = self%entry_set
      ! Add an NFA node reachable by epsilon transitions to the entrance state set within DFA.
      call self%epsilon_closure(initial_closure, self%nfa_entry)

      ! Assign the computed initial closure into self%entry_set
      self%entry_set = initial_closure

      ! Register `entry_set` as a new DFA state in the graph.
      call self%register_state(self%entry_set, new_index)

      ! Assign the returned index to the `initial_index` of the graph.
      self%initial_index = new_index

   end subroutine automaton__initialize


   pure subroutine automaton__deallocate(self)
      implicit none
      class(automaton_t), intent(inout) :: self

      call self%dfa%free()
      call self%nfa%free()

      if (allocated(self%dfa%nodes)) deallocate(self%dfa%nodes)
      if (allocated(self%nfa%nodes)) deallocate(self%nfa%nodes)
      if (allocated(self%all_segments)) deallocate(self%all_segments)

   end subroutine automaton__deallocate


   !> Compute the ε-closure for a set of NFA states.
   !>
   !> The ε-closure is the set of NFA states reachable from a given set of NFA states via ε-transition.
   !> This subroutine calculates the ε-closure and stores it in the `closure` parameter.
   pure recursive subroutine automaton__epsilon_closure(self, closure, n_index)
      use :: forgex_nfa_node_m
      implicit none
      class(automaton_t), intent(inout) :: self
      type(nfa_state_set_t), intent(inout) :: closure
      integer, intent(in) :: n_index

      type(nfa_state_node_t) :: n_node
      type(nfa_transition_t) :: n_tra
      integer :: j

      call add_nfa_state(closure, n_index)

      n_node = self%nfa%nodes(n_index)

      if (.not. allocated(n_node%forward)) return

       ! すべての順方向の遷移をスキャンする
      do j = 1, n_node%forward_top
         ! 一時変数にコピー
         n_tra = n_node%forward(j)

         if (.not. allocated(n_tra%c)) cycle

         if (any(n_tra%c == SEG_EPSILON) .and. .not. check_nfa_state(closure, n_tra%dst)) then
            if (n_tra%dst /= NFA_NULL_TRANSITION) call self%epsilon_closure(closure, n_tra%dst)
         end if

      end do

   end subroutine automaton__epsilon_closure


   !> This subroutine takes a `nfa_state_set_t` type argument as input and register
   !> the set as a DFA state node in the DFA graph.
   pure subroutine automaton__register_state(self, state_set, res)
      implicit none
      class(automaton_t),    intent(inout) :: self
      type(nfa_state_set_t), intent(in)    :: state_set
      integer(int32),        intent(inout) :: res       ! resulting the new dfa index

      integer(int32) :: i

      ! If the set is already registered, returns the index of the corresponding DFA state.
      i = self%dfa%registered(state_set)
      if ( i /= DFA_INVALID_INDEX) then
         res = i
         return
      end if

      ! Execute an error stop statement if the counter exceeds a limit.
      if (self%dfa%dfa_top >= self%dfa%dfa_limit) then
         ! Reallocate
         call self%dfa%reallocate()
      end if

      !> @note The processing here should reflect the semantic change of `dfa_top`.
      i = self%dfa%dfa_top
      self%dfa%dfa_top = i + 1 ! increment dfa_top


      self%dfa%nodes(i)%nfa_set    = state_set
      self%dfa%nodes(i)%accepted   = check_nfa_state(state_set, self%nfa_exit)
      self%dfa%nodes(i)%registered = .true.

      call self%dfa%nodes(i)%increment_tra_top() ! Somehow this is necessary!

      res = i

   end subroutine automaton__register_state



   !> This function calculates a set of possible NFA states from the current DFA state by the input
   !> character `symbol`.
   !>
   !> It scans through the NFA states and finds the set of reachable states by the given input `symbol`,
   !> excluding ε-transitions.
   pure function automaton__compute_reachable_state(self, curr_i, symbol) result(state_set)
      use :: forgex_segment_m, only: operator(.in.), operator(/=)
      use :: forgex_nfa_node_m, only: nfa_state_node_t, nfa_transition_t
      use :: forgex_lazy_dfa_node_m, only: dfa_transition_t
      implicit none
      class(automaton_t), intent(in) :: self
      integer(int32),     intent(in) :: curr_i      ! current index of dfa
      character(*),       intent(in) :: symbol

      type(nfa_state_set_t)  :: state_set    ! RESULT variable
      type(nfa_state_set_t)  :: current_set
      integer                :: i, j, k

      ! temporary variables ... to increase the cache hit rate
      type(nfa_state_node_t)       :: n_node       ! This variable simulates a pointer.
      type(segment_t), allocatable :: segs(:)
      type(nfa_transition_t)       :: n_tra


      call init_state_set(state_set, self%nfa%nfa_top)

      current_set = self%dfa%nodes(curr_i)%nfa_set

      ! Scan the entire NFA states.
      outer: do i = 1, self%nfa%nfa_top

         ! If the i-th element of current state set is true, process the i-th NFA node.
         if (check_nfa_state(current_set, i)) then

            ! Copy to a temporary variable.
            n_node = self%nfa%nodes(i)

            if (.not. allocated(n_node%forward)) cycle

            ! Scan the all transitions belong to the NFA state node.
            middle: do j = 1, n_node%forward_top

               ! Copy to a temporary variable of type(nfa_transition_t)
               n_tra = n_node%forward(j)


               ! If it has a destination,
               if (n_tra%dst /= NFA_NULL_TRANSITION) then

                  ! Investigate the all of segments which transition has.
                  inner: do k = 1, n_tra%c_top

                     ! Copy to a temporary variable fo type(segment_t).
                     ! Note the implicit reallocation.
                     segs = n_tra%c

                     ! If the symbol is in the segment list `segs` or if the segment is epsilon,
                     if ( symbol_to_segment(symbol) .in. segs) then

                        ! Add the index of the NFA state node to `state_set` of type(nfa_state_set_t).
                        call add_nfa_state(state_set, n_node%forward(j)%dst)

                     end if

                  end do inner

               end if

            end do middle

         end if
      end do outer

   end function automaton__compute_reachable_state


   !> This subroutine gets the next DFA nodes index from current index and symbol,
   !> and stores the result in `next` and `next_set`.
   pure subroutine automaton__destination(self, curr, symbol, next, next_set)
      implicit none
      class(automaton_t),    intent(in) :: self
      integer(int32),        intent(in) :: curr
      character(*),          intent(in) :: symbol
      integer(int32),        intent(inout) :: next
      type(nfa_state_set_t), intent(inout) :: next_set

      integer :: i

      ! Get a set of NFAs for which current state can transition, excluding epsilon-transitions.
      next_set = self%get_reachable(curr, symbol)

      ! Initialize the next value
      next = DFA_INVALID_INDEX

      ! Scan the entire DFA nodes.
      do i = 1, self%dfa%dfa_top-1

         ! If there is an existing node corresponding to the NFA state set,
         ! return the index of that node.
         if (equivalent_nfa_state_set(next_set, self%dfa%nodes(i)%nfa_set)) then
            next = i
            return
         end if

      end do
   end subroutine automaton__destination


   !> This function returns the dfa transition object, that contains the destination index
   !> and the corresponding set of transitionable NFA state.
   pure function automaton__move(self, curr, symbol) result(res)
      use :: forgex_lazy_dfa_node_m, only: dfa_transition_t
      implicit none
      class(automaton_t), intent(in) :: self
      integer(int32),     intent(in) :: curr    ! current index
      character(*),       intent(in) :: symbol  ! input symbol
      type(dfa_transition_t)         :: res

      type(nfa_state_set_t) :: set
      integer(int32)        :: next


      call self%destination(curr, symbol, next, set)

      ! Set the value of each component of the returned object.
      res%dst = next                      ! valid index of DFA node or DFA_INVALID_INDEX
      res%nfa_set = set

      ! res%c = symbol_to_segment(symbol) ! this component would not be used.
      ! res%own_j = DFA_INITIAL_INDEX     ! this component would not be used.
   end function automaton__move


   !> This subroutine gets the destination index of DFA nodes from the current index with given symbol,
   !> adding a DFA node if necessary.
   !>
   !> It calculates the set of NFA states that can be reached from the `current` node for the given `symbol`,
   !> excluding epsilon transitions, and then registers the new DFA state node if it has not already been registered.
   !> Finally, it adds the transition from the `current` node to the `destination` node in the DFA graph.
   pure subroutine automaton__construct_dfa (self, curr_i, dst_i, symbol)
      use :: forgex_lazy_dfa_node_m, only: dfa_transition_t
      implicit none
      class(automaton_t), intent(inout) :: self
      integer(int32),     intent(in)    :: curr_i
      integer(int32),     intent(inout) :: dst_i
      character(*),       intent(in)    :: symbol

      type(dfa_transition_t) :: d_tra
      integer(int32) :: prev_i

      dst_i = DFA_INVALID_INDEX
      prev_i = curr_i

      ! ε遷移を除いた行き先のstate_setを取得する。
      ! Get the state set for the destination excluding epsilon-transition.
      d_tra = self%move(prev_i, symbol)

      ! この実装ではリストのリダクションを計算する必要がない。
      !! In this implementation with array approach, array reduction is done in the reachable procedure.

      ! ε遷移との和集合を取り、d_tra%nfa_setに格納する。
      ! Combine the state set with epsilon-transitions and store in `d_tra%nfa_set`.
      call self%nfa%collect_epsilon_transition(d_tra%nfa_set)

      ! 空のNFA状態集合の登録を禁止する
      if (.not. any(d_tra%nfa_set%vec)) then
         dst_i = DFA_INVALID_INDEX
         return
      end if

      dst_i = self%dfa%registered(d_tra%nfa_set)

      ! まだDFA状態が登録されていない場合は、新しく登録する。
      ! If the destination index is DFA_INVALID_INDEX, register a new DFA node.
      if (dst_i == DFA_INVALID_INDEX) then
         call self%register_state(d_tra%nfa_set, dst_i)
      end if

      ! If the destination index is DFA_INVALID_INDEX, the registration is failed.
      if (dst_i == DFA_INVALID_INDEX) error stop "DFA registration failed."

      if (self%dfa%nodes(prev_i)%is_registered_tra(dst_i, symbol)) return

      ! 遷移を追加する
      ! Add a DFA transition from `prev` to `next` for the given `symbol`.
      call self%dfa%add_transition(d_tra%nfa_set, prev_i, dst_i,  &
             which_segment_symbol_belong(self%all_segments, symbol))
   end subroutine automaton__construct_dfa


!=====================================================================!


   !> This subroutine provides the automata' summarized information.
   subroutine automaton__print_info(self)
      use :: iso_fortran_env, only: stderr => error_unit
      implicit none
      class(automaton_t), intent(in) :: self

      write(stderr, *) "--- AUTOMATON INFO ---"
      write(stderr, *) "entry_set: ", self%entry_set%vec(NFA_STATE_BASE+1:self%nfa%nfa_top)
      write(stderr, *) "allocated(all_segments):", allocated(self%all_segments)
      write(stderr, *) "nfa_entry:     ", self%nfa_entry
      write(stderr, *) "nfa_exit:      ", self%nfa_exit
      write(stderr, *) "initial_index: ", self%initial_index

   end subroutine automaton__print_info


   !> This subroutine prints DFA states and transitions to a given unit number.
   subroutine automaton__print_dfa(self, uni)
      use :: forgex_nfa_state_set_m, only: print_nfa_state_set
      use :: forgex_lazy_dfa_node_m, only: dfa_transition_t
      implicit none
      class(automaton_t), intent(in) :: self
      integer(int32), intent(in) :: uni

      type(dfa_transition_t) :: p
      integer(int32) :: i, j

      do i = 1, self%dfa%dfa_top -1

         if (self%dfa%nodes(i)%accepted) then
            write(uni, '(i4,a, a)', advance='no') i, 'A', ": "
         else
            write(uni, '(i4,a, a)', advance='no') i, ' ', ": "
         end if

         do j = 1, self%dfa%nodes(i)%get_tra_top()
            p = self%dfa%nodes(i)%transition(j)
            write(uni, '(a, a, i0, 1x)', advance='no') p%c%print(), '=>', p%dst

         end do
         write(uni, *) ""
      end do

      do i = 1, self%dfa%dfa_top - 1
         if (self%dfa%nodes(i)%accepted) then
            write(uni, '(a, i4, a)', advance='no') "state ", i, 'A = ( '
         else
            write(uni, '(a, i4, a)', advance='no') "state ", i, '  = ( '
         end if

         call print_nfa_state_set(self%dfa%nodes(i)%nfa_set, self%nfa%nfa_top, uni)

         write(uni,'(a)') ")"
      end do
   end subroutine automaton__print_dfa


end module forgex_automaton_m