automaton_m.F90 Source File

This file contains the definition of automaton_t class and its type-bound procedures.



Source Code

! Fortran Regular Expression (Forgex)
!
! MIT License
!
! (C) Amasaki Shinobu, 2023-2025
!     A regular expression engine for Fortran.
!     forgex_automaton_m module is a part of Forgex.
!
!! This file contains the definition of `automaton_t` class and its type-bound procedures.
!
!> The `forgex_automaton_m` module contains `automaton_t` definition and its type-bound procedures.
!>
#ifdef IMPURE
#define pure
#endif
module forgex_automaton_m
   use, intrinsic :: iso_fortran_env, only: int32, stderr=>error_unit
   use :: forgex_parameters_m, only: DFA_NOT_INIT, TREE_NODE_BASE, TREE_NODE_LIMIT, &
         NFA_STATE_BASE, NFA_NULL_TRANSITION, DFA_INVALID_INDEX, DFA_TRANSITION_UNIT, DFA_INITIAL_INDEX
   use :: forgex_segment_m
   use :: forgex_nfa_state_set_m
   use :: forgex_nfa_graph_m
   use :: forgex_lazy_dfa_graph_m
   use :: forgex_syntax_tree_graph_m, only: tree_t
   use :: forgex_cube_m, only: cube_t
   implicit none
   private

   type, public :: automaton_t
      !! This type contains an NFA graph, and the DFA graph that are derived from it.
      type(tree_t)                 :: tree
      type(nfa_graph_t)            :: nfa
      type(dfa_graph_t)            :: dfa
      type(nfa_state_set_t)        :: entry_set
      type(cube_t)                 :: cube
      integer(int32)               :: nfa_entry, nfa_exit
      integer(int32)               :: initial_index = DFA_NOT_INIT
   contains
      procedure :: preprocess      => automaton__build_nfa
      procedure :: init            => automaton__initialize
      procedure :: epsilon_closure => automaton__epsilon_closure
      procedure :: register_state  => automaton__register_state
      procedure :: construct       => automaton__construct_dfa
      procedure :: get_reachable   => automaton__compute_reachable_state
      procedure :: destination     => automaton__destination
      procedure :: print           => automaton__print_info
      procedure :: print_dfa       => automaton__print_dfa
   end type automaton_t

contains

   pure subroutine automaton__build_nfa(self, tree)
      use :: forgex_syntax_tree_graph_m, only: tree_t
      implicit none
      class(automaton_t), intent(inout) :: self
      type(tree_t), intent(in) :: tree

      self%tree = tree
      !-- NFA building
      call self%nfa%build(tree, self%nfa_entry, self%nfa_exit, self%cube)

   end subroutine automaton__build_nfa

   !> This subroutine reads `tree` and `tree_top` variable, constructs the NFA graph,
   !> and then initializes the DFA graph.
   pure subroutine automaton__initialize(self)
      implicit none
      class(automaton_t), intent(inout) :: self

      type(nfa_state_set_t) :: initial_closure
      integer(int32) :: new_index

      !-- DFA initialize
      ! Invokes DFA preprocessing.
      call self%dfa%preprocess()

      ! Check if it has been initialized.
      if (self%dfa%dfa_top /= DFA_INITIAL_INDEX) then
         error stop "DFA graph initialization is failed."
      end if

      call init_state_set(self%entry_set, self%nfa%top)

      ! Constructing a DFA initial state from the NFA initial state.
      call add_nfa_state(self%entry_set, self%nfa_entry)

      call init_state_set(initial_closure, self%nfa%top)
      initial_closure = self%entry_set
      ! Add an NFA node reachable by epsilon transitions to the entrance state set within DFA.
      call self%epsilon_closure(initial_closure, self%nfa_entry)

      ! Assign the computed initial closure into self%entry_set
      self%entry_set = initial_closure

      ! Register `entry_set` as a new DFA state in the graph.
      call self%register_state(self%entry_set, new_index)

      ! Assign the returned index to the `initial_index` of the graph.
      self%initial_index = new_index

   end subroutine automaton__initialize


   !> Compute the ε-closure for a set of NFA states.
   !>
   !> The ε-closure is the set of NFA states reachable from a given set of NFA states via ε-transition.
   !> This subroutine calculates the ε-closure and stores it in the `closure` parameter.
   pure recursive subroutine automaton__epsilon_closure(self, closure, n_index)
      use :: forgex_nfa_node_m
      implicit none
      class(automaton_t), intent(inout) :: self
      type(nfa_state_set_t), intent(inout) :: closure
      integer, intent(in) :: n_index

      type(nfa_state_node_t) :: n_node
      type(nfa_transition_t) :: n_tra
      integer :: dst
      integer :: j

      call add_nfa_state(closure, n_index)

      ! n_node = self%nfa%graph(n_index)

      if (.not. allocated(self%nfa%graph(n_index)%forward)) return

       ! すべての順方向の遷移をスキャンする
      do j = 1, self%nfa%graph(n_index)%forward_top
         ! 一時変数にコピー
         ! n_tra = self%nfa%graph(n_index)%forward(j)

         ! if (.not. allocated(self%nfa%graph(n_index)%forward(j)%c%sps)) cycle

         dst = self%nfa%graph(n_index)%forward(j)%dst

         if (dst == NFA_NULL_TRANSITION) cycle

         ! if (any(self%nfa%graph(n_index)%forward(j)%c%sps == SEG_EPSILON) &
         if (self%nfa%graph(n_index)%forward(j)%c%is_flagged_epsilon() &
         .and. .not. check_nfa_state(closure, dst)) then

            if (dst /= NFA_NULL_TRANSITION) then
               call self%epsilon_closure(closure, dst)
            end if
         end if

      end do

   end subroutine automaton__epsilon_closure


   !> This subroutine takes a `nfa_state_set_t` type argument as input and register
   !> the set as a DFA state node in the DFA graph.
   pure subroutine automaton__register_state(self, state_set, res)
      implicit none
      class(automaton_t),    intent(inout) :: self
      type(nfa_state_set_t), intent(in)    :: state_set
      integer(int32),        intent(inout) :: res       ! resulting the new dfa index

      integer(int32) :: i

      ! If the set is already registered, returns the index of the corresponding DFA state.
      i = self%dfa%registered(state_set)
      if ( i /= DFA_INVALID_INDEX) then
         res = i
         return
      end if

      ! Execute an error stop statement if the counter exceeds a limit.
      if (self%dfa%dfa_top >= self%dfa%dfa_limit) then
         ! Reallocate
         call self%dfa%reallocate()
      end if

      !> @note The processing here should reflect the semantic change of `dfa_top`.
      i = self%dfa%dfa_top
      self%dfa%dfa_top = i + 1 ! increment dfa_top


      self%dfa%nodes(i)%nfa_set    = state_set
      self%dfa%nodes(i)%accepted   = check_nfa_state(state_set, self%nfa_exit)
      self%dfa%nodes(i)%registered = .true.

      call self%dfa%nodes(i)%increment_tra_top() ! Somehow this is necessary!

      res = i

   end subroutine automaton__register_state



   !> This function calculates a set of possible NFA states from the current DFA state by the input
   !> character `symbol`.
   !>
   !> It scans through the NFA states and finds the set of reachable states by the given input `symbol`,
   !> excluding ε-transitions.
   pure function automaton__compute_reachable_state(self, curr_i, symbol) result(state_set)
      use :: forgex_segment_m, only: operator(.in.), operator(/=)
      use :: forgex_nfa_node_m, only: nfa_state_node_t, nfa_transition_t
      use :: forgex_lazy_dfa_node_m, only: dfa_transition_t
      use :: forgex_cube_m, only: cube_t, operator(.in.)
      implicit none
      class(automaton_t), intent(in) :: self
      integer(int32),     intent(in) :: curr_i      ! current index of dfa
      character(*),       intent(in) :: symbol

      type(nfa_state_set_t)  :: state_set    ! RESULT variable
      type(nfa_state_set_t)  :: current_set
      integer                :: i, j, k

      ! temporary variables ... to increase the cache hit rate
      ! type(nfa_state_node_t) :: n_node       ! This variable simulates a pointer.
      ! type(segment_t), allocatable :: segs(:)
      ! type(nfa_transition_t)       :: n_tra
      ! type(cube_t) :: cube
      integer :: dst

      call init_state_set(state_set, self%nfa%top)

      current_set = self%dfa%nodes(curr_i)%nfa_set

      ! Scan the entire NFA states.
      outer: do i = 1, self%nfa%top

         ! If the i-th element of current state set is true, process the i-th NFA node.
         if (check_nfa_state(current_set, i)) then

            if (.not. allocated(self%nfa%graph(i)%forward)) cycle

            ! Scan the all transitions belong to the NFA state node.
            middle: do j = 1, self%nfa%graph(i)%forward_top

               dst = self%nfa%graph(i)%forward(j)%dst

               ! If it has a destination,
               if (dst /= NFA_NULL_TRANSITION) then

                     ! If the symbol is in the cube on the transition forward(j).
                     if (symbol .in. self%nfa%graph(i)%forward(j)%c) then
      
                        ! Add the index of the NFA state node to `state_set` of type(nfa_state_set_t).
                        call add_nfa_state(state_set, dst)

                     end if

               end if

            end do middle

         end if
      end do outer

   end function automaton__compute_reachable_state


   !> This function returns the dfa transition object, that contains the destination index
   !> and the corresponding set of transitionable NFA state.
   pure function automaton__destination(self, curr, symbol) result(ret)
      use :: forgex_lazy_dfa_node_m, only: dfa_transition_t
      implicit none
      class(automaton_t),    intent(in) :: self
      integer(int32),        intent(in) :: curr
      character(*),          intent(in) :: symbol

      type(dfa_transition_t) :: ret

      
      integer :: i

      ! Get a set of NFAs for which current state can transition, excluding epsilon-transitions.
      ret%nfa_set = self%get_reachable(curr, symbol)

      ! Initialize the next value
      ret%dst = DFA_INVALID_INDEX

      ! Scan the entire DFA nodes.
      do concurrent (i = 1:self%dfa%dfa_top-1)
         ! If there is an existing node corresponding to the NFA state set,
         ! return the index of that node.
         if (equivalent_nfa_state_set(ret%nfa_set, self%dfa%nodes(i)%nfa_set)) then
            ret%dst = i
         end if
      end do

   end function automaton__destination


   !> This subroutine gets the destination index of DFA nodes from the current index with given symbol,
   !> adding a DFA node if necessary.
   !>
   !> It calculates the set of NFA states that can be reached from the `current` node for the given `symbol`,
   !> excluding epsilon transitions, and then registers the new DFA state node if it has not already been registered.
   !> Finally, it adds the transition from the `current` node to the `destination` node in the DFA graph.
   pure subroutine automaton__construct_dfa (self, curr_i, dst_i, symbol, d_tra)
      use :: forgex_lazy_dfa_node_m, only: dfa_transition_t
      implicit none
      class(automaton_t), intent(inout) :: self
      integer(int32),     intent(in)    :: curr_i
      integer(int32),     intent(inout) :: dst_i
      character(*),       intent(in)    :: symbol
      type(dfa_transition_t), intent(in) :: d_tra
      type(nfa_state_set_t) :: nfa_set

      integer(int32) :: prev_i

      dst_i = DFA_INVALID_INDEX
      prev_i = curr_i

      nfa_set = d_tra%nfa_set

      ! ε遷移との和集合を取り、d_tra%nfa_setに格納する。
      ! Combine the state set with epsilon-transitions and store in `d_tra%nfa_set`.
      call self%nfa%collect_epsilon_transition(nfa_set)

      ! 空のNFA状態集合の登録を禁止する
      if (.not. any(nfa_set%vec)) then
         dst_i = DFA_INVALID_INDEX
         return
      end if

      dst_i = self%dfa%registered(nfa_set)

      ! まだDFA状態が登録されていない場合は、新しく登録する。
      ! If the destination index is DFA_INVALID_INDEX, register a new DFA node.
      if (dst_i == DFA_INVALID_INDEX) then
         call self%register_state(nfa_set, dst_i)
      end if

      ! If the destination index is DFA_INVALID_INDEX, the registration is failed.
      if (dst_i == DFA_INVALID_INDEX) error stop "DFA registration failed."

      if (self%dfa%nodes(prev_i)%is_registered_tra(dst_i, symbol)) return

      ! 遷移を追加する
      ! Add a DFA transition from `prev` to `next` for the given `symbol`.
      ! call self%dfa%add_transition(d_tra%nfa_set, prev_i, dst_i,  &
      !        which_segment_symbol_belong(self%all_segments, symbol))
      call self%dfa%add_transition(nfa_set, prev_i, dst_i, symbol)
   end subroutine automaton__construct_dfa


!=====================================================================!


   !> This subroutine provides the automata' summarized information.
   subroutine automaton__print_info(self)
      use :: iso_fortran_env, only: stderr => error_unit
      implicit none
      class(automaton_t), intent(in) :: self

      write(stderr, *) "--- AUTOMATON INFO ---"
      write(stderr, *) "entry_set: ", self%entry_set%vec(NFA_STATE_BASE+1:self%nfa%top)
      write(stderr, *) "allocated(all_segments):", allocated(self%cube%sps)
      write(stderr, *) "nfa_entry:     ", self%nfa_entry
      write(stderr, *) "nfa_exit:      ", self%nfa_exit
      write(stderr, *) "initial_index: ", self%initial_index

   end subroutine automaton__print_info


   !> This subroutine prints DFA states and transitions to a given unit number.
   subroutine automaton__print_dfa(self, uni)
      use :: forgex_nfa_state_set_m, only: print_nfa_state_set
      use :: forgex_lazy_dfa_node_m, only: dfa_transition_t
      implicit none
      class(automaton_t), intent(in) :: self
      integer(int32), intent(in) :: uni

      type(segment_t), allocatable :: segments(:)
      type(dfa_transition_t) :: p
      integer(int32) :: i, j, k

      do i = 1, self%dfa%dfa_top -1

         if (self%dfa%nodes(i)%accepted) then
            write(uni, '(i4,a, a)', advance='no') i, 'A', ": "
         else
            write(uni, '(i4,a, a)', advance='no') i, ' ', ": "
         end if

         do j = 1, self%dfa%nodes(i)%get_tra_top()

            if (allocated(segments)) deallocate(segments)
            call self%dfa%nodes(i)%transition(j)%c%cube2seg(segments)

            if (allocated(segments)) then
               do k = 1, size(segments, dim=1)
                  write(uni, '(a, a, i0, 1x)', advance='no') segments(k)%print(), '=>', self%dfa%nodes(i)%transition(j)%dst
               end do
            end if

         end do
         write(uni, *) ""
      end do

      do i = 1, self%dfa%dfa_top - 1
         if (self%dfa%nodes(i)%accepted) then
            write(uni, '(a, i4, a)', advance='no') "state ", i, 'A = ( '
         else
            write(uni, '(a, i4, a)', advance='no') "state ", i, '  = ( '
         end if

         call print_nfa_state_set(self%dfa%nodes(i)%nfa_set, self%nfa%top, uni)

         write(uni,'(a)') ")"
      end do
   end subroutine automaton__print_dfa


end module forgex_automaton_m