This file contains nfa_t
class and its type-bound procedures.
! Fortran Regular Expression (Forgex) ! ! MIT License ! ! (C) Amasaki Shinobu, 2023-2024 ! A regular expression engine for Fortran. ! forgex_nfa_m module is a part of Forgex. ! !! This file contains `nfa_t` class and its type-bound procedures. !> The `forgex_nfa_m` module defines the data structure of NFA. !> The `nfa_t` is defined as a class representing NFA. #ifdef IMPURE #define pure #endif module forgex_nfa_node_m use, intrinsic :: iso_fortran_env, only: stderr=>error_unit, int32 use :: forgex_parameters_m, only: TREE_NODE_BASE, TREE_NODE_LIMIT, ALLOC_COUNT_INITTIAL, & NFA_NULL_TRANSITION, NFA_STATE_BASE, NFA_TRANSITION_UNIT, NFA_STATE_UNIT, NFA_STATE_LIMIT, & NFA_C_SIZE, INFINITE use :: forgex_segment_m, only: segment_t, SEG_INIT, SEG_EPSILON, operator(/=), operator(==), & seg__merge_segments=>merge_segments, seg__sort_segments=>sort_segment_by_min use :: forgex_syntax_tree_graph_m, only: tree_t implicit none private public :: build_nfa_graph public :: disjoin_nfa public :: nfa_deallocate public :: make_nfa_node public :: generate_nfa type, public :: nfa_transition_t type(segment_t), allocatable :: c(:) integer(int32) :: c_top = 0 integer(int32) :: dst = NFA_NULL_TRANSITION integer(int32) :: own_j = NFA_NULL_TRANSITION logical :: is_registered = .false. end type type, public :: nfa_state_node_t integer(int32) :: own_i type(nfa_transition_t), allocatable :: forward(:) type(nfa_transition_t), allocatable :: backward(:) integer(int32) :: forward_top = 0 integer(int32) :: backward_top = 0 integer(int32) :: alloc_count_f = ALLOC_COUNT_INITTIAL integer(int32) :: alloc_count_b = ALLOC_COUNT_INITTIAL ! type(segment_t), allocatable :: all_segments(:) contains procedure :: add_transition => nfa__add_transition procedure :: realloc_f => nfa__reallocate_transition_forward procedure :: realloc_b => nfa__reallocate_transition_backward procedure :: merge_segments => nfa__merge_segments_of_transition end type contains pure subroutine build_nfa_graph (tree, nfa, nfa_entry, nfa_exit, nfa_top, all_segments) use :: forgex_parameters_m, only: NFA_TRANSITION_UNIT implicit none type(tree_t), intent(in) :: tree type(nfa_state_node_t), intent(inout), allocatable :: nfa(:) integer(int32), intent(inout) :: nfa_entry integer(int32), intent(inout) :: nfa_exit integer(int32), intent(inout) :: nfa_top type(segment_t), intent(inout), allocatable :: all_segments(:) integer(int32) :: i, i_begin, i_end ! index for states array i_begin = NFA_STATE_BASE i_end = NFA_STATE_UNIT ! initialize nfa_top = 0 allocate(nfa(i_begin:i_end)) ! Initialize nfa(i_begin:i_end)%own_i = [(i, i =i_begin, i_end)] nfa(:)%alloc_count_f = 0 nfa(:)%alloc_count_b = 0 nfa(:)%forward_top = 1 nfa(:)%backward_top = 1 call make_nfa_node(nfa_top) nfa_entry = nfa_top call make_nfa_node(nfa_top) nfa_exit = nfa_top call generate_nfa(tree, tree%top, nfa, nfa_top, nfa_entry, nfa_exit) do i = 1, nfa_top call nfa(i)%merge_segments() end do call disjoin_nfa(nfa, nfa_top, all_segments) end subroutine build_nfa_graph pure subroutine nfa_deallocate(nfa) implicit none type(nfa_state_node_t), allocatable, intent(inout) :: nfa(:) integer :: i if (.not. allocated(nfa)) return do i = NFA_STATE_BASE, ubound(nfa, dim=1) if (allocated(nfa(i)%forward)) deallocate(nfa(i)%forward) if (allocated(nfa(i)%backward)) deallocate(nfa(i)%backward) end do deallocate(nfa) end subroutine nfa_deallocate pure subroutine make_nfa_node(nfa_top) implicit none integer(int32), intent(inout) :: nfa_top nfa_top = nfa_top + 1 end subroutine make_nfa_node pure function is_exceeded (nfa_top, nfa_graph) result(res) implicit none integer(int32), intent(in) :: nfa_top type(nfa_state_node_t), intent(in) :: nfa_graph(:) logical :: res res = ubound(nfa_graph, dim=1) < nfa_top end function is_exceeded pure subroutine reallocate_nfa(nfa_graph) implicit none type(nfa_state_node_t), allocatable, intent(inout) :: nfa_graph(:) type(nfa_state_node_t), allocatable :: tmp(:) integer :: siz siz = ubound(nfa_graph, dim=1) call move_alloc(nfa_graph, tmp) allocate(nfa_graph(NFA_STATE_BASE:siz*2)) nfa_graph(NFA_STATE_BASE:siz) = tmp(NFA_STATE_BASE:siz) nfa_graph(siz+1:siz*2)%forward_top = 1 nfa_graph(siz+1:siz*2)%backward_top = 1 end subroutine pure recursive subroutine generate_nfa(tree, idx, nfa_graph, nfa_top, entry, exit) use :: forgex_enums_m use :: forgex_parameters_m implicit none type(tree_t), intent(in) :: tree type(nfa_state_node_t), allocatable, intent(inout) :: nfa_graph(:) integer(int32), intent(in) :: idx integer(int32), intent(inout) :: nfa_top integer(int32), intent(in) :: entry integer(int32), intent(in) :: exit integer :: i integer :: k integer :: node1 integer :: node2 integer :: entry_local if (idx == INVALID_INDEX) return i = idx entry_local = entry select case(tree%nodes(i)%op) case (op_char) if (.not. allocated(tree%nodes(i)%c)) then error stop "ERROR: Character node of the AST do not have actual character list." end if ! Handle character operations by adding transition for each character. do k = 1, size(tree%nodes(i)%c, dim=1) call nfa_graph(entry)%add_transition(nfa_graph, entry, exit, tree%nodes(i)%c(k)) end do case (op_empty) ! Handle empty opration by adding an epsilon transition call nfa_graph(entry)%add_transition(nfa_graph, entry, exit, SEG_EPSILON) case (op_union) ! Handle union operation by recursively generating NFA for left and right subtrees. call generate_nfa(tree, tree%nodes(i)%left_i, nfa_graph, nfa_top, entry, exit) call generate_nfa(tree, tree%nodes(i)%right_i, nfa_graph, nfa_top, entry, exit) case (op_closure) ! Handle closure (Kleene star) operations by creating new node and adding appropriate transition call generate_nfa_closure(tree, idx, nfa_graph, nfa_top, entry, exit) case (op_concat) ! Handle concatenation operations by recursively generating NFA for left and right subtrees. call generate_nfa_concatenate(tree, idx, nfa_graph, nfa_top, entry, exit) case (op_repeat) block integer(int32) :: min_repeat, max_repeat, j integer(int32) :: num_1st_repeat, num_2nd_repeat min_repeat = tree%nodes(i)%min_repeat max_repeat = tree%nodes(i)%max_repeat num_1st_repeat = min_repeat-1 if (max_repeat == INFINITE) then num_1st_repeat = num_1st_repeat +1 end if do j = 1, num_1st_repeat call make_nfa_node(nfa_top) if (is_exceeded(nfa_top, nfa_graph)) call reallocate_nfa(nfa_graph) node1 = nfa_top call generate_nfa(tree, tree%nodes(i)%left_i, nfa_graph, nfa_top, entry_local, node1) entry_local = node1 end do if (min_repeat == 0) then num_2nd_repeat = max_repeat - 1 else num_2nd_repeat = max_repeat - min_repeat end if do j = 1, num_2nd_repeat call make_nfa_node(nfa_top) if (is_exceeded(nfa_top, nfa_graph)) call reallocate_nfa(nfa_graph) node2 = nfa_top call generate_nfa(tree, tree%nodes(i)%left_i, nfa_graph, nfa_top, entry_local, node2) call nfa_graph(node2)%add_transition(nfa_graph, node2, exit, SEG_EPSILON) entry_local = node2 end do if (min_repeat == 0) then call nfa_graph(entry)%add_transition(nfa_graph, entry, exit, SEG_EPSILON) end if if (max_repeat == INFINITE) then call generate_nfa_closure(tree, idx, nfa_graph, nfa_top, entry_local, exit) else call generate_nfa(tree, tree%nodes(i)%left_i, nfa_graph, nfa_top, entry_local, exit) end if end block case default ! for case (op_not_init) ! Handle unexpected cases. error stop "This will not heppen in 'generate_nfa'." end select end subroutine generate_nfa pure recursive subroutine generate_nfa_concatenate(tree, idx, nfa_graph, nfa_top, entry, exit) implicit none type(tree_t), intent(in) :: tree type(nfa_state_node_t), allocatable, intent(inout) :: nfa_graph(:) integer(int32), intent(in) :: idx integer(int32), intent(inout) :: nfa_top integer(int32), intent(in) :: entry integer(int32), intent(in) :: exit integer(int32) :: node1 call make_nfa_node(nfa_top) if (is_exceeded(nfa_top, nfa_graph)) then call reallocate_nfa(nfa_graph) end if node1 = nfa_top call generate_nfa(tree, tree%nodes(idx)%left_i, nfa_graph, nfa_top, entry, node1) call generate_nfa(tree, tree%nodes(idx)%right_i, nfa_graph, nfa_top, node1, exit) end subroutine generate_nfa_concatenate pure recursive subroutine generate_nfa_closure(tree, idx, nfa_graph, nfa_top, entry, exit) implicit none type(tree_t), intent(in) :: tree type(nfa_state_node_t), allocatable, intent(inout) :: nfa_graph(:) integer(int32), intent(in) :: idx integer(int32), intent(inout) :: nfa_top integer(int32), intent(in) :: entry integer(int32), intent(in) :: exit integer(int32) :: node1, node2 call make_nfa_node(nfa_top) if (is_exceeded(nfa_top, nfa_graph)) then call reallocate_nfa(nfa_graph) end if node1 = nfa_top call make_nfa_node(nfa_top) if (is_exceeded(nfa_top, nfa_graph)) then call reallocate_nfa(nfa_graph) end if node2 = nfa_top call nfa_graph(entry)%add_transition(nfa_graph, entry, node1, SEG_EPSILON) call generate_nfa(tree, tree%nodes(idx)%left_i, nfa_graph, nfa_top, node1, node2) call nfa_graph(node2)%add_transition(nfa_graph, node2, node1, SEG_EPSILON) call nfa_graph(node1)%add_transition(nfa_graph, node1, exit, SEG_EPSILON) end subroutine generate_nfa_closure pure subroutine nfa__add_transition(self,nfa_graph, src, dst, c) use :: forgex_parameters_m, only: NFA_TRANSITION_UNIT implicit none class(nfa_state_node_t), intent(inout) :: self type(nfa_state_node_t), intent(inout) :: nfa_graph(:) integer(int32), intent(in) :: src, dst type(segment_t) ,intent(in) :: c integer(int32) :: j, jj, k !== Forward transition process j = NFA_NULL_TRANSITION if (allocated(self%forward) .and. c /= SEG_EPSILON) then ! ε遷移でない場合、同じ行き先の遷移があるかどうか検索する do jj = 1, self%forward_top if ( dst == self%forward(jj)%dst .and. self%forward(jj)%c_top < NFA_C_SIZE) then ! セグメント配列のサイズを超える場合には新しい遷移にセグメントを追加する j = jj end if end do end if if (j == NFA_NULL_TRANSITION) then j = self%forward_top end if !> @note Note that the return value of the size function on an unallocated array is undefined. if (.not. allocated(self%forward)) then ! Reallocate the forward array component. call self%realloc_f() endif if (j >= size(self%forward, dim=1)) then ! Reallocate the forward array component. call self%realloc_f() endif if (.not. allocated(self%forward(j)%c)) then allocate(self%forward(j)%c(1:NFA_C_SIZE)) end if self%forward(j)%c_top = self%forward(j)%c_top + 1 ! Increment k = self%forward(j)%c_top self%forward(j)%c(k) = c self%forward(j)%dst = dst self%forward(j)%is_registered = .true. if (j == self%forward_top) self%forward_top = self%forward_top + 1 !== Backward transition process j = NFA_NULL_TRANSITION if (allocated(nfa_graph(dst)%backward) .and. c /= SEG_EPSILON) then do jj = 1, nfa_graph(dst)%backward_top if (src == nfa_graph(dst)%backward(jj)%dst .and. nfa_graph(dst)%backward(jj)%c_top < NFA_C_SIZE) j = jj ! セグメント配列のサイズを超える場合には新しい遷移にセグメントを追加する end do end if if (j == NFA_NULL_TRANSITION) then j = nfa_graph(dst)%backward_top end if if (.not. allocated(nfa_graph(dst)%backward)) then ! Reallocate backward array component. call nfa_graph(dst)%realloc_b end if if (j >= size(nfa_graph(dst)%backward, dim=1)) then ! Reallocate backward array component. call nfa_graph(dst)%realloc_b endif if (.not. allocated(nfa_graph(dst)%backward(j)%c)) allocate(nfa_graph(dst)%backward(j)%c(NFA_C_SIZE)) nfa_graph(dst)%backward(j)%c_top = nfa_graph(dst)%backward(j)%c_top + 1 k = nfa_graph(dst)%backward(j)%c_top nfa_graph(dst)%backward(j)%c(k) = c nfa_graph(dst)%backward(j)%dst = src nfa_graph(dst)%backward(j)%is_registered = .true. if(j == nfa_graph(dst)%backward_top) nfa_graph(dst)%backward_top = nfa_graph(dst)%backward_top + 1 end subroutine nfa__add_transition pure subroutine disjoin_nfa(graph, nfa_top, seg_list) use :: forgex_priority_queue_m use :: forgex_segment_m use :: forgex_segment_disjoin_m implicit none type(nfa_state_node_t), intent(inout) :: graph(:) integer, intent(in) :: nfa_top type(segment_t), allocatable, intent(inout) :: seg_list(:) type(priority_queue_t) :: queue_f type(nfa_transition_t) :: ptr integer :: i, j, k, num_f ! Enqueue ! Traverse through all states and enqueue their segments into a priority queue. block do i = NFA_STATE_BASE, nfa_top ! Do not subtract 1 from nfa_top. do j = 1, graph(i)%forward_top -1 ptr = graph(i)%forward(j) if (ptr%dst /= NFA_NULL_TRANSITION) then do k = 1, graph(i)%forward(j)%c_top if (ptr%c(k) /= SEG_INIT) then call queue_f%enqueue(ptr%c(k)) end if end do end if end do end do end block ! Dequeue ! Allocate memory for the segment list and dequeue all segments for the priority queue. block integer :: m type(segment_t) :: cache num_f = queue_f%number allocate(seg_list(num_f)) m = 0 do j = 1, num_f if (j == 1) then m = m + 1 call queue_f%dequeue(seg_list(j)) cycle end if call queue_f%dequeue(cache) if (seg_list(m) /= cache) then m = m + 1 seg_list(m) = cache end if end do !-- The seg_list arrays are now sorted. seg_list = seg_list(:m) ! reallocation implicitly end block !== At this point, seg_list is always allocated. ==! ! Disjoin the segment lists to ensure no over laps call disjoin(seg_list) if (.not. allocated(seg_list)) then error stop "ERROR: Array that should have been disjoined is not allocated." end if ! Apply disjoining to all transitions over the NFA graph. ! do concurrent (i = NFA_STATE_BASE:nfa_top) ! do concurrent (j = 1:graph(1)%forward_top) do i = NFA_STATE_BASE, nfa_top if (allocated(graph(i)%forward)) then do j = 1, graph(i)%forward_top call disjoin_nfa_each_transition(graph(i)%forward(j), seg_list) end do end if if (allocated(graph(i)%backward)) then do j = 1, graph(i)%backward_top call disjoin_nfa_each_transition(graph(i)%backward(j), seg_list) end do end if end do ! deallocate the used priority queue. call queue_f%clear() end subroutine disjoin_nfa !> This subroutine updates the NFA state transitions by disjoining the segments. !> !> It breaks down overlapping segments into non-overlapping segments, !> and creates new transitions accordingly. pure subroutine disjoin_nfa_each_transition(transition, seg_list) use :: forgex_segment_disjoin_m implicit none type(nfa_transition_t), intent(inout) :: transition type(segment_t), intent(in) :: seg_list(:) type(segment_t), allocatable :: tmp(:) integer :: k, m, n, siz if (.not. allocated(transition%c)) return siz = size(seg_list, dim=1) allocate(tmp(siz)) block logical :: flag(siz) n = 0 ! to count valid disjoined segments. do k = 1, transition%c_top flag(:) = is_overlap_to_seg_list(transition%c(k), seg_list, siz) do m = 1, siz if (flag(m)) then n = n + 1 tmp(n) = seg_list(m) end if end do end do end block if (size(transition%c, dim=1) < n) then deallocate(transition%c) allocate(transition%c(n)) end if ! Deep copy the result into the arguemnt's component do k = 1, n transition%c(k) = tmp(k) end do call update_c_top(transition) deallocate(tmp) end subroutine disjoin_nfa_each_transition !> Update c_top, which has become outdated by disjoin, to new information. pure subroutine update_c_top(transition) implicit none type(nfa_transition_t), intent(inout) :: transition integer :: k if (.not. allocated(transition%c)) return k = 0 do while(k+1 <= size(transition%c, dim=1)) k = k + 1 if (transition%c(k) == SEG_INIT) exit end do transition%c_top = k end subroutine update_c_top ! pure subroutine transition_to_seg_list(transition_list, top_idx, segment_list) ! implicit none ! type(nfa_transition_t), intent(in) :: transition_list(:) ! integer(int32), intent(in) :: top_idx ! type(segment_t), allocatable, intent(inout) :: segment_list(:) ! integer :: j, k ! allocate(segment_list(top_idx)) ! do j = 1, top_idx ! do k = 1, size(transition_list(j)%c, dim=1) ! segment_list(j) = transition_list(j)%c(k) ! end do ! end do ! end subroutine transition_to_seg_list pure subroutine nfa__reallocate_transition_forward (self) implicit none class(nfa_state_node_t), intent(inout) :: self type(nfa_transition_t), allocatable :: tmp(:) integer :: siz, j integer :: prev_count, new_part_begin, new_part_end siz = 0 prev_count = 0 new_part_begin = 0 new_part_end = 0 if (allocated(self%forward)) then siz = size(self%forward, dim=1) call move_alloc(self%forward, tmp) else siz = 0 end if prev_count = self%alloc_count_f self%alloc_count_f = prev_count + 1 new_part_begin = (siz) + 1 new_part_end = NFA_TRANSITION_UNIT * 2**self%alloc_count_f allocate(self%forward(1:new_part_end)) if (allocated(tmp)) then do j = 1, siz self%forward(j) = tmp(j) end do end if self%forward(1:new_part_end)%own_j = & [(j, j= 1, new_part_end)] end subroutine nfa__reallocate_transition_forward pure subroutine nfa__reallocate_transition_backward (self) implicit none class(nfa_state_node_t), intent(inout) :: self type(nfa_transition_t), allocatable :: tmp(:) integer :: siz, jj integer :: prev_count, new_part_begin, new_part_end siz = 0 prev_count = 0 new_part_begin = 0 new_part_end = 0 if (allocated(self%backward)) then siz = size(self%backward, dim=1) call move_alloc(self%backward, tmp) else siz = 0 end if prev_count = self%alloc_count_b self%alloc_count_b = prev_count + 1 new_part_begin = (siz) + 1 new_part_end = NFA_TRANSITION_UNIT * 2**self%alloc_count_b allocate(self%backward(1:new_part_end)) if(allocated(tmp)) self%backward(1:siz) = tmp(1:siz) self%backward(new_part_begin:new_part_end)%own_j = & [(jj, jj= new_part_begin, new_part_end)] end subroutine nfa__reallocate_transition_backward pure elemental subroutine nfa__merge_segments_of_transition(self) implicit none class(nfa_state_node_t), intent(inout) :: self integer :: j if(allocated(self%forward)) then do j = 1, self%forward_top if (allocated(self%forward(j)%c)) then call seg__sort_segments(self%forward(j)%c) call seg__merge_segments(self%forward(j)%c) self%forward(j)%c_top = size(self%forward(j)%c, dim=1) end if end do end if if (allocated(self%backward)) then do j = 1, self%backward_top if (allocated(self%backward(j)%c)) then call seg__sort_segments(self%backward(j)%c) call seg__merge_segments(self%backward(j)%c) self%backward(j)%c_top = size(self%backward(j)%c, dim=1) end if end do end if end subroutine nfa__merge_segments_of_transition end module forgex_nfa_node_m