segment_disjoin_m.F90 Source File

This file contains



Source Code

! Fortran Regular Expression (Forgex)
!
! MIT License
!
! (C) Amasaki Shinobu, 2023-2024
!     A regular expression engine for Fortran.
!     forgex_segment_disjoin_m module is a part of Forgex.
!
!! This file contains
#ifdef IMPURE
#define pure
#endif
module forgex_segment_disjoin_m
   use :: forgex_segment_m, only: segment_t, SEG_UPPER, SEG_INIT, operator(.in.), operator(/=)
   use :: forgex_priority_queue_m, only: priority_queue_t
   implicit none
   private

   public :: disjoin
   public :: is_prime_semgment
   public :: is_overlap_to_seg_list

   interface disjoin
      !! Interface for the procedure `disjoin_kernel`.
      module procedure :: disjoin_kernel
   end interface

contains

   !> Disjoins overlapping segments and creates a new list of non-overlapping segments.
   !>
   !> This subroutine takes a list of segments, disjoins any overlapping segments,
   !> and creates a new list of non-overlapping segments. It uses a priority queue
   !> to sort the segments and processes them to ensure they are disjoined.
   pure subroutine disjoin_kernel(list)
      use, intrinsic :: iso_fortran_env, only: int32
      implicit none
      type(segment_t), intent(inout), allocatable :: list(:)

      type(segment_t), allocatable :: old_list(:)
      type(priority_queue_t)       :: pqueue
      type(segment_t), allocatable :: buff(:)
      type(segment_t), allocatable :: cache(:)
      type(segment_t)              :: new
      integer(int32), allocatable  :: index_list(:)

      integer(int32) :: i, j, k, count, siz, top, bottom, real_size, m
      logical        :: flag

      ! If list is not allocated, it returns immediately.
      if (.not. allocated(list)) return

      siz = size(list, dim=1)
      if (siz <= 0) then
         return
      end if

      ! Move the currnet list to `old_list`
      call move_alloc(list, old_list)

      ! Sort segments using a priority queue (heap sort)
      block
         allocate(buff(siz))

         do j = 1, siz
            call pqueue%enqueue(old_list(j))
         end do

         do j = 1, siz
            call pqueue%dequeue(buff(j))  ! The `buff` is sorted array.
         end do
      end block

      ! Determine the bottom and top value from the segment array.
      block
         bottom = buff(1)%min
         top = 0
         do j = 1, siz
            top = max(top, buff(j)%max)
         end do
      end block

      allocate(list(siz*2))

      ! Generate a list of unique indices from the `old_list`.
      call index_list_from_segment_list(index_list, old_list)

      !==  At this point, index_list is allocated by index_list_from_segment_list procedure.

      ! Initialize
      new = SEG_UPPER   ! segment_t(2**21, 2**21)
      k = 1
      m = 1

      ! NOTE: this is a complex loop with multiple counters, so HANDLE WITH CARE.
      !
      do while(m <= size(index_list))
         i = index_list(m)    ! Get the current value of `index_list`.

         ! NOTE: the `index_list` is in ASCENDING order.

         ! Check if `i` is within any of the segments.
         !
         ! This loop iterates over each value in the `index_list` and checks if the current
         ! value `i` is present in any of the segments stored in the `buff` array.
         ! If it is present and less than the current minimum value of the new segment, it
         ! updates the new segment's minimum value.
         if (i .in. buff(1:siz)) then
            if (i < new%min) new%min = i
         else
            ! Otherwise, advance the index in `index_list` and move to the next cycle.
            m = m + 1
            cycle
         end if

         ! Check if `i+1` is the start of any segment.
         !
         ! This section checks if the value `i+1` is the starting point (`min`) of any segment
         ! in the `buff` array. If it is, then it sets the new segment's `max` value to `i` and
         ! registers the new segment.
         flag = .false.
         do j = 1, siz
            if (i+1 == buff(j)%min) flag = flag .or. .true.
               ! This `if` statement is redundant and should be fixed.
         end do
         if (flag) then
            new%max = i
            call register_seg_list(new, list, k)
            m = m + 1
            cycle
         end if

         ! Check for multiple segments starting at `i`.
         !
         ! This part counts how many segments start at the current value `i`. If more than
         ! one segment starts at `i`, it sets the new segment's max value to `i` and register
         ! the new segment.
         count = 0
         do j = 1, siz
            if (buff(j)%min == i) count = count + 1
         end do
         if (count > 1) then
            new%max = i
            call register_seg_list(new, list, k)
         end if

         ! Check for any segments ending at `i`.
         !
         ! This part counts how many segments end at the current value `i`.
         ! If any segment ends at `i`, it sets the new segment's max value to `i`
         ! and registers the new segment.
         count = 0
         do j = 1, siz
            if (buff(j)%max == i) count = count + 1
         end do
         if (count >0) then
            new%max = i
            call register_seg_list(new, list, k)
         end if

         m = m + 1
      end do

      ! Determine the real size of the new list.
      ! This loop calculates the actual number of non-empty segments in the new `list`.
      real_size = 0 
      do i = 1, size(list)
         if (list(i) /= SEG_INIT) real_size = real_size + 1
      end do

      ! Move `list` to `cache` and reallocate `list` to the real size.
      call move_alloc(list, cache)  ! list is now deallocated.
      allocate(list(real_size))
      list(:) = cache(1:real_size)

      ! Deallocate used arrays and clear the priority queue
      call pqueue%clear()
      deallocate(buff)
      deallocate(cache)
      deallocate(index_list)
   end subroutine disjoin_kernel


   !> Registers a new segment into a list if it is valid.
   !>
   !> This subroutine adds a new segment to a given list if the segment is valid.
   !> After registering, it sets the new segment to a predefined upper limit segment.
   pure subroutine register_seg_list(new, list, k)
      use, intrinsic :: iso_fortran_env, only: int32
      implicit none
      type(segment_t), intent(inout) :: new, list(:)
      integer(int32),  intent(inout) :: k

      ! If the `new` segment is valid, add it to the list and incremetn the count.
      !! @note This implementation is badly behaved and should be fixed as soon as possible.
      if (new%validate()) then
         list(k) = new
         k = k + 1
      end if
      new = SEG_UPPER
   end subroutine register_seg_list


   !> Checks if a segment is a prime segment within a disjoined list.
   !>
   !> This function determines whether the given segment `seg` is a prime
   !> segment, meaning it does not overlap with any segment in the `disjoined_list`.
   !
   !  この関数は、指定されたセグメント`seg`が、`disjoined_list`内の任意のセグメントと交差せずに
   !  独立しているかどうかを判定する。`disjoined_list`内のいずれかのセグメントについて、`seg`がその範囲内に
   !  完全に収まっているかどうかをチェックし、その結果を論理値`res`に格納して返す。
   pure function is_prime_semgment(seg, disjoined_list) result(res)
      implicit none
      type(segment_t), intent(in) :: seg, disjoined_list(:)

      logical :: res
      integer :: j

      ! Initialize the result.
      res = .false.

      ! リストのうちのいずれかと一致すれば、交差していない。
      ! Check if any segment in `disjoined_list` contains `seg`.
      do j = 1, size(disjoined_list)
         res = res .or. ( disjoined_list(j)%min <= seg%min .and. seg%max <= disjoined_list(j)%max)
      end do
   end function is_prime_semgment


   !> Checks if a segment overlaps with any segments in a list.
   !>
   !> This function determines whether the given segment `seg` overlaps with
   !> any of the segments in the provided `list`. It returns a logical array
   !> indicating the overlap status for each segment in the `list`.
   pure function is_overlap_to_seg_list(seg, list, len) result(res)
      use, intrinsic :: iso_fortran_env, only: int32
      implicit none
      integer(int32),  intent(in) :: len
      type(segment_t), intent(in) :: seg, list(:)
      logical :: res(len)

      integer :: i

      ! Initialize the result array.
      res(:) = .false.

      do i = 1, len
         res(i) = list(i) .in. seg  ! Check if each segment overlaps.
      end do
   end function is_overlap_to_seg_list


   !> Extracts a sorted list of unique indices from a list of segments.
   !>
   !> This subroutine takes a list of segments and generates a sorted list of
   !> unique indices from the `min` and `max` values of each segment, including
   !> values just before and after the `min` and `max`.
   pure subroutine index_list_from_segment_list(index_list, seg_list)
      use, intrinsic :: iso_fortran_env, only: int32
      use :: forgex_sort_m, only: insertion_sort
      implicit none
      type(segment_t), intent(in) :: seg_list(:)
      integer(int32), intent(out), allocatable :: index_list(:)
      integer(int32), allocatable :: cache(:)

      integer :: siz, i, k

      siz = size(seg_list, dim=1)   ! Get the size of the list.

      allocate(index_list(6*siz))   ! Allocate an `index_list` of the required size
      allocate(cache(6*siz))        ! Allocate an array for cache.

      do i = 1, siz
         ! Add the `min` and `max` values of each segment, as well as the values
         ! before and after them, to the index list.
         index_list(6*i-5) = seg_list(i)%min - 1
         index_list(6*i-4) = seg_list(i)%min
         index_list(6*i-3) = seg_list(i)%min + 1
         index_list(6*i-2) = seg_list(i)%max - 1
         index_list(6*i-1) = seg_list(i)%max
         index_list(6*i)   = seg_list(i)%max + 1
      end do

      call insertion_sort(index_list)  ! Sort the `index_list` in ascending order.

      ! Initialize
      cache(1) = index_list(1)
      k = 1
      ! Scan the entire `index_list`.
      do i = 2, siz*6
         if (index_list(i-1) /= index_list(i)) then
            ! Add only unique values to the `cache`.
            ! At the same time, count unique values.
            k = k + 1
            cache(k) = index_list(i)
         end if
      end do


      deallocate(index_list)     ! Deallocate the old `index_list`.
      allocate(index_list(k))    ! Allocate a new `index_list` based on the number of unique indices.
      index_list(:) = cache(1:k) ! Copy the data of `cahce(1:k)` into the `index_list(:)`.
   end subroutine index_list_from_segment_list

end module forgex_segment_disjoin_m