interpret_class_string Subroutine

public pure subroutine interpret_class_string(str, cube, is_valid, ierr)

This subroutine parses a pattern string and outputs a list of segment_t type.

Arguments

Type IntentOptional Attributes Name
character(len=*), intent(in) :: str
type(cube_t), intent(inout) :: cube
logical, intent(inout) :: is_valid
integer, intent(inout) :: ierr

Source Code

   pure subroutine interpret_class_string(str, cube, is_valid, ierr)
      use :: forgex_utf8_m, only: idxutf8, next_idxutf8, len_utf8, ichar_utf8
      use :: forgex_parameters_m
      use :: forgex_segment_m, register => register_segment_to_list
      use :: forgex_character_array_m
      use :: forgex_cube_m, only: cube_t
      implicit none

      character(*), intent(in) :: str
      type(cube_t), intent(inout) :: cube
      logical, intent(inout) :: is_valid
      integer, intent(inout) :: ierr

      integer :: i, j, k
      integer :: jerr

      type(segment_t) :: prev_seg, curr_seg
      type(segment_t), allocatable :: list(:), cache(:)

      logical :: backslashed
      logical :: prev_hyphenated, curr_hyphenated
      type(character_array_t), allocatable :: ca(:) ! character array
      integer :: siz ! total number of segment of `ca` array
      character(:), allocatable :: c ! Temporary variable stores a character of interest.

      ! Initialize
      is_valid = .true.
      backslashed = .false.
      prev_hyphenated = .false.
      curr_hyphenated = .false.
      prev_seg = segment_t()
      curr_seg = segment_t()
      
      if (len(str) >= 2) then
         if (str(1:2) == '--') then
            ierr = SYNTAX_ERR_MISPLACED_SUBTRACTION_OPERATOR
            is_valid = .false.
         end if
      end if

      ! Convert to an array from a pattern string.
      call character_string_to_array(str, ca)
      if (.not. allocated(ca)) then
         ierr = SYNTAX_ERR_EMPTY_CHARACTER_CLASS
         is_valid = .false.
         return
      end if

      ! Remove backslash and hyphen, and raise respective flag for each component.
      call parse_backslash_and_hyphen_in_char_array(ca, ierr)
      if (ierr == SYNTAX_ERR_MISPLACED_SUBTRACTION_OPERATOR) then
         is_valid = .false.
         return
      end if

      ! for escape sequences such as \x, \x{...}, \p{...}.
      call parse_escape_sequence_with_argument(ca, ierr)
      if (ierr /= SYNTAX_VALID) then
         is_valid = .false.
         return
      end if

#ifdef IMPURE
      call dump_character_array_t_list(ca)
#endif

      ! Each ca(:)%seg_size will be set by this procedure calling.
      call parse_segment_width_in_char_array(ca)

      ! If each of the array element is hyphenated,
      ! check that the range is not 1 and return invalid.
      siz = 0
      check: do i = 1, size(ca, dim=1)

         ! If the former hypenated range is invalid, throw an error.
         if (ca(i)%is_hyphenated .and. ca(i)%seg_size /= 1) then
            ierr = SYNTAX_ERR_RANGE_WITH_ESCAPE_SEQUENCES
            is_valid = .false.
            return
         end if

         ! If the range following hyphenataed is invalid, throw an error.
         if (i>1) then
            if (ca(i-1)%is_hyphenated .and. ca(i)%seg_size /= 1) then
               ierr = SYNTAX_ERR_RANGE_WITH_ESCAPE_SEQUENCES
               is_valid = .false.
               return
            end if
         end if

         ! If a subtraction flag appear, throw an error at the moment.
         if (ca(i)%is_subtract) then
            ierr = SYNTAX_ERR_CHAR_CLASS_SUBTRANCTION_NOT_IMPLEMENTED
            is_valid = .false.
            return
         end if

         ! If the loop reaches the end of `ca` array, cancel the hyphenated flag, and
         ! then add a literal hyphen to the end.
         if (i> 1 .and. i == size(ca, dim=1)) then
            if (ca(i)%is_hyphenated) then
               ca(i)%is_hyphenated = .false.

               ca = [ca(1:size(ca)), &
                    character_array_t(SYMBOL_HYPN, .false., .false., ca(size(ca))%is_subtract, 1)]
               siz = siz + 1
               exit check
            end if
         end if
         siz = siz + ca(i)%seg_size
      end do check

      if (siz < 1) then
         ierr = SYNTAX_ERR_THIS_SHOULD_NOT_HAPPEN
         is_valid = .false.
         return
      end if
      allocate(list(siz))

      ! Initialize cache and counter variable.
      j = 0 ! Couter of actual list size for `seglist`.
      c = EMPTY_CHAR
      i = 1
      outer: do while(i <= size(ca, dim=1))
         c = ca(i)%c
         backslashed = ca(i)%is_escaped  ! cache `is_escaped` flag
         curr_hyphenated = ca(i)%is_hyphenated
         if (i > 1) prev_hyphenated = ca(i-1)%is_hyphenated 

         ! For escape sequences that take arguments.
         if (backslashed .and. c == ESCAPE_X) then
            i = i + 1
            if (i> size(ca, dim=1)) then
               ierr = SYNTAX_ERR_THIS_SHOULD_NOT_HAPPEN
               is_valid = .false.
               return
            end if
            c = ca(i)%c
            backslashed = ca(i)%is_escaped
            call hex2seg(c, curr_seg, ierr)
            if (ierr /= SYNTAX_VALID) then
               is_valid = .false.
               return
            end if
         else if (backslashed .and. c == ESCAPE_P) then
            ierr = SYNTAX_ERR_UNICODE_PROPERTY_NOT_IMPLEMENTED
            is_valid = .false.
            return
         else
            curr_seg = segment_t(ichar_utf8(c), ichar_utf8(c))
         end if

         ! For escape sequences that do not take arguments
         if (backslashed) then

            call convert_escaped_character_into_segments(c, cache)
            if (cache(1) == SEG_ERROR) then
               ierr = SYNTAX_ERR_ESCAPED_SYMBOL_INVALID
               is_valid = .false.
               return
            end if

            ! If the number of segemnts is greater than 1, register them to the `list`.
            if (size(cache, dim=1) > 1) then
               do k = 1, size(cache)
                  call register(list, cache(k), j, ierr)
               end do
               deallocate(cache)
               prev_seg = segment_t()
               i = i + 1
               cycle outer
            end if 

            curr_seg = cache(1)
         end if


         if (prev_hyphenated) then
            curr_seg = join_two_segments(prev_seg, curr_seg)
            if (curr_seg == SEG_ERROR) then
               ierr = SYNTAX_ERR_THIS_SHOULD_NOT_HAPPEN
               is_valid = .false.
               return
            end if
         end if
      
         if (.not. curr_hyphenated) then
            call register(list, curr_seg, j, jerr)
            if (jerr == SEGMENT_REJECTED) then
               ierr = SYNTAX_ERR_INVALID_CHARACTER_RANGE
               is_valid = .false.
               return
            end if
         end if

         prev_seg = curr_seg
         i = i + 1
      end do outer

      if (j < 1) then
         ! pattern '[+--]' causes this error for now.
         ! ierr = SYNTAX_ERR_THIS_SHOULD_NOT_HAPPEN
         ierr = SYNTAX_ERR_THIS_SHOULD_NOT_HAPPEN
         is_valid = .false.
         return
      end if

      call cube%add(list(1:j)) ! copy local array into the argument array.

   end subroutine interpret_class_string