This subroutine parses a pattern string and outputs a list of segment_t
type.
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
character(len=*), | intent(in) | :: | str | |||
type(cube_t), | intent(inout) | :: | cube | |||
logical, | intent(inout) | :: | is_valid | |||
integer, | intent(inout) | :: | ierr |
pure subroutine interpret_class_string(str, cube, is_valid, ierr) use :: forgex_utf8_m, only: idxutf8, next_idxutf8, len_utf8, ichar_utf8 use :: forgex_parameters_m use :: forgex_segment_m, register => register_segment_to_list use :: forgex_character_array_m use :: forgex_cube_m, only: cube_t implicit none character(*), intent(in) :: str type(cube_t), intent(inout) :: cube logical, intent(inout) :: is_valid integer, intent(inout) :: ierr integer :: i, j, k integer :: jerr type(segment_t) :: prev_seg, curr_seg type(segment_t), allocatable :: list(:), cache(:) logical :: backslashed logical :: prev_hyphenated, curr_hyphenated type(character_array_t), allocatable :: ca(:) ! character array integer :: siz ! total number of segment of `ca` array character(:), allocatable :: c ! Temporary variable stores a character of interest. ! Initialize is_valid = .true. backslashed = .false. prev_hyphenated = .false. curr_hyphenated = .false. prev_seg = segment_t() curr_seg = segment_t() if (len(str) >= 2) then if (str(1:2) == '--') then ierr = SYNTAX_ERR_MISPLACED_SUBTRACTION_OPERATOR is_valid = .false. end if end if ! Convert to an array from a pattern string. call character_string_to_array(str, ca) if (.not. allocated(ca)) then ierr = SYNTAX_ERR_EMPTY_CHARACTER_CLASS is_valid = .false. return end if ! Remove backslash and hyphen, and raise respective flag for each component. call parse_backslash_and_hyphen_in_char_array(ca, ierr) if (ierr == SYNTAX_ERR_MISPLACED_SUBTRACTION_OPERATOR) then is_valid = .false. return end if ! for escape sequences such as \x, \x{...}, \p{...}. call parse_escape_sequence_with_argument(ca, ierr) if (ierr /= SYNTAX_VALID) then is_valid = .false. return end if #ifdef IMPURE call dump_character_array_t_list(ca) #endif ! Each ca(:)%seg_size will be set by this procedure calling. call parse_segment_width_in_char_array(ca) ! If each of the array element is hyphenated, ! check that the range is not 1 and return invalid. siz = 0 check: do i = 1, size(ca, dim=1) ! If the former hypenated range is invalid, throw an error. if (ca(i)%is_hyphenated .and. ca(i)%seg_size /= 1) then ierr = SYNTAX_ERR_RANGE_WITH_ESCAPE_SEQUENCES is_valid = .false. return end if ! If the range following hyphenataed is invalid, throw an error. if (i>1) then if (ca(i-1)%is_hyphenated .and. ca(i)%seg_size /= 1) then ierr = SYNTAX_ERR_RANGE_WITH_ESCAPE_SEQUENCES is_valid = .false. return end if end if ! If a subtraction flag appear, throw an error at the moment. if (ca(i)%is_subtract) then ierr = SYNTAX_ERR_CHAR_CLASS_SUBTRANCTION_NOT_IMPLEMENTED is_valid = .false. return end if ! If the loop reaches the end of `ca` array, cancel the hyphenated flag, and ! then add a literal hyphen to the end. if (i> 1 .and. i == size(ca, dim=1)) then if (ca(i)%is_hyphenated) then ca(i)%is_hyphenated = .false. ca = [ca(1:size(ca)), & character_array_t(SYMBOL_HYPN, .false., .false., ca(size(ca))%is_subtract, 1)] siz = siz + 1 exit check end if end if siz = siz + ca(i)%seg_size end do check if (siz < 1) then ierr = SYNTAX_ERR_THIS_SHOULD_NOT_HAPPEN is_valid = .false. return end if allocate(list(siz)) ! Initialize cache and counter variable. j = 0 ! Couter of actual list size for `seglist`. c = EMPTY_CHAR i = 1 outer: do while(i <= size(ca, dim=1)) c = ca(i)%c backslashed = ca(i)%is_escaped ! cache `is_escaped` flag curr_hyphenated = ca(i)%is_hyphenated if (i > 1) prev_hyphenated = ca(i-1)%is_hyphenated ! For escape sequences that take arguments. if (backslashed .and. c == ESCAPE_X) then i = i + 1 if (i> size(ca, dim=1)) then ierr = SYNTAX_ERR_THIS_SHOULD_NOT_HAPPEN is_valid = .false. return end if c = ca(i)%c backslashed = ca(i)%is_escaped call hex2seg(c, curr_seg, ierr) if (ierr /= SYNTAX_VALID) then is_valid = .false. return end if else if (backslashed .and. c == ESCAPE_P) then ierr = SYNTAX_ERR_UNICODE_PROPERTY_NOT_IMPLEMENTED is_valid = .false. return else curr_seg = segment_t(ichar_utf8(c), ichar_utf8(c)) end if ! For escape sequences that do not take arguments if (backslashed) then call convert_escaped_character_into_segments(c, cache) if (cache(1) == SEG_ERROR) then ierr = SYNTAX_ERR_ESCAPED_SYMBOL_INVALID is_valid = .false. return end if ! If the number of segemnts is greater than 1, register them to the `list`. if (size(cache, dim=1) > 1) then do k = 1, size(cache) call register(list, cache(k), j, ierr) end do deallocate(cache) prev_seg = segment_t() i = i + 1 cycle outer end if curr_seg = cache(1) end if if (prev_hyphenated) then curr_seg = join_two_segments(prev_seg, curr_seg) if (curr_seg == SEG_ERROR) then ierr = SYNTAX_ERR_THIS_SHOULD_NOT_HAPPEN is_valid = .false. return end if end if if (.not. curr_hyphenated) then call register(list, curr_seg, j, jerr) if (jerr == SEGMENT_REJECTED) then ierr = SYNTAX_ERR_INVALID_CHARACTER_RANGE is_valid = .false. return end if end if prev_seg = curr_seg i = i + 1 end do outer if (j < 1) then ! pattern '[+--]' causes this error for now. ! ierr = SYNTAX_ERR_THIS_SHOULD_NOT_HAPPEN ierr = SYNTAX_ERR_THIS_SHOULD_NOT_HAPPEN is_valid = .false. return end if call cube%add(list(1:j)) ! copy local array into the argument array. end subroutine interpret_class_string