This function returns the index of the end of the (multibyte) character, given the string str and the current index curr.
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
character(len=*), | intent(in) | :: | str | |||
integer(kind=int32), | intent(in) | :: | curr |
pure function idxutf8 (str, curr) result(tail) use, intrinsic :: iso_fortran_env use :: forgex_parameters_m implicit none character(*), intent(in) :: str ! Input string, a multibyte character is expected. integer(int32), intent(in) :: curr ! Current index. integer(int32) :: tail ! Resulting index of the end of the character. integer(int32) :: i ! Loop variable. integer(int8) :: byte ! Variable to hold the byte value of the 1-byte part of the character integer(int8) :: shift_3, shift_4, shift_5, shift_6, shift_7 ! Shifted byte values. tail = curr ! Initialize tail to the current index. do i = 0, 3 ! Loop over the next four bytes to determine the byte-length of the character. byte = int(ichar(str(curr+i:curr+i)), kind(byte)) ! Get the byte value of the character at position `curr+1`. shift_3 = ishft(byte, -3) ! Right shift the byte by 3 bits shift_4 = ishft(byte, -4) ! Right shift the byte by 3 bits shift_5 = ishft(byte, -5) ! Right shift the byte by 5 bits shift_6 = ishft(byte, -6) ! Right shift the byte by 6 bits shift_7 = ishft(byte, -7) ! Right shift the byte by 7 bits if (shift_6 == 2) cycle ! Continue to the next iteration if the `byte` is a continuation byte (10xxxxxx_2). if (i == 0) then ! Check the first byte to determine the character length. if (shift_3 == 30 ) then ! If the byte starts with 11110_2 (4-byte character). tail = curr + 4 - 1 return end if if (shift_4 == 14) then ! If the byte starts witth 1110_2 (3-byte character). tail = curr + 3 - 1 return end if if (shift_5 == 6) then ! If the byte starts with 110_2 (2-byte character). tail = curr + 2 - 1 return end if if (shift_7 == 0) then ! If then byte starts with 0_2 (1-byte character). tail = curr + 1 - 1 return end if else ! Check continuation byptes if (shift_3 == 30 .or. shift_4 == 14 .or. shift_5 == 6 .or. shift_7 == 0) then tail = curr + i - 1 return end if end if end do end function idxutf8