char_utf8 Function

public pure function char_utf8(code) result(str)

The char_utf8 function takes a code point as integer in Unicode character set, and returns the corresponding character as UTF-8 binary string.

This function is like an extension of char() for the UTF-8 codeset.

Arguments

Type IntentOptional Attributes Name
integer(kind=int32), intent(in) :: code

Return Value character(len=:), allocatable


Source Code

   pure function char_utf8 (code) result(str)
      use, intrinsic :: iso_fortran_env
      implicit none
      integer(int32), intent(in) :: code        ! Input Unicode code point.

      character(:), allocatable  :: str         ! Resulting one UTF-8 character.
      character(32), allocatable :: bin         ! A 32-digit number expressed in character format for masking.
      integer(int32)             :: buf, mask   ! Buffer and mask for bit operations.
      integer(int8)              :: byte(4)     ! Array to hold up 4 bytes of the UTF-8 character.

      str = ''    ! Initialize result string.
      buf = code  ! Initialize buffer with input `code` point.

      bin = '0000000000000000000000000111111' ! Lower 6-bit mask
      read(bin, '(b32.32)') mask              ! Read the `mask` from the `bin` character string.

      byte(1) = int(iand(ishft(buf, -18), mask),kind(byte))    ! First byte

      buf = code
      byte(2) = int(iand(ishft(buf, -12), mask), kind(byte))   ! Second byte

      buf = code
      byte(3) = int(iand(ishft(buf, -6), mask), kind(byte))    ! Third byte

      buf = code
      byte(4) = int(iand(buf, mask), kind(byte))               ! Fourth byte

      if (code > 2**7-1) then    ! Check if the `code` point is greater than 127 (non-ASCII character).

         if (2**16 -1 < code) then     ! 4-byte character
            byte(1) = ibset(byte(1),7)
            byte(1) = ibset(byte(1),6)
            byte(1) = ibset(byte(1),5)
            byte(1) = ibset(byte(1),4)
            byte(1) = ibclr(byte(1),3)
            byte(2) = set_continuation_byte(byte(2))  ! Set continuation bytes.
            byte(3) = set_continuation_byte(byte(3))
            byte(4) = set_continuation_byte(byte(4))

         else if (2**11 - 1 < code) then  ! 3-byte character
            byte(1) = 32
            byte(2) = ibset(byte(2), 7)
            byte(2) = ibset(byte(2), 6)
            byte(2) = ibset(byte(2), 5)
            byte(2) = ibclr(byte(2), 4)
            byte(3) = set_continuation_byte(byte(3))
            byte(4) = set_continuation_byte(byte(4))

         else if (2**7 -1 < code) then    ! 2-byte character
            byte(1) = 32
            byte(2) = 32
            byte(3) = ibset(byte(3), 7)
            byte(3) = ibset(byte(3), 6)
            byte(3) = ibclr(byte(3), 5)
            byte(4) = set_continuation_byte(byte(4))
         end if

         str = char(byte(1)) //char(byte(2)) //char(byte(3)) //char(byte(4))  ! Concatenate bytes into a string.
         str = trim(adjustl(str))   ! Trim leading and tailing space.

      else
         str = char(code)  ! For ASCII characters.
      end if

   end function char_utf8