Skip to main content

GetCodepointsFromUtf8String

Short summary

This function creates a code point array from an UTF-8 string and writes to the given codepoint buffer.

Example:

myString :String := 'Test';
myCodepoints :Array[0..10] OF UnicodeCodepoint;
codepointCount :UDINT;
------------------
GetCodepointsFromUtf8String(
utf8StringAddress := ADR(myString),
utf8StringByteCount := GetUtf8StringLength(myString),
codePointBuffer := ADR(myCodepoints),
bufferSize := SIZEOF(myCodepoints),
codePointsCount => codepointCount
);
  • Return type: UDINT

Parameters

NameTypeCommentKind
utf8StringAddressPOINTER TO BYTEpointer to utf-8 string bufferinput
utf8StringByteCountUDINTlength of the utf-8 string in bytesinput
codePointBufferPOINTER TO UnicodeCodePointbuffer where the codepoint sequence is storedinput
bufferSizeUDINTsize of the codepoint buffer in bytes, e.g. SIZEOF(codePointBuffer)input
codePointsCountUDINTnumber of decoded codepointsoutput

Code

Declaration

FUNCTION GetCodepointsFromUtf8String
VAR_INPUT
(* pointer to utf-8 string buffer *)
utf8StringAddress :POINTER TO BYTE;
(* length of the utf-8 string in bytes *)
utf8StringByteCount :UDINT;
(* buffer where the codepoint sequence is stored *)
codePointBuffer :POINTER TO UnicodeCodePoint;
(*size of the codepoint buffer in bytes, e.g. SIZEOF(codePointBuffer)*)
bufferSize :UDINT;
END_VAR
VAR_OUTPUT
(* number of decoded codepoints *)
codePointsCount :UDINT := 0;
END_VAR
VAR CONSTANT
END_OF_STRING :BYTE := 16#00;
END_VAR
VAR
byteIndex :UDINT;
octetCount :BYTE(1..4);
END_VAR

Implementation

RETURN((utf8StringAddress = 0) OR_ELSE (codePointBuffer = 0));

WHILE (byteIndex < utf8StringByteCount AND_THEN utf8StringAddress[byteIndex] <> END_OF_STRING) DO
octetCount := GetOctetCountByFirstOctet(utf8StringAddress[byteIndex]);

IF (octetCount = 0) THEN
codePointsCount := 0;
RETURN;
END_IF

IF (octetCount <= utf8StringByteCount - byteIndex)
AND_THEN ( codePointsCount*4 < bufferSize )
THEN
codePointBuffer[codePointsCount] := DecodeCodepointFromOctets(ADR(utf8StringAddress[byteIndex]), octetCount);

byteIndex := byteIndex + octetCount;
codePointsCount := codePointsCount + 1;
ELSE
RETURN;
END_IF
END_WHILE