Skip to main content

GetUtf16StringFromCodepoints

Short summary

This functions creates an UTF-16 string from a code points sequence.

Attention: The buffer size must be sufficient, otherwise the string will be truncated according to the buffer size!

Example:

myString :WString(10);
codepoints :Array[0..1] OF UnicodeCodepoint := [16#0041, 16#0308];
stringLength :UDINT;

------------------
GetUtf16StringFromCodepoints(
addressOfCodePoints := ADR(codepoints),
codePointCount := 2,
stringBuffer := ADR(myString),
bufferSize := SIZEOF(myString),
utf16StringWordCount => stringLength
);
  • Return type: BOOL

Parameters

NameTypeCommentKind
addressOfCodePointsPOINTER TO UnicodeCodePointpointer to codepoints sequenceinput
codePointCountUDINTnumber of codepointsinput
stringBufferPOINTER TO WORDpointer to the stringbuffer, where the utf-16 string will be storedinput
bufferSizeUDINTsize of the stringbuffer in bytesinput
utf16StringWordCountUDINTactual length if the string in wordsoutput

Code

Declaration

FUNCTION GetUtf16StringFromCodepoints : BOOL
VAR_INPUT
(* pointer to codepoints sequence *)
addressOfCodePoints :POINTER TO UnicodeCodePoint;
(* number of codepoints *)
codePointCount :UDINT;
(* pointer to the stringbuffer, where the utf-16 string will be stored *)
stringBuffer :POINTER TO WORD;
(* size of the stringbuffer in bytes *)
bufferSize :UDINT;
END_VAR
VAR_OUTPUT
(* actual length if the string in words *)
utf16StringWordCount :UDINT:=0;
END_VAR
VAR CONSTANT
END_OF_STRING :BYTE := 16#0000;
END_VAR
VAR
highSurrogate, lowSurrogate :UnicodeCodePoint;
codePointIndex :UDINT;
END_VAR

Implementation

IF ((addressOfCodePoints = 0) OR_ELSE (stringBuffer=0) OR_ELSE (codePointCount = 0) OR_ELSE (bufferSize = 0)) THEN
RETURN;
END_IF

Tc2_System.MEMSET(stringBuffer, 0, bufferSize);
codePointIndex := 0;

WHILE ((codePointIndex < codePointCount) AND_THEN (addressOfCodePoints[codePointIndex] > 0) AND_THEN (utf16StringWordCount<(bufferSize/2)-1)) DO
CASE addressOfCodePoints[codePointIndex] OF
16#0000_0000..16#0000_FFFF: // codepoint is in bmp
stringBuffer[utf16StringWordCount] := TO_WORD(addressOfCodePoints[codePointIndex]);
utf16StringWordCount := utf16StringWordCount + 1;
16#0001_0000..16#0010_FFFF: // code point is not in bmp but valid, use surrogate pair
highSurrogate := SHR(addressOfCodePoints[codePointIndex] - 16#10000,10) + 16#D800;
lowSurrogate := ((addressOfCodePoints[codePointIndex] - 16#10000) AND 16#3FF) + 16#DC00;
stringBuffer[utf16StringWordCount] := TO_WORD(highSurrogate);
utf16StringWordCount := utf16StringWordCount + 1;
stringBuffer[utf16StringWordCount] := TO_WORD(lowSurrogate);
utf16StringWordCount := utf16StringWordCount + 1;
ELSE // codepoint was not valid
stringBuffer[utf16StringWordCount] := 16#FFFD; // add replacement character �, usually used to mark non displayable characters
utf16StringWordCount := utf16StringWordCount + 1;
END_CASE
codePointIndex := codePointIndex + 1;
END_WHILE

(* add null terminator *)
IF (utf16StringWordCount = bufferSize/2) THEN
stringBuffer[utf16StringWordCount-1] := END_OF_STRING;
END_IF