GetUtf16StringFromCodepoints
Short summary
This functions creates an UTF-16 string from a code points sequence.
Attention: The buffer size must be sufficient, otherwise the string will be truncated according to the buffer size!
Example:
myString :WString(10);
codepoints :Array[0..1] OF UnicodeCodepoint := [16#0041, 16#0308];
stringLength :UDINT;
------------------
GetUtf16StringFromCodepoints(
addressOfCodePoints := ADR(codepoints),
codePointCount := 2,
stringBuffer := ADR(myString),
bufferSize := SIZEOF(myString),
utf16StringWordCount => stringLength
);
- Return type:
BOOL
Parameters
| Name | Type | Comment | Kind |
|---|---|---|---|
| addressOfCodePoints | POINTER TO UnicodeCodePoint | pointer to codepoints sequence | input |
| codePointCount | UDINT | number of codepoints | input |
| stringBuffer | POINTER TO WORD | pointer to the stringbuffer, where the utf-16 string will be stored | input |
| bufferSize | UDINT | size of the stringbuffer in bytes | input |
| utf16StringWordCount | UDINT | actual length if the string in words | output |
Code
Declaration
FUNCTION GetUtf16StringFromCodepoints : BOOL
VAR_INPUT
(* pointer to codepoints sequence *)
addressOfCodePoints :POINTER TO UnicodeCodePoint;
(* number of codepoints *)
codePointCount :UDINT;
(* pointer to the stringbuffer, where the utf-16 string will be stored *)
stringBuffer :POINTER TO WORD;
(* size of the stringbuffer in bytes *)
bufferSize :UDINT;
END_VAR
VAR_OUTPUT
(* actual length if the string in words *)
utf16StringWordCount :UDINT:=0;
END_VAR
VAR CONSTANT
END_OF_STRING :BYTE := 16#0000;
END_VAR
VAR
highSurrogate, lowSurrogate :UnicodeCodePoint;
codePointIndex :UDINT;
END_VAR
Implementation
IF ((addressOfCodePoints = 0) OR_ELSE (stringBuffer=0) OR_ELSE (codePointCount = 0) OR_ELSE (bufferSize = 0)) THEN
RETURN;
END_IF
Tc2_System.MEMSET(stringBuffer, 0, bufferSize);
codePointIndex := 0;
WHILE ((codePointIndex < codePointCount) AND_THEN (addressOfCodePoints[codePointIndex] > 0) AND_THEN (utf16StringWordCount<(bufferSize/2)-1)) DO
CASE addressOfCodePoints[codePointIndex] OF
16#0000_0000..16#0000_FFFF: // codepoint is in bmp
stringBuffer[utf16StringWordCount] := TO_WORD(addressOfCodePoints[codePointIndex]);
utf16StringWordCount := utf16StringWordCount + 1;
16#0001_0000..16#0010_FFFF: // code point is not in bmp but valid, use surrogate pair
highSurrogate := SHR(addressOfCodePoints[codePointIndex] - 16#10000,10) + 16#D800;
lowSurrogate := ((addressOfCodePoints[codePointIndex] - 16#10000) AND 16#3FF) + 16#DC00;
stringBuffer[utf16StringWordCount] := TO_WORD(highSurrogate);
utf16StringWordCount := utf16StringWordCount + 1;
stringBuffer[utf16StringWordCount] := TO_WORD(lowSurrogate);
utf16StringWordCount := utf16StringWordCount + 1;
ELSE // codepoint was not valid
stringBuffer[utf16StringWordCount] := 16#FFFD; // add replacement character �, usually used to mark non displayable characters
utf16StringWordCount := utf16StringWordCount + 1;
END_CASE
codePointIndex := codePointIndex + 1;
END_WHILE
(* add null terminator *)
IF (utf16StringWordCount = bufferSize/2) THEN
stringBuffer[utf16StringWordCount-1] := END_OF_STRING;
END_IF