GetUtf16StringLength
Short summary
This functions returns the length of an UTF-16 string in words and the number of unicode characters (without null terminator)
Attention: All strings are handled as null-terminated byte streams
Example:
myString :WString(10) := "Test";
stringLengthWords, charCount :UDINT;
------------------
GetUtf16StringLength(
utf16StringAddress := ADR(myString),
wordCount => stringLengthWords,
characterCount => charCount
);
- Return type:
UDINT
Parameters
| Name | Type | Comment | Kind |
|---|---|---|---|
| utf16StringAddress | POINTER TO WORD | pointer to the utf-16 string | input |
| wordCount | UDINT | - | output |
| characterCount | UDINT | - | output |
Code
Declaration
FUNCTION GetUtf16StringLength :UDINT
VAR_INPUT
(* pointer to the utf-16 string *)
utf16StringAddress :POINTER TO WORD;
END_VAR
VAR CONSTANT
(* the end of string marker *)
END_OF_STRING :BYTE := 16#0000;
END_VAR
VAR
wordIndex :UDINT := 0;
END_VAR
VAR_OUTPUT
wordCount :UDINT := 0;
characterCount :UDINT := 0;
END_VAR
Implementation
GetUtf16StringLength := 0;
RETURN(utf16StringAddress = 0);
WHILE (utf16StringAddress[wordCount] <> END_OF_STRING) DO
wordCount := wordCount + 1;
END_WHILE
RETURN((wordCount = 0));
wordIndex := 0;
WHILE (wordIndex < wordCount) DO
CASE utf16StringAddress[wordIndex] OF
16#D800..16#DBFF: // current word is high surrogate
// check next word is a low surrogate
IF ((wordIndex + 1 < wordCount)
AND_THEN (utf16StringAddress[wordIndex+1] >= 16#DC00 )
AND_THEN (utf16StringAddress[wordIndex+1] <= 16#DFFF))
THEN
characterCount := characterCount + 1;
wordIndex := wordIndex + 2; // skip next word as it was a low surrogate
ELSE // no valid surrogate pair
GetUtf16StringLength := 0;
wordCount := 0;
characterCount := 0;
Tc2_System.ADSLOGSTR(
msgCtrlMask := Tc2_System.Global_Variables.ADSLOG_MSGTYPE_ERROR,
msgFmtStr := 'CNM_UnicodeUtilities: Invalid UTF-16 surrogate pair found!',
strArg := ''
);
RETURN;
END_IF
16#DC00..16#DFFF: // unpaired low surrogate found
GetUtf16StringLength := 0;
wordCount := 0;
characterCount := 0;
Tc2_System.ADSLOGSTR(
msgCtrlMask := Tc2_System.Global_Variables.ADSLOG_MSGTYPE_ERROR,
msgFmtStr := 'CNM_UnicodeUtilities: Unpaired low surrogate found: %s!',
strArg := Tc2_Utilities.WORD_TO_HEXSTR( in := utf16StringAddress[wordIndex], iPrecision := 4)
);
RETURN;
ELSE // word is a single codepoint (basic multilingual plane U+0000 bis U+FFFF)
characterCount := characterCount + 1;
wordIndex := wordIndex + 1;
END_CASE
END_WHILE
GetUtf16StringLength := wordCount;