Skip to main content

GetUtf16StringLength

Short summary

This functions returns the length of an UTF-16 string in words and the number of unicode characters (without null terminator)

Attention: All strings are handled as null-terminated byte streams

Example:

myString :WString(10) := "Test";
stringLengthWords, charCount :UDINT;

------------------
GetUtf16StringLength(
utf16StringAddress := ADR(myString),
wordCount => stringLengthWords,
characterCount => charCount
);
  • Return type: UDINT

Parameters

NameTypeCommentKind
utf16StringAddressPOINTER TO WORDpointer to the utf-16 stringinput
wordCountUDINT-output
characterCountUDINT-output

Code

Declaration

FUNCTION GetUtf16StringLength :UDINT
VAR_INPUT
(* pointer to the utf-16 string *)
utf16StringAddress :POINTER TO WORD;
END_VAR
VAR CONSTANT
(* the end of string marker *)
END_OF_STRING :BYTE := 16#0000;
END_VAR
VAR
wordIndex :UDINT := 0;
END_VAR
VAR_OUTPUT
wordCount :UDINT := 0;
characterCount :UDINT := 0;
END_VAR

Implementation

GetUtf16StringLength := 0;
RETURN(utf16StringAddress = 0);

WHILE (utf16StringAddress[wordCount] <> END_OF_STRING) DO
wordCount := wordCount + 1;
END_WHILE

RETURN((wordCount = 0));
wordIndex := 0;
WHILE (wordIndex < wordCount) DO
CASE utf16StringAddress[wordIndex] OF
16#D800..16#DBFF: // current word is high surrogate
// check next word is a low surrogate
IF ((wordIndex + 1 < wordCount)
AND_THEN (utf16StringAddress[wordIndex+1] >= 16#DC00 )
AND_THEN (utf16StringAddress[wordIndex+1] <= 16#DFFF))
THEN
characterCount := characterCount + 1;
wordIndex := wordIndex + 2; // skip next word as it was a low surrogate
ELSE // no valid surrogate pair
GetUtf16StringLength := 0;
wordCount := 0;
characterCount := 0;
Tc2_System.ADSLOGSTR(
msgCtrlMask := Tc2_System.Global_Variables.ADSLOG_MSGTYPE_ERROR,
msgFmtStr := 'CNM_UnicodeUtilities: Invalid UTF-16 surrogate pair found!',
strArg := ''
);
RETURN;
END_IF
16#DC00..16#DFFF: // unpaired low surrogate found
GetUtf16StringLength := 0;
wordCount := 0;
characterCount := 0;
Tc2_System.ADSLOGSTR(
msgCtrlMask := Tc2_System.Global_Variables.ADSLOG_MSGTYPE_ERROR,
msgFmtStr := 'CNM_UnicodeUtilities: Unpaired low surrogate found: %s!',
strArg := Tc2_Utilities.WORD_TO_HEXSTR( in := utf16StringAddress[wordIndex], iPrecision := 4)
);
RETURN;
ELSE // word is a single codepoint (basic multilingual plane U+0000 bis U+FFFF)
characterCount := characterCount + 1;
wordIndex := wordIndex + 1;
END_CASE
END_WHILE

GetUtf16StringLength := wordCount;