Skip to main content

LeftOfUtf8String

Short summary

This function edits a utf-8 string, so only the count of length leftmost characters of the string remain. If length is greater than the length of the string, the string is unchanged.

It returns the new size of the string in byte.

Attention: All strings are handled as null-terminated byte streams

Example:

myString 			:String(10) := 'Test';
lengthOfMyString :UDINT := 4;
------------------
LeftOfUtf8String(
stringAdress := ADR(myString),
length := 3,
newStringLength => lengthOfMyString
); // myString will be 'Tes' with length 3
  • Return type: UDINT

Parameters

NameTypeCommentKind
stringAddressPOINTER TO BYTEpointer to the utf-8 stringinput
lengthUDINTcount of characters in in new substringinput
normalizeStringBOOLnormalize the string before creating subtringinput
newStringLengthUDINTnew length of the string in bytesoutput

Code

Declaration

FUNCTION LeftOfUtf8String :UDINT
VAR_INPUT
(* pointer to the utf-8 string *)
stringAddress :POINTER TO BYTE;
(* count of characters in in new substring *)
length :UDINT;
(* normalize the string before creating subtring *)
normalizeString :BOOL := TRUE;
END_VAR
VAR_OUTPUT
(* new length of the string in bytes *)
newStringLength :UDINT;
END_VAR
VAR
(* byte count of string *)
bytesOfString :UDINT;
(* codepoints of string *)
codepoints :POINTER TO UnicodeCodePoint;
(* codepoints of string after normalization*)
normalizedCodepoints :POINTER TO UnicodeCodePoint;
(* codepoint count of string *)
codepointCountOfString :UDINT;
(* codepoint count of normalized string *)
normalizedCodepointCountOfString :UDINT;
END_VAR
VAR CONSTANT
NULL :USINT := 0;
END_VAR

Implementation

RETURN(stringAddress = 0);

bytesOfString := GetUtf8StringLength(stringAddress);
// if string is empty or length is 0: substring is empty string
IF bytesOfString = 0 OR length = 0 THEN
Tc2_System.MEMSET(
destAddr := stringAddress,
fillByte := NULL,
n := SIZEOF(BYTE)
);
LeftOfUtf8String := 0;
newStringLength := 0;
RETURN;
END_IF

codepoints := __NEW(UnicodeCodePoint, (bytesOfString + 1));
IF (codepoints = 0) THEN
RETURN;
END_IF

GetCodepointsFromUtf8String(
utf8StringAddress := stringAddress,
utf8StringByteCount := bytesOfString,
codePointBuffer := codepoints,
bufferSize := (bytesOfString + 1) * SIZEOF(UnicodeCodepoint),
codePointsCount => codepointCountOfString
);

IF (normalizeString) THEN
normalizedCodepoints := __NEW(UnicodeCodePoint, (bytesOfString*4));
IF (normalizedCodepoints = 0) THEN
__DELETE(codepoints);
RETURN;
END_IF

NormalizeCodepointsFormC(
codePoints := codepoints,
codePointsCount := codepointCountOfString,
normalizedCodepoints := normalizedCodepoints,
bufferSize := (bytesOfString*4) * SIZEOF(UnicodeCodePoint),
normalizedCodepointsCount => normalizedCodepointCountOfString
);
__DELETE(codepoints);
codepoints := normalizedCodepoints;
codepointCountOfString := normalizedCodepointCountOfString;
END_IF

IF (length > codepointCountOfString) THEN
newStringLength := bytesOfString;
LeftOfUtf8String := newStringLength;
__DELETE(codepoints);
RETURN;
END_IF

GetUtf8StringFromCodepoints(
addressOfCodePoints := codepoints,
codePointCount := length,
utf8StringBuffer := stringAddress,
bufferSize := bytesOfString + 1,
utf8StringByteCount => newStringLength
);

LeftOfUtf8String := newStringLength;
__DELETE(codepoints);