Skip to main content

NormalizeUtf16String

Short summary

This functions transforms a given utf-16 string into the selected normalization form and returns the normalized wstring. To learn more about normalization forms of unicode, check this.

Attention: the provided normalizedStringBuffer must have an appropiate size, otherwise ther normalized string will be cut!

Example:


myString :WString(10) := "Täst";
myNormalizedString :WString(30);
normalizedLength :UDINT;
------------------
NormalizeUtf16String(
utf16StringBuffer := ADR(myString),
normalizedStringBuffer := ADR(myNormalizedString),
bufferSize := SIZEOF(myNormalizedString),
normalForm := NormalizationForm.NFD,
normalizedStringLength => normalizedLength
);
  • Return type: UDINT

Parameters

NameTypeCommentKind
utf16StringBufferPVOIDadrress of the utf-16 encoded string or word array (must be also null terminated!)input
normalizedStringBufferPVOIDaddress of the buffer in which the normalized string is stored. e.g. ADR(myNormalizedString)input
bufferSizeUDINTsize of the normalized string buffer in bytes, e.g. SIZEOF(myNormalizedWString)input
normalFormNormalizationFormnormalized forminput
normalizedStringLengthUDINTlength of the normalized string without NULL terminator. If the given bufferSize is smaller, the normalized string will be cut!output

Code

Declaration

FUNCTION NormalizeUtf16String
VAR_INPUT
(* adrress of the utf-16 encoded string or word array (must be also null terminated!) *)
utf16StringBuffer :PVOID;
(* address of the buffer in which the normalized string is stored.
e.g. ADR(myNormalizedString) *)
normalizedStringBuffer :PVOID;
(* size of the normalized string buffer in bytes, e.g. SIZEOF(myNormalizedWString) *)
bufferSize :UDINT;
(* normalized form *)
normalForm :NormalizationForm := NormalizationForm.NFC;
END_VAR
VAR_OUTPUT
(* length of the normalized string without NULL terminator. If the given bufferSize is smaller, the normalized string will be cut! *)
normalizedStringLength :UDINT := 0;
END_VAR
VAR
stringLength, codePointCount, normalizedCodePointCount :UDINT;
codePoints :POINTER TO UnicodeCodePoint;
normalizedCodepoints :POINTER TO UnicodeCodePoint;
END_VAR

Implementation

stringLength := GetUtf16StringLength(utf16StringBuffer);
RETURN((utf16StringBuffer = 0) OR_ELSE (normalizedStringBuffer = 0) OR_ELSE (stringLength = 0));
codePoints := __NEW(UnicodeCodepoint, stringLength);
RETURN(codePoints = 0);

GetCodepointsFromUtf16String(
utf16StringAddress := utf16StringBuffer,
utf16StringWordCount := stringLength,
codepointBuffer := codePoints,
bufferSize := stringLength*SIZEOF(UnicodeCodepoint),
codePointCount => codePointCount
);

IF (codePointCount = 0) THEN // string was not utf16
__DELETE(codePoints);
RETURN;
END_IF

normalizedCodepoints := __NEW(UnicodeCodepoint, (codePointCount*4));
IF (normalizedCodepoints = 0) THEN
__DELETE(codepoints);
RETURN;
END_IF


CASE normalForm OF
NormalizationForm.NFD:
NormalizeCodepointsFormD(
codePoints := codePoints,
codePointsCount := codePointCount,
normalizedCodepoints := normalizedCodepoints,
bufferSize := codePointCount*SIZEOF(UnicodeCodepoint)*4,
normalizedCodepointsCount => normalizedCodePointCount
);
NormalizationForm.NFC:
NormalizeCodepointsFormC(
codePoints := codePoints,
codepointsCount := codePointCount,
normalizedCodepoints := normalizedCodepoints,
bufferSize := codePointCount*SIZEOF(UnicodeCodepoint)*4,
normalizedCodepointsCount => normalizedCodePointCount
);
ELSE
; // do nothing
END_CASE

GetUtf16StringFromCodepoints(
addressOfCodePoints := normalizedCodepoints,
codePointCount := normalizedCodePointCount,
stringBuffer := normalizedStringBuffer,
bufferSize := bufferSize,
utf16StringWordCount => normalizedStringLength
);

__DELETE(codePoints);
__DELETE(normalizedCodepoints);