NormalizeUtf16String
Short summary
This functions transforms a given utf-16 string into the selected normalization form and returns the normalized wstring. To learn more about normalization forms of unicode, check this.
Attention: the provided normalizedStringBuffer must have an appropiate size, otherwise ther normalized string will be cut!
Example:
myString :WString(10) := "Täst";
myNormalizedString :WString(30);
normalizedLength :UDINT;
------------------
NormalizeUtf16String(
utf16StringBuffer := ADR(myString),
normalizedStringBuffer := ADR(myNormalizedString),
bufferSize := SIZEOF(myNormalizedString),
normalForm := NormalizationForm.NFD,
normalizedStringLength => normalizedLength
);
- Return type: UDINT
Parameters
| Name | Type | Comment | Kind |
|---|---|---|---|
| utf16StringBuffer | PVOID | adrress of the utf-16 encoded string or word array (must be also null terminated!) | input |
| normalizedStringBuffer | PVOID | address of the buffer in which the normalized string is stored. e.g. ADR(myNormalizedString) | input |
| bufferSize | UDINT | size of the normalized string buffer in bytes, e.g. SIZEOF(myNormalizedWString) | input |
| normalForm | NormalizationForm | normalized form | input |
| normalizedStringLength | UDINT | length of the normalized string without NULL terminator. If the given bufferSize is smaller, the normalized string will be cut! | output |
Code
Declaration
FUNCTION NormalizeUtf16String
VAR_INPUT
(* adrress of the utf-16 encoded string or word array (must be also null terminated!) *)
utf16StringBuffer :PVOID;
(* address of the buffer in which the normalized string is stored.
e.g. ADR(myNormalizedString) *)
normalizedStringBuffer :PVOID;
(* size of the normalized string buffer in bytes, e.g. SIZEOF(myNormalizedWString) *)
bufferSize :UDINT;
(* normalized form *)
normalForm :NormalizationForm := NormalizationForm.NFC;
END_VAR
VAR_OUTPUT
(* length of the normalized string without NULL terminator. If the given bufferSize is smaller, the normalized string will be cut! *)
normalizedStringLength :UDINT := 0;
END_VAR
VAR
stringLength, codePointCount, normalizedCodePointCount :UDINT;
codePoints :POINTER TO UnicodeCodePoint;
normalizedCodepoints :POINTER TO UnicodeCodePoint;
END_VAR
Implementation
stringLength := GetUtf16StringLength(utf16StringBuffer);
RETURN((utf16StringBuffer = 0) OR_ELSE (normalizedStringBuffer = 0) OR_ELSE (stringLength = 0));
codePoints := __NEW(UnicodeCodepoint, stringLength);
RETURN(codePoints = 0);
GetCodepointsFromUtf16String(
utf16StringAddress := utf16StringBuffer,
utf16StringWordCount := stringLength,
codepointBuffer := codePoints,
bufferSize := stringLength*SIZEOF(UnicodeCodepoint),
codePointCount => codePointCount
);
IF (codePointCount = 0) THEN // string was not utf16
__DELETE(codePoints);
RETURN;
END_IF
normalizedCodepoints := __NEW(UnicodeCodepoint, (codePointCount*4));
IF (normalizedCodepoints = 0) THEN
__DELETE(codepoints);
RETURN;
END_IF
CASE normalForm OF
NormalizationForm.NFD:
NormalizeCodepointsFormD(
codePoints := codePoints,
codePointsCount := codePointCount,
normalizedCodepoints := normalizedCodepoints,
bufferSize := codePointCount*SIZEOF(UnicodeCodepoint)*4,
normalizedCodepointsCount => normalizedCodePointCount
);
NormalizationForm.NFC:
NormalizeCodepointsFormC(
codePoints := codePoints,
codepointsCount := codePointCount,
normalizedCodepoints := normalizedCodepoints,
bufferSize := codePointCount*SIZEOF(UnicodeCodepoint)*4,
normalizedCodepointsCount => normalizedCodePointCount
);
ELSE
; // do nothing
END_CASE
GetUtf16StringFromCodepoints(
addressOfCodePoints := normalizedCodepoints,
codePointCount := normalizedCodePointCount,
stringBuffer := normalizedStringBuffer,
bufferSize := bufferSize,
utf16StringWordCount => normalizedStringLength
);
__DELETE(codePoints);
__DELETE(normalizedCodepoints);