NormalizeUtf8String
Short summary
This functions transforms a given string into the selected normalization form and returns the normalized string. To learn more about normalization forms of unicode, check this
Attention: the provided normalizedStringBuffer must have an appropiate size, otherwise ther normalized string will be cut!
Example:
myString :String(10) := 'Täst';
myNormalizedString :String(30);
normalizedLength :UDINT;
------------------
NormalizeUtf8String(
utf8StringBuffer := ADR(myString),
normalizedStringBuffer := ADR(myNormalizedString),
bufferSize := SIZEOF(myNormalizedString),
normalForm := NormalizationForm.NFD,
normalizedStringLength => normalizedLength
);
- Return type: UDINT
Parameters
| Name | Type | Comment | Kind |
|---|---|---|---|
| utf8StringBuffer | PVOID | adrress of the utf-8 encoded string or byte array (must be also null terminated!) | input |
| normalizedStringBuffer | PVOID | address of the buffer in which the normalized string is stored. e.g. ADR(myNormalizedString) | input |
| bufferSize | UDINT | size of the normalized string buffer, e.g. SIZEOF(myNormalizedString) | input |
| normalForm | NormalizationForm | normalized form | input |
| normalizedStringLength | UDINT | length of the normalized string. If the given bufferSize is smaller, the normalized string will be cut! | output |
Code
Declaration
FUNCTION NormalizeUtf8String
VAR_INPUT
(* adrress of the utf-8 encoded string or byte array (must be also null terminated!) *)
utf8StringBuffer :PVOID;
(* address of the buffer in which the normalized string is stored.
e.g. ADR(myNormalizedString) *)
normalizedStringBuffer :PVOID;
(* size of the normalized string buffer, e.g. SIZEOF(myNormalizedString) *)
bufferSize :UDINT;
(* normalized form *)
normalForm :NormalizationForm := NormalizationForm.NFC;
END_VAR
VAR_OUTPUT
(* length of the normalized string. If the given bufferSize is smaller, the normalized string will be cut! *)
normalizedStringLength :UDINT;
END_VAR
VAR
stringLength, codePointCount, normalizedCodePointCount :UDINT;
codePoints :POINTER TO UnicodeCodePoint;
normalizedCodepoints :POINTER TO UnicodeCodePoint;
END_VAR
Implementation
RETURN((utf8StringBuffer = 0) OR_ELSE (normalizedStringBuffer = 0));
GetUtf8StringLength(utf8StringBuffer, byteCount => stringLength);
RETURN(stringLength = 0); // string is empty
codePoints := __NEW(UnicodeCodepoint, stringLength);
RETURN(codePoints = 0);
GetCodepointsFromUtf8String(
utf8StringAddress := utf8StringBuffer,
utf8StringByteCount := stringLength,
codePointBuffer := codePoints,
bufferSize := stringLength * SIZEOF(UnicodeCodepoint),
codePointsCount => codePointCount
);
IF (codePointCount = 0) THEN // string was not utf8 encoded
__DELETE(codePoints);
RETURN;
END_IF
normalizedCodepoints := __NEW(UnicodeCodepoint, (stringLength*4));
IF (normalizedCodepoints = 0) THEN
__DELETE(codePoints);
RETURN;
END_IF
CASE normalForm OF
NormalizationForm.NFD:
NormalizeCodepointsFormD(
codePoints := codePoints,
codePointsCount := codePointCount,
normalizedCodepoints := normalizedCodepoints,
bufferSize := SIZEOF(UnicodeCodepoint)*stringLength*4,
normalizedCodepointsCount => normalizedCodePointCount
);
NormalizationForm.NFC:
NormalizeCodepointsFormC(
codePoints := codePoints,
codePointsCount := codePointCount,
normalizedCodepoints := normalizedCodepoints,
bufferSize := SIZEOF(UnicodeCodepoint)*stringLength*4,
normalizedCodepointsCount => normalizedCodePointCount
);
ELSE
; // do nothing
END_CASE
GetUtf8StringFromCodepoints(
addressOfCodePoints := normalizedCodepoints,
codePointCount := normalizedCodePointCount,
utf8StringBuffer := normalizedStringBuffer,
bufferSize := bufferSize,
utf8StringByteCount => normalizedStringLength
);
__DELETE(codePoints);
__DELETE(normalizedCodepoints);