NormalizeUtf8String

Short summary

This functions transforms a given string into the selected normalization form and returns the normalized string. To learn more about normalization forms of unicode, check this

Attention: the provided normalizedStringBuffer must have an appropiate size, otherwise ther normalized string will be cut!

Example:

myString :String(10) := 'Täst';
myNormalizedString :String(30);
normalizedLength :UDINT;
------------------
NormalizeUtf8String(
	utf8StringBuffer := ADR(myString),
	normalizedStringBuffer := ADR(myNormalizedString),
	bufferSize := SIZEOF(myNormalizedString),
	normalForm := NormalizationForm.NFD, 
	normalizedStringLength => normalizedLength
);

Return type: UDINT

Parameters

Name	Type	Comment	Kind
utf8StringBuffer	PVOID	adrress of the utf-8 encoded string or byte array (must be also null terminated!)	input
normalizedStringBuffer	PVOID	address of the buffer in which the normalized string is stored. e.g. ADR(myNormalizedString)	input
bufferSize	UDINT	size of the normalized string buffer, e.g. SIZEOF(myNormalizedString)	input
normalForm	NormalizationForm	normalized form	input
normalizedStringLength	UDINT	length of the normalized string. If the given bufferSize is smaller, the normalized string will be cut!	output

Code

`Declaration`

FUNCTION NormalizeUtf8String 
VAR_INPUT
	(* adrress of the utf-8 encoded string or byte array (must be also null terminated!) *)
	utf8StringBuffer :PVOID;
	(* address of the buffer in which the normalized string is stored. 
       e.g. ADR(myNormalizedString) *)
	normalizedStringBuffer :PVOID;
	(* size of the normalized string buffer, e.g. SIZEOF(myNormalizedString) *)
	bufferSize :UDINT;
	(* normalized form *)
	normalForm :NormalizationForm := NormalizationForm.NFC;
END_VAR
VAR_OUTPUT
	(* length of the normalized string. If the given bufferSize is smaller, the normalized string will be cut! *)
	normalizedStringLength :UDINT;
END_VAR
VAR
	stringLength, codePointCount, normalizedCodePointCount :UDINT;
	codePoints :POINTER TO UnicodeCodePoint;
	normalizedCodepoints :POINTER TO UnicodeCodePoint;
END_VAR

`Implementation`

RETURN((utf8StringBuffer = 0) OR_ELSE (normalizedStringBuffer = 0));
GetUtf8StringLength(utf8StringBuffer, byteCount => stringLength);
RETURN(stringLength = 0); // string is empty
codePoints := __NEW(UnicodeCodepoint, stringLength);
RETURN(codePoints = 0);

GetCodepointsFromUtf8String(
	utf8StringAddress := utf8StringBuffer,
	utf8StringByteCount := stringLength,
	codePointBuffer := codePoints,
	bufferSize := stringLength * SIZEOF(UnicodeCodepoint),
	codePointsCount => codePointCount
);

IF (codePointCount = 0) THEN // string was not utf8 encoded
	__DELETE(codePoints);
	RETURN;
END_IF

normalizedCodepoints := __NEW(UnicodeCodepoint, (stringLength*4));

IF (normalizedCodepoints = 0) THEN
	__DELETE(codePoints);
	RETURN;
END_IF

CASE normalForm OF
NormalizationForm.NFD:
	NormalizeCodepointsFormD(
		codePoints := codePoints,
		codePointsCount := codePointCount,
		normalizedCodepoints := normalizedCodepoints,
		bufferSize := SIZEOF(UnicodeCodepoint)*stringLength*4,
		normalizedCodepointsCount => normalizedCodePointCount
	);
NormalizationForm.NFC:
	NormalizeCodepointsFormC(
		codePoints := codePoints,
		codePointsCount := codePointCount,
		normalizedCodepoints := normalizedCodepoints,
		bufferSize := SIZEOF(UnicodeCodepoint)*stringLength*4,
		normalizedCodepointsCount => normalizedCodePointCount
	);
ELSE
	; // do nothing
END_CASE

GetUtf8StringFromCodepoints(
	addressOfCodePoints := normalizedCodepoints,
	codePointCount := normalizedCodePointCount,
	utf8StringBuffer := normalizedStringBuffer,
	bufferSize := bufferSize,
	utf8StringByteCount => normalizedStringLength
);

__DELETE(codePoints);
__DELETE(normalizedCodepoints);

NormalizeUtf8String

Short summary​

Parameters​

Code​

Declaration​

Implementation​

Short summary

Parameters

Code

`Declaration`

`Implementation`