FindInUtf8String
Short summary
This functions finds the character position of the beginning of the first occurence of stringToFind in stringToCheck in UTF-8 strings. If no occurence of stringToFind is found, then the result is 0.
Attention: All strings are handled as null-terminated byte streams
Example:
myString :String(10) := 'Test';
toFind :String(10) := 'est';
result :UDINT;
------------------
FindInUtf8String(
stringToCheckAdress := ADR(myString),
stringToFindAdress := ADR(toFind),
result := result
); // result is 2 here
- Return type:
UDINT
Parameters
| Name | Type | Comment | Kind |
|---|---|---|---|
| stringToCheckAdress | POINTER TO BYTE | Adress of the string to search in | input |
| stringToFindAdress | POINTER TO BYTE | Adress of the substring to search for | input |
| normalizeStrings | BOOL | normalize both strings for check | input |
| result | UDINT | the result of the function call | output |
Code
Declaration
FUNCTION FindInUtf8String : UDINT
VAR_INPUT
(* Adress of the string to search in *)
stringToCheckAdress :POINTER TO BYTE;
(* Adress of the substring to search for *)
stringToFindAdress :POINTER TO BYTE;
(*normalize both strings for check*)
normalizeStrings :BOOL := TRUE;
END_VAR
VAR_OUTPUT
(* the result of the function call *)
result :UDINT := 0;
END_VAR
VAR
(* byte count of ``stringToCheck`` *)
bytesOfStringToCheck :UDINT;
(* byte count of ``stringToFind`` *)
bytesOfStringToFind :UDINT;
(* holds codepoints of ``stringToFind`` *)
codepointsToCheck :POINTER TO UnicodeCodePoint;
(* holds codepoints of ``stringToFind`` *)
codepointsToFind :POINTER TO UnicodeCodePoint;
(* codepoint count of ``stringToCheck`` *)
codepointCountOfStringToCheck :UDINT;
(* codepoint count of ``stringToFind`` *)
codepointCountOfStringToFind :UDINT;
(* contains ``stringToCheck`` in normalized form *)
normalizedCodePointsToCheck :POINTER TO UnicodeCodePoint;
(* contains ``stringToFind`` in normalized form *)
normalizedCodePointsToFind :POINTER TO UnicodeCodePoint;
(* count of ``stringToCheck`` in normalized form *)
countOfNormalizedCodePointsToCheck :UDINT;
(* count of ``stringToFind`` in normalized form *)
countOfNormalizedCodePointsToFind :UDINT;
(* loop var *)
curCodePoint :UDINT := 0;
END_VAR
Implementation
RETURN( stringToCheckAdress = 0);
RETURN( stringToFindAdress = 0);
bytesOfStringToCheck := GetUtf8StringLength(stringToCheckAdress);
bytesOfStringToFind := GetUtf8StringLength(stringToFindAdress);
RETURN( bytesOfStringToCheck = 0);
RETURN( bytesOfStringToFind = 0);
codepointsToCheck := __NEW(UnicodeCodePoint, (bytesOfStringToCheck + 1));
IF (codepointsToCheck = 0) THEN
RETURN;
END_IF
codepointsToFind := __NEW(UnicodeCodePoint, (bytesOfStringToFind + 1));
IF (codepointsToFind = 0) THEN
__DELETE(codepointsToCheck);
RETURN;
END_IF
GetCodepointsFromUtf8String(
utf8StringAddress := stringToCheckAdress,
utf8StringByteCount := bytesOfStringToCheck,
codePointBuffer := codepointsToCheck,
bufferSize := bytesOfStringToCheck * SIZEOF(UnicodeCodepoint),
codePointsCount => codepointCountOfStringToCheck
);
GetCodepointsFromUtf8String(
utf8StringAddress := stringToFindAdress,
utf8StringByteCount := bytesOfStringToFind,
codePointBuffer := codepointsToFind,
bufferSize := bytesOfStringToFind * SIZEOF(UnicodeCodepoint),
codePointsCount => codepointCountOfStringToFind
);
IF (codepointCountOfStringToCheck = 0 OR codepointCountOfStringToFind = 0) THEN // string was not utf8 encoded
__DELETE(codepointsToCheck);
__DELETE(codepointsToFind);
RETURN;
END_IF
IF (normalizeStrings) THEN
normalizedCodePointsToCheck := __NEW(UnicodeCodePoint, (bytesOfStringToCheck*4));
IF (normalizedCodePointsToCheck = 0) THEN
__DELETE(codepointsToCheck);
__DELETE(codepointsToFind);
RETURN;
END_IF
normalizedCodePointsToFind := __NEW(UnicodeCodePoint, (bytesOfStringToFind*4));
IF (normalizedCodePointsToFind = 0) THEN
__DELETE(codepointsToCheck);
__DELETE(codepointsToFind);
__DELETE(normalizedCodePointsToCheck);
RETURN;
END_IF
NormalizeCodepointsFormD(
codePoints := codepointsToCheck,
codePointsCount := codepointCountOfStringToCheck,
normalizedCodepoints := normalizedCodePointsToCheck,
bufferSize := (bytesOfStringToCheck*4) * SIZEOF(UnicodeCodePoint),
normalizedCodepointsCount => countOfNormalizedCodePointsToCheck
);
__DELETE(codepointsToCheck);
codepointsToCheck := normalizedCodePointsToCheck;
codepointCountOfStringToCheck := countOfNormalizedCodePointsToCheck;
NormalizeCodepointsFormD(
codePoints := codepointsToFind,
codePointsCount := codepointCountOfStringToFind,
normalizedCodepoints := normalizedCodePointsToFind,
bufferSize := (bytesOfStringToFind*4) * SIZEOF(UnicodeCodePoint),
normalizedCodepointsCount => countOfNormalizedCodePointsToFind
);
__DELETE(codepointsToFind);
codepointsToFind := normalizedCodePointsToFind;
codepointCountOfStringToFind := countOfNormalizedCodePointsToFind;
END_IF
IF( codepointCountOfStringToFind > codepointCountOfStringToCheck ) THEN
__DELETE(codepointsToCheck);
__DELETE(codepointsToFind);
RETURN;
END_IF
WHILE (curCodePoint <= (codepointCountOfStringToCheck - codepointCountOfStringToFind)) DO
IF (
Tc2_System.MEMCMP(
ADR(codepointsToCheck[curCodePoint]),
codepointsToFind,
codepointCountOfStringToFind * SIZEOF(UnicodeCodePoint)
) = 0
) THEN
__DELETE(codepointsToCheck);
__DELETE(codepointsToFind);
result := curCodePoint + 1;
FindInUtf8String := curCodePoint + 1;
RETURN;
END_IF;
curCodePoint := curCodePoint + 1;
END_WHILE
__DELETE(codepointsToCheck);
__DELETE(codepointsToFind);