Skip to main content

FindInUtf8String

Short summary

This functions finds the character position of the beginning of the first occurence of stringToFind in stringToCheck in UTF-8 strings. If no occurence of stringToFind is found, then the result is 0.

Attention: All strings are handled as null-terminated byte streams

Example:

myString :String(10) := 'Test';
toFind :String(10) := 'est';
result :UDINT;
------------------
FindInUtf8String(
stringToCheckAdress := ADR(myString),
stringToFindAdress := ADR(toFind),
result := result
); // result is 2 here
  • Return type: UDINT

Parameters

NameTypeCommentKind
stringToCheckAdressPOINTER TO BYTEAdress of the string to search ininput
stringToFindAdressPOINTER TO BYTEAdress of the substring to search forinput
normalizeStringsBOOLnormalize both strings for checkinput
resultUDINTthe result of the function calloutput

Code

Declaration

FUNCTION FindInUtf8String : UDINT
VAR_INPUT
(* Adress of the string to search in *)
stringToCheckAdress :POINTER TO BYTE;
(* Adress of the substring to search for *)
stringToFindAdress :POINTER TO BYTE;
(*normalize both strings for check*)
normalizeStrings :BOOL := TRUE;
END_VAR
VAR_OUTPUT
(* the result of the function call *)
result :UDINT := 0;
END_VAR
VAR
(* byte count of ``stringToCheck`` *)
bytesOfStringToCheck :UDINT;
(* byte count of ``stringToFind`` *)
bytesOfStringToFind :UDINT;
(* holds codepoints of ``stringToFind`` *)
codepointsToCheck :POINTER TO UnicodeCodePoint;
(* holds codepoints of ``stringToFind`` *)
codepointsToFind :POINTER TO UnicodeCodePoint;
(* codepoint count of ``stringToCheck`` *)
codepointCountOfStringToCheck :UDINT;
(* codepoint count of ``stringToFind`` *)
codepointCountOfStringToFind :UDINT;
(* contains ``stringToCheck`` in normalized form *)
normalizedCodePointsToCheck :POINTER TO UnicodeCodePoint;
(* contains ``stringToFind`` in normalized form *)
normalizedCodePointsToFind :POINTER TO UnicodeCodePoint;
(* count of ``stringToCheck`` in normalized form *)
countOfNormalizedCodePointsToCheck :UDINT;
(* count of ``stringToFind`` in normalized form *)
countOfNormalizedCodePointsToFind :UDINT;
(* loop var *)
curCodePoint :UDINT := 0;
END_VAR

Implementation

RETURN( stringToCheckAdress = 0);
RETURN( stringToFindAdress = 0);

bytesOfStringToCheck := GetUtf8StringLength(stringToCheckAdress);
bytesOfStringToFind := GetUtf8StringLength(stringToFindAdress);
RETURN( bytesOfStringToCheck = 0);
RETURN( bytesOfStringToFind = 0);

codepointsToCheck := __NEW(UnicodeCodePoint, (bytesOfStringToCheck + 1));
IF (codepointsToCheck = 0) THEN
RETURN;
END_IF
codepointsToFind := __NEW(UnicodeCodePoint, (bytesOfStringToFind + 1));
IF (codepointsToFind = 0) THEN
__DELETE(codepointsToCheck);
RETURN;
END_IF

GetCodepointsFromUtf8String(
utf8StringAddress := stringToCheckAdress,
utf8StringByteCount := bytesOfStringToCheck,
codePointBuffer := codepointsToCheck,
bufferSize := bytesOfStringToCheck * SIZEOF(UnicodeCodepoint),
codePointsCount => codepointCountOfStringToCheck
);
GetCodepointsFromUtf8String(
utf8StringAddress := stringToFindAdress,
utf8StringByteCount := bytesOfStringToFind,
codePointBuffer := codepointsToFind,
bufferSize := bytesOfStringToFind * SIZEOF(UnicodeCodepoint),
codePointsCount => codepointCountOfStringToFind
);

IF (codepointCountOfStringToCheck = 0 OR codepointCountOfStringToFind = 0) THEN // string was not utf8 encoded
__DELETE(codepointsToCheck);
__DELETE(codepointsToFind);
RETURN;
END_IF

IF (normalizeStrings) THEN
normalizedCodePointsToCheck := __NEW(UnicodeCodePoint, (bytesOfStringToCheck*4));
IF (normalizedCodePointsToCheck = 0) THEN
__DELETE(codepointsToCheck);
__DELETE(codepointsToFind);
RETURN;
END_IF
normalizedCodePointsToFind := __NEW(UnicodeCodePoint, (bytesOfStringToFind*4));
IF (normalizedCodePointsToFind = 0) THEN
__DELETE(codepointsToCheck);
__DELETE(codepointsToFind);
__DELETE(normalizedCodePointsToCheck);
RETURN;
END_IF

NormalizeCodepointsFormD(
codePoints := codepointsToCheck,
codePointsCount := codepointCountOfStringToCheck,
normalizedCodepoints := normalizedCodePointsToCheck,
bufferSize := (bytesOfStringToCheck*4) * SIZEOF(UnicodeCodePoint),
normalizedCodepointsCount => countOfNormalizedCodePointsToCheck
);
__DELETE(codepointsToCheck);
codepointsToCheck := normalizedCodePointsToCheck;
codepointCountOfStringToCheck := countOfNormalizedCodePointsToCheck;
NormalizeCodepointsFormD(
codePoints := codepointsToFind,
codePointsCount := codepointCountOfStringToFind,
normalizedCodepoints := normalizedCodePointsToFind,
bufferSize := (bytesOfStringToFind*4) * SIZEOF(UnicodeCodePoint),
normalizedCodepointsCount => countOfNormalizedCodePointsToFind
);
__DELETE(codepointsToFind);
codepointsToFind := normalizedCodePointsToFind;
codepointCountOfStringToFind := countOfNormalizedCodePointsToFind;
END_IF

IF( codepointCountOfStringToFind > codepointCountOfStringToCheck ) THEN
__DELETE(codepointsToCheck);
__DELETE(codepointsToFind);
RETURN;
END_IF

WHILE (curCodePoint <= (codepointCountOfStringToCheck - codepointCountOfStringToFind)) DO
IF (
Tc2_System.MEMCMP(
ADR(codepointsToCheck[curCodePoint]),
codepointsToFind,
codepointCountOfStringToFind * SIZEOF(UnicodeCodePoint)
) = 0
) THEN
__DELETE(codepointsToCheck);
__DELETE(codepointsToFind);
result := curCodePoint + 1;
FindInUtf8String := curCodePoint + 1;
RETURN;
END_IF;

curCodePoint := curCodePoint + 1;
END_WHILE

__DELETE(codepointsToCheck);
__DELETE(codepointsToFind);