FindInUtf8String

Short summary

This functions finds the character position of the beginning of the first occurence of stringToFind in stringToCheck in UTF-8 strings. If no occurence of stringToFind is found, then the result is 0.

Attention: All strings are handled as null-terminated byte streams

Example:

myString :String(10) := 'Test';
toFind 	 :String(10) := 'est';
result	 :UDINT;
------------------
FindInUtf8String(
	stringToCheckAdress := ADR(myString),
	stringToFindAdress  := ADR(toFind),
	result 				:= result
);	// result is 2 here

Return type: UDINT

Parameters

Name	Type	Comment	Kind
stringToCheckAdress	POINTER TO BYTE	Adress of the string to search in	input
stringToFindAdress	POINTER TO BYTE	Adress of the substring to search for	input
normalizeStrings	BOOL	normalize both strings for check	input
result	UDINT	the result of the function call	output

Code

`Declaration`

FUNCTION FindInUtf8String : UDINT
VAR_INPUT
	(* Adress of the string to search in *)
	stringToCheckAdress					:POINTER TO BYTE;
	(* Adress of the substring to search for *)
	stringToFindAdress					:POINTER TO BYTE;
	(*normalize both strings for check*)
	normalizeStrings 					:BOOL := TRUE;
END_VAR
VAR_OUTPUT
	(* the result of the function call *)
	result 								:UDINT := 0;
END_VAR
VAR
	(* byte count of ``stringToCheck`` *)
	bytesOfStringToCheck				:UDINT;
	(* byte count of ``stringToFind`` *)
	bytesOfStringToFind 				:UDINT;
	(* holds codepoints of ``stringToFind`` *)
	codepointsToCheck					:POINTER TO UnicodeCodePoint;
	(* holds codepoints of ``stringToFind`` *)
	codepointsToFind					:POINTER TO UnicodeCodePoint;
	(* codepoint count of ``stringToCheck`` *)
	codepointCountOfStringToCheck		:UDINT;
	(* codepoint count of ``stringToFind`` *)
	codepointCountOfStringToFind		:UDINT;
	(* contains ``stringToCheck`` in normalized form *)
	normalizedCodePointsToCheck 		:POINTER TO UnicodeCodePoint;
	(* contains ``stringToFind`` in normalized form *)
	normalizedCodePointsToFind 			:POINTER TO UnicodeCodePoint;
	(* count of ``stringToCheck`` in normalized form *)
	countOfNormalizedCodePointsToCheck 	:UDINT;
	(* count of ``stringToFind`` in normalized form *)
	countOfNormalizedCodePointsToFind 	:UDINT;
	(* loop var *)
	curCodePoint						:UDINT := 0;
END_VAR

`Implementation`

RETURN( stringToCheckAdress = 0);
RETURN( stringToFindAdress = 0);

bytesOfStringToCheck := GetUtf8StringLength(stringToCheckAdress);
bytesOfStringToFind := GetUtf8StringLength(stringToFindAdress);
RETURN( bytesOfStringToCheck = 0);
RETURN( bytesOfStringToFind = 0);

codepointsToCheck := __NEW(UnicodeCodePoint, (bytesOfStringToCheck + 1));
IF (codepointsToCheck = 0) THEN
	RETURN;
END_IF
codepointsToFind := __NEW(UnicodeCodePoint, (bytesOfStringToFind + 1));
IF (codepointsToFind = 0) THEN
	__DELETE(codepointsToCheck);
	RETURN;
END_IF

GetCodepointsFromUtf8String(
	utf8StringAddress := stringToCheckAdress,
	utf8StringByteCount := bytesOfStringToCheck,
	codePointBuffer := codepointsToCheck,
	bufferSize := bytesOfStringToCheck * SIZEOF(UnicodeCodepoint),
	codePointsCount => codepointCountOfStringToCheck
);
GetCodepointsFromUtf8String(
	utf8StringAddress := stringToFindAdress,
	utf8StringByteCount := bytesOfStringToFind,
	codePointBuffer := codepointsToFind,
	bufferSize := bytesOfStringToFind * SIZEOF(UnicodeCodepoint),
	codePointsCount => codepointCountOfStringToFind
);

IF (codepointCountOfStringToCheck = 0 OR codepointCountOfStringToFind = 0) THEN // string was not utf8 encoded
	__DELETE(codepointsToCheck);
	__DELETE(codepointsToFind);
	RETURN;
END_IF

IF (normalizeStrings) THEN
	normalizedCodePointsToCheck := __NEW(UnicodeCodePoint, (bytesOfStringToCheck*4));
	IF (normalizedCodePointsToCheck = 0) THEN
		__DELETE(codepointsToCheck);
		__DELETE(codepointsToFind);
		RETURN;
	END_IF
	normalizedCodePointsToFind := __NEW(UnicodeCodePoint, (bytesOfStringToFind*4));
	IF (normalizedCodePointsToFind = 0) THEN
		__DELETE(codepointsToCheck);
		__DELETE(codepointsToFind);
		__DELETE(normalizedCodePointsToCheck);	
		RETURN;
	END_IF
	
	NormalizeCodepointsFormD(
		codePoints 					:= codepointsToCheck,
		codePointsCount 			:= codepointCountOfStringToCheck,
		normalizedCodepoints 		:= normalizedCodePointsToCheck,
		bufferSize 					:= (bytesOfStringToCheck*4) * SIZEOF(UnicodeCodePoint),
		normalizedCodepointsCount	=> countOfNormalizedCodePointsToCheck
	);
	__DELETE(codepointsToCheck);
	codepointsToCheck := normalizedCodePointsToCheck;
	codepointCountOfStringToCheck := countOfNormalizedCodePointsToCheck;
	NormalizeCodepointsFormD(
		codePoints 					:= codepointsToFind,
		codePointsCount 			:= codepointCountOfStringToFind,
		normalizedCodepoints 		:= normalizedCodePointsToFind,
		bufferSize 					:= (bytesOfStringToFind*4) * SIZEOF(UnicodeCodePoint),
		normalizedCodepointsCount	=> countOfNormalizedCodePointsToFind
	);
	__DELETE(codepointsToFind);
	codepointsToFind := normalizedCodePointsToFind;
	codepointCountOfStringToFind := countOfNormalizedCodePointsToFind;
END_IF

IF( codepointCountOfStringToFind > codepointCountOfStringToCheck ) THEN
	__DELETE(codepointsToCheck);
	__DELETE(codepointsToFind);
	RETURN;
END_IF

WHILE (curCodePoint <= (codepointCountOfStringToCheck - codepointCountOfStringToFind)) DO
	IF (
		Tc2_System.MEMCMP(
			ADR(codepointsToCheck[curCodePoint]),
			codepointsToFind, 
			codepointCountOfStringToFind * SIZEOF(UnicodeCodePoint)
		) = 0
	) THEN	
		__DELETE(codepointsToCheck);
		__DELETE(codepointsToFind);
		result := curCodePoint + 1;
		FindInUtf8String := curCodePoint + 1;
		RETURN;
	END_IF;

	curCodePoint := curCodePoint + 1;
END_WHILE

__DELETE(codepointsToCheck);
__DELETE(codepointsToFind);

FindInUtf8String

Short summary​

Parameters​

Code​

Declaration​

Implementation​

Short summary

Parameters

Code

`Declaration`

`Implementation`