您应该使用UTF8 description自己解析 UTF8 字符串。我已经编写了一个快速的 UTF8 模拟ByteToCharIndex
并在西里尔字符串上进行了测试:
function UTF8PosToCharIndex(const S: UTF8String; Index: Integer): Integer;
var
I: Integer;
P: PAnsiChar;
begin
Result:= 0;
if (Index <= 0) or (Index > Length(S)) then Exit;
I:= 1;
P:= PAnsiChar(S);
while I <= Index do begin
if Ord(P^) and $C0 <> $80 then Inc(Result);
Inc(I);
Inc(P);
end;
end;
const TestStr: UTF8String = 'abФЫВА';
procedure TForm1.Button2Click(Sender: TObject);
begin
ShowMessage(IntToStr(UTF8PosToCharIndex(TestStr, 1))); // a = 1
ShowMessage(IntToStr(UTF8PosToCharIndex(TestStr, 2))); // b = 2
ShowMessage(IntToStr(UTF8PosToCharIndex(TestStr, 3))); // Ф = 3
ShowMessage(IntToStr(UTF8PosToCharIndex(TestStr, 5))); // Ы = 4
ShowMessage(IntToStr(UTF8PosToCharIndex(TestStr, 7))); // В = 5
end;
反向功能也没有问题:
function CharIndexToUTF8Pos(const S: UTF8String; Index: Integer): Integer;
var
P: PAnsiChar;
begin
Result:= 0;
P:= PAnsiChar(S);
while (Result < Length(S)) and (Index > 0) do begin
Inc(Result);
if Ord(P^) and $C0 <> $80 then Dec(Index);
Inc(P);
end;
if Index <> 0 then Result:= 0; // char index not found
end;