-1

I have a routine that processes a C-like string, resulting in usual Delphi string:

class function UTIL.ProcessString(const S: string): string;
var
  SB:TStringBuilder;
  P:MarshaledString;
  procedure DoIt(const S:string;const I:Integer=2);
  begin
  SB.Append(S);
  Inc(P,I);
  end;
begin
SB:=TStringBuilder.Create;
P:=PChar(S);
while P<>nil do
  begin
  if P^<>'\' then DoIt(P^,1) else
    case (P+1)^ of
    '\','"':DoIt((P+1)^);
    #0,'n':DoIt(sLineBreak);
    't':DoIt(#9);
    else DoIt('\'+(P+1)^,2);
    end;
  end;
Result:=SB.ToString;
SB.Free;
end;

The problem is the loop never exits. Debugging shows the line while P<>nil do doesn't evaluate to False because P is '' at the end of processing, so the code tries to perform out-of-range operations on it. Since I didn't find any concise documentation on pointer math in Delphi, it's quite possible I'm at fault here.

EDIT: I've rewritten the function with everything read in mind like that:

class function UTIL.ProcessString(const S: string): string;
var
  SB:TStringBuilder;
  P:PChar;
  C:Char;
begin
SB:=TStringBuilder.Create;
P:=PChar(S);
  repeat
  C:=P^;
  Inc(P);
    case C of
    #0:;
    '\':
      begin
      C:=P^;
      Inc(P);
        case C of
        #0,'n':SB.Append(sLineBreak);
        '\','"':SB.Append(C);
        't':SB.Append(#9);
        else SB.Append('\').Append(C);
        end;
      end;
    else SB.Append(C);
    end;
  until P^=#0;
Result:=SB.ToString;
SB.Free;
end;

I check for #0 in the inner case statement for "such \ strings" being fed into the routine, i. e. a sequence of strings broken into pieces read from a source and then formatted one by one. So far this works great, however it fails to correctly parse '\\t' as '\t' and similar constructs, it returns just #9. I can't really think of any cause. Oh, and the old version also had this bug BTW.

4

1 回答 1

5

您的循环永远运行,因为P永远不会nil开始,而不是因为您的指针数学问题(尽管我将在下面进一步讨论)。 PChar()始终返回一个非nil指针。如果S不为空,则PChar()返回指向 first 的指针Char,但如果S为空,则PChar()返回指向内存中空终止符的指针const。您的代码没有考虑后一种可能性。

如果你想S作为一个空终止的 C 字符串来处理(为什么不考虑完整Length()S呢?),那么你需要使用while P^ <> #0 do而不是while P <> nil do.

除此之外:

  • P应声明为PChar而不是MarshaledString. 没有理由MarshaledString在这种情况下使用,或以这种方式使用。

  • 在您将TStringBuilder.Append(Char)单个传递CharDoIt(). 事实上,我建议完全摆脱DoIt()它,因为它并没有真正为你带来任何有用的东西。

  • 你为什么把'\'#0它当作换行符?考虑\输入字符串末尾的字符?如果您遇到这种情况,您正在递增P超过空终止符,然后您处于未定义的区域,因为您正在读取周围的内存。或者您的输入字符串是否真的有嵌入#0字符,然后是最终的空终止符?这对于文本数据来说是不寻常的格式。

尝试更多类似的东西(如果真的有嵌入的#0字符):

class function UTIL.ProcessString(const S: string): string;
var
  SB: TStringBuilder;
  P: PChar;
begin
  Result := '';
  P := PChar(S);
  if P^ = #0 then Exit;
  SB := TStringBuilder.Create;
  try
    repeat
      if P^ <> '\' then
      begin
        SB.Append(P^);
        Inc(P);
      end else
      begin
        Inc(P);
        case P^ of
          '\','"': SB.Append(P^);
          #0, 'n': SB.Append(sLineBreak);
          't':     SB.Append(#9);
          else     SB.Append('\'+P^);
        end;
        Inc(P);
      end;
    until P^ = #0;
    Result := SB.ToString;
  finally
    SB.Free;
  end;
end;

或者这个(如果没有嵌入#0字符):

class function UTIL.ProcessString(const S: string): string;
var
  SB: TStringBuilder;
  P: PChar;
  Ch: Char;
begin
  Result := '';
  P := PChar(S);
  if P^ = #0 then Exit;
  SB := TStringBuilder.Create;
  try
    repeat
      Ch := P^;
      Inc(P);
      if Ch <> '\' then
        SB.Append(Ch)
      else
      begin
        Ch := P^;
        if Ch = #0 then
        begin
          // up to you if you really need this or not:
          // SB.Append(sLineBreak);
          Break;
        end;
        Inc(P);
        case Ch of
          '\','"': SB.Append(Ch);
          'n':     SB.Append(sLineBreak);
          't':     SB.Append(#9);
          else     SB.Append('\'+Ch);
        end;
      end;
    until P^ = #0;
    Result := SB.ToString;
  finally
    SB.Free;
  end;
end;
于 2015-06-10T04:32:02.363 回答