3

我们有一个像这样的库函数:

class function TFileUtils.ReadTextStream(const AStream: TStream): string;
var
  StringStream: TStringStream;
begin
  StringStream := TStringStream.Create('', TEncoding.Unicode);
  try
    // This is WRONG since CopyFrom might rewind the stream (see Remys comment)
    StringStream.CopyFrom(AStream, AStream.Size - AStream.Position);
    Result := StringStream.DataString;
  finally
    StringStream.Free;
  end;
end;

当我检查函数返回的字符串时,第一个 Char 是(小端)BOM。

为什么 TStringStream 不忽略 BOM?

有一个更好的方法吗?我不需要向后兼容旧的 Delphi 版本,XE2 的工作解决方案就可以了。

4

2 回答 2

9

BOM 必须来自源TStream,因为TStringStream不编写 BOM。如果您想忽略源中存在的 BOM,则必须在复制数据之前手动执行此操作,例如:

class function TFileUtils.ReadTextStream(const AStream: TStream): string;
var
  StreamPos, StreamSize: Int64;
  Buf: TBytes;
  NumBytes: Integer;
  Encoding: TEncoding;
begin
  Result := '';

  StreamPos := AStream.Position;
  StreamSize := AStream.Size - StreamPos;

  // Anything available to read?
  if StreamSize < 1 then Exit;

  // Read the first few bytes from the stream...
  SetLength(Buf, 4);
  NumBytes := AStream.Read(Buf[0], Length(Buf));
  if NumBytes < 1 then Exit;
  Inc(StreamPos, NumBytes);
  Dec(StreamSize, NumBytes);

  // Detect the BOM. If you know for a fact what the TStream data is encoded as, 
  // you can assign the Encoding variable to the appropriate TEncoding object and 
  // GetBufferEncoding() will check for that encoding's BOM only...
  SetLength(Buf, NumBytes);
  Encoding := nil;
  Dec(NumBytes, TEncoding.GetBufferEncoding(Buf, Encoding));

  // If any non-BOM bytes were read than rewind the stream back to that position...
  if NumBytes > 0 then
  begin
    AStream.Seek(-NumBytes, soCurrent);
    Dec(StreamPos, NumBytes);
    Inc(StreamSize, NumBytes);
  end else
  begin
    // Anything left to read after the BOM?
    if StreamSize < 1 then Exit;
  end;

  // Now read and decode whatever is left in the stream...
  StringStream := TStringStream.Create('', Encoding);
  try
    StringStream.CopyFrom(AStream, StreamSize);
    Result := StringStream.DataString;
  finally
    StringStream.Free;
  end;
end;
于 2013-01-11T01:48:31.623 回答
3

显然TStreamReader没有遇到同样的问题:

var
  StreamReader: TStreamReader;
begin
  StreamReader := TStreamReader.Create(AStream);
  try
    Result := StreamReader.ReadToEnd;
  finally
    StreamReader.Free;
  end;
end;

TStringList也有效(感谢 whosrdaddy):

var
  Strings: TStringList;
begin
  Strings := TStringList.Create;
  try
    Strings.LoadFromStream(AStream);
    Result := Strings.Text;
  finally
    Strings.Free;
  end;
end;

我还测量了这两种方法,TStreamReader 似乎快了两倍。

于 2013-01-10T18:27:31.340 回答