这是dnewcome在自定义 StreamReader 中的答案。它只是包装了一个真正的流阅读器并在读取字符时替换它们。
我只实施了一些方法来节省自己的时间。我将它与 XDocument.Load 和文件流结合使用,并且只调用了 Read(char[] buffer, int index, int count) 方法,所以它像这样工作。您可能需要实现其他方法才能使其适用于您的应用程序。我使用这种方法是因为它似乎比其他答案更有效。我也只实现了一个构造函数,你显然可以实现你需要的任何 StreamReader 构造函数,因为它只是一个传递。
我选择替换字符而不是删除它们,因为它大大简化了解决方案。这样,文本的长度保持不变,因此无需跟踪单独的索引。
public class InvalidXmlCharacterReplacingStreamReader : TextReader
{
private StreamReader implementingStreamReader;
private char replacementCharacter;
public InvalidXmlCharacterReplacingStreamReader(Stream stream, char replacementCharacter)
{
implementingStreamReader = new StreamReader(stream);
this.replacementCharacter = replacementCharacter;
}
public override void Close()
{
implementingStreamReader.Close();
}
public override ObjRef CreateObjRef(Type requestedType)
{
return implementingStreamReader.CreateObjRef(requestedType);
}
public void Dispose()
{
implementingStreamReader.Dispose();
}
public override bool Equals(object obj)
{
return implementingStreamReader.Equals(obj);
}
public override int GetHashCode()
{
return implementingStreamReader.GetHashCode();
}
public override object InitializeLifetimeService()
{
return implementingStreamReader.InitializeLifetimeService();
}
public override int Peek()
{
int ch = implementingStreamReader.Peek();
if (ch != -1)
{
if (
(ch < 0x0020 || ch > 0xD7FF) &&
(ch < 0xE000 || ch > 0xFFFD) &&
ch != 0x0009 &&
ch != 0x000A &&
ch != 0x000D
)
{
return replacementCharacter;
}
}
return ch;
}
public override int Read()
{
int ch = implementingStreamReader.Read();
if (ch != -1)
{
if (
(ch < 0x0020 || ch > 0xD7FF) &&
(ch < 0xE000 || ch > 0xFFFD) &&
ch != 0x0009 &&
ch != 0x000A &&
ch != 0x000D
)
{
return replacementCharacter;
}
}
return ch;
}
public override int Read(char[] buffer, int index, int count)
{
int readCount = implementingStreamReader.Read(buffer, index, count);
for (int i = index; i < readCount+index; i++)
{
char ch = buffer[i];
if (
(ch < 0x0020 || ch > 0xD7FF) &&
(ch < 0xE000 || ch > 0xFFFD) &&
ch != 0x0009 &&
ch != 0x000A &&
ch != 0x000D
)
{
buffer[i] = replacementCharacter;
}
}
return readCount;
}
public override Task<int> ReadAsync(char[] buffer, int index, int count)
{
throw new NotImplementedException();
}
public override int ReadBlock(char[] buffer, int index, int count)
{
throw new NotImplementedException();
}
public override Task<int> ReadBlockAsync(char[] buffer, int index, int count)
{
throw new NotImplementedException();
}
public override string ReadLine()
{
throw new NotImplementedException();
}
public override Task<string> ReadLineAsync()
{
throw new NotImplementedException();
}
public override string ReadToEnd()
{
throw new NotImplementedException();
}
public override Task<string> ReadToEndAsync()
{
throw new NotImplementedException();
}
public override string ToString()
{
return implementingStreamReader.ToString();
}
}