-1

我有一个 XML 字符串,其中隐藏了一个 0x11 十六进制值,这破坏了我的 XmlDocument.LoadXml 调用。

有人可以告诉我如何在不遍历字符串的所有 50000 个字符的情况下找到并销毁这个 0x11。

谢谢

4

1 回答 1

1

我以前需要这样做,这是我的逐字代码。它读取 LineNumber 和 LinePosition 属性以查找违规字符。

它仅在en-US中进行了测试,但我不确定这是否重要,因为它仅0x在异常消息中查找。

  internal static XmlDocument ParseWithRetry(ref string xml, string errorComment, int badCharRetryCount, Action<StringBuilder,XmlException,string> onXmlExceptionDelegate)
  {
     StringBuilder xmlBuff = null;
     if (badCharRetryCount < 0)
        badCharRetryCount = 0;
     XmlDocument doc = new XmlDocument();
     int attemptCount = badCharRetryCount + 1;
     for (int i = 0; i < attemptCount; i++)
     {
        try
        {
           doc.LoadXml(xml);
           break;
        }
        catch (XmlException xe)
        {
           if (xe.Message.Contains("0x"))
           {
              if (xmlBuff == null)
                 xmlBuff = new StringBuilder(xml);
              // else, it's already synchronized with xml... no need to create a new buffer.

              // Write to the log... or whatever the caller wants to do.
              if (onXmlExceptionDelegate != null)
                 onXmlExceptionDelegate (xmlBuff, xe, errorComment);

              // Remove the offending character and try again.
              int badCharPosition = GetCharacterPosition (xml, xe.LineNumber, xe.LinePosition);
              if (badCharPosition >= 0)
                 xmlBuff.Remove(badCharPosition, 1);
              xml = xmlBuff.ToString();
              continue;
           }
           throw;
        }
     }

     return doc;
  }

  static readonly char[] LineBreakCharacters = { '\r', '\n' };
  internal static int GetCharacterPosition (string xml, int lineNumber, int linePosition)
  {
     // LineNumber is one-based, not zero based.
     if (lineNumber == 1)
        return linePosition - 1;

     int pos = -1;
     // Skip to the appropriate line number.
     for (int i = 1; i < lineNumber; i++)
     {
        pos = xml.IndexOfAny(LineBreakCharacters, pos + 1);
        if (pos < 0)
           return pos; // bummer.. couldn't find it.
        if (xml[pos] == '\r' && pos + 1 < xml.Length && xml[pos + 1] == '\n')
           pos++; // The CR is followed by a LF, so treat it as one line break, not two.
     }
     pos += linePosition;
     return pos;
  }
于 2012-06-11T21:31:05.940 回答