0

我有一个不断附加的 XML 文件。我需要反复从 XML 中读取数据,但在每次通过时我都不想检索我在上一次运行中处理过的数据。

由于我知道文件在处理时有多长,我想我可以使用文件的长度(减去结尾 /Contacts 标记)来确定我上次离开的位置。知道了这一点,从文件中的特定字节位置开始检索所有联系人标签的最佳方法是什么?

<?xml version="1.0"?>
<Contacts>
    <Contact>
      <Name>Todd</Name>
      <Email>todd@blah.com</Email>
  </Contact>
    <Contact>
      <Name>Sarah</Name>
      <Email>sarah@blah.com</Email>
  </Contact>
</Contacts>

此代码块获取所有联系人。我想限制它,所以它只在第一次接触后获取数据(在字节 116 处。)

var xdoc = XDocument.Load(PATH_TO_FILE);
var contact = xdoc.Descendants("Contact").Select(x => (string)x).ToArray();
4

4 回答 4

2

如果您仍想从特定偏移量中读取数据并保持高水平。这是 XmlTailReader,它将只有结束标记的文档内容放在其他根元素中:

class XmlTailReader : XmlReader
{
    private readonly XmlReader _reader;
    private readonly XmlReader _fakeReader;
    private int _level;
    enum Fake { Start, Align, None, End };
    private Fake _fake;

    public XmlTailReader(XmlReader reader, string rootTag = "root")
    {
        _reader = reader;
        _fake = Fake.Start;

        var doc = new XmlDocument();
        var root = doc.CreateElement(rootTag);
        doc.AppendChild(root);
        // make sure that we'll get Element/EndElement
        root.AppendChild(doc.CreateComment("dummy")); 
        _fakeReader = new XmlNodeReader(root);
    }

    private XmlReader Proxy
    {
        get
        {
            switch(_fake)
            {
            case Fake.Start:
            case Fake.Align:
            case Fake.End:
                return _fakeReader;
            default:
                return _reader;
            }
        }
    }

    public override bool Read()
    {
        switch(_fake)
        {
        case Fake.Start:
            if (!_fakeReader.Read()) return false;
            if (NodeType == XmlNodeType.Element)
            {
                ++_level;
                _fake = Fake.Align;
            }
            return true;
        case Fake.Align:
            _fake = Fake.None;
            while(true) // align to first Element
            {
                if (!_reader.Read()) return false;
                if (NodeType == XmlNodeType.Element)
                {
                    ++_level;
                    break;
                }
            }
            return true;
        case Fake.None:
            try
            {
                if (!_reader.Read()) return false;
            }
            catch (XmlException e)
            {
                // if (!e.Message.StartsWith("Unexpected end tag.")) throw;
                // reading of extra-closing tag cause "Unexpected end tag"
                // so use this as event for transition too
                _fake = Fake.End;
                if (!_fakeReader.Read()) return false;
                return true;
            }
            switch(NodeType)
            {
            case XmlNodeType.Element:
                ++_level;
                break;
            case XmlNodeType.EndElement:
                if (--_level == 0)
                {
                    _fake = Fake.End;
                    if (!_fakeReader.Read()) return false;
                }
                break;
            }
            return true;
        default:
            return Proxy.Read();
        }
    }

    public override string Value
    {
        get { return Proxy.Value; }
    }

    public override XmlNodeType NodeType
    {
        get { return Proxy.NodeType; }
    }
    // rest use Proxy property for forwarding
}

void Main()
{
    var xml = "<?xml version=\"1.0\"?>" + @"
<Contacts>
    <Contact>
      <Name>Todd</Name>
      <Email>todd@blah.com</Email>
  </Contact>
    <Contact>
      <Name>Sarah</Name>
      <Email>sarah@blah.com</Email>
  </Contact>
    <Contact>
      <Name>Peter</Name>
      <Email>peter@blah.com</Email>
  </Contact>
</Contacts>";
    const string tag = "</Contact>";
    var xml2 = xml.Substring(xml.IndexOf(tag) + tag.Length);
    using(var sr = new StringReader(xml2))
    using(var xr = XmlReader.Create(sr, new XmlReaderSettings { ConformanceLevel = ConformanceLevel.Fragment, } ))
    using(var xr2 = new XmlTailReader(xr, "xxx"))
    {
        var xdoc = XDocument.Load(xr2);
        xdoc.Descendants("Contact").Dump();
    }
}

请注意,对于此类阅读,ConformanceLevel 应该是 Fragment。

于 2012-11-29T14:33:44.393 回答
1

我找到了一种按索引位置保存/检索的方法。这也可以。

int position = 1;
var contacts = xdoc
    .Descendants("Contact")
    .Select((x, index) => new { Contact = x, Index = index })
    .Where(x => x.Index >= position)
    .Select(x => x.Contact);
于 2012-11-28T15:04:17.667 回答
1

您可以创建棘手的 Stream,它在自定义位置模拟 Ducument 的 start 元素。它的代码非常粗糙,但它的工作原理

void Main()
{
 var xml =
    @"<Contacts><Contact><Name>Todd</Name><Email>todd@blah.com</Email></Contact><Contact>
      <Name>Sarah1</Name>
      <Email>sarah@blah.com</Email>
  </Contact>
  <Contact>
      <Name>Sarah2</Name>
      <Email>sarah@blah.com</Email>
  </Contact>
</Contacts>";

    var ms = new MemoryStream(Encoding.UTF8.GetBytes(xml));
    ms.Position = 74;
    var reader = XmlReader.Create(new CustomReader("<Contacts>",ms));

    var xdoc = XDocument.Load(reader);
    var contact = xdoc.Descendants("Contact").Select(x => x).ToArray();

    contact.Dump();
}

public class CustomReader : Stream
{
    private readonly string _element;
    private readonly Stream _stream;
    private int _offset;

    public CustomReader(string element, Stream stream)
    {
        _element = element;
        _stream = stream;
        _offset = -element.Length;
    }

    public override bool CanRead
    {
        get { return true; }
    }

    public override bool CanSeek
    {
        get { return false; }
    }

    public override bool CanWrite
    {
        get { return false; }
    }

    public override void Close()
    {
        _stream.Close();
        base.Close();
    }

    public override void Flush()
    {
        throw new NotImplementedException();
    }

    public override long Length
    {
        get { throw new NotImplementedException(); }
    }

    public override long Position
    {
        get { throw new NotImplementedException(); }
        set { throw new NotImplementedException(); }
    }

    public override int Read(byte[] buffer, int offset, int count)
    {
        if (count == 0) return 0;

        if (_offset < 0)
        {
            var buf = Encoding.UTF8.GetBytes(_element);
            Buffer.BlockCopy(buf, 0, buffer, offset, buf.Length);
            _offset = 0;
            return buf.Length;
        }

        return _stream.Read(buffer, offset, count);
    }

    public override long Seek(long offset, SeekOrigin origin)
    {
        throw new NotImplementedException();
    }

    public override void SetLength(long value)
    {
        throw new NotImplementedException();
    }

    public override void Write(byte[] buffer, int offset, int count)
    {
        throw new NotImplementedException();
    }
}
于 2012-11-28T15:55:40.470 回答
1

如果您不想破坏阅读 XML 的一致性。你不能避免XDocument使用一些第一个元素来构建,比如:

class XmlSkipReader : XmlReader
{
    private readonly XmlReader _reader;
    private readonly int _skip;
    private int _level, _skipped;
    public XmlSkipReader(XmlReader reader, int skip)
    {
        _reader = reader;
        _skip = skip;
    }

    public override bool Read()
    {
        if (_skipped == _skip) return _reader.Read();
        if (_level < 1)
        {
            if(!_reader.Read()) return false;
            switch(_reader.NodeType)
            {
                case XmlNodeType.Element: ++_level; break;
            }
            return true;
        }
        if(!_reader.Read()) return false;
        switch(_reader.NodeType)
        {
            case XmlNodeType.Element:
                ++_level;
                break;
            default: return true;
        }

        for(; _skipped < _skip; ++_skipped)
        {
            while(_level > 1)
            {
                if(!_reader.Read()) return false;
                switch(_reader.NodeType)
                {
                    case XmlNodeType.Element:
                        ++_level;
                        break;
                    case XmlNodeType.EndElement:
                        --_level;
                        break;
                }
            }
        }
        return _reader.Read();
    }
    // rest is just proxy to _reader
}

void Main()
{
    var xml = "<?xml version=\"1.0\"?>" + @"
<Contacts>
    <Contact>
      <Name>Todd</Name>
      <Email>todd@blah.com</Email>
  </Contact>
    <Contact>
      <Name>Sarah</Name>
      <Email>sarah@blah.com</Email>
  </Contact>
</Contacts>";
    using(var sr = new StringReader(xml))
    using(var xr = XmlReader.Create(sr))
    using(var xr2 = new XmlSkipReader(xr, 1))
    {
        var xdoc = XDocument.Load(xr2);
        xdoc.Descendants("Contact").Dump();
    }
}
于 2012-11-29T11:51:49.950 回答