我有来自第 3 方的 xml 文件。因为 xml 文件很大(40MB),所以我使用 XmlTextReader 逐步解析文件,使用这种方式:
class Parser
{
// ...
public void ImportFile(string fileName)
{
bool isCountryStateFile = IsCountryStateFile(fileName);
XmlTextReader xmlReader = new XmlTextReader(fileName);
namespaceManager = new XmlNamespaceManager(xmlReader.NameTable);
namespaceManager.FillRuianNamespaces();
xmlReader.WhitespaceHandling = WhitespaceHandling.None;
while (xmlReader.Read())
{
if (xmlReader.NodeType == XmlNodeType.Element &&
xmlReader.IsStartElement() == true)
{
// skip items in country state file (are included in sity state files)
if (isCountryStateFile && nodesToSkipInCountryStateFile.Contains(xmlReader.Name))
{
Console.WriteLine("Skiping {0}", xmlReader.Name);
xmlReader.Skip();
}
if (xmlReader.Name == "vf:Vusc")
{
var item = ParseRegion(xmlReader);
repository.Save(item);
xmlReader.Skip();
}
if (xmlReader.Name == "vf:Obec")
{
var item = ParseCity(xmlReader);
repository.Save(item);
xmlReader.Skip();
}
}
}
xmlReader.Close();
}
private Region ParseRegion(XmlTextReader xmlReader)
{
XmlDocument node = new XmlDocument();
node.LoadXml(xmlReader.ReadOuterXml());
// parse and compose Region
// ...
return result;
}
// ...
}
通常节点的结构为<Cities><City/></Cities>
,一切正常,但有些节点包含与其父节点同名的子节点:
<?xml version="1.0" encoding="UTF-8"?>
<vf:VymennyFormat xsi:schemaLocation="urn:cz:isvs:ruian:schemas:VymennyFormatTypy:v1 ../ruian/xsd/vymenny_format/VymennyFormatTypy.xsd" xmlns:gml="http://www.opengis.net/gml/3.2" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ami="urn:cz:isvs:ruian:schemas:AdrMistoIntTypy:v1" xmlns:base="urn:cz:isvs:ruian:schemas:BaseTypy:v1" xmlns:coi="urn:cz:isvs:ruian:schemas:CastObceIntTypy:v1" xmlns:com="urn:cz:isvs:ruian:schemas:CommonTypy:v1" xmlns:kui="urn:cz:isvs:ruian:schemas:KatUzIntTypy:v1" xmlns:kri="urn:cz:isvs:ruian:schemas:KrajIntTypy:v1" xmlns:mci="urn:cz:isvs:ruian:schemas:MomcIntTypy:v1" xmlns:mpi="urn:cz:isvs:ruian:schemas:MopIntTypy:v1" xmlns:obi="urn:cz:isvs:ruian:schemas:ObecIntTypy:v1" xmlns:oki="urn:cz:isvs:ruian:schemas:OkresIntTypy:v1" xmlns:opi="urn:cz:isvs:ruian:schemas:OrpIntTypy:v1" xmlns:pai="urn:cz:isvs:ruian:schemas:ParcelaIntTypy:v1" xmlns:pui="urn:cz:isvs:ruian:schemas:PouIntTypy:v1" xmlns:rsi="urn:cz:isvs:ruian:schemas:RegSouIntiTypy:v1" xmlns:spi="urn:cz:isvs:ruian:schemas:SpravObvIntTypy:v1" xmlns:sti="urn:cz:isvs:ruian:schemas:StatIntTypy:v1" xmlns:soi="urn:cz:isvs:ruian:schemas:StavObjIntTypy:v1" xmlns:uli="urn:cz:isvs:ruian:schemas:UliceIntTypy:v1" xmlns:vci="urn:cz:isvs:ruian:schemas:VuscIntTypy:v1" xmlns:vf="urn:cz:isvs:ruian:schemas:VymennyFormatTypy:v1" xmlns:zji="urn:cz:isvs:ruian:schemas:ZsjIntTypy:v1">
<vf:Data>
<vf:Vusc> <!-- HERE -->
<vf:Vusc gml:id="VC.19">
<vci:Kod>19</vci:Kod>
<vci:Nazev>Hlavní město Praha</vci:Nazev>
<vci:RegionSoudrznosti>
<rsi:Kod>19</rsi:Kod>
</vci:RegionSoudrznosti>
<vci:PlatiOd>2013-06-11T00:00:00</vci:PlatiOd>
<vci:IdTransakce>241736</vci:IdTransakce>
<vci:GlobalniIdNavrhuZmeny>413702</vci:GlobalniIdNavrhuZmeny>
<vci:NutsLau>CZ010</vci:NutsLau>
<vci:Geometrie>
<vci:DefinicniBod>
<gml:Point gml:id="DVC.19" srsName="urn:ogc:def:crs:EPSG::5514" srsDimension="2">
<gml:pos>-743100.00 -1043300.00</gml:pos>
</gml:Point>
</vci:DefinicniBod>
</vci:Geometrie>
</vf:Vusc>
</vf:Vusc>
...
</vf:Data>
</vf:VymennyFormat>
在这种情况下如何选择正确的节点来解析唯一的子节点,以及如何使用 XmlTextReader.Skip() 跳过父节点,如顶级节点及其所有子节点?如果我跳过它,它无论如何都会读儿童。