我接受了 Legoless 的回答并将其扩展为适合我的版本,因此我分享它。出于我的需要,我需要拆分每个文件的多个条目,而不仅仅是原始问题中显示的每个文件的单个条目,这意味着我需要保留更高级别的元素以确保生成的 xml 有效文件。
因此,您需要提供要拆分的级别以及所需的每个文件的条目数。
public class XMLFileManager
{
public List<string> SplitXMLFile(string fileName, int startingLevel, int numEntriesPerFile)
{
List<string> resultingFilesList = new List<string>();
XmlReaderSettings readerSettings = new XmlReaderSettings();
readerSettings.DtdProcessing = DtdProcessing.Parse;
XmlReader reader = XmlReader.Create(fileName, readerSettings);
XmlWriter writer = null;
int fileNum = 1;
int entryNum = 0;
bool writerIsOpen = false;
XmlWriterSettings settings = new XmlWriterSettings();
settings.Indent = true;
settings.NewLineOnAttributes = true;
Dictionary<int, XmlNodeItem> higherLevelNodes = new Dictionary<int, XmlNodeItem>();
int hlnCount = 0;
string fileIncrementedName = GetIncrementedFileName(fileName, fileNum);
resultingFilesList.Add(fileIncrementedName);
writer = XmlWriter.Create(fileIncrementedName, settings);
writerIsOpen = true;
writer.WriteStartDocument();
int treeDepth = 0;
while (reader.Read())
{
switch (reader.NodeType)
{
case XmlNodeType.Element:
treeDepth++;
if (treeDepth == startingLevel)
{
entryNum++;
if (entryNum == 1)
{
if (fileNum > 1)
{
fileIncrementedName = GetIncrementedFileName(fileName, fileNum);
resultingFilesList.Add(fileIncrementedName);
writer = XmlWriter.Create(fileIncrementedName, settings);
writerIsOpen = true;
writer.WriteStartDocument();
for (int d = 1; d <= higherLevelNodes.Count; d++)
{
XmlNodeItem xni = higherLevelNodes[d];
switch (xni.XmlNodeType)
{
case XmlNodeType.Element:
writer.WriteStartElement(xni.NodeValue);
break;
case XmlNodeType.Text:
writer.WriteString(xni.NodeValue);
break;
case XmlNodeType.CDATA:
writer.WriteCData(xni.NodeValue);
break;
case XmlNodeType.Comment:
writer.WriteComment(xni.NodeValue);
break;
case XmlNodeType.EndElement:
writer.WriteEndElement();
break;
}
}
}
}
}
if (writerIsOpen)
{
writer.WriteStartElement(reader.Name);
}
if (treeDepth < startingLevel)
{
hlnCount++;
XmlNodeItem xni = new XmlNodeItem();
xni.XmlNodeType = XmlNodeType.Element;
xni.NodeValue = reader.Name;
higherLevelNodes.Add(hlnCount, xni);
}
break;
case XmlNodeType.Text:
if (writerIsOpen)
{
writer.WriteString(reader.Value);
}
if (treeDepth < startingLevel)
{
hlnCount++;
XmlNodeItem xni = new XmlNodeItem();
xni.XmlNodeType = XmlNodeType.Text;
xni.NodeValue = reader.Value;
higherLevelNodes.Add(hlnCount, xni);
}
break;
case XmlNodeType.CDATA:
if (writerIsOpen)
{
writer.WriteCData(reader.Value);
}
if (treeDepth < startingLevel)
{
hlnCount++;
XmlNodeItem xni = new XmlNodeItem();
xni.XmlNodeType = XmlNodeType.CDATA;
xni.NodeValue = reader.Value;
higherLevelNodes.Add(hlnCount, xni);
}
break;
case XmlNodeType.Comment:
if (writerIsOpen)
{
writer.WriteComment(reader.Value);
}
if (treeDepth < startingLevel)
{
hlnCount++;
XmlNodeItem xni = new XmlNodeItem();
xni.XmlNodeType = XmlNodeType.Comment;
xni.NodeValue = reader.Value;
higherLevelNodes.Add(hlnCount, xni);
}
break;
case XmlNodeType.EndElement:
if (entryNum == numEntriesPerFile && treeDepth == startingLevel || treeDepth==1)
{
if (writerIsOpen)
{
fileNum++;
writer.WriteEndDocument();
writer.Close();
writerIsOpen = false;
entryNum = 0;
}
}
else
{
if (writerIsOpen)
{
writer.WriteEndElement();
}
if (treeDepth < startingLevel)
{
hlnCount++;
XmlNodeItem xni = new XmlNodeItem();
xni.XmlNodeType = XmlNodeType.EndElement;
xni.NodeValue = string.Empty;
higherLevelNodes.Add(hlnCount, xni);
}
}
treeDepth--;
break;
}
}
return resultingFilesList;
}
private string GetIncrementedFileName(string fileName, int fileNum)
{
return fileName.Replace(".xml", "") + "_" + fileNum + "_" + ".xml";
}
}
public class XmlNodeItem
{
public XmlNodeType XmlNodeType { get; set; }
public string NodeValue { get; set; }
}
样品用法:
int startingLevel = 2; //EMR is level 1, while the entries of CustomTextBox and AllControlsCount
//are at Level 2. The question wants to split on those Level 2 items
//and so this parameter is set to 2.
int numEntriesPerFile = 1; //Question wants 1 entry per file which will result in 3 files,
//each with one entry.
XMLFileManager xmlFileManager = new XMLFileManager();
List<string> resultingFilesList = xmlFileManager.SplitXMLFile("before_split.xml", startingLevel, numEntriesPerFile);
对问题中的 XML 文件使用时的结果:
文件 1:
<?xml version="1.0" encoding="utf-8"?>
<EMR>
<CustomTextBox>
<Text>WNL</Text>
<Type>TextBox</Type>
<Width>500</Width>
<id>txt1</id>
</CustomTextBox>
</EMR>
文件 2:
<?xml version="1.0" encoding="utf-8"?>
<EMR>
<CustomTextBox>
<Text>WNL</Text>
<Type>TextBox</Type>
<Width>500</Width>
<id>txt2</id>
</CustomTextBox>
</EMR>
文件 3:
<?xml version="1.0" encoding="utf-8"?>
<EMR>
<AllControlsCount>
<Width>0</Width>
<id>ControlsID</id>
</AllControlsCount>
</EMR>
另一个具有更大级别深度并显示每个文件的多个条目的示例:
int startingLevel = 4; //splitting on the 4th level down which is <ITEM>
int numEntriesPerFile = 2;//2 enteries per file. If instead you used 3, then the result
//would be 3 entries in the first file and 1 entry in the second file.
XMLFileManager xmlFileManager = new XMLFileManager();
List<string> resultingFilesList = xmlFileManager.SplitXMLFile("another_example.xml", startingLevel, numEntriesPerFile);
原始文件:
<?xml version="1.0" encoding="utf-8"?>
<TOP_LEVEL>
<RESPONSE>
<DATETIME>2019-04-03T21:39:40Z</DATETIME>
<ITEM_LIST>
<ITEM>
<ID>1</ID>
<ABC>Some Text 1</ABC>
<TESTDATA><![CDATA[Here is some c data]]></TESTDATA>
<A_DATETIME>2019-04-01T01:00:00Z</A_DATETIME>
<A_DEEPER_LIST>
<DEEPER_LIST_ITEM>
<DLID>42</DLID>
<TYPE>Example</TYPE>
<IS_ENABLED>1</IS_ENABLED>
</DEEPER_LIST_ITEM>
</A_DEEPER_LIST>
</ITEM>
<ITEM>
<ID>2</ID>
<ABC>Some Text 2</ABC>
<TESTDATA><![CDATA[Here is some c data]]></TESTDATA>
<A_DATETIME>2019-04-01T01:00:00Z</A_DATETIME>
<A_DEEPER_LIST>
<DEEPER_LIST_ITEM>
<DLID>53</DLID>
<TYPE>Example</TYPE>
<IS_ENABLED>1</IS_ENABLED>
</DEEPER_LIST_ITEM>
</A_DEEPER_LIST>
</ITEM>
<ITEM>
<ID>3</ID>
<ABC>Some Text 3</ABC>
<TESTDATA><![CDATA[Here is some c data]]></TESTDATA>
<A_DATETIME>2019-04-01T01:00:00Z</A_DATETIME>
<A_DEEPER_LIST>
<DEEPER_LIST_ITEM>
<DLID>1128</DLID>
<TYPE>Example</TYPE>
<IS_ENABLED>1</IS_ENABLED>
</DEEPER_LIST_ITEM>
</A_DEEPER_LIST>
</ITEM>
<ITEM>
<ID>4</ID>
<ABC>Some Text 4</ABC>
<TESTDATA><![CDATA[Here is some c data]]></TESTDATA>
<A_DATETIME>2019-04-01T01:00:00Z</A_DATETIME>
<A_DEEPER_LIST>
<DEEPER_LIST_ITEM>
<DLID>1955</DLID>
<TYPE>Example</TYPE>
<IS_ENABLED>1</IS_ENABLED>
</DEEPER_LIST_ITEM>
</A_DEEPER_LIST>
</ITEM>
</ITEM_LIST>
</RESPONSE>
</TOP_LEVEL>
结果文件:
第一个文件:
<?xml version="1.0" encoding="utf-8"?>
<TOP_LEVEL>
<RESPONSE>
<DATETIME>2019-04-03T21:39:40Z</DATETIME>
<ITEM_LIST>
<ITEM>
<ID>1</ID>
<ABC>Some Text 1</ABC>
<TESTDATA><![CDATA[Here is some c data]]></TESTDATA>
<A_DATETIME>2019-04-01T01:00:00Z</A_DATETIME>
<A_DEEPER_LIST>
<DEEPER_LIST_ITEM>
<DLID>42</DLID>
<TYPE>Example</TYPE>
<IS_ENABLED>1</IS_ENABLED>
</DEEPER_LIST_ITEM>
</A_DEEPER_LIST>
</ITEM>
<ITEM>
<ID>2</ID>
<ABC>Some Text 2</ABC>
<TESTDATA><![CDATA[Here is some c data]]></TESTDATA>
<A_DATETIME>2019-04-01T01:00:00Z</A_DATETIME>
<A_DEEPER_LIST>
<DEEPER_LIST_ITEM>
<DLID>53</DLID>
<TYPE>Example</TYPE>
<IS_ENABLED>1</IS_ENABLED>
</DEEPER_LIST_ITEM>
</A_DEEPER_LIST>
</ITEM>
</ITEM_LIST>
</RESPONSE>
</TOP_LEVEL>
第二个文件:
<?xml version="1.0" encoding="utf-8"?>
<TOP_LEVEL>
<RESPONSE>
<DATETIME>2019-04-03T21:39:40Z</DATETIME>
<ITEM_LIST>
<ITEM>
<ID>3</ID>
<ABC>Some Text 3</ABC>
<TESTDATA><![CDATA[Here is some c data]]></TESTDATA>
<A_DATETIME>2019-04-01T01:00:00Z</A_DATETIME>
<A_DEEPER_LIST>
<DEEPER_LIST_ITEM>
<DLID>1128</DLID>
<TYPE>Example</TYPE>
<IS_ENABLED>1</IS_ENABLED>
</DEEPER_LIST_ITEM>
</A_DEEPER_LIST>
</ITEM>
<ITEM>
<ID>4</ID>
<ABC>Some Text 4</ABC>
<TESTDATA><![CDATA[Here is some c data]]></TESTDATA>
<A_DATETIME>2019-04-01T01:00:00Z</A_DATETIME>
<A_DEEPER_LIST>
<DEEPER_LIST_ITEM>
<DLID>1955</DLID>
<TYPE>Example</TYPE>
<IS_ENABLED>1</IS_ENABLED>
</DEEPER_LIST_ITEM>
</A_DEEPER_LIST>
</ITEM>
</ITEM_LIST>
</RESPONSE>
</TOP_LEVEL>