XML 由节点组成,并且有许多不同类型的节点(元素、属性、文本、名称空间、处理指令、注释、文档等)。
包含文本内容的 XML 元素节点将有一个名为 的子节点#text
。这是由 XML 规范规定的。因此,在您的示例中,grandchild1
、grandchild2
、grandchild3
和grandchild4
都有一个子#text
节点,例如:
文档
|
|_ PI: <?xml version="1.0" encoding="utf-8"?>
|
|_ 元素:“父母”
|
|_ 元素:“child1”
| |
| |_ 元素:“grandchild1”
| | |
| | |_ #文本“someinfo1”
| |
| |_ 元素:“grandchild2”
| |
| |_ #text "someinfo2"
|
|_ 元素:“child2”
|
|_ 元素:“grandchild3”
| |
| |_ #text: "someinfo3"
|
|_ 元素:“grandchild4”
|
|_ #text: "someinfo4"
即使元素之间的空格,即使只是换行符,也会存储为额外的文本节点(因为您将preserveWhiteSpace
选项设置为 true),例如:
文档
|
|_ PI: <?xml version="1.0" encoding="utf-8"?>
|
|_ #文本 "\r\n"
|
|_ 元素:“父母”
|
|_ #文本 "\r\n "
|
|_ 元素:“child1”
| |
| |_ #文本 "\r\n "
| |
| |_ 元素:“grandchild1”
| | |
| | |_ #文本“someinfo1”
| |
| |_ #文本 "\r\n "
| |
| |_ 元素:“grandchild2”
| |
| |_ #text "someinfo2"
|
|_ #文本 "\r\n "
|
|_ 元素:“child2”
| |
| |_ #文本 "\r\n "
| |
| |_ 元素:“grandchild3”
| | |
| | |_ #text: "someinfo3"
| |
| |_ #文本 "\r\n "
| |
| |_ 元素:“grandchild4”
| | |
| | |_ #text: "someinfo4"
| |
| |_ #文本 "\r\n "
|
|_ #文本 "\r\n"
XPath 搜索所有节点,但*
通配符只匹配元素节点。但是您正在手动钻取已找到元素的子元素,因此您将遇到#text
节点。对于您尝试执行的操作,请关闭空白保留以删除不需要的空白文本节点,然后仅关注元素子节点,例如:
IXMLDOMDocument *pXMLDom = NULL;
IXMLDOMNodeList *pNodes = NULL;
IXMLDOMNode *pNode = NULL;
long length = 0;
// create pXMLDom as needed ...
pXMLDom->put_async(VARIANT_FALSE);
pXMLDom->put_validateOnParse(VARIANT_TRUE);
pXMLDom->put_resolveExternals(VARIANT_FALSE);
pXMLDom->put_preserveWhiteSpace(VARIANT_FALSE); // <--
BSTR parentNode = SysAllocString(L"//Parent/*");
HRESULT hRes = pXMLDom->selectNodes(parentNode, &pNodes);
SysFreeString(parentNode);
if (SUCCEEDED(hRes))
{
pNodes->get_length(&length);
for (int i = 0; i < length; ++i)
{
hRes = pNodes->get_item(i, &pNode);
if (SUCCEEDED(hRes))
{
BSTR name = NULL;
hRes = pNode->get_nodeName(&name);
if (SUCCEEDED(hRes))
{
printf("Node (%d), <%S>:\n", i, name);
SysFreeString(name);
}
IXMLDOMNode *pChild = NULL;
hRes = pNode->get_firstChild(&pChild);
if (hRes == S_OK)
{
do
{
DOMNodeType type;
hRes = pChild->get_nodeType(&type);
if ((SUCCEEDED(hRes) && (type == NODE_ELEMENT))
{
hRes = pNode->get_nodeName(&name);
if (SUCCEEDED(hRes))
{
printf(" %S\n", name);
SysFreeString(name);
}
}
IXMLDOMNode *pSibling = NULL;
hRes = pChild->get_nextSibling(&pSibling);
if (hRes != S_OK) break;
pChild->Release();
pChild = pSibling;
}
while (true);
pChild->Release();
}
pNode->Release();
}
}
pNodes->Release();
}
...
pXMLDom->Release();
如果您需要超过 2 个级别,则应该设置一个递归循环,例如:
void processNode(IXMLDOMNode *pNode)
{
BSTR name = NULL;
hRes = pNode->get_nodeName(&name);
if (SUCCEEDED(hRes))
{
printf("%S\n", name);
SysFreeString(name);
}
IXMLDOMNode *pChild = NULL;
hRes = pNode->get_firstChild(&pChild);
if (hRes == S_OK)
{
do
{
DOMNodeType type;
hRes = pChild->get_nodeType(&type);
if ((SUCCEEDED(hRes) && (type == NODE_ELEMENT))
processNode(pChild);
IXMLDOMNode *pSibling = NULL;
hRes = pChild->get_nextSibling(&pSibling);
if (hRes != S_OK) break;
pChild->Release();
pChild = pSibling;
}
while (true);
pChild->Release();
}
}
...
IXMLDOMDocument *pXMLDom = NULL;
IXMLDOMNodeList *pNodes = NULL;
IXMLDOMNode *pNode = NULL;
long length = 0;
// create pXMLDom as needed ...
pXMLDom->put_async(VARIANT_FALSE);
pXMLDom->put_validateOnParse(VARIANT_TRUE);
pXMLDom->put_resolveExternals(VARIANT_FALSE);
pXMLDom->put_preserveWhiteSpace(VARIANT_FALSE); // <--
BSTR parentNode = SysAllocString(L"//Parent/*");
HRESULT hRes = pXMLDom->selectNodes(parentNode, &pNodes);
SysFreeString(parentNode);
if (SUCCEEDED(hRes))
{
pNodes->get_length(&length);
for (int i = 0; i < length; ++i)
{
hRes = pNodes->get_item(i, &pNode);
if (SUCCEEDED(hRes))
{
processNode(pNode);
pNode->Release();
}
}
pNodes->Release();
}
...
pXMLDom->Release();