c++ - Xerces C++ - 加载、读取和保存，替代方案？

Question

我正在寻找一个教程来加载一个 XML 文件，阅读它，改变它，最后用 C++ 保存它。我正在使用 Linux Ubuntu 并尝试使用 Xerces。用谷歌和很多时间，我只能加载一个 XML 文件：

#include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/dom/DOM.hpp>
#include <xercesc/sax/HandlerBase.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/PlatformUtils.hpp>

#include <iostream>

using namespace std;
using namespace xercesc;

int main (int argc, char* args[]) {

    try {
        XMLPlatformUtils::Initialize();
    }
    catch (const XMLException& toCatch) {
        char* message = XMLString::transcode(toCatch.getMessage());
        cout << "Error during initialization! :\n"
             << message << "\n";
        XMLString::release(&message);
        return 1;
    }

    XercesDOMParser* parser = new XercesDOMParser();
    parser->setValidationScheme(XercesDOMParser::Val_Always);
    parser->setDoNamespaces(true);    // optional

    ErrorHandler* errHandler = (ErrorHandler*) new HandlerBase();
    parser->setErrorHandler(errHandler);

    const char* xmlFile = "demo.xml";

    try {
        parser->parse(xmlFile);
    }
    catch (const XMLException& toCatch) {
        char* message = XMLString::transcode(toCatch.getMessage());
        cout << "Exception message is: \n"
             << message << "\n";
        XMLString::release(&message);
        return -1;
    }
    catch (const DOMException& toCatch) {
        char* message = XMLString::transcode(toCatch.msg);
        cout << "Exception message is: \n"
             << message << "\n";
        XMLString::release(&message);
        return -1;
    }
    catch (...) {
        cout << "Unexpected Exception \n" ;
        return -1;
    }

    DOMNode* docRootNode;
//  DOMNode* aNode;
    DOMDocument* doc;
    doc = parser->getDocument();
    docRootNode = doc->getDocumentElement();
    cout << docRootNode->getAttributes() << endl; //returns Hex



    delete parser;
    delete errHandler;
    return 0;
}

如何读取操作 XML 文件并最终保存它？有替代库吗？（我尝试了 tinyxml 但文件返回错误，当我想编译它时）

score 3 · Accepted Answer

保存 xerces DOMDocument 的示例。保存文档

DOMLSSerializer::write(const DOMNode* nodeToWrite, DOMLSOutput* const destination)

见代码示例http://xerces.apache.org/xerces-c/domprint-2.html

    DOMLSSerializer * theSerializer = impl->createLSSerializer();
    DOMPrintFilter   *myFilter = new DOMPrintFilter(DOMNodeFilter::SHOW_ELEMENT   |
                                      DOMNodeFilter::SHOW_ATTRIBUTE |
                                      DOMNodeFilter::SHOW_DOCUMENT_TYPE);
    theSerializer->setFilter(myFilter);

    DOMLSOutput       *theOutputDesc = ((DOMImplementationLS*)impl)->createLSOutput();
    XMLFormatTarget *myFormTarget  = new LocalFileFormatTarget(XMLString::transcode("C:\\target.xml"));
    theOutputDesc->setByteStream(myFormTarget);
    theOutputDesc->setEncoding(XMLString::transcode("ISO-8859-1"));

    theSerializer->getDomConfig()->setParameter(XMLUni::fgDOMXMLDeclaration, true);

    theSerializer->getDomConfig()->setParameter(XMLUni::fgDOMWRTFormatPrettyPrint, true);
    theSerializer->write(doc, theOutputDesc);

    myFormTarget->flush();

    delete myFormTarget;

    theOutputDesc->release();
    theSerializer->release();

还有一个过滤器实现的例子。您可以在 DOMPrint 示例中找到它。

class DOMPrintFilter : public DOMLSSerializerFilter {
public:

    DOMPrintFilter(ShowType whatToShow = DOMNodeFilter::SHOW_ALL);
    ~DOMPrintFilter(){};

    virtual FilterAction acceptNode(const DOMNode*) const;
    virtual ShowType getWhatToShow() const {return fWhatToShow;};

private:
    // unimplemented copy ctor and assignement operator
    DOMPrintFilter(const DOMPrintFilter&);
    DOMPrintFilter & operator = (const DOMPrintFilter&);

    ShowType fWhatToShow;
};
#include "DOMPrintFilter.hpp"
#include <xercesc/util/XMLUniDefs.hpp>
#include <xercesc/util/XMLString.hpp>

static const XMLCh  element_person[]=
{
chLatin_p, chLatin_e, chLatin_r, chLatin_s, chLatin_o, chLatin_n, chNull
};

static const XMLCh  element_link[]=
{
chLatin_l, chLatin_i, chLatin_n, chLatin_k, chNull
};

DOMPrintFilter::DOMPrintFilter(ShowType whatToShow)
:fWhatToShow(whatToShow)
{}

DOMNodeFilter::FilterAction DOMPrintFilter::
acceptNode(const DOMNode* node) const
{
//
// The DOMLSSerializer shall call getWhatToShow() before calling
// acceptNode(), to show nodes which are supposed to be
// shown to this filter.
//
// REVISIT: In case the DOMLSSerializer does not follow the protocol,
//          Shall the filter honour, or NOT, what it claims
//          it is interested in ?
//
// The DOMLS specs does not specify that acceptNode() shall do
// this way, or not, so it is up the implementation,
// to skip the code below for the sake of performance ...
//
if ((getWhatToShow() & (1 << (node->getNodeType() - 1))) == 0)
    return DOMNodeFilter::FILTER_ACCEPT;

switch (node->getNodeType())
{
case DOMNode::ELEMENT_NODE:
    {
        // for element whose name is "person", skip it
        if (XMLString::compareString(node->getNodeName(),          element_person)==0)
            return DOMNodeFilter::FILTER_SKIP;
        // for element whose name is "line", reject it
        if (XMLString::compareString(node->getNodeName(), element_link)==0)
            return DOMNodeFilter::FILTER_REJECT;
        // for rest, accept it
        return DOMNodeFilter::FILTER_ACCEPT;

        break;
    }
case DOMNode::COMMENT_NODE:
    {
        // the WhatToShow will make this no effect
        return DOMNodeFilter::FILTER_REJECT;
        break;
    }
case DOMNode::TEXT_NODE:
    {
        // the WhatToShow will make this no effect
        return DOMNodeFilter::FILTER_REJECT;
        break;
    }
case DOMNode::DOCUMENT_TYPE_NODE:
    {
        // even we say we are going to process document type,
        // we are not able be to see this node since
        // DOMLSSerializerImpl (a XercesC's default implementation
        // of DOMLSSerializer) will not pass DocumentType node to
        // this filter.
        //
        return DOMNodeFilter::FILTER_REJECT;  // no effect
        break;
    }
case DOMNode::DOCUMENT_NODE:
    {
        // same as DOCUMENT_NODE
        return DOMNodeFilter::FILTER_REJECT;  // no effect
        break;
    }
default :
    {
        return DOMNodeFilter::FILTER_ACCEPT;
        break;
    }
}

return DOMNodeFilter::FILTER_ACCEPT;
}

score 1 · Accepted Answer

LibXML++ 似乎是 C++ 的最佳选择。就功能而言，它非常完整，包括 XPath、字符集转换（由 Glibmm 提供）以及您在 XML 库中所期望的一切。它使用传统的 DOM 和 SAX API，这取决于您向谁询问，这算是利还是弊。一个可能的问题是库的依赖非常重（由于使用了 Glibmm）。尽管如此，它似乎是唯一适合 C++ 的 XML 库。

http://libxmlplusplus.sourceforge.net/docs/manual/html/index.html

TinyXML 不根据规范解析 XML，因此我建议不要使用它，即使它适用于简单文档。

score 0 · Accepted Answer

Xerces 附带的示例 CreateDOMDocument 向您展示了如何将节点等添加到 DOM 文档中。到目前为止，您的代码创建了文档，因此您需要调整第二个示例中的代码以添加节点、属性等。

另外，请注意，当您说：

 cout << docRootNode->getAttributes() << endl;

getAttributes 函数返回一个属性集合——您需要对该集合应用进一步的 Xerces 函数以获取包含的信息。

请注意，如果您想提取 e XML 文件中的数据子集，使用事件驱动的 SAX 解析器（Xerces 包括其中之一）可能比构建和遍历完整的 DOM 文档更容易。

score 0 · Accepted Answer

如果您想查看如何使用 Xerces-C++ 执行此操作的示例，请查看以下代码：

http://libprf1.tigris.org/files/documents/1338/13256/libprf1-0.1R3.tar.gz

我很久以前写它作为一个大学项目。它很可能基于过时的 Xerces-C++ 版本，但我不认为 API 变化太大而不会成为问题。它至少会给你一个想法。

score 0 · Accepted Answer

以下链接是一个很好的教程，展示了如何读取 XML 文件并使用 XERCES 解析它的基础知识。

http://www.yolinux.com/TUTORIALS/XML-Xerces-C.html

完成后，XERCES API 应该足以进行进一步的操作：

http://xerces.apache.org/xerces-c/apiDocs-2/classes.html

要编写（序列化）文档，请使用类 DOMWriter http://xerces.apache.org/xerces-c/apiDocs-2/classDOMWriter.html#a0ddcef5fed6b49e03e53334fedca4b2

c++ - Xerces C++ - 加载、读取和保存，替代方案？

5 回答 5

Related

Reference