7

我正在寻找一个基本示例,说明如何设置 XQilla 以在包含 XML 的 std::string 上使用 XPath 查询。XQilla 站点上的示例似乎在文件或 URL 上使用 XQuery。

4

1 回答 1

6

这是一个古老的问题,但我一直在寻找答案,但我自己却找不到。现在我解决了它,并认为我应该分享代码。

--edit,如果需要以下代码的许可证,则在 MIT 和 BSD 或其他任何地方共享...

XPathExtracter.h

#ifndef JOPPLI_XPATHEXTRACTER_H
#define JOPPLI_XPATHEXTRACTER_H

#include <string>
#include <vector>
#include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/dom/DOM.hpp>

namespace Joppli
{
  using namespace xercesc;

  class XPathExtracter
  {
    public:
      typedef std::vector<std::string> Result;

      XPathExtracter();
     ~XPathExtracter();

      DOMDocument * getDocument(const std::string & xml);

      void extract(const std::string & query, DOMDocument * document,
                   Result * result);

  protected:
    DOMLSParser * parser;
    DOMImplementation * xqillaImplementation;

    private:
      static int count;
  };
}

#endif

XPathExtracter.cpp

#include "XPathExtracter.h"
#include <xercesc/framework/MemBufInputSource.hpp>
#include <xqilla/xqilla-dom3.hpp>

namespace Joppli
{
  XPathExtracter::XPathExtracter()
  {
    // Initialise Xerces-C and XQilla using XQillaPlatformUtils
    if(count++ == 0)
      XQillaPlatformUtils::initialize();

    // Get the XQilla DOMImplementation object
    this->xqillaImplementation =
            DOMImplementationRegistry::getDOMImplementation(X("XPath2 3.0"));

    this->parser = this->xqillaImplementation->createLSParser(
      DOMImplementationLS::MODE_SYNCHRONOUS, 0);
  }

  XPathExtracter::~XPathExtracter()
  {
    this->parser->release();

    if(--count == 0)
      XQillaPlatformUtils::terminate();
  }

  DOMDocument * XPathExtracter::getDocument(const std::string & xml)
  {
    /* 
    // An alternative to simply setting the string input, as shown below

    MemBufInputSource * memBuf = new MemBufInputSource(
      (const XMLByte *) xml.c_str(),
      xml.size(),
      "xml (in memory)");

    DOMLSInput * input = this->xqillaImplementation->createLSInput();
    input->setByteStream(memBuf);

    DOMDocument * document = parser->parse(input);

    input->release();
    delete memBuf;

    return document;
    */

    DOMLSInput * input = this->xqillaImplementation->createLSInput();
    XMLCh * stringData = XMLString::transcode(xml.c_str());
    input->setStringData(stringData);
    DOMDocument * document = parser->parse(input);

    input->release();
    delete stringData;

    return document;
  }

  void XPathExtracter::extract(const std::string & query,
                               DOMDocument * document, Result * result)
  {
    // Parse an XPath 2 expression
    AutoRelease<DOMXPathExpression> expression(
      document->createExpression(X(query.c_str()), 0));

    // Execute the query
    AutoRelease<DOMXPathResult> xQillaResult(
      expression->evaluate(
        document,
        DOMXPathResult::ITERATOR_RESULT_TYPE, 0));

    // Iterate over the results
    while(xQillaResult->iterateNext())
    {
      char * content = XMLString::transcode(
        xQillaResult->getStringValue());

      result->push_back(content);

      delete content;
    }
  }

  int XPathExtracter::count = 0;
}

主文件

#include <iostream>
#include "XPathExtracter.h"

int main(void)
{
  std::string * body = new std::string;

  // ... (logic to fill the body string with an xml/html value)

  // Extract
  using namespace xercesc;

  Joppli::XPathExtracter * driver = new Joppli::XPathExtracter();
  Joppli::XPathExtracter::Result * results = new Joppli::XPathExtracter::Result;
  DOMDocument * document = driver->getDocument(*body);
  driver->extract("html/head//title", document, results);
  driver->extract("html/head//meta//@name", document, results);
  driver->extract("html//body//a[@id=\"link_mx_es\"]", document, results);

  for(const auto & result : *results)
    std::cout << result << std::endl;

  delete results;
  delete driver;
  delete body;

  return 0;
}

我通过 valgrind 运行了这段代码,它没有显示任何泄漏。

于 2014-12-20T22:39:41.917 回答