2

是否有任何 Delphi 库允许使用 XPath 或 XQuery 解析 HTML。类似于 PHP 默认内置的。例如 FLWOR..

4

2 回答 2

2

我不知道任何库,但这是我的 XE2 辅助类单元,它带有我用来“执行”XPath 的 SelectNode(s) 函数。RemoveNameSpaces 和 XMLToTree 函数不适用,但谁知道它们什么时候会派上用场 ;-)

unit uXMLHelper;

interface

Uses
   System.SysUtils, System.Classes, System.TypInfo, Vcl.ComCtrls,
   XML.XMLDoc, XMLDom, XML.XMLIntf;

type
   TXMLHelper = class
   public
      class function SelectNode(StartNode: IXmlNode; const NodeXPath: WideString): IXmlNode;
      class function SelectNodes(StartNode: IXmlNode; const NodeXPath: WideString): IXMLNodeList;
      class function RemoveNameSpaces(XMLString: String): String;
      class procedure XMLToTree(XmlDoc: IXMLDocument; TV: TTreeView);
   end;

function ConcatNodeNames(NodeNames: Array of String): String;
// Concatenates the strings in NodeNames to /name1/name2/.../namex

implementation

Uses
   MSXML2_TLB; // IXMLDOMdocument

class function TXMLHelper.RemoveNameSpaces(XMLString: String): String;
const
  // An XSLT script for removing the namespaces from any document. It will remove the prefix as well.
  // From http://wiki.tei-c.org/index.php/Remove-Namespaces.xsl
  cRemoveNSTransform =
    '<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">' +
    '<xsl:output method="xml" indent="no"/>' +

    '<xsl:template match="/|comment()|processing-instruction()">' +
    '    <xsl:copy>' +
    '      <xsl:apply-templates/>' +
    '    </xsl:copy>' +
    '</xsl:template>' +

    '<xsl:template match="*">' +
    '    <xsl:element name="{local-name()}">' +
    '      <xsl:apply-templates select="@*|node()"/>' +
    '    </xsl:element>' +
    '</xsl:template>' +

    '<xsl:template match="@*">' +
    '    <xsl:attribute name="{local-name()}">' +
    '      <xsl:value-of select="."/>' +
    '    </xsl:attribute>' +
    '</xsl:template>' +

    '</xsl:stylesheet>';

var
  Doc, XSL: IXMLDOMdocument2;
  Res     : string;
  p       : Integer;
begin
  Doc := ComsDOMDocument.Create;
  Doc.ASync := false;
  XSL := ComsDOMDocument.Create;
  XSL.ASync := false;
  try
     Doc.loadXML(XMLString);
     XSL.loadXML(cRemoveNSTransform);
     Res := Doc.TransFormNode(XSL);
     // This now contains the original text with a <?xml version="1.0" encoding="UTF-16"?> prepended; remove it:
     p := Pos('?>',Res);
     result := Copy(Res,P+2);
  except
     on E:Exception do Result := E.Message;
  end;
end; { RemoveNameSpaces }


class function TXMLHelper.SelectNode(StartNode: IXmlNode; const NodeXPath: WideString): IXmlNode;
// Geeft de node in path NodeXPath onder StartNode
// http://delphi.about.com/od/delphi-tips-2011/qt/select-single-node-ixmlnode-txmlnode-xpath-delphi-xmldom.htm
var
  intfSelect    : IDomNodeSelect;
  dnResult      : IDomNode;
  intfDocAccess : IXmlDocumentAccess;
  XMLDoc        : TXmlDocument;
begin
  Result := nil;
  if not Assigned(StartNode)
    or not Supports(StartNode.DOMNode, IDomNodeSelect, intfSelect) then
    Exit;

  dnResult := intfSelect.selectNode(NodeXPath);
  if Assigned(dnResult) then
  begin
    if Supports(StartNode.OwnerDocument, IXmlDocumentAccess, intfDocAccess) then
      XMLDoc := intfDocAccess.DocumentObject
    else
      XMLDoc := nil;
    Result := TXmlNode.Create(dnResult, nil, XMLDoc);
  end;
end; { SelectNode }


class function TXMLHelper.SelectNodes(StartNode: IXmlNode; const NodeXPath: WideString): IXMLNodeList;
(* Returns a list of all nodes in path NodeXPath below StartNode.
 * NodeXPath is relative; e.g. with:
 *
 * <Envelope>                            <= DocumentElement root
 *    <Body>
 *       <FindItemResponse>
 *          <ResponseMessages>
 *             <FindItemResponseMessage>
 *                <RootFolder>           <= IRootNode
 *                   <Items>
 *                      <CalendarItem>
 *
 * these are identical:
 *   SelectNodes(DocumentElement,'Envelope/Body/FindItemResponse/ResponseMessages/FindItemResponseMessage/RootFolder/Items/CalendarItem')
 *   SelectNodes(DocumentElement,'/Envelope/Body/FindItemResponse/ResponseMessages/FindItemResponseMessage/RootFolder/Items/CalendarItem')
 *   SelectNodes(IRootNode,'Items/CalendarItem')
 *
 * http://delphi.about.com/od/vclusing/qt/delphi-select-xml-nodes-ixmlnodelist-selectnodes-xpath-xmldom.htm
 *)
var
  intfSelect    : IDomNodeSelect;
  intfAccess    : IXmlNodeAccess;
  dnlResult     : IDomNodeList;
  intfDocAccess : IXmlDocumentAccess;
  XMLDoc        : TXmlDocument;
  i             : Integer;
  dn            : IDomNode;
begin
  Result := nil;
  if not Assigned(StartNode)
    or not Supports(StartNode, IXmlNodeAccess, intfAccess)
    or not Supports(StartNode.DOMNode, IDomNodeSelect, intfSelect) then
    Exit;

  dnlResult := intfSelect.selectNodes(NodeXPath);
  if Assigned(dnlResult) then
  begin
    // Since the XPath implementation of SelectNodes returns an IDomNodeList and we need an IXMLNodeList
    // we need to "wrap" a call to IDomNodeSelect.selectNodes into a function that will result in IXMLNodeList.
    Result := TXmlNodeList.Create(intfAccess.GetNodeObject, '', nil);
    if Supports(StartNode.OwnerDocument, IXmlDocumentAccess, intfDocAccess) then
      XMLDoc := intfDocAccess.DocumentObject
    else
      XMLDoc := nil;

    for i := 0 to dnlResult.length - 1 do
    begin
      dn := dnlResult.item[i];
      Result.Add(TXmlNode.Create(dn, nil, XMLDoc));
    end;
  end;
end; { SelectNodes }


procedure DomToTree(XmlNode: IXMLNode; TV: TTreeView; TreeNode: TTreeNode);
var
   I: Integer;
   NewTreeNode: TTreeNode;
   NodeText: string;
   AttrNode: IXMLNode;
begin
  // Skip text nodes and other special cases
  if XmlNode.NodeType <> ntElement then Exit;
  try
     // Add the node itself
     NodeText := XmlNode.NodeName;
     if XmlNode.IsTextElement then
       NodeText := NodeText + ' = ' + XmlNode.NodeValue;
     NewTreeNode := TV.Items.AddChild(TreeNode, NodeText);
     // Add attributes
     for I := 0 to xmlNode.AttributeNodes.Count - 1 do
     begin
       AttrNode := xmlNode.AttributeNodes.Nodes[I];
       TV.Items.AddChild(NewTreeNode,
         '[' + AttrNode.NodeName + ' = "' + AttrNode.Text + '"]');
     end;
     // add each child node
     if XmlNode.HasChildNodes then
       for I := 0 to xmlNode.ChildNodes.Count - 1 do
         DomToTree (xmlNode.ChildNodes.Nodes [I], TV, NewTreeNode);
  except
     on E:Exception do
        TV.Items.AddChild(TreeNode, E.Message);
  end;
end; { DomToTree }


class procedure TXMLHelper.XMLToTree(XmlDoc: IXMLDocument; TV: TTreeView);
begin
   XMlDoc.Active := true;
   TV.Items.Clear;
   DomToTree(XMLDoc.DocumentElement,TV,nil);
end; { XMLToTree }


function ConcatNodeNames(NodeNames: Array of String): String;
var Res,Nam : String;
begin
   for Nam in NodeNames do Res := Res + '/' + Nam;
   Result := Res;
end;

end.
于 2012-11-30T07:31:16.097 回答
1

可能OmniXML会有用(参见 XPath 演示)。另一个 用于 HTML 解析库包括 XPath 2 支持;

于 2012-11-30T08:14:00.800 回答