还可以将HTMLP 解析器与 THtmlFormatter 和OXml XPath 解析结合使用
uses
// Htmlp
HtmlParser,
DomCore,
Formatter,
// OXml
OXmlPDOM,
OXmlUtils;
function HtmlToXHtml(const Html: string): string;
var
HtmlParser: THtmlParser;
HtmlDoc: TDocument;
Formatter: THtmlFormatter;
begin
HtmlParser := THtmlParser.Create;
try
HtmlDoc := HtmlParser.ParseString(Html);
try
Formatter := THtmlFormatter.Create;
try
Result := Formatter.GetText(HtmlDoc);
finally
Formatter.Free;
end;
finally
HtmlDoc.Free;
end;
finally
HtmlParser.Free;
end;
end;
type
TCard = record
Store: string;
Quality: string;
Quantity: string;
Price: string;
end;
TCards = array of TCard;
function ParseCard(const Node: PXMLNode): TCard;
const
StoreXPath = 'div[1]/ax';
QualityXPath = 'div[3]';
QuantityXPath = 'div[4]';
PriceXPath = 'div[5]';
var
CurrentNode: PXMLNode;
begin
Result := Default(TCard);
if Node.SelectNode(StoreXPath, CurrentNode) then
Result.Store := CurrentNode.Text;
if Node.SelectNode(QualityXPath, CurrentNode) then
Result.Quality := CurrentNode.Text;
if Node.SelectNode(QuantityXPath, CurrentNode) then
Result.Quantity := CurrentNode.Text;
if Node.SelectNode(PriceXPath, CurrentNode) then
Result.Price := CurrentNode.Text;
end;
procedure THTMLForm.OpenButtonClick(Sender: TObject);
var
Html: string;
Xml: string;
FXmlDocument: IXMLDocument;
QueryNode: PXMLNode;
XPath: string;
NodeList: IXMLNodeList;
i: Integer;
Card: TCard;
begin
Html := System.IOUtils.TFile.ReadAllText(FileNameEdit.Text, TEncoding.UTF8);
Xml := HtmlToXHtml(Html);
Memo.Lines.Text := Xml;
// Parse with XPath
FXMLDocument := CreateXMLDoc;
FXMLDocument.WriterSettings.IndentType := itIndent;
if not FXMLDocument.LoadFromXML(Xml) then
raise Exception.Create('Source document is not valid');
QueryNode := FXmlDocument.DocumentElement;
XPath := '//div[@class="row pricetableline"]';
NodeList := QueryNode.SelectNodes(XPath);
for i := 0 to NodeList.Count -1 do
begin
Card := ParseCard(NodeList[i]);
Memo.Lines.Text := Memo.Lines.Text + sLineBreak +
Format('%0:s %1:s %2:s %3:s', [Card.Store, Card.Quality, Card.Quantity, Card.Price]);
end;
Memo.SelStart := 0;
Memo.SelLength := 0;
end;