1

基于 IHTMLDocument2 的 MSHTML 文档,我正在尝试编写简单的 HTML 解析器。不幸的是,尝试设置编辑模式失败,换句话说,resultState永远不会获得“完整”值,因此应用程序挂起。

{$APPTYPE CONSOLE}

function ParseHtml(doc: TStringList): TStringList;
var
  iHtml: IHTMLDocument2;
  v: Variant;
  msg: tagMSG;
begin
  iHtml := CreateComObject(CLASS_HTMLDocument) as IHTMLDocument2;
  Result := TStringList.Create;
  try
    try
      iHtml.designMode := 'on';
      while iHtml.readyState <> 'complete' do
        PeekMessage(msg, 0, 0, 0, PM_NOREMOVE);
//    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
//    above loop never finishes  
      v := VarArrayCreate([0, 0], varVariant);
      v[0] := doc.Text;
      iHtml.write( PSafeArray(TVarData(v).VArray) );
      iHtml.designMode := 'off';
      while iHtml.readyState <> 'complete' do
        PeekMessage(msg, 0, 0, 0, PM_NOREMOVE);
      // processing iHtml.body
      ...
    except
      ...
    end;
  finally
    ...
  end;
  ...
end;

begin
  CoInitialize(nil);
  ...
  CoUninitialize;  
end.

只是好奇为什么IHTMLDocument2 接口的 readyState 属性从未设置为“完成”,尽管它应该基于官方文档?

4

2 回答 2

4

readyState属性未设置为,'complete'因为您尚未告诉IHTMLDocument2对象实际加载文档。您必须加载一个文档,甚至是一个空白文档(即:'about:blank'URL),才能影响该readyState属性,否则它将保持其初始值'uninitialized'.

于 2011-12-29T01:54:14.497 回答
3

无需设置designModeon. 无需轮询readyState。只要"complete"writeclose文档:

program Test;
{$APPTYPE CONSOLE}
uses
  SysUtils,
  MSHTML,
  ActiveX,
  ComObj;

procedure DocumentFromString(Document: IHTMLDocument2; const S: WideString);
var
  v: OleVariant;
begin
  v := VarArrayCreate([0, 0], varVariant);
  v[0] := S;
  Document.Write(PSafeArray(TVarData(v).VArray));
  Document.Close;
end;

var
  Document: IHTMLDocument2;
  Elements: IHTMLElementCollection;
  Element: IHTMLElement;
begin
  CoInitialize(nil);

  Document := CreateComObject(CLASS_HTMLDocument) as IHTMLDocument2;
  DocumentFromString(Document, '<b>Hello</b>');
  Writeln(string(Document.readyState));

  // process the Document here
  Elements := Document.all.tags('b') as IHTMLElementCollection;
  Element := Elements.item(0, '') as IHTMLElement;
  Writeln(string(Element.innerText));
  Readln;

  CoUninitialize;
end.
于 2012-02-23T18:05:46.083 回答