获取HTML页面的文本内容
// Very easy way to parse text from HTML using IHTMLDocument2.
uses
mshtml, ActiveX, ComObj;
procedure TForm1.Button1Click(Sender: TObject);
var
IDoc: IHTMLDocument2;
Strl: TStringList;
sHTMLFile: string;
v: Variant;
Links: IHTMLElementCollection;
i: Integer;
Link: IHTMLAnchorElement;
begin
if OpenDialog1.Execute then
begin
sHTMLFile := OpenDialog1.FileName;
Strl := TStringList.Create;
try
Strl.LoadFromFile(sHTMLFile);
Idoc := CreateComObject(Class_HTMLDOcument) as IHTMLDocument2;
try
IDoc.designMode := 'on';
while IDoc.readyState <> 'complete' do
Application.ProcessMessages;
v := VarArrayCreate([0, 0], VarVariant);
v[0] := Strl.Text;
IDoc.Write(PSafeArray(System.TVarData(v).VArray));
IDoc.designMode := 'off';
while IDoc.readyState <> 'complete' do
Application.ProcessMessages;
Memo1.Lines.Text := IDoc.body.innerText;
finally
IDoc := nil;
end;
finally
Strl.Free;
end;
end;
end;