您可以使用
IHTMLDocument2 DOM从HTML解析您需要的任何元素:
uses ActiveX, MSHTML;
const
HTML =
'
'Name:' +
'Value' +
'
'
procedure TForm1.Button1Click(Sender: TObject);
var
doc: OleVariant;
el: OleVariant;
i: Integer;
begin
doc := coHTMLDocument.Create as IHTMLDocument2;
doc.write(HTML);
doc.close;
ShowMessage(doc.body.innerHTML);
for i := 0 to doc.body.all.length - 1 do
begin
el := doc.body.all.item(i);
if (el.tagName = 'LABEL') and (el.className = 'tvLabel') then
ShowMessage(el.innerText);
if (el.tagName = 'SPAN') and (el.className = 'tvValue') then
ShowMessage(el.innerText);
end;
end;
我想提到我今天发现的另一个非常好的HTML解析器:htmlp(Delphi Dom HTML解析器和转换器).它不像IHTMLDocument2那么灵活,但是很容易使用,快速,免费,并支持旧的Delphi版本的Unicode.
样品用量:
uses HtmlParser, DomCore;
function GetDocBody(HtmlDoc: TDocument): TElement;
var
i: integer;
node: TNode;
begin
Result := nil;
for i := 0 to HtmlDoc.documentElement.childNodes.length - 1 do
begin
node := HtmlDoc.documentElement.childNodes.item(i);
if node.nodeName = 'body' then
begin
Result := node as TElement;
Break;
end;
end;
end;
procedure THTMLForm.Button2Click(Sender: TObject);
var
HtmlParser: THtmlParser;
HtmlDoc: TDocument;
i: Integer;
body, el: TElement;
node: TNode;
begin
HtmlParser := THtmlParser.Create;
try
HtmlDoc := HtmlParser.parseString(HTML);
try
body := GetDocBody(HtmlDoc);
if Assigned(body) then
for i := 0 to body.childNodes.length - 1 do
begin
node := body.childNodes.item(i);
if (node is TElement) then
begin
el := node as TElement;
if (el.tagName = 'div') and (el.GetAttribute('class') = 'tvRow tvFirst hasLabel tvFirst') then
begin
// iterate el.childNodes here...
ShowMessage(IntToStr(el.childNodes.length));
end;
end;
end;
finally
HtmlDoc.Free;
end;
finally
HtmlParser.Free
end;
end;