最近经常会模拟网页提交返回网页源码,然后获得网页中相应的元素,于是需要常常解析Html中相应的各种元素,网络是个好东西,搜索一番,就找到了 好几个Delphi版本的HtmlParser的类库,试着使用了几个,发现解析起来都不完整,或多或少的回出现一些问题!于是想到了如果界面上有一个浏 览器,我们可以通过WebBrowser的Document接口对网页元素进行操作,很是方便!但是模拟网页提交,界面上是不一定要出现 WebBrowser的,肯定有办法,不通过WebBrowser就直接解析HTML的,那便是我不要WebBrowser这个外壳,只要他里面的 Document文档接口对象就能实现对Html的解析了,查找了一番MSDN,然后Google一下,果然可行,构建方法如下:
//创建 IHTMLDocument2接口
CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc);
接口创建好了之后就能够对文 档元素进行解析了,很是爽快!
结合了我自己的特有操作,我对Combobox,Table,Frame等一些网页元素做了相应的封装,实现 了一个HTMLParser,大致代码如下:
这里只给出声明,代 码请在最后下载
代码
(*
****************************************************
*)
(* 得闲工作 室 *)
(* 网页元素操作 类库 *)
(* *)
(* DxHtmlElement Unit *)
(* Copyright(c) 2008-2010 不得 闲 *)
(* email:appleak46@yahoo.com.cn QQ:75492895 *)
(* **************************************************** *)
unit DxHtmlElement;
interface
uses Windows,sysUtils,Clipbrd,MSHTML,ActiveX,OleCtrls,Graphics,TypInfo;
{ Get EleMent Type }
function IsSelectElement(eleElement: IHTMLElement): Boolean;
function IsPwdElement(eleElement: IHTMLElement): Boolean;
function IsTextElement(element: IHTMLElement): boolean;
function IsTableElement(element: IHTMLElement): Boolean;
function IsElementCollection(element: IHTMLElement): Boolean;
function IsChkElement(element: IHTMLElement): boolean;
function IsRadioBtnElement(element: IHTMLElement): boolean;
function IsMemoElement(element: IHTMLElement): boolean;
function IsFormElement(element: IHTMLElement): boolean;
function IsIMGElement(element: IHTMLElement): boolean;
function IsInIMGElement(element: IHTMLElement): boolean;
function IsLabelElement(element: IHTMLElement): boolean;
function IsLinkElement(element: IHTMLElement): boolean;
function IsListElement(element: IHTMLElement): boolean;
function IsControlElement(element: IHTMLElement): boolean;
function IsObjectElement(element: IHTMLElement): boolean;
function IsFrameElement(element: IHTMLElement): boolean;
function IsInPutBtnElement(element: IHTMLElement): boolean;
function IsInHiddenElement(element: IHTMLElement): boolean;
function IsSubmitElement(element: IHTMLElement): boolean;
{ Get ImgElement Data }
function GetPicIndex(doc: IHTMLDocument2; Src: string ; Alt: string ): Integer;
function GetPicElement(doc: IHTMLDocument2;imgName: string ;src: string ;Alt: string ): IHTMLImgElement;
function GetRegCodePic(doc: IHTMLDocument2;ImgName: string ; Src: string ; Alt: string ): TPicture; overload ;
function GetRegCodePic(doc: IHTMLDocument2;Index: integer): TPicture; overload ;
function GetRegCodePic(doc: IHTMLDocument2;element: IHTMLIMGElement): TPicture; overload ;
type
TObjectFromLResult = function (LRESULT: lResult; const IID: TIID; WPARAM: wParam; out pObject): HRESULT; stdcall ;
TEleMentType = (ELE_UNKNOW,ELE_TEXT,ELE_PWD,ELE_SELECT,ELE_CHECKBOX,ELE_RADIOBTN,ELE_MEMO,ELE_FORM,ELE_IMAGE,
ELE_LABEL,ELE_LINK,ELE_LIST,ELE_CONTROL,ELE_OBJECT,ELE_FRAME,ELE_INPUTBTN,ELE_INIMAGE,ELE_INHIDDEN);
function GetElementType(element: IHTMLELEMENT): TEleMentType;
function GetElementTypeName(element: IHTMLELEMENT): string ;
function GetHtmlTableCell(aTable: IHTMLTable;aRow,aCol: Integer): IHTMLElement;
function GetHtmlTable(aDoc: IHTMLDocument2; aIndex: Integer): IHTMLTable;
(* 得闲工作 室 *)
(* 网页元素操作 类库 *)
(* *)
(* DxHtmlElement Unit *)
(* Copyright(c) 2008-2010 不得 闲 *)
(* email:appleak46@yahoo.com.cn QQ:75492895 *)
(* **************************************************** *)
unit DxHtmlElement;
interface
uses Windows,sysUtils,Clipbrd,MSHTML,ActiveX,OleCtrls,Graphics,TypInfo;
{ Get EleMent Type }
function IsSelectElement(eleElement: IHTMLElement): Boolean;
function IsPwdElement(eleElement: IHTMLElement): Boolean;
function IsTextElement(element: IHTMLElement): boolean;
function IsTableElement(element: IHTMLElement): Boolean;
function IsElementCollection(element: IHTMLElement): Boolean;
function IsChkElement(element: IHTMLElement): boolean;
function IsRadioBtnElement(element: IHTMLElement): boolean;
function IsMemoElement(element: IHTMLElement): boolean;
function IsFormElement(element: IHTMLElement): boolean;
function IsIMGElement(element: IHTMLElement): boolean;
function IsInIMGElement(element: IHTMLElement): boolean;
function IsLabelElement(element: IHTMLElement): boolean;
function IsLinkElement(element: IHTMLElement): boolean;
function IsListElement(element: IHTMLElement): boolean;
function IsControlElement(element: IHTMLElement): boolean;
function IsObjectElement(element: IHTMLElement): boolean;
function IsFrameElement(element: IHTMLElement): boolean;
function IsInPutBtnElement(element: IHTMLElement): boolean;
function IsInHiddenElement(element: IHTMLElement): boolean;
function IsSubmitElement(element: IHTMLElement): boolean;
{ Get ImgElement Data }
function GetPicIndex(doc: IHTMLDocument2; Src: string ; Alt: string ): Integer;
function GetPicElement(doc: IHTMLDocument2;imgName: string ;src: string ;Alt: string ): IHTMLImgElement;
function GetRegCodePic(doc: IHTMLDocument2;ImgName: string ; Src: string ; Alt: string ): TPicture; overload ;
function GetRegCodePic(doc: IHTMLDocument2;Index: integer): TPicture; overload ;
function GetRegCodePic(doc: IHTMLDocument2;element: IHTMLIMGElement): TPicture; overload ;
type
TObjectFromLResult = function (LRESULT: lResult; const IID: TIID; WPARAM: wParam; out pObject): HRESULT; stdcall ;
TEleMentType = (ELE_UNKNOW,ELE_TEXT,ELE_PWD,ELE_SELECT,ELE_CHECKBOX,ELE_RADIOBTN,ELE_MEMO,ELE_FORM,ELE_IMAGE,
ELE_LABEL,ELE_LINK,ELE_LIST,ELE_CONTROL,ELE_OBJECT,ELE_FRAME,ELE_INPUTBTN,ELE_INIMAGE,ELE_INHIDDEN);
function GetElementType(element: IHTMLELEMENT): TEleMentType;
function GetElementTypeName(element: IHTMLELEMENT): string ;
function GetHtmlTableCell(aTable: IHTMLTable;aRow,aCol: Integer): IHTMLElement;
function GetHtmlTable(aDoc: IHTMLDocument2; aIndex: Integer): IHTMLTable;