前段时间作了一个HTML的解析类,方便在蜘蛛、信息发布、小偷程序中正确抓取网页内容。
有人可能会说,为啥不用Webbrowser呢。
1,首先是效率问题,Webbrowser太慢了。
2,你无法操作Webbrowser上传一个文件。
3,你无法操作Webbrowser跨域的Iframe
4,你不能不按套路出牌,例如页面setTimeout 100秒输出一个div,你也只能100秒后才能获取到。
5,如果你只想在后台解析HTML,不想让用户看到浏览器,Webbrowser会很碍事。
6,阻止浏览器下载图片、CSS、Flash不是一件容易的事情,而这写内容通常没啥用。
。。。。
因为是一个商业软件,不太方便直接公开全部源代码,列出各头大家了解一下吧。如果大家比较感兴趣,稍后我会核心部分做一个说明。
HTTP通讯部分
type
ETCPSocket = class(Exception);
ESSLTCPSocket = class(Exception);
ECookies = class(Exception);
EHTTP = class(Exception);
TSockEvent = (sSend,sRecv);
TSocketNotify = procedure(Event:TSockEvent; Bytes: Int64) of object;
TTCPSocket = class
strict private
FTimeOut: DWORD;
FSocketNotify: TSocketNotify;
procedure SetTimeOut(Ms : DWORD);
function GetTimeOut : DWORD;
protected
FSocket: TSocket;
public
constructor Create;
destructor Destroy; override;
function Open(const Host : String; const Port : WORD) : Boolean;
function Send(const Buffer : RawByteString) : Boolean;
function Recv(var Buffer : RawByteString) : Boolean;
procedure Close; virtual;
function HostToIP(const Name:String):String;
property TimeOut : DWORD read GetTimeOut write SetTimeOut;
property OnNotify: TSocketNotify read FSocketNotify write FSocketNotify;
end;
TSSLTCPSocket = class(TTCPSocket)
strict private
FCTXClient: PSSL_CTX;
FSSLClient: PSSL;
protected
{ protected declarations }
public
constructor Create;
function Open(const Host : String; const Port : WORD) : Boolean;
function Send(const Buffer : RawByteString) : Boolean;
function Recv(var Buffer : RawByteString) : Boolean;
procedure Close; override;
end;
TCookieData = class
Domain,
Path,
Name,
Value:String;
end;
TCookies = class
private
FList:TObjectList;
function GetCount:Integer;
function GetItems(Index: Integer):TCookieData;
procedure SetItems(Index: Integer; CookieData:TCookieData);
function InDomain(const Domain,Domain2:String):Boolean;
function InPath(const Path,Path2:String):Boolean;
procedure UpdateCookies(const CookieDomain,CookiePath,CookieName,CookieValue:String);
protected
{ protected declarations }
public
constructor Create;
destructor Destroy; override;
procedure Clear;
procedure Add(Domain,Path,Name,Value:String); overload;
procedure Add(Domain,Path,Cookie:String); overload;
procedure Delete(Index:Integer);
property Count:Integer read GetCount;
property Items[Index: Integer]:TCookieData read GetItems write SetItems; default;
procedure Update(const Domain,Path,Header:String);
function GetCookies(const Domain,Path:String): String;
procedure SetCookies(Domain,Path,Cookies:String);
end;
THTTPProtocol = (hpHTTP, hpHTTPS);
THTTPCommand = (hcGet, hcPost, hcHead, hcDownload);
TLocation = record
Protocol: THTTPProtocol;
Domain: String;
Path: String;
Port: Integer;
end;
THTTPRequest = record
Url:String;
CharSet:String;
Referer:String;
CMD:THTTPCommand;
Buffer:RawByteString;
PostData:RawByteString;
AddonsHeader:String;
ContentType:RawByteString;
StartTime:Cardinal;
FinishTime:Cardinal;
end;
THTTPResponse = record
Url:String;
Header:RawByteString;
RawCode:RawByteString;
RedirectCount:Integer;
RedirectPages: Array of RawByteString;
end;
THTTPOnRequest = function(Sender:TObject):Boolean of object;
THTTPOnResponse = function(Sender:TObject):Boolean of object;
THTTPOnProgress = function(Sender:TObject; ContentSize,Received:Integer):Boolean of object;
THTTPManage = class;
THTTP = class
private
FOwner:THTTPManage;
FCookies:TCookies;
FFreeCookies:Boolean;
FTCPSocket:TTCPSocket;
FSSLTCPSocket:TSSLTCPSocket;
FFileHandle:THandle;
FAgentHost:String;
FAgentPort:Integer;
FMaxRedirect:Integer;
FMaxRecvByte:Int64;
FInterval:Cardinal;
FTimeOut:Cardinal;
FDownloadTimeOut:Cardinal;
FSupportSSL:Boolean;
FOnNotify: TSocketNotify;
FOnRequest: THTTPOnRequest;
FOnResponse: THTTPOnResponse;
FOnProgress: THTTPOnProgress;
procedure SetTimeOut(val: DWORD);
function GetCookie : String;
procedure SetCookie(val : String);
procedure SetNotify(const Value: TSocketNotify);
protected
function SendRequest:Boolean;
procedure MakeRequestBuffer;
function ParseUrl(const URL:String):Boolean;
property TCPSocket:TTCPSocket read FTCPSocket;
property SSLSocket: TSSLTCPSocket read FSSLTCPSocket;
public
Location:TLocation;
Request:THTTPRequest;
Response:THTTPResponse;
constructor Create(const SupportSSL:Boolean=True; const ACookies:TCookies=nil; const AOwner:THTTPManage=nil);
destructor Destroy; override;
procedure Init;
function Head(Url:string):Integer;
function Get(Url:string):Integer; overload;
function Get(Url:string; GetField:array of String):Integer; overload;
function Post(Url:string; PostData:RawByteString):Integer; overload;
function Post(Url:string; PostField:array of String):Integer; overload;
function Post(Url:string; PostField:array of String; FileIndex: array of String; Multipart:Boolean):Integer; overload;
function Redirect(CMD: THTTPCommand; Url:String; PostData:RawByteString; ContentType:RawByteString):Integer;
function DoRequest(CMD:THTTPCommand; Url:String; PostData:RawByteString; ContentType:RawByteString):Integer;
function Download(const Url: string; const SaveTo: string):Integer;
function Encoding(Text:String; HTTPConvert:Boolean):RawByteString;
property Cookie : String read GetCookie write SetCookie;
property Cookies : TCookies read FCookies write FCookies;
property Interval: Cardinal read FInterval write FInterval;
property AgentHost : String read FAgentHost write FAgentHost;
property AgentPort : Integer read FAgentPort write FAgentPort;
property TimeOut : Cardinal read FTimeOut write SetTimeOut;
property DownloadTimeOut : Cardinal read FDownloadTimeOut write FDownloadTimeOut;
property MaxRedirect : Integer read FMaxRedirect write FMaxRedirect;
property MaxRecvByte : Int64 read FMaxRecvByte write FMaxRecvByte;
property OnNotify: TSocketNotify read FOnNotify write SetNotify;
property OnRequest : THTTPOnRequest read FOnRequest write FOnRequest;
property OnResponse : THTTPOnResponse read FOnResponse write FOnResponse;
property OnProgress : THTTPOnProgress read FOnProgress write FOnProgress;
end;
THTTPManage = class
private
FCri:TRTLCriticalSection;
FSendBytes,
FRecvBytes,
FLastSendBytes,
FLastRecvBytes,
FSeekSend,
FSeekRecv:Int64;
FSupportSSL:Boolean;
FSendAverage:Array [0..AverageCycle-1] Of Int64;
FRecvAverage:Array [0..AverageCycle-1] Of Int64;
//Config
FTimeOut:Cardinal;
FAgentHost:String;
FAgentPort,
FDownloadTimeOut,
FInterval,
FMaxRedirect,
FMaxRecvByte :Integer;
function GetAgentHost:String;
procedure SetAgentHost(val:String);
procedure SetAgentPort(val:Integer);
procedure SetTimeOut(val:Cardinal);
procedure SetDownloadTimeOut(val:Integer);
procedure SetInterval(val:Integer);
procedure SetMaxRedirect(val:Integer);
procedure SetMaxRecvByte(val:Integer);
protected
FHTTPList:TList;
procedure Lock;
procedure UnLock;
public
constructor Create(const SupportSSL:Boolean=True);
destructor Destroy; override;
function CreateHTTP(const Cookies:TCookies=nil):THTTP;
procedure FreeHTTP(HTTP:THTTP);
procedure Close; virtual;
procedure Notify(AObject: Tobject; Operation: TOperation); virtual;
procedure SocketEvent(Event:TSockEvent; Bytes: Int64);
procedure Average(var AvgSend,AvgRecv:Int64);
procedure StatClear;
property SendBytes: Int64 read FSendBytes;
property RecvBytes: Int64 read FRecvBytes;
//Config
property AgentHost: String read GetAgentHost write SetAgentHost;
property AgentPort: Integer read FAgentPort write SetAgentPort;
property TimeOut: Cardinal read FTimeOut write SetTimeOut;
property DownloadTimeOut: Integer read FDownloadTimeOut write SetDownloadTimeOut;
property Interval: Integer read FInterval write SetInterval;
property MaxRedirect: Integer read FMaxRedirect write SetMaxRedirect;
property MaxRecvByte: Integer read FMaxRecvByte write SetMaxRecvByte;
end;
function HostToIP(const Name:String):String;
procedure LoadSSLLibrary;
HTML解析部分
const
DoNotPush:set of TLabelEnum=[csTitle,csImg,csStyle,csScript,csInput,csTextArea];
TLabelNames:Array [TLabelEnum] of String = (
'a','img','style','link','script','title','form','input','select',
'button','textarea','option','');
DelimiterWord:array[0..6] of String = ('','//','/*','*/','');
DelimiterChar= ['<','>','/','\','=','&',';','"','''',' ',#9,#13,#10];
WhiteSpaceChar= [' ',#9,#13,#10];
type
TFindin = (fiAll,fiForms,fiFields,fiAnchors,fiImages,fiStyles,fiScripts);
THTMLDocument = class;
THTMLParser = class;
THTMLElementCollection = class;
THTMLElement = class
private
FID:String;
FName:String;
FTagName:String;
FParent:THTMLElement;
FChildElements:THTMLElementCollection;
FDocument:THTMLDocument;
FAttributes:TDictionary;
procedure Assign(Target:THTMLElement); virtual;
function GetInnerHTML:String;
procedure SetInnerHTML(val:String);
function GetOuterHTML:string;
procedure SetOuterHTML(val:String);
procedure AppendChildElement(Element:THTMLElement);
procedure RemoveChildElement(Element:THTMLElement);
public
constructor Create(Parent:THTMLElement); virtual;
destructor Destroy; override;
function Clone:THTMLElement; virtual;
procedure SetAttribute(AName,AValue:String); virtual;
function GetAttribute(AName:String):String; virtual;
function GetElementById(AID:String):THTMLElement;
function GetElementsByName(AName:String):THTMLElementCollection;
function GetElementsByTagName(ATagName:String):THTMLElementCollection;
function HasChildElement(Child:THTMLElement):Boolean;
property Document:THTMLDocument read FDocument write FDocument;
property Attributes: TDictionary read FAttributes;
property ChildElements: THTMLElementCollection read FChildElements;
property Attribute[Name:String]:string read GetAttribute write SetAttribute;
property Parent:THTMLElement read FParent;
property ParentNode:THTMLElement read FParent;
property ID:String read FID write FID;
property Name:String read FName write FName;
property TagName:String read FTagName write FTagName;
property InnerHTML:String read GetInnerHTML write SetInnerHTML;
property OuterHTML:String read GetOuterHTML write SetOuterHTML;
end;
THTMLClass = class of THTMLElement;
THTMLCollection = class(TList)
private
FDocument:THTMLDocument;
function GetCount: Integer;
public
function Item(Index:Integer): T;
property Length:Integer read GetCount;
property Document:THTMLDocument read FDocument write FDocument;
end;
THTMLScript = class(THTMLElement)
private
FSrc:String;
FType:String;
FText:String;
FLanguage:String;
public
function GetAttribute(AName:String):String; override;
procedure SetAttribute(AName,AValue:String); override;
property Src:String read FSrc write FSrc;
property AType:String read FType write FType;
property Text:String read FText write FText;
property Language:String read FLanguage write FLanguage;
end;
THTMLStyle = class(THTMLElement)
private
FCSSText :String;
public
function GetAttribute(AName:String):String; override;
procedure SetAttribute(AName,AValue:String); override;
property CSSText:String read FCSSText write FCSSText;
end;
THTMLImage = class(THTMLElement)
private
FAlt:String;
FTitle:String;
FSrc:String;
function GetTextValue: string;
public
function GetAttribute(AName:String):String; override;
procedure SetAttribute(AName,AValue:String); override;
property Alt:String read FAlt write FAlt;
property Title:String read FTitle write FTitle;
property Src:String read FSrc write FSrc;
property TextValue:string read GetTextValue;
end;
THTMLAnchor =class(THTMLElement)
private
FHref:String;
FDisplay:String;
public
function GetAttribute(AName:String):String; override;
procedure SetAttribute(AName,AValue:String); override;
property Href:String read FHref write FHref;
property Display:String read FDisplay write FDisplay;
end;
THTMLForm = class;
THTMLField = class(THTMLElement)
private
FValue:String;
FDisplay:String;
procedure Assign(Target:THTMLElement); override;
function GetActive:Boolean; virtual; abstract;
protected
FParentForm:THTMLForm;
public
destructor Destroy; override;
function GetAttribute(AName:String):String; override;
procedure SetAttribute(AName,AValue:String); override;
property Active:Boolean read GetActive;
property ParentForm:THTMLForm read FParentForm;
property Value:String read FValue write FValue;
property Display:String read FDisplay write FDisplay;
end;
THTMLTextArea = class(THTMLField)
private
function GetActive:Boolean; override;
public
procedure Random;
end;
THTMLButton = class(THTMLField)
private
FType:String;
procedure Assign(Target:THTMLElement); override;
function GetActive:Boolean; override;
public
constructor Create(Parent:THTMLElement); override;
function GetAttribute(AName:String):String; override;
procedure SetAttribute(AName,AValue:String); override;
property AType:String read FType write FType;
end;
THTMLSelect = class;
THTMLOption = class(THTMLField)
private
FSelected: Boolean;
FParentSelect: THTMLSelect;
procedure Assign(Target:THTMLElement); override;
function GetActive:Boolean; override;
procedure SetSelected(val:Boolean);
public
destructor Destroy; override;
function GetAttribute(AName:String):String; override;
procedure SetAttribute(AName,AValue:String); override;
property ParentSelect:THTMLSelect read FParentSelect;
property Selected: Boolean read FSelected write SetSelected;
end;
THTMLInput = class(THTMLField)
private
FType:String;
FChecked: Boolean;
procedure Assign(Target:THTMLElement); override;
function GetActive:Boolean; override;
procedure SetChecked(val:Boolean);
public
constructor Create(Parent:THTMLElement); override;
function GetAttribute(AName:String):String; override;
procedure SetAttribute(AName,AValue:String); override;
procedure Random;
property AType:string read FType write FType;
property Checked: Boolean read FChecked write SetChecked;
end;
THTMLOptionCollection = class;
THTMLSelect = class(THTMLField)
private
FSelected: THTMLOption;
FOptions: THTMLOptionCollection;
procedure Assign(Target:THTMLElement); override;
function GetActive:Boolean; override;
procedure SetSelected(val: THTMLOption);
procedure SetParentFrom(const Value: THTMLForm);
public
constructor Create(Parent:THTMLElement); override;
destructor Destroy; override;
procedure Random;
procedure AddOption(val:THTMLOption);
procedure Notify(Element: THTMLOption; Operation: TOperation);
property ParentFrom:THTMLForm read FParentForm write SetParentFrom;
property Selected:THTMLOption read FSelected write SetSelected;
property Options:THTMLOptionCollection read FOptions;
end;
TRadioFields = Array of THTMLInput;
THTMLFieldCollection = class;
THTMLForm = class(THTMLElement)
private
FMethod:String;
FENCType:String;
FAction:String;
FReferer:String;
FFields:THTMLFieldCollection;
procedure Assign(Target:THTMLElement); override;
function GetRadioFields(AName:String):TRadioFields;
function GetActiveFields(AName:String):THTMLField;
function GetFieldValues(AName:String):String;
procedure SetFieldValues(AName,AValue:String);
public
constructor Create(Parent:THTMLElement); override;
destructor Destroy; override;
procedure AddField(val:THTMLField);
procedure Notify(Element: THTMLField; Operation: TOperation);
function Find(Keys,Values:string):THTMLField; overload;
function Find(Keys,Values:string; Match:TMatch):THTMLField; overload;
function Find(Keys,Values:array of string):THTMLField; overload;
function Find(Keys,Values:array of string; Match:TMatch):THTMLField; overload;
function GetAttribute(AName:String):String; override;
procedure SetAttribute(AName,AValue:String); override;
function Submit(PostField:array of String; const ButtonName:string=''):Integer;
function CheckBox(Checked:Boolean):Boolean; overload;
function CheckBox(AName:String; Checked:Boolean):Boolean; overload;
property Fields:THTMLFieldCollection read FFields;
property ActiveFields[Name:String]:THTMLField read GetActiveFields;
property RadioFields[Name:String]:TRadioFields read GetRadioFields;
property FieldValues[Name:String]:String read GetFieldValues write SetFieldValues;
property Method:String read FMethod write FMethod;
property ENCType:String read FENCType write FENCType;
property Action:String read FAction write FAction;
end;
THTMLElementCollection = class(THTMLCollection);
THTMLScriptCollection = class(THTMLCollection);
THTMLStyleCollection = class(THTMLCollection);
THTMLImageCollection = class(THTMLCollection);
THTMLAnchorCollection = class(THTMLCollection);
THTMLFieldCollection = class(THTMLCollection);
THTMLOptionCollection = class(THTMLCollection);
THTMLFormCollection = class(THTMLCollection)
public
function Find(Keys,Values:String):THTMLForm; overload;
function Find(Keys,Values:String; Match:TMatch):THTMLForm; overload;
function Find(Keys,Values:array of String; Match:TMatch):THTMLForm; overload;
function FindByField(Keys,Value:String):THTMLForm; overload;
function FindByField(Keys,Value:String; Match:TMatch):THTMLForm; overload;
function FindByField(Keys,Values:array of String; Match:TMatch):THTMLForm; overload;
function FindWithField(FormKeys,FormValues:String;
FieldKeys,FieldValues:String):THTMLForm; overload;
function FindWithField(FormKeys,FormValues:String; FormMatch:TMatch;
FieldKeys,FieldValues:String; FieldMatch:TMatch):THTMLForm; overload;
function FindWithField(FormKeys,FormValues:array of String; FormMatch:TMatch;
FieldKeys,FieldValues:array of String; FieldMatch:TMatch):THTMLForm; overload;
end;
TEvalEvent = function(Sender:TObject; Script:string):Variant of object;
TSubmitEvent = procedure(Sender:TObject; Return:Integer) of object;
TChangedEvent = procedure(Sender:TObject; Obj:THTMLElement; Operation:TOperation) of object;
THTMLDocument = class
private
FHTTP:THTTP;
FURL:String;
FTitle:String;
FSource:String;
FDisplay:String;
FRoot:THTMLElementCollection;
FAll:THTMLElementCollection;
FForms:THTMLFormCollection;
FFields:THTMLFieldCollection;
FAnchors:THTMLAnchorCollection;
FImages:THTMLImageCollection;
FStyles:THTMLStyleCollection;
FScripts:THTMLScriptCollection;
FFinds:THTMLElementCollection;
FOnEval: TEvalEvent;
FOnSubmit:TSubmitEvent;
FOnChanged:TChangedEvent;
function GetMetaRefresh: string;
public
constructor Create(AHTTP:THTTP);
destructor Destroy; override;
procedure Clear;
procedure Notify(Element: THTMLElement; Operation: TOperation); virtual;
function Find(Keys,Values:array of String):THTMLElement; overload;
function Find(Keys,Values:array of String; Match:TMatch):THTMLElement; overload;
function Find(Keys,Values:array of String; FindIn:TFindin):THTMLElement; overload;
function Find(Keys,Values:array of String; Parent:THTMLElement):THTMLElement; overload;
function Find(Keys,Values:array of String; Match:TMatch; FindIn:TFindin):THTMLElement; overload;
function Find(Keys,Values:array of String; Match:TMatch; Parent:THTMLElement):THTMLElement; overload;
function Find(Keys,Values:array of String; Match:TMatch; FindIn:TFindin; Parent:THTMLElement):THTMLElement; overload;
function Finds(Keys,Values:array of String):THTMLElementCollection; overload;
function Finds(Keys,Values:array of String; Match:TMatch):THTMLElementCollection; overload;
function Finds(Keys,Values:array of String; FindIn:TFindin):THTMLElementCollection; overload;
function Finds(Keys,Values:array of String; Parent:THTMLElement):THTMLElementCollection; overload;
function Finds(Keys,Values:array of String; Match:TMatch; FindIn:TFindin):THTMLElementCollection; overload;
function Finds(Keys,Values:array of String; Match:TMatch; Parent:THTMLElement):THTMLElementCollection; overload;
function Finds(Keys,Values:array of String; Match:TMatch; FindIn:TFindin; Parent:THTMLElement):THTMLElementCollection; overload;
property URL:String read FURL;
property Title:String read FTitle;
property Source:String read FSource;
property Display:String read FDisplay;
property All:THTMLElementCollection read FAll;
property Root:THTMLElementCollection read FRoot;
property Forms:THTMLFormCollection read FForms;
property Fields:THTMLFieldCollection read FFields;
property Anchors:THTMLAnchorCollection read FAnchors;
property Images:THTMLImageCollection read FImages;
property Styles:THTMLStyleCollection read FStyles;
property Scripts:THTMLScriptCollection read FScripts;
property MetaRefresh:string read GetMetaRefresh;
property OnEval:TEvalEvent read FOnEval write FOnEval;
property OnSubmit:TSubmitEvent read FOnSubmit write FOnSubmit;
property OnChanged:TChangedEvent read FOnChanged write FOnChanged;
end;
THTMLToken = class
private
FIndex: Integer;
FTokenList: TList;
function GetEOF:Boolean;
function GetCurrToken:String;
function GetNextToken:string;
function GetNextNextToken:string;
function GetPrevToken:String;
function GetToken(Index:Integer):String;
public
constructor Create;
destructor Destroy; override;
procedure Clear;
procedure Process(Source:String);
function IsSpace(Token:String):Boolean;
function MoveNext:Boolean;
function SkipSpace:Boolean;
function SkipToken(Token:String):Boolean;
function Preview(Tokens:array of String):Boolean;
function MatchToken(Token:String):Boolean; overload;
function MatchToken(Tokens:array of String):Boolean; overload;
property EOF:Boolean read GetEOF;
property CurrToken:String read GetCurrToken;
property NextToken:String read GetNextToken;
property NextNextToken:String read GetNextNextToken;
property PrevToken:String read GetPrevToken;
property Items[Index:Integer]: String read GetToken; default;
end;
TParseEvent = procedure(Sender:TObject) of object;
THTMLParser = class
type
TransferType = (ttNone,ttHTML,ttScript,ttStyle);
BlockType = (btNone,btRegExp,btString,btComments);
private
FOwner:THTMLParser;
FDocument:THTMLDocument;
FLastContent:String;
FBlock: BlockType;
FEndBlock: TList;
FTransferType:TransferType;
FTokenList:THTMLToken;
FStack:TStack;
procedure CheckBlock;
function GetValue:String;
function GetTransfer:String;
function GetInner(TagName:String):String;
function CheckName(Name:String):Boolean;
procedure Pop(T:TClass);
function Peek:THTMLElement;
procedure Push(Element:THTMLElement);
procedure AddContent(Text:String);
procedure Addobject(Element:THTMLElement);
procedure UpdateParent(Text:String; Element:THTMLElement);
function LabelInfo(TagName:String; var LabelClass:THTMLClass):TLabelEnum;
procedure Process(ASource:String);
procedure ProcessToken;
procedure ProcessLabel;
procedure ProcessCDATA;
procedure ProcessContent;
procedure ProcessComments;
procedure ProcessLabelOpen(Name:String);
procedure ProcessLabelClose(Name:String);
function ProcessLabelValue(Element:THTMLElement):Boolean;
public
constructor Create(ADocument:THTMLDocument; const AOwner:THTMLParser=nil);
destructor Destroy; override;
procedure Clear;
end;
THTMLEngine = class(THTMLDocument)
private
FCharset:String;
FParser:THTMLParser;
FRawCode:RawByteString;
FMetaRefreshEnable:Boolean;
FOnBeforeParse:TParseEvent;
FOnAfterParse:TParseEvent;
function GetCharset:String; overload;
procedure SetCharset(const Value: String);
public
constructor Create(AHTTP:THTTP);
destructor Destroy; override;
procedure Clear;
procedure Load(AUrl:string; ARawCode:RawByteString);
procedure InsertSource(AddCode:String);
function Translate(const Text:string):String;
function GetCharset(const RawCode:RawByteString):String; overload;
function Decoding(const RawCode:RawByteString):String; overload;
function Decoding(const RawCode:RawByteString; ACharSet:String):String; overload;
property Charset:String read FCharset write SetCharset;
property MetaRefreshEnable:Boolean read FMetaRefreshEnable write FMetaRefreshEnable;
property OnBeforeParse:TParseEvent read FOnBeforeParse write FOnBeforeParse;
property OnAfterParse:TParseEvent read FOnAfterParse write FOnAfterParse;
end;
Javascript模拟部分
type
TScriptEngine=Class;
TScriptBase = class
private
FHTML:THTMLEngine;
FScript:TScriptEngine;
public
constructor Create(AScriptEngine:TScriptEngine);
property HTMLEngine:THTMLEngine read FHTML write FHTML;
property ScriptEngine:TScriptEngine read FScript write FScript;
end;
TScriptNavigator = class(TScriptBase)
private
FappCodeName,FappMinorVersion,FappName,FappVersion,
FbrowserLanguage,FcookieEnabled,FcpuClass,FonLine,
Fplatform,FsystemLanguage,FuserAgent,FuserLanguage:Variant;
public
constructor Create(AScriptEngine:TScriptEngine);
function javaEnabled:Boolean;
function taintEnabled:Boolean;
property appCodeName:Variant read FappCodeName write FappCodeName;
property appMinorVersion:Variant read FappMinorVersion write FappMinorVersion;
property appName:Variant read FappName write FappName;
property appVersion:Variant read FappVersion write FappVersion;
property browserLanguage:Variant read FbrowserLanguage write FbrowserLanguage;
property cookieEnabled:Variant read FcookieEnabled write FcookieEnabled;
property cpuClass:Variant read FcpuClass write FcpuClass;
property onLine:Variant read FonLine write FonLine;
property platform:Variant read Fplatform write Fplatform;
property systemLanguage:Variant read FsystemLanguage write FsystemLanguage;
property userAgent:Variant read FuserAgent write FuserAgent;
property userLanguage:Variant read FuserLanguage write FuserLanguage;
end;
TScriptScreen = class(TScriptBase)
private
FavailHeight,FavailWidth,FbufferDepth,FcolorDepth,
FdeviceXDPI,FdeviceYDPI,FfontSmoothingEnabled,
Fheight,FlogicalXDPI,FlogicalYDPI,FpixelDepth,
FupdateInterval,Fwidth:Variant;
public
constructor Create(AScriptEngine:TScriptEngine);
property availHeight:Variant read FavailHeight write FavailHeight;
property availWidth:Variant read FavailWidth write FavailWidth;
property bufferDepth:Variant read FbufferDepth write FbufferDepth;
property colorDepth:Variant read FcolorDepth write FcolorDepth;
property deviceXDPI:Variant read FdeviceXDPI write FdeviceXDPI;
property deviceYDPI:Variant read FdeviceYDPI write FdeviceYDPI;
property fontSmoothingEnabled:Variant read FfontSmoothingEnabled write FfontSmoothingEnabled;
property height:Variant read Fheight write Fheight;
property logicalXDPI:Variant read FlogicalXDPI write FlogicalXDPI;
property logicalYDPI:Variant read FlogicalYDPI write FlogicalYDPI;
property pixelDepth:Variant read FpixelDepth write FpixelDepth;
property updateInterval:Variant read FupdateInterval write FupdateInterval;
property width:Variant read Fwidth write Fwidth;
end;
TScriptHistory = class(TScriptBase)
private
Flength:Integer;
public
constructor Create(AScriptEngine:TScriptEngine);
property length:Integer read Flength write Flength;
procedure back();
procedure forward();
procedure go();
end;
TScriptLocation = class(TScriptBase)
private
Fhash,Fhost,Fhostname,Fhref,Fpathname,Fport,Fprotocol,Fsearch:Variant;
procedure SetHref(const Value: Variant);
public
constructor Create(AScriptEngine:TScriptEngine);
procedure assign(URL:Variant);
procedure reload();
procedure replace(URL:Variant);
property hash:Variant read Fhash write Fhash;
property host:Variant read Fhost write Fhost;
property hostname:Variant read Fhostname write Fhostname;
property href:Variant read Fhref write SetHref;
property pathname:Variant read Fpathname write Fpathname;
property port:Variant read Fport write Fport;
property protocol:Variant read Fprotocol write Fprotocol;
property search:Variant read Fsearch write Fsearch;
end;
TScriptDocument = class(TScriptBase)
private
Fall,Fanchors,Fapplets,Fforms,Fimages,Flinks,Fbody:Variant;
Fcookie,Fdomain,FlastModified,Freferrer,Ftitle,FURL:Variant;
public
constructor Create(AScriptEngine:TScriptEngine);
procedure close();
function getElementById(id:Variant):Variant;
function getElementsByName(name:Variant):Variant;
function getElementsByTagName(tagname:Variant):Variant;
function open(mimetype,replace:Variant):Variant;
procedure write(exp:Variant);
procedure writeln(exp:Variant);
property cookie:Variant read Fcookie write Fcookie;
property domain:Variant read Fdomain write Fdomain;
property lastModified:Variant read FlastModified write FlastModified;
property referrer:Variant read Freferrer write Freferrer;
property title:Variant read Ftitle write Ftitle;
property URL:Variant read FURL write FURL;
property all:Variant read Fall write Fall;
property anchors:Variant read Fanchors write Fanchors;
property applets:Variant read Fapplets write Fapplets;
property forms:Variant read Fforms write Fforms;
property images:Variant read Fimages write Fimages;
property links:Variant read Flinks write Flinks;
property body:Variant read Fbody write Fbody;
end;
TScriptWindow = class(TScriptBase)
private
ObjDocument:TScriptDocument;
ObjHistory:TScriptHistory;
ObjNavigator:TScriptNavigator;
ObjScreen:TScriptScreen;
ObjLocation:TScriptLocation;
FTemp,
Fclosed,FdefaultStatus,Finnerheight,Finnerwidth,Flength,
Fname,Fouterheight,Fouterwidth,FpageXOffset,FpageYOffset,
Fstatus,FscreenLeft,FscreenTop,FscreenX,FscreenY,
Fframes,Fdocument,Fhistory,FNavigator,FScreen,Fwindow,Flocation:Variant;
procedure SetLocation(const Value: Variant);
public
constructor Create(AScriptEngine:TScriptEngine);
destructor Destroy; override;
procedure alert(msg:Variant);
procedure blur();
procedure clearInterval(id:Variant);
procedure clearTimeout(id:Variant);
procedure close();
procedure confirm(msg:Variant);
function createPopup():Variant;
procedure focus();
procedure moveBy(x,y:Variant);
procedure moveTo(x,y:Variant);
function open(url,name,features,replace:Variant):Variant;
procedure print();
function prompt(text,defaultText:Variant):Variant;
procedure resizeBy(width,height:Variant);
procedure resizeTo(width,height:Variant);
procedure scrollBy(xnum,ynum:Variant);
procedure scrollTo(xpos,ypos:Variant);
function setInterval(code,millisec:Variant):Variant;
function setTimeout(code,millisec:Variant):Variant;
function getElementById(id:Variant):Variant;
function getElementsByName(name:Variant):Variant;
function getElementsByTagName(tagname:Variant):Variant;
property closed:Variant read Fclosed write Fclosed;
property defaultStatus:Variant read FdefaultStatus write FdefaultStatus;
property innerheight:Variant read Finnerheight write Finnerheight;
property innerwidth:Variant read Finnerwidth write Finnerwidth;
property length:Variant read Flength write Flength;
property name:Variant read Fname write Fname;
property outerheight:Variant read Fouterheight write Fouterheight;
property outerwidth:Variant read Fouterwidth write Fouterwidth;
property pageXOffset:Variant read FpageXOffset write FpageXOffset;
property pageYOffset:Variant read FpageYOffset write FpageYOffset;
property status:Variant read Fstatus write Fstatus;
property screenLeft:Variant read FscreenLeft write FscreenLeft;
property screenTop:Variant read FscreenTop write FscreenTop;
property screenX:Variant read FscreenX write FscreenX;
property screenY:Variant read FscreenY write FscreenY;
property frames:Variant read Fframes write Fframes;
property self:Variant read Fwindow write FTemp;
property opener:Variant read Fwindow write FTemp;
property top:Variant read Fwindow write FTemp;
property window:Variant read Fwindow write FTemp;
property document:Variant read Fdocument write FTemp;
property history:Variant read Fhistory write Fhistory;
property Navigator:Variant read FNavigator write FNavigator;
property Screen:Variant read FScreen write FScreen;
property location:Variant read Flocation write SetLocation;
end;
TScriptLanguage = (slJavaScript,slVBScript);
TScriptEngine=Class
private
FHTTP:THTTP;
FDocument:THTMLEngine;
FWindow:TScriptWindow;
FRefManage:TRefManage;
FLanguage:TScriptLanguage;
FVBScriptEngine:Variant;
FJavaScriptEngine:Variant;
FNeedRedirect:Boolean;
FRedirectUrl:String;
procedure CreateInstance;
public
constructor Create(AHTTP:THTTP; AHTML:THTMLEngine);
destructor Destroy; override;
procedure Reset;
procedure BeforeProcess(Sender:TObject);
procedure AfterProcess(Sender:TObject);
function ProcessEval(Sender:TObject; Script:string):Variant;
procedure ProcessObject(Sender:TObject; Obj:THTMLElement; Operation:TOperation);
procedure Execute(const Code:string; ALanguage:TScriptLanguage);
function Eval(const Expression:string; ALanguage:TScriptLanguage):Variant;
function Transform(Obj:TObject; Owned:Boolean=False):Variant;
property Document:THTMLEngine read FDocument write FDocument;
property Language:TScriptLanguage read FLanguage;
End;
三个部分协调控制类
type
TWebbotSupport = set of (wsHTTPS,wsHTML,wsScript);
TWebbotManage = class;
TWebbot = class
private
FHTTP:THTTP;
FDocument:THTMLEngine;
FCookies:TCookies;
FFreeCookies:Boolean;
FScript:TScriptEngine;
FOwner:TWebbotManage;
FSupport:TWebbotSupport;
procedure OnSubmit(Sender:TObject; Return:Integer);
public
constructor Create(const ASupport:TWebbotSupport=[wsHTTPS,wsHTML,wsScript]; const ACookies:TCookies=nil; const AOwner:TWebbotManage=nil);
destructor Destroy; override;
function Clone:TWebbot;
function Get(Url:string):Integer;
function Post(Url:string; PostData:AnsiString):Integer; overload;
function Post(Url:string; PostField:array of String):Integer; overload;
property HTTP:THTTP read FHTTP;
property Document:THTMLEngine read FDocument;
property Cookies:TCookies read FCookies;
property Script:TScriptEngine read FScript;
end;
TWebbotManage = class(THTTPManage)
private
FWebbotList:TList<TWebbot>;
FSupport:TWebbotSupport;
public
constructor Create(const ASupport:TWebbotSupport=[wsHTTPS,wsHTML,wsScript]); overload;
destructor Destroy; override;
procedure Notify(AObject: Tobject; Operation: TOperation); override;
function CreateWebbot(const Cookies:TCookies=nil):TWebbot;
procedure FreeWebbot(Browser:TWebbot);
end;