function GetHtml(const WebBrowser:TWebBrowser): string; const BufSize = $10000; var Size: Int64; Stream: IStream; hHTMLText: HGLOBAL; psi: IPersistStreamInit; begin if not Assigned(WebBrowser.Document) then Exit; OleCheck(WebBrowser.Document.QueryInterface(IPersistStreamInit, psi)); try hHTMLText := GlobalAlloc(GPTR, BufSize); if0= hHTMLText then Exit;// RaiseLastWin32Error; OleCheck(CreateStreamOnHGlobal(hHTMLText, True, Stream)); try OleCheck(psi.Save(Stream, False)); Size := StrLen(PChar(hHTMLText)); SetLength(Result, Size); CopyMemory(PChar(Result), Pointer(hHTMLText), Size); finally Stream := nil; end; finally psi := nil; end; end; procedure TForm1.OutExcel(const WebBrowser:TWebBrowser); //导出为excel const //行列的分别起止 rc =3; rs =12; cc =1; cs =7; str='EXCEL.EXE'; var Excelid :variant; ri,ci :Integer; //当前行和当前列 abc :array[cc..cs] of string; sqlstr :String; H :THandle; P :DWORD; begin try Excelid:=CreateOleObject( 'Excel.Application' ); except on Exception do raise exception.Create('无法创建Xls文件,请确认是否安装EXCEL') end; Excelid.Visible := False; Excelid.WorkBooks.Add; WebBrowser.ExecWB(OLECMDID_SELECTALL,OLECMDEXECOPT_DODEFAULT); WebBrowser.ExecWB(OLECMDID_COPY, OLECMDEXECOPT_DODEFAULT); //复制网页 Excelid.worksheets[1].Paste; //excel文档粘贴 WebBrowser.ExecWB(OLECMDID_UNDO, OLECMDEXECOPT_DODEFAULT); //取消全选 //Excel文件操作 Excelid.Range[Excelid.cells[1,1],Excelid.cells[100,20]].select; Excelid.selection.MergeCells := False; //取消合并 ri:=4; ci:=0; while ri <100do begin if ci >70 then Break; if Trim(AnsiReplaceText(Excelid.Cells[ri,1].Value,'?','')) ='' then begin Excelid.ActiveSheet.Rows[ri].Delete; //删除行 inc(ci); end else begin Inc(ri); end; end; ri:=2; ci:=0; while ri <20do begin if ci >20 then Break; if Trim(AnsiReplaceText(Excelid.Cells[2,ri].Value,'?','')) ='' then begin Excelid.ActiveSheet.Columns[ri].Delete; //删除行 inc(ci); end else Inc(ri); end; for ri:= rc to rs do begin for ci:=cc to cs do begin abc[ci]:= Trim(AnsiReplaceText(Excelid.Cells[ri,ci].Value,'?','')); end; sqlstr:='Insert Into 建筑业企业资质数据库(序号,企业名称,'+ '资质证书编号,主项资质,增项资质,原发证日期,主管部门'+ ') Values ('+QuotedStr(abc[1])+','+QuotedStr(abc[2])+ ','+QuotedStr(abc[3])+','+QuotedStr(abc[4])+',' +QuotedStr(abc[5])+','+QuotedStr(abc[6])+',' +QuotedStr(abc[7])+')'; // ShowMessage(sqlstr); if not od.SetExecSql(sqlstr) then begin od.SetExecSql('Insert into 日志表(日志) values (' +QuotedStr('系统在取第'+inttostr(Count)+'页,第'+abc[1]+'行时遇到错误!')+')'); Break; // ShowMessage('第'+inttostr(Count)+'页。第'+abc[1]+'行'); end; ProgressBar1.StepIt; end; Excelid.ActiveWorkBook.Saved := True; Excelid.WorkBooks.Close; Excelid.quit; //杀死进程 H:=FindWindow(nil,pchar(Str)); if H<>0 then begin GetWindowThreadProcessId(H,@P); if P<>0 then TerminateProcess(OpenProcess(PROCESS_TERMINATE,False,P),$FFFFFFFF); end; end;
当一页处理完毕后,自动处理下一页
procedure TForm1.WebBrowser1DocumentComplete(Sender: TObject; const pDisp: IDispatch; var URL: OleVariant); begin Label6.Caption:=IntToStr(count); if Count < SumCount then begin // htmlstr:= GetHtml(WebBrowser1); //取得HTML源代码 OutExcel(WebBrowser1); Inc(Count); httpaddress:=httpaddress1+inttostr(Count)+httpaddress2; WebBrowser1.Navigate(httpaddress); end; end;
全部源代码如下:
unit Unit1; interface uses Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms, Dialogs, StdCtrls, OleCtrls, SHDocVw, ExtCtrls,MSHTML, ActiveX,comobj, StrUtils, DB, ADODB,UOperationData, ComCtrls; type TForm1 =class(TForm) Panel1: TPanel; Panel2: TPanel; WebBrowser1: TWebBrowser; Button1: TButton; GroupBox1: TGroupBox; Label1: TLabel; Edit1: TEdit; Label2: TLabel; Edit2: TEdit; Edit3: TEdit; Label3: TLabel; Edit4: TEdit; Label4: TLabel; Label5: TLabel; ADOConnection1: TADOConnection; ProgressBar1: TProgressBar; Label6: TLabel; procedure Button1Click(Sender: TObject); procedure WebBrowser1DocumentComplete(Sender: TObject; const pDisp: IDispatch; var URL: OleVariant); procedure FormShow(Sender: TObject); private procedure OutExcel(const WebBrowser:TWebBrowser); //导出为excel ...{ Private declarations } public ...{ Public declarations } end; var Form1 : TForm1; httpaddress1 :string; httpaddress2 :String; SumCount :Integer; Count : Integer; //次数 M_Bool : Boolean; //鼠标模拟是否有效 httpaddress : String; od :TOperationData; implementation ...{$R *.dfm} function GetHtml(const WebBrowser:TWebBrowser): string; const BufSize = $10000; var Size: Int64; Stream: IStream; hHTMLText: HGLOBAL; psi: IPersistStreamInit; begin if not Assigned(WebBrowser.Document) then Exit; OleCheck(WebBrowser.Document.QueryInterface(IPersistStreamInit, psi)); try hHTMLText := GlobalAlloc(GPTR, BufSize); if0= hHTMLText then Exit;// RaiseLastWin32Error; OleCheck(CreateStreamOnHGlobal(hHTMLText, True, Stream)); try OleCheck(psi.Save(Stream, False)); Size := StrLen(PChar(hHTMLText)); SetLength(Result, Size); CopyMemory(PChar(Result), Pointer(hHTMLText), Size); finally Stream := nil; end; finally psi := nil; end; end; procedure TForm1.OutExcel(const WebBrowser:TWebBrowser); //导出为excel const //行列的分别起止 rc =3; rs =12; cc =1; cs =7; str='EXCEL.EXE'; var Excelid :variant; ri,ci :Integer; //当前行和当前列 abc :array[cc..cs] of string; sqlstr :String; H :THandle; P :DWORD; begin try Excelid:=CreateOleObject( 'Excel.Application' ); except on Exception do raise exception.Create('无法创建Xls文件,请确认是否安装EXCEL') end; Excelid.Visible := False; Excelid.WorkBooks.Add; WebBrowser.ExecWB(OLECMDID_SELECTALL,OLECMDEXECOPT_DODEFAULT); WebBrowser.ExecWB(OLECMDID_COPY, OLECMDEXECOPT_DODEFAULT); //复制网页 Excelid.worksheets[1].Paste; //excel文档粘贴 WebBrowser.ExecWB(OLECMDID_UNDO, OLECMDEXECOPT_DODEFAULT); //取消全选 //Excel文件操作 Excelid.Range[Excelid.cells[1,1],Excelid.cells[100,20]].select; Excelid.selection.MergeCells := False; //取消合并 ri:=4; ci:=0; while ri <100do begin if ci >70 then Break; if Trim(AnsiReplaceText(Excelid.Cells[ri,1].Value,'?','')) ='' then begin Excelid.ActiveSheet.Rows[ri].Delete; //删除行 inc(ci); end else begin Inc(ri); end; end; ri:=2; ci:=0; while ri <20do begin if ci >20 then Break; if Trim(AnsiReplaceText(Excelid.Cells[2,ri].Value,'?','')) ='' then begin Excelid.ActiveSheet.Columns[ri].Delete; //删除行 inc(ci); end else Inc(ri); end; for ri:= rc to rs do begin for ci:=cc to cs do begin abc[ci]:= Trim(AnsiReplaceText(Excelid.Cells[ri,ci].Value,'?','')); end; sqlstr:='Insert Into 建筑业企业资质数据库(序号,企业名称,'+ '资质证书编号,主项资质,增项资质,原发证日期,主管部门'+ ') Values ('+QuotedStr(abc[1])+','+QuotedStr(abc[2])+ ','+QuotedStr(abc[3])+','+QuotedStr(abc[4])+',' +QuotedStr(abc[5])+','+QuotedStr(abc[6])+',' +QuotedStr(abc[7])+')'; // ShowMessage(sqlstr); if not od.SetExecSql(sqlstr) then begin od.SetExecSql('Insert into 日志表(日志) values (' +QuotedStr('系统在取第'+inttostr(Count)+'页,第'+abc[1]+'行时遇到错误!')+')'); Break; // ShowMessage('第'+inttostr(Count)+'页。第'+abc[1]+'行'); end; ProgressBar1.StepIt; end; Excelid.ActiveWorkBook.Saved := True; Excelid.WorkBooks.Close; Excelid.quit; //杀死进程 H:=FindWindow(nil,pchar(Str)); if H<>0 then begin GetWindowThreadProcessId(H,@P); if P<>0 then TerminateProcess(OpenProcess(PROCESS_TERMINATE,False,P),$FFFFFFFF); end; end; procedure TForm1.Button1Click(Sender: TObject); begin ProgressBar1.Max:= SumCount; ProgressBar1.Min:= Count; httpaddress1:=Edit1.Text; httpaddress2:=Edit2.Text; Count:=StrToIntDef(Edit3.Text,1); SumCount:=StrToIntDef(Edit4.Text,1); httpaddress:=httpaddress1+inttostr(Count)+httpaddress2; WebBrowser1.Navigate(httpaddress); end; procedure TForm1.WebBrowser1DocumentComplete(Sender: TObject; const pDisp: IDispatch; var URL: OleVariant); begin Label6.Caption:=IntToStr(count); if Count < SumCount then begin // htmlstr:= GetHtml(WebBrowser1); //取得HTML源代码 OutExcel(WebBrowser1); Inc(Count); httpaddress:=httpaddress1+inttostr(Count)+httpaddress2; WebBrowser1.Navigate(httpaddress); end; end; procedure TForm1.FormShow(Sender: TObject); begin od:= TOperationData.Create(ADOConnection1); end; initialization OleInitialize(nil); finalization try OleUninitialize; except end; end.