本程序编写初衷只是想稍微帮下一朋友从某网页抓取数据资料,代码及逻辑都很简单(目标网页貌似很不怎样,各方面性能都较差...),可修改优化之处很多,仅供业余摆弄。
{
  问题来源: http://jdxx.zhs.mofcom.gov.cn/website/btgs.jsp
   实现步骤:
    1、浏览指定网页,使用者输入查询条件查询;
    2、点击 获取网页内容 按钮,进行
      2.1、跳到 第N页,保存其内容到 ListView;重复...
      2.2、保存 ListView 内容 到  Excel;
}
unit NetFetch;
interface
uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, StdCtrls, Buttons, ExtCtrls, OleCtrls, SHDocVw, XPMan, ComCtrls;
type
  Tfrm_Main = class(TForm)
    XPManifest1: TXPManifest;
    wb_WebContent: TWebBrowser;
    Panel1: TPanel;
    Panel2: TPanel;
    Label1: TLabel;
    btn_GotoURL: TSpeedButton;
    edt_URL: TEdit;
    btn_Close: TButton;
    dlg_SaveInfo: TSaveDialog;
    Tmr_ClsDlg: TTimer;
    btn_GetNetInfo: TButton;
    lv_Info: TListView;
    lbl_Hints: TLabel;
    Tmr_GotoURL: TTimer;
    procedure FormShow(Sender: TObject);
    procedure FormResize(Sender: TObject);
    procedure edt_URLKeyDown(Sender: TObject; var Key: Word;
      Shift: TShiftState);
    procedure btn_GotoURLClick(Sender: TObject);
    procedure btn_GetNetInfoClick(Sender: TObject);
    procedure btn_CloseClick(Sender: TObject);
    procedure Tmr_GotoURLTimer(Sender: TObject);
    procedure Tmr_ClsDlgTimer(Sender: TObject);
  private
    { Private declarations }
    FTotalCount: string;                              // 总数据条数
    FTotalPageCount: string;                          // 总页数
  public
    { Public declarations }
    function GetDatasInfo: Boolean;                   // 得到数据条数、数据总页数
    function GotoNthPage(PageIndex: string): Boolean; // 转至第 PageIndex 页
    function SaveInfoToLV: Boolean;                   // 保存抓取的数据入 ListView
    function SaveLVToXLS(FileName: string): Boolean;  // 将 ListView 内容存入 Excel
  end;
var
  frm_Main: Tfrm_Main;
implementation
uses
  MSHtml, ComObj, ShellAPI;
{$R *.dfm}
procedure Tfrm_Main.FormShow(Sender: TObject);
begin
  Tmr_GotoURL.Enabled:= True;
  edt_URL.SetFocus;
end;
procedure Tfrm_Main.FormResize(Sender: TObject);
begin
  btn_GetNetInfo.Left:= (Width - 190) div 2;
  btn_Close.Left:= btn_GetNetInfo.Left + 130;
end;
procedure Tfrm_Main.edt_URLKeyDown(Sender: TObject; var Key: Word;
  Shift: TShiftState);
begin
  if Key = VK_RETURN then
  begin
    Key:= 0;
    wb_WebContent.Navigate(edt_URL.Text);
  end;
end;
procedure Tfrm_Main.btn_GotoURLClick(Sender: TObject);
begin
  wb_WebContent.Navigate(edt_URL.Text);
end;
procedure Tfrm_Main.btn_GetNetInfoClick(Sender: TObject);
var
  I: Integer;
  J: Integer;
begin
  if lv_Info.Items.Count > 0 then
  begin
    with lv_Info do
    begin
      Items.BeginUpdate;
      Items.Clear;
      Items.EndUpdate;
    end;
  end;
  btn_GetNetInfo.Enabled:= False;
  btn_Close.Enabled:= False;
  try
    if not GetDatasInfo then
      Exit;
    if FTotalPageCount = '1' then // 只 1页,直接保存退出即可
    begin
      SaveInfoToLV;
    end
    else
    begin                        // 多于 1页,先保存第 1 页内容,再...
      SaveInfoToLV;
      for I:= 2 to StrToInt(FTotalPageCount) do
      begin
        if not GotoNthPage(IntToStr(I)) then
        begin
          MessageBox(
                     Handle,
                     '网页跳转失败,程序无法继续抓取数据! ',
                     '错误',
                     MB_OK+ MB_ICONERROR
                    );
          Exit;
        end;
        lbl_Hints.Caption:= '正在获取第 ' + IntToStr(I) + '/' + FTotalPageCount + ' 页数据信息...';
        Update;
        Application.ProcessMessages;
        while wb_WebContent.Busy do
          Application.ProcessMessages;
        ShowMessage('转页...');        // 延时,可以优化...
        for J:= 0 to 20 do
          Application.ProcessMessages;
        ShowMessage('转页毕...');
        Application.ProcessMessages;
        Sleep(200);
        Application.ProcessMessages;
        while True do
        begin
          if SaveInfoToLV then
            Break
          else
          begin
            if not GotoNthPage(IntToStr(I)) then
            begin
              MessageBox(
                         Handle,
                         PChar('网页跳转失败(第 ' + IntToStr(I) + ' 页),程序无法继续抓取数据! '),
                         '错误',
                         MB_OK+ MB_ICONERROR
                        );
              Exit;
            end;
          end;
        end;
        Application.ProcessMessages;
      end;
    end;
  finally
    btn_GetNetInfo.Enabled:= True;
    btn_Close.Enabled:= True;
    lbl_Hints.Caption:= '';
    Update;
  end;
  if dlg_SaveInfo.Execute then
  begin
    if SaveLVToXLS(dlg_SaveInfo.FileName) then
    begin
      if MessageBox(
                    Handle,
                    PChar(
                          '所获取的网页数据信息已成功保存至 ' + dlg_SaveInfo.FileName +
                          ',是否现在查看?'
                         ),
                    '提示',
                    MB_YESNO + MB_ICONQUESTION
                   ) = IDYES then
       begin
         if ShellExecute(0, 'Open', PChar(dlg_SaveInfo.FileName), nil, nil, SW_SHOW) <= 32 then
           MessageBox(Handle, '打开文件失败! ', '提示', MB_OK + MB_ICONINFORMATION);
       end;
    end;
  end;
  with lv_Info do
  begin
    Items.BeginUpdate;
    Items.Clear;
    Items.EndUpdate;
  end;
end;
procedure Tfrm_Main.btn_CloseClick(Sender: TObject);
begin
  Close;
end;
procedure Tfrm_Main.Tmr_GotoURLTimer(Sender: TObject);
begin
  Tmr_GotoURL.Enabled:= False;
  Screen.Cursor:= crHourGlass;
  wb_WebContent.Navigate(edt_URL.Text);
  while wb_WebContent.Busy do
    Application.ProcessMessages;
  Screen.Cursor:= crDefault;
end;
procedure Tfrm_Main.Tmr_ClsDlgTimer(Sender: TObject);
var
  H: THandle;
begin
  H:= FindWindow('TMessageForm', PChar(Application.Title));
  if H > 0 then
  begin
    SendMessage(H, WM_KEYDOWN, VK_SPACE, 0);
    SendMessage(H, WM_CLOSE, 0, 0);
  end;
end;
function Tfrm_Main.GetDatasInfo: Boolean;
var
  ws: string;
begin
  Result:= False;
  ws:= (wb_WebContent.Document as IHTMLDocument2).Body.outerHTML;
  // 总数据条数
  Delete(ws, 1, Pos('<TD>共 ', ws) + 6);
  FTotalCount:= Copy(ws, 1, Pos(' 条信息', ws) - 1);
  if FTotalCount = '0' then
  begin
    MessageBox(Handle, '没有可供获取的网页数据信息! ', '提示', MB_OK + MB_ICONINFORMATION);
    Exit;
  end;

  // 总分页数
  Delete(ws, 1, Pos('页次:', ws) + 7);
  FTotalPageCount:= Copy(ws, 1, Pos('页 20篇', ws) - 1);
  if FTotalPageCount = '0' then
  begin
    MessageBox(Handle, '没有可供获取的网页数据信息!', '提示', MB_OK + MB_ICONINFORMATION);
    Exit;
  end;
  Result:= True;
end;
function Tfrm_Main.GotoNthPage(PageIndex: string): Boolean;
var
  Doc: IHTMLDocument2;
  Form: IHTMLFormElement;
  Elements: IHTMLElementCollection;
  InputElem: IHTMLInputElement;
  I: Integer;
begin
  Result:= False;
  Doc:= wb_WebContent.Document as IHTMLDocument2;
  Elements:= Doc.Forms as IHTMLElementCollection;
  Form:= Elements.Item(0, varEmpty) as IHTMLFormElement;
  Elements:= (Doc.All as IHTMLElementCollection).tags('input') as IHTMLElementCollection;
  for I:= 0 to Elements.Length - 1 do // 找到,并填充页码文本框
  begin
    InputElem:= Elements.Item(I, varEmpty) as IHTMLInputElement;
    if UpperCase(Trim(InputElem.Name)) = 'CPF.CPAGE' then
    begin