一个简单(搜索EMail)的蜘蛛程序

unit UIDTcpClientThread;

interface
uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, IdBaseComponent, IdComponent, IdTCPConnection, IdTCPClient, IdHTTP,
  IniFiles;
 const
  MAXBUFF = 100000;

type
  TIDTcpClientThread = class(TThread)
  private
    bRun: Boolean;
  protected
   // Response2: TStringStream;
    StringList: TStringList;
    IdHTTPWeb: TIdHTTP;
    procedure Execute; override;
    procedure OutPutData();
    procedure ThreadExeProcedure();
    procedure AnalysisData();
  public
    URLBuffer: array[1..MAXBUFF] of string;
    iCurrentRecord: integer;
    iAlreadyRecord: integer;
    bBeginToTerminate: Boolean;
    constructor Create();
    destructor Destroy; override;
    procedure AppendURLToIDTcpClientThread(URL: string);
    procedure CloseThread();
    procedure OutPutURL(URL: string);
  end;

 

implementation

uses Unit1, UHandleURLThread;

{ TIDTcpClientThread }

procedure TIDTcpClientThread.ThreadExeProcedure;
var
  s: string;
begin
  if iCurrentRecord <> iAlreadyRecord then begin
    inc(iAlreadyRecord);
    if iAlreadyRecord > MAXBUFF then iAlreadyRecord := 1;
    try
     // Response2.Free;
      //Response2 := TStringStream.Create('');
      StringList.Clear;
      s := IdHTTPWeb.Get(URLBuffer[iAlreadyRecord]);
    //  s := Response2.DataString;
      //s := Utf8ToAnsi(s);
      StringList.Text := s;
      AnalysisData();

    except
      on E: Exception do begin
      //
      end;
    end;
  end;
end;

procedure TIDTcpClientThread.AppendURLToIDTcpClientThread(URL: string);
var
  iCurrentRecord_Temp: integer;
begin
  iCurrentRecord_Temp := iCurrentRecord;
  inc(iCurrentRecord_Temp);
  if iCurrentRecord_Temp > MAXBUFF then iCurrentRecord_Temp := 1;
  URLBuffer[iCurrentRecord_Temp] := URL;
  iCurrentRecord := iCurrentRecord_Temp;
end;

procedure TIDTcpClientThread.CloseThread;
begin

end;

constructor TIDTcpClientThread.Create;
begin
  inherited Create(false);
  StringList := TStringList.Create;
  IdHTTPWeb := TIdHTTP.Create(nil);

//  Response2 := TStringStream.Create('');
  IdHTTPWeb.HandleRedirects := true;
  IdHTTPWeb.Request.ContentType := 'application/x-www-form-urlencoded';

  StringList.Clear;

  bBeginToTerminate := true;
  bRun := false;
  iCurrentRecord := 1;
  iAlreadyRecord := 1;
end;

destructor TIDTcpClientThread.Destroy;
begin
  StringList.Clear;
  StringList.Free;
  IdHTTPWeb.Free;
  inherited;
end;

procedure TIDTcpClientThread.Execute;
begin
  inherited;
  while bBeginToTerminate do begin
    if not bRun then begin
      bRun := true;
      try
        sleep(10);
        ThreadExeProcedure;
      except
        on E: Exception do begin
        //
        end;
      end;

      bRun := false;
    end;
  end;

  bBeginToTerminate := true;
end;

procedure TIDTcpClientThread.OutPutData;
begin

end;

procedure TIDTcpClientThread.OutPutURL(URL: string);
begin
  EnterCriticalSection(CriticalSection);
  if assigned(HandleURLThread) then HandleURLThread.AppendURLToThread(URL);
  LeaveCriticalSection(CriticalSection);
end;
{var
  indexHashed: integer;
begin
  indexHashed := HashedStringList.IndexOf(URL);
  if indexHashed = -1 then begin
    HashedStringList.Add(URL);
    inc(indexofthread);
    if indexofthread >= MAXTHREAD then indexofthread := 0;
    IDTcpClientThread[indexofthread].AppendURLToIDTcpClientThread(URL);
   // Form1.Memo1.Lines.Add(URL);
  end;
end;
}
procedure TIDTcpClientThread.AnalysisData();
const
  HEADSTRING = 'href="';
  ENDSTRING = '"';
var
  ts, TempURL: string;
  i, j, sIndex, sEnd, sLen, ps: integer;
begin
//<a href=" ">
// " , http ,
//½ØÈ¡ Ö÷ÍøÒ³£¬Á½¸öhTTP
//<a href="mailto:sales@calarm.cn">sales@calarm.cn</a>
  for i := 0 to StringList.Count - 1 do begin
    ts := Lowercase(StringList.Strings[i]);
    sIndex := Pos(HEADSTRING, ts);
    sLen := Length(ts);
    while sIndex > 0 do begin
      delete(ts, 1, sIndex + Length(HEADSTRING) - 1);

      for j := 1 to MaxMailz do begin
        if Pos(Mailz[j], ts) > 0 then begin
          if assigned(SendEmailThread) then SendEmailThread.AppendURLToThread(ts);

          break;
        end;
      end;

      sEnd := Pos(ENDSTRING, ts);
      TempURL := Copy(ts, 1, sEnd - 1);
      if
        (Pos('.com', ts) > 0) or (Pos('.cn', ts) > 0) or (Pos('.mobi', ts) > 0) or (Pos('.tel', ts) > 0) or
        (Pos('.asia', ts) > 0) or (Pos('.net', ts) > 0) or (Pos('.org', ts) > 0) or (Pos('.name', ts) > 0) or
        (Pos('.me', ts) > 0) or (Pos('.tv', ts) > 0) or (Pos('.hk', ts) > 0) or (Pos('.biz', ts) > 0) or
        (Pos('.info', ts) > 0)
        then begin //.com.cn.mobi.tel.asia.net.org.name.me.tv.hk.biz.info
        OutPutURL(TempURL);
      end;

      delete(ts, 1, sEnd);
      sIndex := Pos(HEADSTRING, ts);
    end;
  end;
end;

end.

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值