自己用C#写一个采集器、蜘蛛(zz)

效果图:

C# 采集器 蜘蛛

代码如下:

 

using  System;
using  System.Collections.Generic;
using  System.Text;
using  System.Net;
using  System.Web;
using  System.IO;
using  System.Collections;
using  System.Text.RegularExpressions;


namespace  chinaz
{
    
class  Program
    {
        
static   void  Main( string [] args)
        {

            
string  cookie  =   null ;
            
using  (StreamReader sr  =   new  StreamReader( " cookie.txt " ))
            {
                cookie 
=  sr.ReadToEnd();
                sr.Close();
            }
            
// string tmp = SRWebClient.GetPage(" http://bbs.chinaz.com/Members.html?page=1 &sort=CreateDate&desc=true&keyword=", Encoding.UTF8, cookie);
             int  a  =   int .Parse(Console.ReadLine());
            
int  b  =   int .Parse(Console.ReadLine());
            
string  url  =  Console.ReadLine();

            Hashtable hash 
=   new  Hashtable();
            Encoding encoding 
=  Encoding.GetEncoding(Console.ReadLine());

            
for  ( int  i  =  a; i  <=  b; i ++ )
            {
                
string  html  =  SRWebClient.GetPage( string .Format(url, i), encoding, cookie);
                
// Console.WriteLine(html);
                 if  (html  !=   null   &&  html.Length  >   1000 )
                {
                    Match m 
=  Regex.Match(html,  @" \w+([-+.']\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)* " , RegexOptions.Compiled  |  RegexOptions.IgnoreCase);
                    
while  (m  !=   null   &&  m.Value  !=   null   &&  m.Value.Trim()  !=   string .Empty)
                    {
                        
if  ( ! hash.Contains(m.Value))
                        {
                            Console.WriteLine(m.Value);
                            
using  (StreamWriter sw  =   new  StreamWriter( " mail.txt " true ))
                            {
                                sw.WriteLine(m.Value);
                                sw.Close();
                            }
                            hash.Add(m.Value, 
string .Empty);
                        }
                        m 
=  m.NextMatch();
                    }

                }
            }



            Console.Write(
" 完成 " );
            Console.ReadLine();
        }
    }


    
public   class  SRWebClient
    {
        
public  CookieCollection cookie;
        
public  SRWebClient()
        {
            cookie 
=   null ;
        }

        
#region  从包含多个 Cookie 的字符串读取到 CookieCollection 集合中
        
private   static   void  AddCookieWithCookieHead( ref  CookieCollection cookieCol,  string  cookieHead,  string  defaultDomain)
        {
            
if  (cookieCol  ==   null ) cookieCol  =   new  CookieCollection();
            
if  (cookieHead  ==   null return ;
            
string [] ary  =  cookieHead.Split( ' ; ' );
            
for  ( int  i  =   0 ; i  <  ary.Length; i ++ )
            {
                Cookie ck 
=  GetCookieFromString(ary[i].Trim(), defaultDomain);
                
if  (ck  !=   null )
                {
                    cookieCol.Add(ck);
                }
            }
        }
        
#endregion

        
#region  读取某一个 Cookie 字符串到 Cookie 变量中
        
private   static  Cookie GetCookieFromString( string  cookieString,  string  defaultDomain)
        {
            
string [] ary  =  cookieString.Split( ' , ' );
            Hashtable hs 
=   new  Hashtable();
            
for  ( int  i  =   0 ; i  <  ary.Length; i ++ )
            {
                
string  s  =  ary[i].Trim();
                
int  index  =  s.IndexOf( " = " );
                
if  (index  >   0 )
                {
                    hs.Add(s.Substring(
0 , index), s.Substring(index  +   1 ));
                }
            }
            Cookie ck 
=   new  Cookie();
            
foreach  ( object  Key  in  hs.Keys)
            {
                
if  (Key.ToString()  ==   " path " ) ck.Path  =  hs[Key].ToString();

                
else   if  (Key.ToString()  ==   " expires " )
                {
                    
// ck.Expires=DateTime.Parse(hs[Key].ToString();
                }
                
else   if  (Key.ToString()  ==   " domain " ) ck.Domain  =  hs[Key].ToString();
                
else
                {
                    ck.Name 
=  Key.ToString();
                    ck.Value 
=  hs[Key].ToString();
                }
            }
            
if  (ck.Name  ==   "" return   null ;
            
if  (ck.Domain  ==   "" ) ck.Domain  =  defaultDomain;
            
return  ck;
        }
        
#endregion



        
/**/
        
///   <TgData>
        
///       <Alias> 下载Web源代码 </Alias>
        
///   </TgData>
         public   string  DownloadHtml( string  URL,  bool  CreateCookie)
        {
            
try
            {
                HttpWebRequest request 
=  HttpWebRequest.Create(URL)  as  HttpWebRequest;
                
if  (cookie  !=   null )
                {
                    request.CookieContainer 
=   new  CookieContainer();
                    request.CookieContainer.Add(cookie);
                }
                request.AllowAutoRedirect 
=   false ;
                
// request.MaximumAutomaticRedirections = 3;
                request.Timeout  =   20000 ;

                HttpWebResponse res 
=  (HttpWebResponse)request.GetResponse();
                
string  r  =   "" ;

                System.IO.StreamReader S1 
=   new  System.IO.StreamReader(res.GetResponseStream(), System.Text.Encoding.Default);
                
try
                {
                    r 
=  S1.ReadToEnd();
                    
if  (CreateCookie)
                        cookie 
=  res.Cookies;
                }
                
catch  (Exception er)
                {
                    
// Log l = new Log();
                    
// l.writelog("下载Web错误", er.ToString());
                }
                
finally
                {
                    res.Close();
                    S1.Close();
                }

                
return  r;
            }

            
catch
            {

            }

            
return   string .Empty;
        }

        
/**/
        
///   <TgData>
        
///       <Alias> 下载文件 </Alias>
        
///   </TgData>
         public   long  DownloadFile( string  FileURL,  string  FileSavePath,  bool  CreateCookie)
        {
            
long  Filelength  =   0 ;
            HttpWebRequest req 
=  HttpWebRequest.Create(FileURL)  as  HttpWebRequest;

            
if  (cookie  !=   null )
            {
                req.CookieContainer 
=   new  CookieContainer();
                req.CookieContainer.Add(cookie);
            }
            req.AllowAutoRedirect 
=   true ;

            HttpWebResponse res 
=  req.GetResponse()  as  HttpWebResponse;
            
if  (CreateCookie)
                cookie 
=  res.Cookies;
            System.IO.Stream stream 
=  res.GetResponseStream();
            
try
            {
                Filelength 
=  res.ContentLength;

                
byte [] b  =   new   byte [ 512 ];

                
int  nReadSize  =   0 ;
                nReadSize 
=  stream.Read(b,  0 512 );

                System.IO.FileStream fs 
=  System.IO.File.Create(FileSavePath);
                
try
                {
                    
while  (nReadSize  >   0 )
                    {
                        fs.Write(b, 
0 , nReadSize);
                        nReadSize 
=  stream.Read(b,  0 512 );
                    }
                }
                
finally
                {
                    fs.Close();
                }
            }
            
catch  (Exception er)
            {
                
// Log l = new Log();
                
// l.writelog("下载文件错误", er.ToString());
            }
            
finally
            {
                res.Close();
                stream.Close();
            }

            
return  Filelength;
        }

        
/**/
        
///   <TgData>
        
///       <Alias> 提交数据 </Alias>
        
///   </TgData>
         public   string  Request( string  RequestPageURL, RequestData Data,  bool  CreateCookie)
        {
            StreamReader reader 
=   null ;
            HttpWebResponse response 
=   null ;
            HttpWebRequest request 
=   null ;
            
try
            {
                
string  StrUrl  =  RequestPageURL;
                request 
=  HttpWebRequest.Create(StrUrl)  as  HttpWebRequest;

                
string  postdata  =  Data.GetData();
                request.Referer 
=  RequestPageURL;
                request.AllowAutoRedirect 
=   false ;
                request.UserAgent 
=   " Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; Maxthon; .NET CLR 1.1.4322; .NET CLR 2.0.50727) " ;
                request.Timeout 
=   20000 ;

                
if  (cookie  !=   null )
                {
                    request.CookieContainer 
=   new  CookieContainer();
                    request.CookieContainer.Add(cookie);
                }

                Uri u 
=   new  Uri(StrUrl);

                
if  (postdata.Length  >   0 // 包含要提交的数据 就使用Post方式
                {
                    request.ContentType 
=   " application/x-www-form-urlencoded " // 作为表单请求
                    request.Method  =   " POST " ;         // 方式就是Post

                    
// 把提交的数据换成字节数组
                    Byte[] B  =  System.Text.Encoding.UTF8.GetBytes(postdata);
                    request.ContentLength 
=  B.Length;

                    System.IO.Stream SW 
=  request.GetRequestStream();  // 开始提交数据
                    SW.Write(B,  0 , B.Length);
                    SW.Close();
                }

                response 
=  request.GetResponse()  as  HttpWebResponse;
                
if  (CreateCookie)
                    
// cookie = response.Cookies;
                    AddCookieWithCookieHead( ref  cookie, response.Headers[ " Set-Cookie " ], request.RequestUri.Host);
                reader 
=   new  StreamReader(response.GetResponseStream(), Encoding.Default);

                
return  reader.ReadToEnd();
            }
            
catch  (Exception ex)
            {
                
string  x  =  ex.StackTrace;
            }
            
finally
            {
                
if  (response  !=   null )
                    response.Close();
            }

            
return   string .Empty;
        }


        
public   bool  PostDownload(RequestData Data,  out   string  file)
        {
            file 
=   null ;
            StreamReader reader 
=   null ;
            HttpWebResponse response 
=   null ;
            HttpWebRequest request 
=   null ;
            
try
            {
                
string  StrUrl  =   " http://www.imobile.com.cn/wapdiyringdownload.php " ;
                request 
=  HttpWebRequest.Create(StrUrl)  as  HttpWebRequest;

                
string  postdata  =  Data.GetData();
                request.Referer 
=  StrUrl;
                request.AllowAutoRedirect 
=   false ;
                request.UserAgent 
=   " Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; Maxthon; .NET CLR 1.1.4322; .NET CLR 2.0.50727) " ;
                request.Timeout 
=   20000 ;

                
if  (cookie  !=   null )
                {
                    request.CookieContainer 
=   new  CookieContainer();
                    request.CookieContainer.Add(cookie);
                }

                Uri u 
=   new  Uri(StrUrl);

                
if  (postdata.Length  >   0 // 包含要提交的数据 就使用Post方式
                {
                    request.ContentType 
=   " application/x-www-form-urlencoded " // 作为表单请求
                    request.Method  =   " POST " ;         // 方式就是Post

                    
// 把提交的数据换成字节数组
                    Byte[] B  =  System.Text.Encoding.UTF8.GetBytes(postdata);
                    request.ContentLength 
=  B.Length;

                    System.IO.Stream SW 
=  request.GetRequestStream();  // 开始提交数据
                    SW.Write(B,  0 , B.Length);
                    SW.Close();
                }

                response 
=  request.GetResponse()  as  HttpWebResponse;
                
string  des  =  response.Headers[ " Content-Disposition " ].Trim();
                file 
=  des.Substring(des.IndexOf( " filename= " +   9 );
                file 
=   new  Random().Next( 100 ).ToString()  +   " / "   +  file;

                System.IO.Stream stream 
=  response.GetResponseStream();
                
try
                {
                    
int  Filelength  =  ( int )response.ContentLength;

                    
byte [] b  =   new   byte [ 512 ];

                    
int  nReadSize  =   0 ;
                    nReadSize 
=  stream.Read(b,  0 512 );

                    System.IO.FileStream fs 
=  System.IO.File.Create( " f:/mobileMusic/ "   +  file);
                    
try
                    {
                        
while  (nReadSize  >   0 )
                        {
                            fs.Write(b, 
0 , nReadSize);
                            nReadSize 
=  stream.Read(b,  0 512 );
                        }
                    }
                    
finally
                    {
                        fs.Close();
                    }
                }
                
catch  (Exception er)
                {
                    
// Log l = new Log();
                    
// l.writelog("下载文件错误", er.ToString());
                }
                
finally
                {
                    response.Close();
                    stream.Close();
                }
            }
            
catch  (Exception ex)
            {
                
string  x  =  ex.StackTrace;
            }
            
finally
            {
                
if  (response  !=   null )
                    response.Close();
            }
            
return   true ;
        }
        
#region  GetPage
        
///   <summary>
        
///  获取源代码
        
///   </summary>
        
///   <param name="url"></param>
        
///   <param name="coding"></param>
        
///   <param name="TryCount"></param>
        
///   <returns></returns>
         public   static   string  GetPage( string  url, Encoding encoding,  int  TryCount)
        {
            
for  ( int  i  =   0 ; i  <  TryCount; i ++ )
            {
                
string  result  =  GetPage(url, encoding,  null );
                
if  (result  !=   null   &&  result  !=   string .Empty)
                    
return  result;
            }

            
return   string .Empty;
        }

        
///   <summary>
        
///  获取源代码
        
///   </summary>
        
///   <param name="url"></param>
        
///   <param name="coding"></param>
        
///   <returns></returns>
         public   static   string  GetPage( string  url, Encoding encoding,  string  cookie)
        {
            HttpWebRequest request 
=   null ;
            HttpWebResponse response 
=   null ;
            StreamReader reader 
=   null ;
            
try
            {
                request 
=  (HttpWebRequest)WebRequest.Create(url);
                request.UserAgent 
=   " Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2;) " ;
                request.Timeout 
=   20000 ;
                request.AllowAutoRedirect 
=   false ;
                
if  (cookie  !=   null )
                    request.Headers[
" Cookie " =  cookie;

                response 
=  (HttpWebResponse)request.GetResponse();
                
if  (response.StatusCode  ==  HttpStatusCode.OK  &&  response.ContentLength  <   1024   *   1024 )
                {
                    reader 
=   new  StreamReader(response.GetResponseStream(), encoding);
                    
string  html  =  reader.ReadToEnd();

                    
return  html;
                }
            }
            
catch
            {
            }
            
finally
            {

                
if  (response  !=   null )
                {
                    response.Close();
                    response 
=   null ;
                }
                
if  (reader  !=   null )
                    reader.Close();

                
if  (request  !=   null )
                    request 
=   null ;

            }

            
return   string .Empty;
        }
        
#endregion
    }

    
public   class  RequestData
    {
        Hashtable hash 
=   new  Hashtable();

        
public  RequestData()
        {

        }

        
public   string  GetData()
        {
            
string  r  =   "" ;

            
foreach  ( string  key  in  hash.Keys)
            {
                
if  (r.Length  >   0 ) r  +=   " & " ;
                r 
+=  key  +   " = "   +  hash[key];
            }

            
return  r;
        }

        
public   void  AddField( string  Field,  string  Value)
        {
            hash[Field] 
=  Value;
        }


    }
}

转载于:https://www.cnblogs.com/stu-acer/archive/2008/10/11/1308712.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值