多线程抓去mm8mm8.com数据

最新推荐文章于 2024-05-28 09:15:00 发布

weixin_30323631

最新推荐文章于 2024-05-28 09:15:00 发布

阅读量916

点赞数

原文链接：http://www.cnblogs.com/i799/archive/2013/02/06/2906894.html

版权

 static void Main(string[] args)
 {
            List<Site> sites = new List<Site>{  
                new Site{page=32,url="http://www.mm8mm8.com/model/p{*}.html",type="model"}, 
                new Site{page=38,url="http://www.mm8mm8.com/sexy/p{*}.html",type="sexy"},
                new Site{page=33,url="http://www.mm8mm8.com/belle/p{*}.html",type="belle"}, 
                new Site{page=26,url="http://www.mm8mm8.com/stars/p{*}.html",type="stars"}, 
                new Site{page=7,url="http://www.mm8mm8.com/rihan/list_11_{*}.html",type="rihan"}, 
                new Site{page=6,url="http://www.mm8mm8.com/siwa/list_12_{*}.html",type="siwa"}  
            };
   foreach (var site in sites)
            {
                //ExcuteThread(site);
              //  Console.WriteLine(site.url);
               Thread thread = new Thread(ExcuteThread);
               thread.Start(site);
            }
}

      public static void ExcuteThread(object obsite)
        {
            Site site = (Site)obsite;
            for (int i=1; i <= site.page; i++)
            {
                 String url = site.url.Replace("{*}", "" + i);
                    //string getpath=Path.GetFullPath("html/" + site.type);
                    //if (!Directory.Exists(getpath))
                    //Directory.CreateDirectory(getpath); //创建文件  
                    //File.WriteAllText("html/" + site.type + "/list" + i + ".html", DownloadString(url)); 
                    String gethtml= DownloadString(url);


                    MatchCollection returnhtml= Regex.Matches(gethtml, "<li><a href=\\s*(?:\"(?<1>[^\"]*)\") title"); 
                    foreach (Match mc in returnhtml)
                    {
                        try
                        {
                            url = "http://www.mm8mm8.com" + mc.Value.Replace("<li><a href=\"", "").Replace("\" title", "");
                            gethtml = DownloadString(url);
                            int page = int.Parse(Regex.Match(gethtml, "共(.*)页:").Value.Replace("共", "").Replace("页:", ""));
                            url = url.Substring(0, url.LastIndexOf("."));
                            for (int k = 2; k <= page; k++)
                            {
                                String contenturl = url + "_" + k + ".html";
                                gethtml = DownloadString(contenturl);
                                string a = Regex.Match(gethtml, "<img src=\\s*(?:\"(?<1>[^\"]*)\") /></a></p>").Groups[0].Value;
                                a = Regex.Match(a, "src=\\s*(?:\"(?<1>[^\"]*)\")").Groups[0].Value.Replace("src=\"", "").Replace("\"", "");
                                SaveFile(a, "Images/" + site.type);
                            }
                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.Message.ToString());
                            
                            continue;
                        }
                       
                    }  
            }
     
        }


       public static Stream DownloadStream(string url)
        {
            var imageRequest = (HttpWebRequest)WebRequest.Create(url); 
            imageRequest.Timeout = 1000 * 5;  //5s 超时 
            var imageResponse = (HttpWebResponse)imageRequest.GetResponse(); 
            return imageResponse.GetResponseStream(); 
        }
         
        public static String DownloadString(string url)
        {
            var request = (HttpWebRequest)WebRequest.Create(url);
            //request.Timeout = 1000 * 5;    //5s过期 
            var response = (HttpWebResponse)request.GetResponse();
            StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("gb2312"));  
      
            return reader.ReadToEnd(); 
        }


   public static void SaveFile(String path, String filename, Stream instream)
        {
            //if(!filename.Contains("."))
            //       return;

            //var filetype = filename.Substring(filename.LastIndexOf("."));
            //switch (filetype)
            //{ 
            //    case ".jpg":
                    var localFile = path + filename;
                    Image image = Image.FromStream(instream); 
                    image.Save(localFile); 
                    image.Dispose();
            //    break;
            //    default:
               
          

            //    break;
            //} 
        }

      public static void SaveFile(String url,String path="Images")
        {
            var fullpath = Path.GetFullPath(path); //获取全部路径
            if (!Directory.Exists(fullpath))
                Directory.CreateDirectory(fullpath); //创建文件 
            var filetype = url.Substring(url.LastIndexOf("."));  
            WebClient wb = new WebClient();
            wb.Proxy = null;

             
            Random ro = new Random(10);
            long tick = DateTime.Now.Ticks;
            Random ran = new Random((int)(tick & 0xffffffffL) | (int)(tick >> 32));
            path = path + "/" + ran.Next() + filetype;
            wb.DownloadFile(url, path);
        }

转载于:https://www.cnblogs.com/i799/archive/2013/02/06/2906894.html

weixin_30323631

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
多线程抓去mm8mm8.com数据

static void Main(string[] args) { List<Site> sites = new List<Site>{ new Site{page=32,url="http://www.mm8mm8.com/model/p{*}.html",type="model"}, ...
复制链接

扫一扫