多线程下载网站图片

  class Program
    {
        static void Main(string[] args)
        {
            string[] str = { "model", "sexy", "belle", "stars" };

            for (int url = 0; url < str.Length; url++)
            {
                Thread thread = new Thread(DownLoad);

                thread.Start(str[url]);
                
                //DownLoad(str[url]);
            }
            Console.Read();
        }

        public static void DownLoad(object category)
        {
            string url = string.Empty;

            for (int purl = 9014; purl > 10; purl--)
            {
                for (int pageSize = 0; pageSize < 20; pageSize++)
                {
                    try
                    {
                        if (pageSize == 0)
                            url = "http://www.mm8mm8.com/" + category + "/" + purl + ".html";
                        else
                            url = "http://www.mm8mm8.com/" + category + "/" + purl + "_" + pageSize + ".html";

                        //创建http链接
                        var request = (HttpWebRequest)WebRequest.Create(url);

                        request.Timeout = 1000 * 5;    //5s过期

                        var response = (HttpWebResponse)request.GetResponse();

                        Stream stream = response.GetResponseStream();

                        StreamReader sr = new StreamReader(stream);

                        string content = sr.ReadToEnd();

                        var list = GetHtmlImageUrlList(content);

                        WebClient client = new WebClient();

                        string[] directory = { @"d:\MM\1\", @"D:\MM\2\", @"d:\MM\3\", @"d:\MM\4\" };

                        var directoryName = directory[new Random().Next(0, directory.Length)];

                        if (!Directory.Exists(directoryName))
                            Directory.CreateDirectory(directoryName);

                        var fileName = string.Empty;

                        if (list.Count == 0)
                        {
                            Console.WriteLine("时间:" + DateTime.Now + " 当前网址:" + url + "  未发现图片");
                            break;
                        }

                        try
                        {

                            fileName = category + "_" + purl + "_" + (pageSize + 1) + ".jpg";

                            var localFile = directoryName + fileName;

                            var imageRequest = (HttpWebRequest)WebRequest.Create(list[0]);

                            imageRequest.Timeout = 1000 * 5;  //5s 超时

                            var imageResponse = (HttpWebResponse)imageRequest.GetResponse();

                            var s = imageResponse.GetResponseStream();

                            Image image = Image.FromStream(s);

                            image.Save(localFile);

                            image.Dispose();

                            Console.WriteLine("时间:" + DateTime.Now + "  图片:" + fileName + " 已经下载   存入磁盘位置:" + localFile);

                        }
                        catch (Exception e)
                        {
                            Console.WriteLine("时间:" + DateTime.Now + " 当前图片:" + fileName + " 错误信息:" + e.Message);
                            continue;
                        }
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine("时间:" + DateTime.Now + " 当前网址:" + url + " 错误信息:" + ex.Message);
                    }
                }
            }
        }

        /// <summary> 
        /// 取得HTML中所有图片的 URL。 
        /// </summary> 
        /// <param name="sHtmlText">HTML代码</param> 
        /// <returns>图片的URL列表</returns> 
        public static List<string> GetHtmlImageUrlList(string sHtmlText)
        {
            // 定义正则表达式用来匹配 img 标签 
            Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);

            // 搜索匹配的字符串 
            MatchCollection matches = regImg.Matches(sHtmlText);

            List<string> sUrlList = new List<string>();

            // 取得匹配项列表 
            foreach (Match match in matches)
                sUrlList.Add(match.Groups["imgUrl"].Value);
            return sUrlList;
        }
    }

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值