class Program
{
static void Main(string[] args)
{
string[] str = { "model", "sexy", "belle", "stars" };
for (int url = 0; url < str.Length; url++)
{
Thread thread = new Thread(DownLoad);
thread.Start(str[url]);
//DownLoad(str[url]);
}
Console.Read();
}
public static void DownLoad(object category)
{
string url = string.Empty;
for (int purl = 9014; purl > 10; purl--)
{
for (int pageSize = 0; pageSize < 20; pageSize++)
{
try
{
if (pageSize == 0)
url = "http://www.mm8mm8.com/" + category + "/" + purl + ".html";
else
url = "http://www.mm8mm8.com/" + category + "/" + purl + "_" + pageSize + ".html";
//创建http链接
var request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 1000 * 5; //5s过期
var response = (HttpWebResponse)request.GetResponse();
Stream stream = response.GetResponseStream();
StreamReader sr = new StreamReader(stream);
string content = sr.ReadToEnd();
var list = GetHtmlImageUrlList(content);
WebClient client = new WebClient();
string[] directory = { @"d:\MM\1\", @"D:\MM\2\", @"d:\MM\3\", @"d:\MM\4\" };
var directoryName = directory[new Random().Next(0, directory.Length)];
if (!Directory.Exists(directoryName))
Directory.CreateDirectory(directoryName);
var fileName = string.Empty;
if (list.Count == 0)
{
Console.WriteLine("时间:" + DateTime.Now + " 当前网址:" + url + " 未发现图片");
break;
}
try
{
fileName = category + "_" + purl + "_" + (pageSize + 1) + ".jpg";
var localFile = directoryName + fileName;
var imageRequest = (HttpWebRequest)WebRequest.Create(list[0]);
imageRequest.Timeout = 1000 * 5; //5s 超时
var imageResponse = (HttpWebResponse)imageRequest.GetResponse();
var s = imageResponse.GetResponseStream();
Image image = Image.FromStream(s);
image.Save(localFile);
image.Dispose();
Console.WriteLine("时间:" + DateTime.Now + " 图片:" + fileName + " 已经下载 存入磁盘位置:" + localFile);
}
catch (Exception e)
{
Console.WriteLine("时间:" + DateTime.Now + " 当前图片:" + fileName + " 错误信息:" + e.Message);
continue;
}
}
catch (Exception ex)
{
Console.WriteLine("时间:" + DateTime.Now + " 当前网址:" + url + " 错误信息:" + ex.Message);
}
}
}
}
/// <summary>
/// 取得HTML中所有图片的 URL。
/// </summary>
/// <param name="sHtmlText">HTML代码</param>
/// <returns>图片的URL列表</returns>
public static List<string> GetHtmlImageUrlList(string sHtmlText)
{
// 定义正则表达式用来匹配 img 标签
Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);
// 搜索匹配的字符串
MatchCollection matches = regImg.Matches(sHtmlText);
List<string> sUrlList = new List<string>();
// 取得匹配项列表
foreach (Match match in matches)
sUrlList.Add(match.Groups["imgUrl"].Value);
return sUrlList;
}
}
多线程下载网站图片
最新推荐文章于 2023-12-22 12:22:59 发布