运用正则表达式匹配链接,实现爬取煎蛋网的图片。代码很短,新手值得一试。
不说废话了,直接上图。
using System;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
namespace Crawler
{
class Program
{
static void Main(string[] args)
{
string url = "http://jandan.net/top-zoo";
string path = @"D:\Picture\";
HttpWebRequest webRequest = WebRequest.CreateHttp(url);
webRequest.Method = "GET";
webRequest.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) ";
var webResponse = webRequest.GetResponse();
StreamReader streamReader = new StreamReader(webResponse.GetResponseStream(), Encoding.UTF8);
string str = streamReader.ReadToEnd();
streamReader.Close();
if (string.IsNullOrEmpty(str))
{
Console.WriteLine("————————-错误—————————");
Console.ReadKey();
}
Regex regex = new Regex("<img.*?src=['|\"](?<Collect>(.*?(?:\\.(?:png|jpg|gif))))['|\"]");
MatchCollection match = regex.Matches(str);
WebClient client = new WebClient();
int name = 0;
try
{
foreach (Match match1 in match)
{
string src = match1.Groups["Collect"].Value;
src = "http:"+src;
name++;
client.DownloadFile(src,path+name+".jpg");
Console.WriteLine("\n正在爬取———————" + "|" +src);
}
}
catch (Exception ex)
{
Console.WriteLine("-------------" + ex);
}
Console.ReadKey();
}
}
}
希望能给有需要的人一些启示和帮助。