防止目标站点封禁IP,所以使用了一个http代理Get请求,下面上代码
try
{
WebProxy proxyObject = new WebProxy(ip, port);//str为IP地址 port为端口号 代理类
HttpWebRequest Req = (HttpWebRequest)WebRequest.Create(Url);
Req.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; QQWubi 133; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; CIBA; InfoPath.2)";
Req.Proxy = proxyObject; //设置代理
Req.Method = "GET";
Req.Timeout = 15000;
HttpWebResponse Resp = (HttpWebResponse)Req.GetResponse();
Encoding code = Encoding.Default;
using (StreamReader sr = new StreamReader(Resp.GetResponseStream(), code))
{
return sr.ReadToEnd();
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
什么?您是说您没有代理IP吗?
刚刚百度了一下免费代理IP的网站,然后写下了一个比较简单的爬代理IP的方法,下面上代码
List<string[]> IPList = new List<string[]>(); //获取的IP存储方式为 IPList[0][0] = IP, IPList[0][1] = Prot,
string[] str;
var url = Htmlbase.GetHtml("http://www.xicidaili.com/wt/");
var reg = new Regex("<tr class.+[\r\n].+[\r\n].+[\r\n].+[\r\n]").Matches(url);
foreach (Match m in reg)
{
str = new string[2];
var ip = new Regex(@"((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?)").Match(m.Value).Value;
var html = new Regex("<td>.+</td>").Matches(m.Value);
string port = html[1].Value.Replace("</td>", "").Replace("<td>", ""); ;
str[0] = ip;
str[1] = port;
Console.WriteLine(ip);
foreach (string[] s in IP)
{
if (str[0] == s[0])
{
continue;
}
}
IPList.Add(str);
}
接下来您是不是发现 有一个方法是无效的?请自行百度获取指定页面Html的方法吧~哈哈
玩笑玩笑~至于GetHtml方法参考我的另一篇文章吧,仅供参考你懂的。效果还不错
IP爬下来之后,又发现一个问题,有好多IP对于本机或者是目标是无效的,那么下面就使用一个验证代理IP的方法吧,下面上代码
for (int i = 0; i < IPList.Count; i++)
{
var rs = i;
var ip = $"{IPList[rs][0]}";
Console.WriteLine(rs);
var port = Convert.ToInt32(IP[rs][1]);
try
{
WebProxy proxyObject = new WebProxy(ip, port);//str为IP地址 port为端口号
HttpWebRequest Req = (HttpWebRequest)WebRequest.Create("目标URL或者IP");
Req.Proxy = proxyObject; //设置代理
Req.Method = "GET";
Req.Timeout = 4000;
HttpWebResponse Resp = (HttpWebResponse)Req.GetResponse();
Encoding code = Encoding.Default;
using (StreamReader sr = new StreamReader(Resp.GetResponseStream(), code))
{
Console.WriteLine($"IP可用 {ip}");
StreamWriter str = new StreamWriter("IP.txt", true);
str.WriteLine($"{ip}:{port}");
str.Close();
}
}
catch
{
Console.WriteLine(ip + " 失效,删除");
IP.Remove(IP[rs]);
}
}
嗯,这样子就完美了,程序仅提供一个思路,欢迎您拿去使用