通过代理IP 来获取HMTL源码。这里使用了免费的代理IP ,从这些代理IP中获取 数据,
如下获取下载代理的代码,其中通过cnproxy,heibai还有proxy360得到的代理IP,里面也有些获取的限制,具体怎么解决的见代码吧!
View Code
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using System.Net;
using System.IO;
using System.Threading.Tasks;
using System.Threading;
using System.Data.SqlClient;
using System.Data;
using HtmlAgilityPack;
using System.Web;
using System.Text.RegularExpressions;
namespace ProxyTools
{
public class ProxyTest
{
public static bool Error = false;
public int GetProxyTotal = 0;
public int ProxyHashCount = 0;
public List<string> proxyResultList;
private int RepeateTimes = 5; //下载测试次数
private int MaxThread = 300; //最大线程数
private int FileLength; //下载内容大小
private readonly ProxyDetail[] Proxys; //代理类数组
private Uri TestUri = new Uri("http://www.baidu.com/"); //下载baidu.com来测试代理
Regex ipPattern = new Regex(@"^\d+\.\d+\.\d+\.\d+$", RegexOptions.Compiled);
Regex numPattern = new Regex(@"^\d+$", RegexOptions.Compiled);
Regex numCnPattern = new Regex(@"(?<word>[a-z])=""(?<num>\d)""", RegexOptions.Compiled);
Regex ipPortPattern = new Regex(@"(?is)(?<ip>\d+\.\d+\.\d+\.\d+)[^+]*?(?<word>(\+[a-z])+)", RegexOptions.Compiled);
Regex dailiipPattern = new Regex(@"(?is)<td>(?<ip>(\d{1,3}\.){3}\d{1,3})</td>.*?<td>(?<port>\d+)</td>", RegexOptions.Compiled);
Regex xkerPattern = new Regex(@"(?is)(?<ip>(\d{1,3}\.){3}\d{1,3})([:\s]|(</?div>.*?(port\d*"">)))(?<port>\d+)", RegexOptions.Compiled);
Regex daili18Pattern = new Regex(@"(?is)<td>(?<ip>(\d{1,3}\.){3}\d{1,3})</td><td>(?<port>\d+)", RegexOptions.Compiled);
Regex wl35Pattern = new Regex(@"(?is)<td[^>]*?>(?<ip>(\d{1,3}\.){3}\d{1,3})\s*</td>[^>]*>(?<port>\d+)", RegexOptions.Compiled);
Regex proxiedPattern = new Regex(@"<td>(?<ip>(\d{1,3}\.){3}\d{1,3})</td>\s*?<td[^>]*>(?<port>\d+)</td>", RegexOptions.Compiled);
Regex numnntimePattern = new Regex(@"(?<word>[a-z])=(?<num>\d)", RegexOptions.Compiled);
Regex nntimePattern = new Regex(@"<td>(?<ip>(\d{1,3}\.){3}\d{1,3}).*?(?<word>(\+[a-z])*)\)", RegexOptions.Compiled);
Regex realboosterPattern = new Regex(@"(?<ip>(\d{1,3}\.){3}\d{1,3}):(?<port>\d+)</td>", RegexOptions.Compiled);
Regex simpleproxylistPattern = new Regex(@"(?<ip>(\d{1,3}\.){3}\d{1,3}).*?(?<word>(&#\d{2})+)</td>", RegexOptions.Compiled);
public ProxyTest(int maxThread,int repeateTimes,string downUrl,CheckProxyWeb checkWeb)
{
this.MaxThread = maxThread;
this.RepeateTimes = repeateTimes;
this.TestUri = new Uri(downUrl);
FileLength = GetFileLength(); //得到下载测试内容的大小
if (FileLength == 0)
{
Error = true;
return;
}
string[] pArray = GetProxyFromWeb(checkWeb).ToArray(); //读取需要测试的代理列表
List<ProxyDetail> pList = new List<ProxyDetail>();
foreach (var item in pArray)
{
WebProxy proxy;
try { proxy = new WebProxy(item); }
catch { continue; }
ProxyDetail pDetail = new ProxyDetail();
pDetail.Proxy = proxy;
pDetail.ProxyString = item;
pList.Add(pDetail);
}
proxyResultList = pList.Select(a => a.ProxyString).ToList();
ProxyHashCount = pList.Count;
Proxys = pList.ToArray(); //得到需要测试的代理列表
}
//得到下载测试内容的大小
private int GetFileLength()
{
try
{
WebClient client = new WebClient();
byte[] datas = client.DownloadData(TestUri);
return datas.Length;
}
catch { return 0; }
}
//从cnProxy得到代理地址
private HashSet<string> GetFromCnproxy()
{
HashSet<string> proxyHash = new HashSet<string>();
WebClient client = new WebClient();
string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
string DefaultPostContentType = "application/x-www-form-urlencoded";
client.Credentials = CredentialCache.DefaultCredentials;
client.Headers.Add("User-Agent", DefaultUserAgent);
client.Headers.Add("Content-Type", DefaultPostContentType);
for (int pageIndex = 1; pageIndex < 11; ++pageIndex)
{
Uri uri = new Uri(string.Format("http://www.cnproxy.com/proxy{0}.html", pageIndex));
client.Headers.Add("Referer", uri.AbsoluteUri);
string content;
try
{
content = Encoding.GetEncoding("gb2312").GetString(client.DownloadData(uri));
}
catch { continue; }
//该网站的代理端口是通过js得到的
Dictionary<string, string> wordToNum = new Dictionary<string, string>();
foreach (Match m in numCnPattern.Matches(content))
if (!wordToNum.ContainsKey(m.Groups["word"].Value))
wordToNum.Add(m.Groups["word"].Value, m.Groups["num"].Value);
//正则匹配到代理地址等
foreach (Match m in ipPortPattern.Matches(content))
{
try
{
string ip = m.Groups["ip"].Value + ":";
string[] words = m.Groups["word"].Value.Split(new char[] { '+' }, StringSplitOptions.RemoveEmptyEntries);
foreach (var word in words)
ip += wordToNum[word];
proxyHash.Add(ip);
}
catch { }
}
}
return proxyHash;
}
//通过heibai网站得到代理地址
private HashSet<string> GetFromHeibai()
{
HashSet<string> ipHash = new HashSet<string>();
HtmlDocument htmlDoc = new HtmlDocument();
WebClient client = new WebClient();
string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
string DefaultPostContentType = "application/x-www-form-urlencoded";
client.Credentials = CredentialCache.DefaultCredentials;
client.Headers.Add("User-Agent", DefaultUserAgent);
client.Headers.Add("Content-Type", DefaultPostContentType);
//得到十页的代理内容
for (int pageIndex = 1; pageIndex < 11; ++pageIndex)
{
Uri uri = new Uri(string.Format("http://www.heibai.net/proxy/index.php?act=list&port=&type=&country=&page={0}", pageIndex));
client.Headers.Add("Referer", uri.AbsoluteUri);
byte[] bytes ;string content ;
try
{
bytes = client.DownloadData(uri);
content = WebUtility.HtmlDecode(Encoding.GetEncoding("utf-8").GetString(bytes));
}
catch { continue; }
htmlDoc.LoadHtml(content);
//get paraNodes part
var paraNodes = htmlDoc.DocumentNode.SelectNodes(@"//tr[@class='cells']");
if (paraNodes == null)
return ipHash;
HtmlDocument htmlD = new HtmlDocument();
foreach (var node in paraNodes)
{
htmlD.LoadHtml(node.OuterHtml);
var ipNode = htmlD.DocumentNode.SelectSingleNode(@"//td[2]");
var portNode = htmlD.DocumentNode.SelectSingleNode(@"//td[3]");
if ((ipNode != null && ipPattern.IsMatch(ipNode.InnerText.Trim()) &&
portNode != null) && numPattern.IsMatch(portNode.InnerText.Trim()))
ipHash.Add(ipNode.InnerText.Trim() + ":" + portNode.InnerText.Trim());
}
}
return ipHash;
}
//通过proxy360得到代理地址
private HashSet<string> GetFromProxy360()
{
HashSet<string> ipHash = new HashSet<string>();
HtmlDocument htmlDoc = new HtmlDocument();
WebClient client = new WebClient();
string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
string DefaultPostContentType = "application/x-www-form-urlencoded";
client.Credentials = CredentialCache.DefaultCredentials;
client.Headers.Add("User-Agent", DefaultUserAgent);
client.Headers.Add("Content-Type", DefaultPostContentType);
Uri uri = new Uri("http://www.proxy360.cn/default.aspx");
client.Headers.Add("Referer", uri.AbsoluteUri);
byte[] bytes;
try { bytes = client.DownloadData(uri); }
catch { return ipHash; }
string content = WebUtility.HtmlDecode(Encoding.GetEncoding("utf-8").GetString(bytes));
htmlDoc.LoadHtml(content);
//get paraNodes part
var paraNodes = htmlDoc.DocumentNode.SelectNodes(@"//div[@class='proxylistitem']");
if (paraNodes == null)
return ipHash;
HtmlDocument htmlD = new HtmlDocument();
foreach (var node in paraNodes)
{
htmlD.LoadHtml(node.OuterHtml);
var ipNode = htmlD.DocumentNode.SelectSingleNode(@"//span[@class='tbBottomLine'][1]");
var portNode = htmlD.DocumentNode.SelectSingleNode(@"//span[@class='tbBottomLine'][2]");
if ((ipNode != null && ipPattern.IsMatch(ipNode.InnerText.Trim()) &&
portNode != null) && numPattern.IsMatch(portNode.InnerText.Trim()))
ipHash.Add(ipNode.InnerText.Trim() + ":" + portNode.InnerText.Trim());
}
return ipHash;
}
//通过dailiip网站得到代理地址
private HashSet<string> GetFromDailiip()
{
HashSet<string> ipHash = new HashSet<string>();
WebClient client = new WebClient();
string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
string DefaultPostContentType = "application/x-www-form-urlencoded";
client.Credentials = CredentialCache.DefaultCredentials;
client.Headers.Add("User-Agent", DefaultUserAgent);
client.Headers.Add("Content-Type", DefaultPostContentType);
Uri uri = new Uri("http://www.dailiip.com/");
client.Headers.Add("Referer", uri.AbsoluteUri);
byte[] bytes;
try { bytes = client.DownloadData(uri); }
catch { return ipHash; }
string content = WebUtility.HtmlDecode(Encoding.GetEncoding("gb2312").GetString(bytes));
foreach (Match m in dailiipPattern.Matches(content))
{
ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
}
return ipHash;
}
//通过xker网站得到代理地址
private HashSet<string> GetFromXker()
{
HashSet<string> ipHash = new HashSet<string>();
WebClient client = new WebClient();
string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
string DefaultPostContentType = "application/x-www-form-urlencoded";
client.Credentials = CredentialCache.DefaultCredentials;
client.Headers.Add("User-Agent", DefaultUserAgent);
client.Headers.Add("Content-Type", DefaultPostContentType);
Uri uri = new Uri("http://www.xker.com/ip/");
client.Headers.Add("Referer", uri.AbsoluteUri);
byte[] bytes;
try { bytes = client.DownloadData(uri); }
catch { return ipHash; }
string content = WebUtility.HtmlDecode(Encoding.GetEncoding("gb2312").GetString(bytes));
foreach (Match m in xkerPattern.Matches(content))
{
ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
}
return ipHash;
}
//通过18daili网站得到代理地址
private HashSet<string> GetFrom18daili()
{
HashSet<string> ipHash = new HashSet<string>();
WebClient client = new WebClient();
string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
string DefaultPostContentType = "application/x-www-form-urlencoded";
client.Credentials = CredentialCache.DefaultCredentials;
client.Headers.Add("User-Agent", DefaultUserAgent);
client.Headers.Add("Content-Type", DefaultPostContentType);
Uri uri = new Uri("http://www.18daili.com/SearchLocationForAjax.php");
client.Headers.Add("Referer", uri.AbsoluteUri);
byte[] bytes;
try { bytes = client.DownloadData(uri); }
catch { return ipHash; }
string content = WebUtility.HtmlDecode(Encoding.GetEncoding("UTF-8").GetString(bytes));
foreach (Match m in daili18Pattern.Matches(content))
{
ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
}
return ipHash;
}
//通过35wl网站得到代理地址
private HashSet<string> GetFrom35wl()
{
HashSet<string> ipHash = new HashSet<string>();
WebClient client = new WebClient();
string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
string DefaultPostContentType = "application/x-www-form-urlencoded";
client.Credentials = CredentialCache.DefaultCredentials;
client.Headers.Add("User-Agent", DefaultUserAgent);
client.Headers.Add("Content-Type", DefaultPostContentType);
Uri uri = new Uri("http://www.35wl.com/tools/dlfwq.htm");
client.Headers.Add("Referer", uri.AbsoluteUri);
byte[] bytes;
try { bytes = client.DownloadData(uri); }
catch { return ipHash; }
string content = WebUtility.HtmlDecode(Encoding.GetEncoding("gb2312").GetString(bytes));
foreach (Match m in wl35Pattern.Matches(content))
{
ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
}
return ipHash;
}
//通过51proxied网站得到代理地址
private HashSet<string> GetFrom51proxied()
{
HashSet<string> ipHash = new HashSet<string>();
WebClient client = new WebClient();
string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
string DefaultPostContentType = "application/x-www-form-urlencoded";
client.Credentials = CredentialCache.DefaultCredentials;
client.Headers.Add("User-Agent", DefaultUserAgent);
client.Headers.Add("Content-Type", DefaultPostContentType);
List<Uri> uriList = new List<Uri>();
uriList.Add(new Uri("http://www.51proxied.com/http_fast.html"));
uriList.Add(new Uri("http://www.51proxied.com/http_anonymous.html"));
uriList.Add(new Uri("http://www.51proxied.com/http_non_anonymous.html"));
uriList.Add(new Uri("http://www.51proxied.com/socks5.html"));
uriList.ForEach(uri =>
{
client.Headers.Add("Referer", uri.AbsoluteUri);
byte[] bytes;
try { bytes = client.DownloadData(uri); }
catch { return; }
string content = WebUtility.HtmlDecode(Encoding.GetEncoding("UTF-8").GetString(bytes));
foreach (Match m in proxiedPattern.Matches(content))
{
ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
}
});
return ipHash;
}
//通过nntime网站得到代理地址
private HashSet<string> GetFromnntime()
{
HashSet<string> ipHash = new HashSet<string>();
WebClient client = new WebClient();
string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
string DefaultPostContentType = "application/x-www-form-urlencoded";
client.Credentials = CredentialCache.DefaultCredentials;
client.Headers.Add("User-Agent", DefaultUserAgent);
client.Headers.Add("Content-Type", DefaultPostContentType);
Uri uri = new Uri("http://nntime.com/");
client.Headers.Add("Referer", uri.AbsoluteUri);
byte[] bytes;
try
{
bytes = client.DownloadData(uri);
}
catch { return ipHash; }
string content = WebUtility.HtmlDecode(Encoding.GetEncoding("iso-8859-1").GetString(bytes));
Dictionary<string, string> charDic = new Dictionary<string, string>();
foreach (Match m in numnntimePattern.Matches(content))
if (!charDic.ContainsKey(m.Groups["word"].Value))
charDic.Add(m.Groups["word"].Value, m.Groups["num"].Value);
foreach (Match m in nntimePattern.Matches(content))
{
try
{
string ip = m.Groups["ip"].Value + ":";
string[] words = m.Groups["word"].Value.Split(new char[] { '+' }, StringSplitOptions.RemoveEmptyEntries);
foreach (var word in words)
ip += charDic[word];
ipHash.Add(ip);
}
catch { }
}
return ipHash;
}
//通过realbooster网站得到代理地址
private HashSet<string> GetFromrealbooster()
{
HashSet<string> ipHash = new HashSet<string>();
WebClient client = new WebClient();
string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
string DefaultPostContentType = "application/x-www-form-urlencoded";
client.Credentials = CredentialCache.DefaultCredentials;
client.Headers.Add("User-Agent", DefaultUserAgent);
client.Headers.Add("Content-Type", DefaultPostContentType);
Uri uri = new Uri("http://realbooster.com/seo-services/free-proxy-list-tool/");
client.Headers.Add("Referer", uri.AbsoluteUri);
byte[] bytes;
try { bytes = client.DownloadData(uri); }
catch { return ipHash; }
string content = WebUtility.HtmlDecode(Encoding.GetEncoding("UTF-8").GetString(bytes));
foreach (Match m in realboosterPattern.Matches(content))
{
try
{
ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
}
catch { }
}
return ipHash;
}
//通过simpleproxylist网站得到代理地址
private HashSet<string> GetFromsimpleproxylist()
{
HashSet<string> ipHash = new HashSet<string>();
WebClient client = new WebClient();
string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
string DefaultPostContentType = "application/x-www-form-urlencoded";
client.Credentials = CredentialCache.DefaultCredentials;
client.Headers.Add("User-Agent", DefaultUserAgent);
client.Headers.Add("Content-Type", DefaultPostContentType);
for (int i = 1; i < 20; ++i)
{
Uri uri = new Uri("http://simpleproxylist.com/search.php?p=" + i.ToString() + "&country=CN");
client.Headers.Add("Referer", uri.AbsoluteUri);
byte[] bytes;
try { bytes = client.DownloadData(uri); }
catch { continue; }
string content = HttpUtility.HtmlDecode(Encoding.GetEncoding("UTF-8").GetString(bytes));
foreach (Match m in simpleproxylistPattern.Matches(content))
{
try
{
string ip = m.Groups["ip"].Value + ":";
string[] word = m.Groups["word"].Value.Split(new string[] { "&#" }, StringSplitOptions.RemoveEmptyEntries);
foreach (var w in word)
ip += (char)(int.Parse(w));
ipHash.Add(ip);
}
catch { }
}
}
return ipHash;
}
//通过proxy-ip-list网站得到代理地址
private static HashSet<string> GetFromproxyiplist()
{
HashSet<string> ipHash = new HashSet<string>();
WebClient client = new WebClient();
string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
string DefaultPostContentType = "application/x-www-form-urlencoded";
client.Credentials = CredentialCache.DefaultCredentials;
client.Headers.Add("User-Agent", DefaultUserAgent);
client.Headers.Add("Content-Type", DefaultPostContentType);
Uri uri = new Uri("http://proxy-ip-list.com/");
client.Headers.Add("Referer", uri.AbsoluteUri);
byte[] bytes;
try { bytes = client.DownloadData(uri); }
catch { return ipHash; }
string content = HttpUtility.HtmlDecode(HttpUtility.HtmlDecode(Encoding.GetEncoding("UTF-8").GetString(bytes)));
Regex proxyiplistPattern = new Regex(@"(?<ip>(\d{1,3}\.){3}\d{1,3}):(?<port>\d+)</td>", RegexOptions.Compiled);
foreach (Match m in proxyiplistPattern.Matches(content))
{
try
{
ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
}
catch { }
}
return ipHash;
}
//通过Webs
private static HashSet<string> GetFromWebs()
{
HashSet<string> ipHash = new HashSet<string>();
WebClient client = new WebClient();
string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
string DefaultPostContentType = "application/x-www-form-urlencoded";
client.Credentials = CredentialCache.DefaultCredentials;
client.Headers.Add("User-Agent", DefaultUserAgent);
client.Headers.Add("Content-Type", DefaultPostContentType);
Uri uri = new Uri("http://anonymous-proxy-list.webs.com/");
client.Headers.Add("Referer", uri.AbsoluteUri);
byte[] bytes;
try { bytes = client.DownloadData(uri); }
catch { return ipHash; }
string content = HttpUtility.HtmlDecode(HttpUtility.HtmlDecode(Encoding.GetEncoding("ISO-8859-1").GetString(bytes)));
Regex proxyiplistPattern = new Regex(@"(?<ip>(\d{1,3}\.){3}\d{1,3}):(?<port>\d+)", RegexOptions.Compiled);
foreach (Match m in proxyiplistPattern.Matches(content))
{
try
{
ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
}
catch { }
}
return ipHash;
}
//在网站上得到新代理地址
private HashSet<string> GetProxyFromWeb(CheckProxyWeb checkWeb)
{
HashSet<string> proxyHash = new HashSet<string>();
HashSet<string> hash;
///cnproxy
if (checkWeb.CnProxy)
{
hash = GetFromCnproxy();
GetProxyTotal += hash.Count;
foreach (var proxy in hash)
proxyHash.Add(proxy);
}
///heibai
if (checkWeb.Heibai)
{
hash = GetFromHeibai();
GetProxyTotal += hash.Count;
foreach(var proxy in hash)
proxyHash.Add(proxy);
}
///proxy360
if (checkWeb.Proxy360)
{
hash = GetFromProxy360();
GetProxyTotal += hash.Count;
foreach (var proxy in hash)
proxyHash.Add(proxy);
}
///dailiip
if (checkWeb.Dailiip)
{
hash = GetFromDailiip();
GetProxyTotal += hash.Count;
foreach (var proxy in hash)
proxyHash.Add(proxy);
}
///xker
if (checkWeb.Xker)
{
hash = GetFromXker();
GetProxyTotal += hash.Count;
foreach (var proxy in hash)
proxyHash.Add(proxy);
}
///18daili
if (checkWeb.Daili18)
{
hash = GetFrom18daili();
GetProxyTotal += hash.Count;
foreach (var proxy in hash)
proxyHash.Add(proxy);
}
///35wl
if (checkWeb.Wl35)
{
hash = GetFrom35wl();
GetProxyTotal += hash.Count;
foreach (var proxy in hash)
proxyHash.Add(proxy);
}
///51proxied
if (checkWeb.Proxied51)
{
hash = GetFrom51proxied();
GetProxyTotal += hash.Count;
foreach (var proxy in hash)
proxyHash.Add(proxy);
}
///nntime
if (checkWeb.Nntime)
{
hash = GetFromnntime();
GetProxyTotal += hash.Count;
foreach (var proxy in hash)
proxyHash.Add(proxy);
}
///realbooster
if (checkWeb.Realbooster)
{
hash = GetFromrealbooster();
GetProxyTotal += hash.Count;
foreach (var proxy in hash)
proxyHash.Add(proxy);
}
///simpleproxylist
if (checkWeb.Simpleproxylist)
{
hash = GetFromsimpleproxylist();
GetProxyTotal += hash.Count;
foreach (var proxy in hash)
proxyHash.Add(proxy);
}
///proxyiplist
if (checkWeb.Proxyiplist)
{
hash = GetFromproxyiplist();
GetProxyTotal += hash.Count;
foreach (var proxy in hash)
proxyHash.Add(proxy);
}
///webs
if (checkWeb.Webs)
{
hash = GetFromWebs();
GetProxyTotal += hash.Count;
foreach (var proxy in hash)
proxyHash.Add(proxy);
}
return proxyHash;
}
//测试代理是否可用
public void Start(ProxyHelp help)
{
ParallelOptions taskParallet = new ParallelOptions();
taskParallet.MaxDegreeOfParallelism = MaxThread;
int count = 0;
for (int i = 0; i < RepeateTimes; i++)
{
Parallel.ForEach(Proxys, taskParallet, current =>
{
if (current.Error != null)
return;
Stopwatch sw = new Stopwatch();
WebClient wc = new WebClient();
InitWebClient(wc, TestUri, TestUri);
wc.Proxy = current.Proxy;
sw.Restart();
try
{
byte[] data = wc.DownloadData(TestUri);
sw.Stop();
current.Milliseconds = sw.ElapsedMilliseconds;
current.ContentLength = data.Length;
if (current.ContentLength != FileLength)
current.Error = new Exception("下载错误");
string log = string.Format("{0} - {1} - {2} - {3}", current.ProxyString, current.ContentLength, current.Milliseconds, count);
help.AddText(log);
//Console.WriteLine(log);
}
catch (Exception e)
{
current.Error = e;
string log = string.Format("{0} - {1} - {2}", current.ProxyString, e.Message, count);
help.AddText(log);
//Console.WriteLine(log);
}
Interlocked.Increment(ref count);
});
Array.Sort(Proxys, new Comparison<ProxyDetail>((a, b) => b.Milliseconds.CompareTo(a.Milliseconds)));
}
SaveProxy();
help.AddText("测试代理地址结束!");
Console.WriteLine("结束");
}
//保存代理地址
private void SaveProxy()
{
//在这里可以排序取前多少可以使用的
//Array.Sort(Proxys, new Comparison<ProxyDetail>((a, b) => a.Milliseconds.CompareTo(b.Milliseconds)));
//这里取了下载时间小于10000毫秒的代理
//for (int i = 0; i < Proxys.Length; ++i)
// if (Proxys[i].Error == null && Proxys[i].Milliseconds < 10000)
// proxyList.Add(new ProxyTable(Proxys[i].ProxyString));
proxyResultList = Proxys.Where(a => a.Error == null).Select(b=>b.ProxyString).ToList();
}
private const string DefaultUserAgent = @"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; CIBA; .NET4.0C; .NET4.0E)";
private const string DefaultPostContentType = "application/x-www-form-urlencoded";
//初始化WebClient
private static void InitWebClient(WebClient webClient, Uri uri, Uri pUri)
{
webClient.Credentials = CredentialCache.DefaultCredentials;
webClient.Headers.Add("User-Agent", DefaultUserAgent);
webClient.Headers.Add("Content-Type", DefaultPostContentType);
webClient.Headers.Add("Referer", pUri.AbsoluteUri);
ServicePoint servicePoint = ServicePointManager.FindServicePoint(uri);
if (servicePoint.Expect100Continue == true)
servicePoint.Expect100Continue = false;
}
}
/// <summary>
/// 测试Proxy时需要用到的一些参数
/// </summary>
public class ProxyDetail
{
public WebProxy Proxy;
public string ProxyString;
public int ContentLength;
public long Milliseconds;
public Exception Error;
}
public class CheckProxyWeb
{
public bool CnProxy;
public bool Heibai;
public bool Proxy360;
public bool Dailiip;
public bool Xker;
public bool Daili18;
public bool Wl35;
public bool Proxied51;
public bool Nntime;
public bool Realbooster;
public bool Simpleproxylist;
public bool Proxyiplist;
public bool Webs;
}
}
其中调用的代码就很简单了
由于我是隔段时间获取一次,所以用到了Timer的内容,也贴在这里了
View Code
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Runtime.InteropServices;
using System.Threading;
using System.IO;
namespace ProxyTest
{
class Program
{
static System.Timers.Timer proxyTimer = new System.Timers.Timer();
static void Main(string[] args)
{
proxyTimer = new System.Timers.Timer();
proxyTimer.Interval = 1000 * 60;
proxyTimer.Elapsed += new System.Timers.ElapsedEventHandler(DoProxyTest);
proxyTimer.Enabled = true;
proxyTimer.Start();
Thread.Sleep(-1);
}
static bool proxyIsRunning = false;
static void DoProxyTest(object sender, System.Timers.ElapsedEventArgs e)
{
Write("Start to running ProxyTest function!");
if (proxyIsRunning)
{
Write("End ProxyTest function! by : ProxyTest is running now!" + Environment.NewLine);
return;
}
proxyTimer.Interval = 1000 * 60 * 60 * 24; //每天执行一次
int rowCount = Pmars.DataBaseHelper.GetHelper().GetRowCount("ProxyTable");
Write("ProxyTable Count:" + rowCount);
Pmars.ProxyTest proxy = new Pmars.ProxyTest();
proxy.Start();
rowCount = Pmars.DataBaseHelper.GetHelper().GetRowCount("ProxyTable");
Write("ProxyTable Count:" + rowCount);
Write("End ProxyTest function!" + Environment.NewLine);
}
static void Write(string contents)
{
File.AppendAllText(@"log/proxyLog.txt", DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + " : " + contents + Environment.NewLine);
}
}
}
这文转于
http://www.cnblogs.com/pmars/archive/2012/01/20/2327877.html