爬虫的封装HttpWebRequest类版本(后续说到HttpClient版本)
public class HttpHelper
{
private static Logger logger = new Logger(typeof(HttpHelper));
/// <summary>
/// 根据url下载内容 之前是GB2312
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
public static string DownloadUrl(string url)
{
return DownloadHtml(url, Encoding.UTF8);
}
/// <summary>
/// 下载html
/// http://tool.sufeinet.com/HttpHelper.aspx
/// HttpWebRequest功能比较丰富,WebClient使用比较简单
/// WebRequest
///
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
public static string DownloadHtml(string url, Encoding encode)
{
string html = string.Empty;
try
{
HttpWebRequest request = HttpWebRequest.Create(url) as HttpWebRequest;//模拟请求
request.Timeout = 30 * 1000;//设置30s的超时
request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36";
request.ContentType = "text/html; charset=utf-8";// "text/html;charset=gbk";//
request.CookieContainer = new CookieContainer();//1 给请求准备个container
using (HttpWebResponse response = request.GetResponse() as HttpWebResponse)//发起请求
{
if (response.StatusCode != HttpStatusCode.OK)
{
logger.Warn(string.Format("抓取{0}地址返回失败,response.StatusCode为{1}", url, response.StatusCode));
}
else
{
try
{
StreamReader sr = new StreamReader(response.GetResponseStream(), encode);
html = sr.ReadToEnd();//读取数据
sr.Close();
}
catch (Exception ex)
{
logger.Error(string.Format($"DownloadHtml抓取{url}失败"), ex);
html = null;
}
}
}
}
catch (System.Net.WebException ex)
{