/// <summary>
/// 类说明:HttpHelper类,用来实现Http访问,Post或者Get方式的,直接访问,带Cookie的,带证书的等方式,可以设置代理
///
///
/// </summary>
using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
using System.IO.Compression;
using System.Security.Cryptography.X509Certificates;
using System.Net.Security;
/// <summary>
/// Http连接操作帮助类
/// </summary>
public class HttpHelper
{
#region 预定义方法或者变更
//默认的编码
private Encoding encoding = Encoding.Default;
//Post数据编码
private Encoding postencoding = Encoding.Default;
//HttpWebRequest对象用来发起请求
private HttpWebRequest request = null;
//获取影响流的数据对象
private HttpWebResponse response = null;
/// <summary>
/// 根据相传入的数据,得到相应页面数据
/// </summary>
/// <param name="item">参数类对象</param>
/// <returns>返回HttpResult类型</returns>
public HttpResult GetHtml(HttpItem item)
{
//返回参数
HttpResult result = new HttpResult();
try
{
//准备参数
SetRequest(item);
}
catch (Exception ex)
{
result = new HttpResult();
result.Cookie = string.Empty;
result.Header = null;
result.Html = ex.Message;
result.StatusDescription = "配置参数时出错:" + ex.Message;
return result;
}
try
{
#region 得到请求的response
using (response = (HttpWebResponse)request.GetResponse())
{
result.StatusCode = response.StatusCode;
result.StatusDescription = response.StatusDescription;
result.Header = response.Headers;
if (response.Cookies != null) result.CookieCollection = response.Cookies;
if (response.Headers["set-cookie"] != null) result.Cookie = response.Headers["set-cookie"];
MemoryStream _stream = new MemoryStream();
//GZIIP处理
if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
{
//开始读取流并设置编码方式
//new GZipStream(response.GetResponseStream(), CompressionMode.Decompress).CopyTo(_stream, 10240);
//.net4.0以下写法
_stream = GetMemoryStream(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));
}
else
{
//开始读取流并设置编码方式
//response.GetResponseStream().CopyTo(_stream, 10240);
//.net4.0以下写法
_stream = GetMemoryStream(response.GetResponseStream());
}
//获取Byte
byte[] ResponseByte = _stream.ToArray();
_stream.Close();
if (ResponseByte != null & ResponseByte.Length > 0)
{
//是否返回Byte类型数据
if (item.ResultType == ResultType.Byte) result.ResultByte = ResponseByte;
//从这里开始我们要无视编码了
if (encoding == null)
{
Match meta = Regex.Match(Encoding.Default.GetString(ResponseByte), "<meta([^<]*)charset=([^<]*)[\"']", RegexOptions.IgnoreCase);
string c = (meta.Groups.Count > 1) ? meta.Groups[2].Value.ToLower().Trim() : string.Empty;
if (c.Length > 2)
{
try
{
if (c.IndexOf(" ") > 0) c = c.Substring(0, c.IndexOf(" "));
encoding = Encoding.GetEncoding(c.Replace("\"", "").Replace("'", "").Replace(";", "").Replace("iso-8859-1", "gbk").Trim());
}
catch
{
if (string.IsNullOrEmpty(response.CharacterSet)) encoding = Encoding.UTF8;
else encoding = Encoding.GetEncoding(response.CharacterSet);
}
}
else
{
if (string.IsNullOrEmpty(response.CharacterSet)) encoding = Encoding.UTF8;
else encoding = Encoding.GetEncoding(response.CharacterSet);
}
}
//得到返回的HTML
result.Html = encoding.GetString(ResponseByte);
}
else
{
//得到返回的HTML
result.Html = "本次请求并未返回任何数据";
}
}
#endregion
}
catch (WebException ex)
{
//这里是在发生异常时返回的错误信息
response = (HttpWebResponse)ex.Response;
result.Html = ex.Message;
if (response != null)
{
C# 网络爬虫类
最新推荐文章于 2024-07-25 14:16:13 发布