C# HttpHelper

public static class HttpHelper
	{
		public static HttpWebResponse HttpRequest(string getOrPost, string url, Dictionary<string, string> headers, Dictionary<string, string> parameters, Encoding dataEncoding, string contentType, ref CookieContainer cookie)
		{
			HttpWebRequest httpWebRequest = HttpHelper.CreateRequest(getOrPost, url, headers, parameters, dataEncoding, contentType);
			bool flag = cookie.Count == 0;
			if (flag)
			{
				httpWebRequest.CookieContainer = new CookieContainer();
				cookie = httpWebRequest.CookieContainer;
			}
			else
			{
				httpWebRequest.CookieContainer = cookie;
			}
			bool flag2 = getOrPost == "POST" && parameters != null && parameters.Count != 0;
			if (flag2)
			{
				byte[] array = HttpHelper.FormatPostParameters(parameters, dataEncoding, contentType);
				using (Stream requestStream = httpWebRequest.GetRequestStream())
				{
					requestStream.Write(array, 0, array.Length);
					requestStream.Close();
				}
			}
			WebResponse webResponse = null;
			try
			{
				webResponse = httpWebRequest.GetResponse();
			}
			catch (WebException ex)
			{
				webResponse = (HttpWebResponse)ex.Response;
			}
			catch (Exception ex2)
			{
				throw ex2;
			}
			bool flag3 = webResponse == null;
			HttpWebResponse result;
			if (flag3)
			{
				try
				{
					result = (httpWebRequest.GetResponse() as HttpWebResponse);
					return result;
				}
				catch (WebException ex3)
				{
					webResponse = (HttpWebResponse)ex3.Response;
				}
			}
			result = (HttpWebResponse)webResponse;
			return result;
		}

		private static HttpWebRequest CreateRequest(string getOrPost, string url, Dictionary<string, string> headers, Dictionary<string, string> parameters, Encoding paraEncoding, string contentType)
		{
			bool flag = string.IsNullOrEmpty(url);
			if (flag)
			{
				throw new ArgumentNullException("url");
			}
			bool flag2 = parameters != null && parameters.Count > 0 && paraEncoding == null;
			if (flag2)
			{
				throw new ArgumentNullException("requestEncoding");
			}
			bool flag3 = url.StartsWith("https", StringComparison.OrdinalIgnoreCase);
			HttpWebRequest httpWebRequest;
			if (flag3)
			{
				ServicePointManager.ServerCertificateValidationCallback = new RemoteCertificateValidationCallback(HttpHelper.CheckValidationResult);
				httpWebRequest = (WebRequest.Create(url) as HttpWebRequest);
				httpWebRequest.ProtocolVersion = HttpVersion.Version10;
			}
			else
			{
				httpWebRequest = (WebRequest.Create(url) as HttpWebRequest);
			}
			bool flag4 = getOrPost == "GET";
			if (flag4)
			{
				httpWebRequest.Method = "GET";
				bool flag5 = parameters != null && parameters.Count > 0;
				if (flag5)
				{
					url = HttpHelper.FormatGetParametersToUrl(url, parameters, paraEncoding);
				}
			}
			else
			{
				httpWebRequest.Method = "POST";
			}
			bool flag6 = contentType == null;
			if (flag6)
			{
				httpWebRequest.ContentType = "application/x-www-form-urlencoded;charset=UTF-8";
			}
			else
			{
				httpWebRequest.ContentType = contentType;
			}
			httpWebRequest.ServicePoint.Expect100Continue = false;
			httpWebRequest.ServicePoint.ConnectionLimit = 2147483647;
			bool flag7 = headers != null;
			if (flag7)
			{
				HttpHelper.FormatRequestHeaders(headers, httpWebRequest);
			}
			httpWebRequest.ServicePoint.ConnectionLimit = 2147483647;
			return httpWebRequest;
		}

		private static void FormatRequestHeaders(Dictionary<string, string> headers, HttpWebRequest request)
		{
			foreach (KeyValuePair<string, string> current in headers)
			{
				string text = current.Key.ToLower();
				string text2 = text;
				if (text2 == null)
				{
					goto IL_7C;
				}
				if (!(text2 == "connection"))
				{
					if (!(text2 == "content-type"))
					{
						if (!(text2 == "transfer-enconding"))
						{
							goto IL_7C;
						}
						request.TransferEncoding = current.Value;
					}
					else
					{
						request.ContentType = current.Value;
					}
				}
				else
				{
					request.KeepAlive = false;
				}
				continue;
			IL_7C:
				request.Headers.Add(current.Key, current.Value);
			}
		}

		private static string FormatGetParametersToUrl(string url, Dictionary<string, string> parameters, Encoding paraEncoding)
		{
			bool flag = url.IndexOf("?") < 0;
			if (flag)
			{
				url += "?";
			}
			int num = 0;
			string text = "";
			foreach (KeyValuePair<string, string> current in parameters)
			{
				bool flag2 = num > 0;
				if (flag2)
				{
					text += "&";
				}
				text = text + HttpUtility.UrlEncode(current.Key, paraEncoding) + "=" + HttpUtility.UrlEncode(current.Value, paraEncoding);
				num++;
			}
			url += text;
			return url;
		}

		private static byte[] FormatPostParameters(Dictionary<string, string> parameters, Encoding dataEncoding, string contentType)
		{
			string text = "";
			int num = 0;
			bool flag = !string.IsNullOrEmpty(contentType) && contentType.ToLower().Trim() == "application/json";
			if (flag)
			{
				text = "{";
			}
			foreach (KeyValuePair<string, string> current in parameters)
			{
				bool flag2 = !string.IsNullOrEmpty(contentType) && contentType.ToLower().Trim() == "application/json";
				if (flag2)
				{
					bool flag3 = num > 0;
					if (flag3)
					{
						bool flag4 = current.Value.StartsWith("{");
						if (flag4)
						{
							text += string.Format(",\"{0}\":{1}", current.Key, current.Value);
						}
						else
						{
							text += string.Format(",\"{0}\":\"{1}\"", current.Key, current.Value);
						}
					}
					else
					{
						bool flag5 = current.Value.StartsWith("{");
						if (flag5)
						{
							text += string.Format("\"{0}\":{1}", current.Key, current.Value);
						}
						else
						{
							text += string.Format("\"{0}\":\"{1}\"", current.Key, current.Value);
						}
					}
				}
				else
				{
					bool flag6 = num > 0;
					if (flag6)
					{
						text += string.Format("&{0}={1}", current.Key, HttpUtility.UrlEncode(current.Value, dataEncoding));
					}
					else
					{
						text = string.Format("{0}={1}", current.Key, HttpUtility.UrlEncode(current.Value, dataEncoding));
					}
				}
				num++;
			}
			bool flag7 = !string.IsNullOrEmpty(contentType) && contentType.ToLower().Trim() == "application/json";
			if (flag7)
			{
				text += "}";
			}
			return dataEncoding.GetBytes(text);
		}

		private static bool CheckValidationResult(object sender, X509Certificate certificate, X509Chain chain, SslPolicyErrors errors)
		{
			return true;
		}

		public static string GetAllCookies(CookieContainer cc)
		{
			List<Cookie> list = new List<Cookie>();
			Hashtable hashtable = (Hashtable)cc.GetType().InvokeMember("m_domainTable", BindingFlags.Instance | BindingFlags.NonPublic | BindingFlags.GetField, null, cc, new object[0]);
			foreach (object current in hashtable.Values)
			{
				SortedList sortedList = (SortedList)current.GetType().InvokeMember("m_list", BindingFlags.Instance | BindingFlags.NonPublic | BindingFlags.GetField, null, current, new object[0]);
				foreach (CookieCollection cookieCollection in sortedList.Values)
				{
					foreach (Cookie item in cookieCollection)
					{
						list.Add(item);
					}
				}
			}
			StringBuilder stringBuilder = new StringBuilder();
			foreach (Cookie current2 in list)
			{
				stringBuilder.AppendFormat("{0};{1};{2};{3};{4};{5}\r\n", new object[]
				{
					current2.Domain,
					current2.Name,
					current2.Path,
					current2.Port,
					current2.Secure.ToString(),
					current2.Value
				});
			}
			return stringBuilder.ToString();
		}
	}

爬虫(Web Crawler)是一种自动化程序,用于从互联网上收集信息。其主要功能是访问网页、提取数据并存储,以便后续分析或展示。爬虫通常由搜索引擎、数据挖掘工具、监测系统等应用于网络数据抓取的场景。 爬虫的工作流程包括以下几个关键步骤: URL收集: 爬虫从一个或多个初始URL开始,递归或迭代地发现新的URL,构建一个URL队列。这些URL可以通过链接分析、站点地图、搜索引擎等方式获取。 请求网页: 爬虫使用HTTP或其他协议向目标URL发起请求,获取网页的HTML内容。这通常通过HTTP请求库实现,如Python中的Requests库。 解析内容: 爬虫对获取的HTML进行解析,提取有用的信息。常用的解析工具有正则表达式、XPath、Beautiful Soup等。这些工具帮助爬虫定位和提取目标数据,如文本、图片、链接等。 数据存储: 爬虫将提取的数据存储到数据库、文件或其他存储介质中,以备后续分析或展示。常用的存储形式包括关系型数据库、NoSQL数据库、JSON文件等。 遵守规则: 为避免对网站造成过大负担或触发反爬虫机制,爬虫需要遵守网站的robots.txt协议,限制访问频率和深度,并模拟人类访问行为,如设置User-Agent。 反爬虫应对: 由于爬虫的存在,一些网站采取了反爬虫措施,如验证码、IP封锁等。爬虫工程师需要设计相应的策略来应对这些挑战。 爬虫在各个领域都有广泛的应用,包括搜索引擎索引、数据挖掘、价格监测、新闻聚合等。然而,使用爬虫需要遵守法律和伦理规范,尊重网站的使用政策,并确保对被访问网站的服务器负责。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值