C#爬虫必备:使用wininet接口发起http请求

这个博客介绍了如何使用C#的Wininet库进行HTTP的GET和POST请求,包括设置请求头、处理响应数据以及获取网页图片。示例代码展示了如何构造请求,发送数据,并从响应中读取内容。此外,还提供了获取和清除浏览器cookies的方法。
摘要由CSDN通过智能技术生成

 封装好的类:

using System;
using System.Collections;
using System.Drawing;
using System.IO;
using System.Net;
using System.Runtime.InteropServices;
using System.Text;
using System.Text.RegularExpressions;

namespace 测试
{
    public class Wininet
    {
        #region WininetAPI
        [DllImport("wininet.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.Cdecl)]
        private static extern int InternetOpen(string strAppName, int ulAccessType, string strProxy, string strProxyBypass, int ulFlags);
        [DllImport("wininet.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.Cdecl)]
        private static extern int InternetConnect(int ulSession, string strServer, int ulPort, string strUser, string strPassword, int ulService, int ulFlags, int ulContext);
        [DllImport("wininet.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.Cdecl)]
        private static extern bool InternetCloseHandle(int ulSession);
        [DllImport("wininet.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.Cdecl)]
        private static extern bool HttpAddRequestHeaders(int hRequest, string szHeasers, uint headersLen, uint modifiers);
        [DllImport("wininet.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.Cdecl)]
        private static extern int HttpOpenRequest(int hConnect, string szVerb, string szURI, string szHttpVersion, string szReferer, string accetpType, long dwflags, int dwcontext);
        [DllImport("wininet.dll")]
        private static extern bool HttpSendRequestA(int hRequest, string szHeaders, int headersLen, string options, int optionsLen);
        [DllImport("wininet.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.Cdecl)]
        private static extern bool InternetReadFile(int hRequest, byte[] pByte, int size, out int revSize);
        [DllImport("wininet.dll", CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
        static extern bool InternetGetCookieEx(string pchURL, string pchCookieName, StringBuilder pchCookieData, ref System.UInt32 pcchCookieData, int dwFlags, IntPtr lpReserved);
        private const int INTERNET_OPTION_END_BROWSER_SESSION = 42;
        [DllImport("wininet.dll", SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
        private static extern bool InternetSetOption(IntPtr hInternet, int dwOption, IntPtr lpBuffer, int lpdwBufferLength);
        #endregion

    /// <summary>
    /// Get请求
    /// </summary>
    /// <param name="Url">请求链接</param>
    /// <param name="headers">请求头</param>
    /// <returns></returns>
        public string Get(string Url, StringBuilder headers = null)
    {
        MemoryStream ms = GetHtml(Url, null, headers);
        //无视编码
        Match meta = Regex.Match(Encoding.Default.GetString(ms.ToArray()), "<meta([^<]*)charset=([^<]*)[\"']", RegexOptions.IgnoreCase);
        string c = (meta.Groups.Count > 1) ? meta.Groups[2].Value.ToUpper().Trim() : string.Empty;
        if (c.Length > 2)
        {
            if (c.IndexOf("UTF-8") != -1)
            {
                return Encoding.GetEncoding("UTF-8").GetString(ms.ToArray());
            }
        }
        return Encoding.GetEncoding("GBK").GetString(ms.ToArray());
    }
    /// <summary>
    /// POST
    /// </summary>
    /// <param name="Url">地址</param>
    /// <param name="pd">提交数据</param>
    /// <returns></returns>
    public string Post(string Url, string postdata)
    {
        MemoryStream ms = GetHtml(Url, postdata);
        //无视编码
        Match meta = Regex.Match(Encoding.Default.GetString(ms.ToArray()), "<meta([^<]*)charset=([^<]*)[\"']", RegexOptions.IgnoreCase);
        string c = (meta.Groups.Count > 1) ? meta.Groups[2].Value.ToUpper().Trim() : string.Empty;
        if (c.Length > 2)
        {
            if (c.IndexOf("UTF-8") != -1)
            {
                return Encoding.GetEncoding("UTF-8").GetString(ms.ToArray());
            }
        }
        return Encoding.GetEncoding("GBK").GetString(ms.ToArray());
    }
    /// <summary>
    /// 获取网页图片(Image)
    /// </summary>
    /// <param name="Url">图片地址</param>
    /// <returns></returns>
    public Image GetImage(string Url)
    {
        MemoryStream ms = GetHtml(Url);
        Image img = Image.FromStream(ms);
        return img;
    }

    private MemoryStream GetHtml(string Url, string postdata = null, StringBuilder headers = null)
    {
        try
        {
            //声明部分变量
            Uri uri = new Uri(Url);
            string Method = "GET";
            if (postdata != null)
                Method = "POST";
            string UserAgent = "Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1; 125LA; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
            int hSession = InternetOpen(UserAgent, 1, "", "", 0);//会话句柄
            if (hSession == 0)
            {
                InternetCloseHandle(hSession);
                return null;//Internet句柄获取失败则返回
            }
            int hConnect = InternetConnect(hSession, uri.Host, uri.Port, "", "", 3, 0, 0);//连接句柄
            if (hConnect == 0)
            {
                InternetCloseHandle(hConnect);
                InternetCloseHandle(hSession);
                return null;//Internet连接句柄获取失败则返回
            }
            //请求标记
            long gettype = -2147483632;
            if (Url.Substring(0, 5) == "https")
            {
                gettype = -2139095024;
            }
            else
            {
                gettype = -2147467248;
            }
            //取HTTP请求句柄
            int hRequest = HttpOpenRequest(hConnect, Method, uri.PathAndQuery, "HTTP/1.1", "", "", gettype, 0);//请求句柄
            if (hRequest == 0)
            {
                InternetCloseHandle(hRequest);
                InternetCloseHandle(hConnect);
                InternetCloseHandle(hSession);
                return null;//HTTP请求句柄获取失败则返回
            }
            //添加HTTP头
            StringBuilder sb = new StringBuilder();
            if (headers == null)
            {
                sb.Append("Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8\r\n");
                sb.Append("Content-Type:application/x-www-form-urlencoded\r\n");
                sb.Append("Accept-Language:zh-cn\r\n");
                sb.Append("Referer:" + Url);
            }
            else
            {
                sb = headers;
            }
            //获取返回数据
            if (string.Equals(Method, "GET", StringComparison.OrdinalIgnoreCase))
            {
                HttpSendRequestA(hRequest, sb.ToString(), sb.Length, "", 0);
            }
            else
            {
                HttpSendRequestA(hRequest, sb.ToString(), sb.Length, postdata, postdata.Length);
            }
            //处理返回数据
            int revSize = 0;//计次
            byte[] bytes = new byte[1024];
            MemoryStream ms = new MemoryStream();
            while (true)
            {
                bool readResult = InternetReadFile(hRequest, bytes, 1024, out revSize);
                if (readResult && revSize > 0)
                {
                    ms.Write(bytes, 0, revSize);
                }
                else
                {
                    break;
                }
            }
            InternetCloseHandle(hRequest);
            InternetCloseHandle(hConnect);
            InternetCloseHandle(hSession);
            return ms;
        }
        catch (Exception)
        {
            return null;
        }
    }

    #region 获取webbrowser的cookies
    /// <summary>
    /// 取出cookies
    /// </summary>
    /// <param name="url">完整的链接格式</param>
    /// <returns></returns>
    public string GetCookies(string url)
    {
        uint datasize = 256;
        StringBuilder cookieData = new StringBuilder((int)datasize);
        if (!InternetGetCookieEx(url, null, cookieData, ref datasize, 0x2000, IntPtr.Zero))
        {
            if (datasize < 0)
                return null;

            cookieData = new StringBuilder((int)datasize);
            if (!InternetGetCookieEx(url, null, cookieData, ref datasize, 0x00002000, IntPtr.Zero))
                return null;
        }
        return cookieData.ToString() + ";";
    }
    #endregion

    #region String与CookieContainer互转
    /// <summary>
    /// 将String转CookieContainer
    /// </summary>
    /// <param name="url"></param>
    /// <param name="cookie"></param>
    /// <returns></returns>
    public static CookieContainer StringToCookie(string url, string cookie)
    {
        string[] arrCookie = cookie.Split(';');
        CookieContainer cookie_container = new CookieContainer();    //加载Cookie
        foreach (string sCookie in arrCookie)
        {
            if (sCookie.IndexOf("expires") > 0)
                continue;
            cookie_container.SetCookies(new Uri(url), sCookie);
        }
        return cookie_container;
    }

    /// <summary>
    /// 将CookieContainer转换为string类型
    /// </summary>
    /// <param name="cc"></param>
    /// <returns></returns>
    public static string CookieToString(CookieContainer cc)
    {
        System.Collections.Generic.List<Cookie> lstCookies = new System.Collections.Generic.List<Cookie>();
        Hashtable table = (Hashtable)cc.GetType().InvokeMember("m_domainTable",
            System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.GetField |
            System.Reflection.BindingFlags.Instance, null, cc, new object[] { });
        StringBuilder sb = new StringBuilder();
        foreach (object pathList in table.Values)
        {
            SortedList lstCookieCol = (SortedList)pathList.GetType().InvokeMember("m_list",
                System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.GetField
                | System.Reflection.BindingFlags.Instance, null, pathList, new object[] { });
            foreach (CookieCollection colCookies in lstCookieCol.Values)
                foreach (Cookie c in colCookies)
                {
                    sb.Append(c.Name).Append("=").Append(c.Value).Append(";");
                }
        }
        return sb.ToString();
    }
    #endregion

    #region 清空cookies
    /// <summary>
    /// 删除浏览器cookies
    /// </summary>
    public static void ClearCookies()
    {
        //执行清理cookies
        InternetSetOption(IntPtr.Zero, INTERNET_OPTION_END_BROWSER_SESSION, IntPtr.Zero, 0);
    }

    #endregion
    }
}

调用示例:

Wininet wininet = new Wininet();
//设置请求头
StringBuilder headers = new StringBuilder();
headers.Append("User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11\r\n");                headers.Append("Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8\r\n");
headers.Append("Content-Type:application/x-www-form-urlencoded\r\n");
headers.Append("Accept-Language:zh-CN,zh;q=0.8,en-us;q=0.6,en;q=0.5;q=0.4\r\n");
headers.Append("Connection:keep-alive\r\n");
headers.Append("X-Requested-With:XMLHttpRequest\r\n");
//发起请求
string result = wininet.Get("https://请求链接地址");

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值