C#爬虫帮助类

2 篇文章 0 订阅

最近刚好在做一个爬虫项目,借鉴了网上的资料,代码如下

/// <summary>
    /// Http操作类
    /// </summary>
    public class SpiderHelper
    {
        /// <summary>
        /// 获取网址HTML
        /// </summary>
        /// <param name="URL"></param>
        /// <returns></returns>
        public string GetHtml(string URL)
        {
            WebRequest wrq;
            wrq = WebRequest.Create(URL);
            wrq.Credentials = CredentialCache.DefaultCredentials;
            WebResponse wrp;
            wrp = wrq.GetResponse();
            string reader = new StreamReader(wrp.GetResponseStream(), Encoding.GetEncoding("utf-8")).ReadToEnd();
            try
            {
                wrq.GetResponse().Close();
            }
            catch (WebException ex)
            {

                throw ex;
            }
            return reader;
        }
        /// <summary>
        /// 获取网站cookie
        /// </summary>
        /// <param name="URL"></param>
        /// <param name="cookie"></param>
        /// <returns></returns>
        public string GetHtml(string URL, out string cookie)
        {
            WebRequest wrq;
            wrq = WebRequest.Create(URL);
            wrq.Credentials = CredentialCache.DefaultCredentials;
            WebResponse wrp;
            wrp = wrq.GetResponse();

            string html = new StreamReader(wrp.GetResponseStream(), Encoding.GetEncoding("UTF-8")).ReadToEnd();
            try
            {
                wrq.GetResponse().Close();
            }
            catch (WebException ex)
            {

                throw ex;
            }
            cookie = wrq.Headers.Get("Set-Cookie");
            return html;
        }
        public string GetWeb()
        {
            string param = "hl=zh-CN&newwindow=1";
            byte[] bs = Encoding.ASCII.GetBytes(param);
            HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create("http://localhost:30237/D_PCS_Select.ashx");
            req.Method = "POST";
            req.ContentType = "application/json; charset=utf-8";
            req.ContentLength = bs.Length;
            using (Stream reqStream = req.GetRequestStream())
            {
                reqStream.Write(bs, 0, bs.Length);
            }
            string html = "";
            using (WebResponse wr = req.GetResponse())
            {
                html = new StreamReader(wr.GetResponseStream(), Encoding.GetEncoding("UTF-8")).ReadToEnd();
                //在这里对接收到的页面内容进行处理
            }



            return html;
        }
        public string GetCookie(string url, string indata)
        {
            //string url = "http://hi.baidu.com/yimeng3025/test.asp";

            //string indata = "aa=zhuye";
            string outdata = "";
            CookieContainer myCookieContainer = new CookieContainer();
            HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(url);
            //新建一个HttpWebRequest
            myHttpWebRequest.ContentType = "application/json; charset=utf-8";
            myHttpWebRequest.ContentLength = indata.Length;
            myHttpWebRequest.Method = "POST";
            myHttpWebRequest.CookieContainer = myCookieContainer;
            //设置HttpWebRequest的CookieContainer为刚才建立的那个myCookieContainer
            Stream myRequestStream = myHttpWebRequest.GetRequestStream();
            StreamWriter myStreamWriter = new StreamWriter(myRequestStream, Encoding.GetEncoding("utf-8"));
            myStreamWriter.Write(indata);
            //把数据写入HttpWebRequest的Request流
            myStreamWriter.Close();
            myRequestStream.Close();
            //关闭打开对象
            HttpWebResponse myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
            //字串9

            //新建一个HttpWebResponse
            myHttpWebResponse.Cookies = myCookieContainer.GetCookies(myHttpWebRequest.RequestUri);
            //获取一个包含url的Cookie集合的CookieCollection
            Stream myResponseStream = myHttpWebResponse.GetResponseStream();
            StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("gb2312"));
            outdata = myStreamReader.ReadToEnd();
            //把数据从HttpWebResponse的Response流中读出
            myStreamReader.Close();
            myResponseStream.Close();
            Console.WriteLine(outdata);
            //显示"登录"

            //拿到了Cookie,再进行请求就能直接读取到登录后的内容了
            myHttpWebRequest = (HttpWebRequest)WebRequest.Create(url);
            myHttpWebRequest.CookieContainer = myCookieContainer;//*
                                                                 //刚才那个CookieContainer已经存有了Cookie,把它附加到HttpWebRequest中则能直接通过验证
            myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
            //字串1


            myHttpWebResponse.Cookies = myCookieContainer.GetCookies(myHttpWebRequest.RequestUri);
            myResponseStream = myHttpWebResponse.GetResponseStream();
            myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));
            outdata = myStreamReader.ReadToEnd();
            myStreamReader.Close();
            myResponseStream.Close();
            return outdata;
        }
        public string GetHtml(string URL, string postData, string cookie, out string header, string server)
        {
            return GetHtml(server, URL, postData, cookie, out header);
        }
        public string GetHtml(string server, string URL, string postData, string cookie, out string header)
        {
            byte[] byteRequest = Encoding.GetEncoding("gb2312").GetBytes(postData);
            return GetHtml(server, URL, byteRequest, cookie, out header);
        }
        public string GetHtml(string server, string URL, byte[] byteRequest, string cookie, out string header)
        {
            byte[] bytes = GetHtmlByBytes(server, URL, byteRequest, cookie, out header);
            Stream getStraem = new MemoryStream(bytes);
            StreamReader streamReader = new StreamReader(getStraem, Encoding.GetEncoding("UTF-8"));
            string getString = streamReader.ReadToEnd();
            streamReader.Close();
            return getString;
        }
        /// <summary>
        /// Post模式浏览
        /// </summary>
        /// <param name="server">服务器地址</param>
        /// <param name="URL">网址</param>
        /// <param name="byteRequest">流</param>
        /// <param name="cookie">cookie</param>
        /// <param name="header">句柄</param>
        /// <returns></returns>
        public byte[] GetHtmlByBytes(string server, string URL, byte[] byteRequest, string cookie, out string header)
        {
            long contentLength;
            HttpWebRequest httpWebRequest;
            HttpWebResponse webResponse;
            Stream getStream;
            httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(URL);
            CookieContainer co = new CookieContainer();
            co.SetCookies(new Uri(server), cookie);
            httpWebRequest.CookieContainer = co;
            httpWebRequest.ContentType = "application/x-www-form-urlencoded";
            httpWebRequest.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";
            httpWebRequest.Referer = server;
            httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon; .NET CLR 1.1.4322)";
            httpWebRequest.Method = "Post";
            httpWebRequest.ContentLength = byteRequest.Length;

            Stream stream;
            stream = httpWebRequest.GetRequestStream();
            stream.Write(byteRequest, 0, byteRequest.Length);
            stream.Close();
            webResponse = (HttpWebResponse)httpWebRequest.GetResponse();
            header = webResponse.Headers.ToString();
            getStream = webResponse.GetResponseStream();
            contentLength = webResponse.ContentLength;
            byte[] outBytes = new byte[contentLength];
            outBytes = ReadFully(getStream);
            getStream.Close();
            return outBytes;
        }
        public byte[] ReadFully(Stream stream)
        {
            byte[] buffer = new byte[128];
            using (MemoryStream ms = new MemoryStream())
            {
                while (true)
                {
                    int read = stream.Read(buffer, 0, buffer.Length);
                    if (read <= 0)
                        return ms.ToArray();
                    ms.Write(buffer, 0, read);
                }
            }
        }
        /// <summary>
        /// Get模式
        /// </summary>
        /// <param name="URL">网址</param>
        /// <param name="cookie">cookie</param>
        /// <param name="header">句柄</param>
        /// <param name="server">服务器</param>
        /// <returns></returns>
        public string GetHtml(string URL, string cookie, out string header, string server)
        {
            return GetHtml(URL, cookie, out header, server, "");
        }
        /// <summary>
        /// Get模式浏览
        /// </summary>
        /// <param name="URL">Get网址</param>
        /// <param name="cookie">cookie</param>
        /// <param name="header">句柄</param>
        /// <param name="server">服务器地址</param>
        /// <param name="val"></param>
        /// <returns></returns>
        public string GetHtml(string URL, string cookie, out string header, string server, string val)
        {
            HttpWebRequest httpWebRequest;
            HttpWebResponse webResponse;
            Stream getStream;
            StreamReader streamReader;
            string getString = "";
            httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(URL);
            httpWebRequest.Accept = "*/*";
            httpWebRequest.Referer = server;
            CookieContainer co = new CookieContainer();
            co.SetCookies(new Uri(server), cookie);
            httpWebRequest.CookieContainer = co;
            httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon; .NET CLR 1.1.4322)";
            httpWebRequest.Method = "GET";
            webResponse = (HttpWebResponse)httpWebRequest.GetResponse();
            header = webResponse.Headers.ToString();
            getStream = webResponse.GetResponseStream();
            streamReader = new StreamReader(getStream, Encoding.GetEncoding("UTF-8"));
            getString = streamReader.ReadToEnd();
            streamReader.Close();
            getStream.Close();
            return getString;
        }

    }

 

  • 3
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值