c#抓取网页并用HtmlAgilityPack解析

pm:  Install-Package HtmlAgilityPack -Version 1.11.12

        private void button1_Click(object sender, EventArgs e)
        {
            string url_base = "https://www.qiushuzw.com/t";
            string novel = "21868";
            string section = "22173677.html";
            string url = string.Format("{0}/{1}/{2}",url_base,novel,section);

            HDA.HtmlDocument doc = new HDA.HtmlDocument();
            doc.LoadHtml(WebGetter.GetHtmlContent(url));
            HDA.HtmlNode node = doc.DocumentNode.SelectSingleNode("//h1");
            HDA.HtmlNode nodeContent = doc.DocumentNode.SelectSingleNode("//div[@id='content']");

            MessageBox.Show(node.InnerHtml);
            MessageBox.Show(nodeContent.InnerHtml);
        }
        public static string GetHtmlContent(string url, string encoding = "utf8")
        {
            string result = string.Empty;
            try
            {
                if (!string.IsNullOrEmpty(url))
                {
                    WebRequest request = WebRequest.Create(url);
                    WebResponse response = request.GetResponse();
                    Stream stream = response.GetResponseStream();
                    Encoding encode = Encoding.GetEncoding("utf-8");

                    StreamReader reader = new StreamReader(new GZipStream(stream, CompressionMode.Decompress), encode);

                    result = reader.ReadToEnd();                    

                    /*
                    Char[] read = new Char[256];
                    int count = reader.Read(read, 0, 256);
                    while (count>0)
                    {
                        string str = new String(read, 0, count);
                        sb.Append(str);
                        count = reader.Read(read, 0, 256);
                    }
                    */

                    reader.Close();
                    stream.Close();
                    response.Close();
                }
            }
            catch (Exception ex)
            {
                throw;
            }

            return result;
        }

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值