HtmlAgilityPack爬虫实战-百度经验悬赏爬取

2 篇文章 0 订阅
1 篇文章 0 订阅

完整代码如下:

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using Maticsoft.BLL;
using Maticsoft.Model;
using System.Net;
using System.IO;
using System.Runtime.InteropServices;
using Maticsoft.DBUtility;
namespace getXsjy_爬取悬赏经验_
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }
        string url = "https://jingyan.baidu.com/patch";
        HtmlAgilityPack.HtmlWeb webClient = new HtmlAgilityPack.HtmlWeb();
        bl_ddlbn lb_ddlb = new bl_ddlbn();
        int pagesize = 15;
        private void Form1_Load(object sender, EventArgs e)
        {
            webClient.OverrideEncoding = Encoding.GetEncoding("utf-8");
        }
        /// <summary>
        /// 延时函数
        /// </summary>
        /// <param name="delayTime">需要延时多少秒</param>
        /// <returns></returns>
        public static bool Delay(int delayTime)
        {
            DateTime now = DateTime.Now;
            int s;
            do
            {
                TimeSpan spand = DateTime.Now - now;
                s = spand.Seconds;
            }
            while (s < delayTime);
            return true;
        }

        private void GetXsjy()
        {
            List<ddlbn> modellist = lb_ddlb.GetModelList(" lb='1'");
            lb_ddlb.Delete("2");
            foreach (ddlbn model in modellist)
            {
                string ls_url = url + "?cid=" + model.dm;
                for (int i = 0; i < int.Parse(tbx_page.Text); i++)
                {
                    string ls_par = (pagesize * i).ToString();
                    if (ls_url.IndexOf("&pn=")>0)
                    {
                        ls_url = ls_url.Substring(0, ls_url.IndexOf("&pn=")) + " &pn=" + ls_par;
                    }
                    else
                        ls_url = ls_url + " &pn=" + ls_par;
                


                List<ddlbn> pagelist = new List<ddlbn>();//一页数据提交一次
                    HtmlAgilityPack.HtmlDocument doc = webClient.Load(ls_url);
                    HtmlAgilityPack.HtmlNodeCollection colist = doc.DocumentNode.SelectNodes("//*[@class='li-par']");
                    if (colist == null || colist.Count == 0)
                        break;
                    foreach (HtmlAgilityPack.HtmlNode no in colist)
                    {
                        HtmlAgilityPack.HtmlNode row = no.ChildNodes[0].SelectSingleNode("a[@class='title query-item-id']");

                        HtmlAgilityPack.HtmlNode cash = no.ChildNodes[0].SelectSingleNode("span[@class='cash']");
                        //  HtmlAgilityPack.HtmlNode cash1 = no.ChildNodes[0].SelectSingleNode("cash");
                        //*[@id="body"]/section/div[1]/div[2]/div/div[2]/ul/div[2]/li/span[2]
                        ddlbn newmodel = new ddlbn();
                        newmodel.lb = "2";//经验明细
                        newmodel.price = cash.InnerText;
                        newmodel.mc = row.InnerText;
                        newmodel.dm = row.Attributes["data-queryid"].Value;
                        newmodel.detail = ls_url;
                        if(lb_ddlb.Exists(newmodel.dm, newmodel.lb))
                        {
                        lb_ddlb.Delete(newmodel.dm, newmodel.lb);

                            //label4.Text="爬取数据:" + newmodel.mc+",单价:" + newmodel.price;
                        }
                        pagelist.Add(newmodel);
                    }
                    Delay(3);
                    lb_ddlb.Add(pagelist);
                }
            }
            while(DateTime.Now.Hour>=0&& DateTime.Now.Hour<=7)
            {
                System.Threading.Thread.Sleep(1000 * 60 * 60);
            }
            System.Threading.Thread.Sleep(1000*60*Convert.ToInt16( textBox2.Text));//20分钟同步一次
            GetXsjy();
         
        }
        private void getMl()
        {
            DbHelperSQL.ExecuteSql("delete from ddlbn where lb='2'");
            HtmlAgilityPack.HtmlDocument doc = webClient.Load(url);

            HtmlAgilityPack.HtmlNodeCollection colist = doc.DocumentNode.SelectNodes("//*[@id='typeList']");

            List<Maticsoft.Model.ddlbn> modellist = new List<Maticsoft.Model.ddlbn>();

            foreach (HtmlAgilityPack.HtmlNode node in colist)
            {
                HtmlAgilityPack.HtmlNodeCollection ls_a = node.SelectNodes(".//a[@href]");
                foreach (HtmlAgilityPack.HtmlNode no in ls_a)
                {
                    ddlbn model = new ddlbn();
                    model.lb = "1";//经验分类
                    model.mc = no.InnerText;
                    model.dm = no.GetAttributeValue("href", null).Split('=')[1];
                    modellist.Add(model);

                }
            }
            lb_ddlb.Add(modellist);
        }

        private void button1_Click(object sender, EventArgs e)
        {
            GetXsjy();
        }

        private void button2_Click(object sender, EventArgs e)
        {
            getMl();
        }
    }
}

界面如下,主要分两个功能,经验分类目录爬取和明细数据爬取。代码比较简单,就不详细介绍了。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值