HtmlAgilityPack+Winform实现(天天基金)爬虫工具

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.IO;
using System.Text;
using System.Windows.Forms;
using CCWin;
using HtmlAgilityPack;
using HtmlDocument = HtmlAgilityPack.HtmlDocument;

namespace FundDataDemo
{
    public partial class Form1 : CCSkinMain
    {
        private readonly string funddetailPage = "http://fundf10.eastmoney.com/tsdata_";
        private readonly string line;
        private readonly List<Fund> myList = new List<Fund>();
        private readonly string sPage = "http://fund.eastmoney.com/";
        private readonly string[] subline;
        private readonly HtmlWeb web = new HtmlWeb();
        private string sContent;

        private readonly string gzPageUrl = "http://fundgz.1234567.com.cn/js/";
        //http://fundgz.1234567.com.cn/js/161725.js?rt=1600422522450
        //jsonpgz({ "fundcode":"161122","name":"易方达生物分级","jzrq":"2020-09-17","dwjz":"0.9039","gsz":"0.9127","gszzl":"0.97","gztime":"2020-09-18 15:00"});
        #region MyRegion
        //{
        // "fundcode": "161122",
        // "name": "易方达生物分级",
        // "jzrq": "2020-09-17",
        // "dwjz": "0.9039",
        // "gsz": "0.9127",
        // "gszzl": "0.97",
        // "gztime": "2020-09-18 15:00"
        //}
        #endregion

        public Form1()
        {
            InitializeComponent();
            timer1.Enabled = true;

            //读取基金资料
            var fs = new FileStream(Environment.CurrentDirectory + @"\AllFunds.txt", FileMode.Open);
            var sr = new StreamReader(fs);

            while ((line = sr.ReadLine()) != null)
            {
                line = line.Trim();
                if (line == "") continue;
                subline = line.Split(',');
                myList.Add(new Fund { Id = subline[0], Name = subline[1], Has = subline[2] == "y" ? " 持有" : " 未持有" });
            }

            sr.Close();
            fs.Close();
        }


        private void button1_Click(object sender, EventArgs e)
        {
            webBrowser1.DocumentText = "资料爬取中...";
            button1.Enabled = false;
            backgroundWorker1.RunWorkerAsync();
        }

        private void timer1_Tick(object sender, EventArgs e)
        {
            button1_Click(null, null);
        }

        private void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e)
        {
            web.CacheOnly = false;
            web.CachePath = null;
            web.UsingCache = false;
            var sb = new StringBuilder();
            sb.Append("<table id=\"tabledata\" width=\"1800\" border=\"1\" cellspacing=\"0\" cellpadding=\"0\">");
            sb.Append("<tr>" +
                      "<th>基金代码</th>" +
                      "<th>基金名称</th>" +
                      "<th>基金规模</th>" +
                      "<th>基金经理</th>" +
                      "<th>任职时间</th>" +
                      "<th>任职天数</th>" +
                      "<th>估算涨幅</th>" +
                      "<th>更新时间</th>" +
                      "<th>同类近一周排名</th>" +
                      "<th>近1年标准差</th>" +
                      "<th>近2年标准差</th>" +
                      "<th>近3年标准差</th>" +
                      "<th>近1年夏普比率</th>" +
                      "<th>近2年夏普比率</th>" +
                      "<th>近3年夏普比率</th>" +
                      "<th>是否持有</th>" +
                      "</tr>");
            HtmlDocument doc = null;
            HtmlDocument doc_detail = null;
            Valuation vt = new Valuation();
            //获取估值
            foreach (var objFund in myList)
            {
                try
                {
                    doc = web.Load(sPage + objFund.Id + ".html"); // HtmlDocument 类,负责操作html文档
                    doc_detail = web.Load(funddetailPage + objFund.Id + ".html");
                    vt = CommonHelper.Get(gzPageUrl + string.Format("{0}.js?rt={1}", objFund.Id, DateTime.Now.ToString("yyyyMMddHHmmss")));
                }
                catch (Exception ex)
                {
                    throw new Exception("Network Errors: " + ex.Message);
                }

                var ranking = doc.DocumentNode.SelectSingleNode("//div[@class='Rdata']"); // 近一周同类排名

                //基金规模
                var guimoNode =
                    doc.DocumentNode.SelectSingleNode(
                        "//div[@id='body']/div[12]/div/div/div[3]/div[1]/div[2]/table/tr[1]/td[2]");
                //基金经理
                var managerNode =
                    doc.DocumentNode.SelectSingleNode(
                        "//li[@id='fundManagerTab']/div[1]/table/tr[2]/td[2]/a");
                //任职时间
                var tenureNode =
                    doc.DocumentNode.SelectSingleNode("//li[@id='fundManagerTab']/div[1]/table/tr[2]/td[1]");

                //任职天数
                var workingdaysNode =
                    doc.DocumentNode.SelectSingleNode("//li[@id='fundManagerTab']/div[1]/table/tr[2]/td[3]");

                var oneyear = doc_detail.DocumentNode.SelectSingleNode(
                    "//div[@id='bodydiv']/div[8]/div[3]/div[2]/div[3]/div/div[1]/div/div[4]/table/tr[2]/td[2]");
                var twoyear = doc_detail.DocumentNode.SelectSingleNode(
                    "//div[@id='bodydiv']/div[8]/div[3]/div[2]/div[3]/div/div[1]/div/div[4]/table/tr[2]/td[3]");
                var threeyear = doc_detail.DocumentNode.SelectSingleNode(
                    "//div[@id='bodydiv']/div[8]/div[3]/div[2]/div[3]/div/div[1]/div/div[4]/table/tr[2]/td[4]");

                var oneyearSharp = doc_detail.DocumentNode.SelectSingleNode(
                    "//div[@id='bodydiv']/div[8]/div[3]/div[2]/div[3]/div/div[1]/div/div[4]/table/tr[3]/td[2]");
                var twoyearSharp = doc_detail.DocumentNode.SelectSingleNode(
                    "//div[@id='bodydiv']/div[8]/div[3]/div[2]/div[3]/div/div[1]/div/div[4]/table/tr[3]/td[3]");
                var threeyearSharp = doc_detail.DocumentNode.SelectSingleNode(
                    "//div[@id='bodydiv']/div[8]/div[3]/div[2]/div[3]/div/div[1]/div/div[4]/table/tr[3]/td[4]");


                #region 净值估算 涨跌率
                //var rate = doc.GetElementbyId("gz_gszzl").InnerText;
                //if (rate.Contains("+"))
                //{
                //    rate = "<font color=\"red\">" + rate + "</font>";
                //}

                //if (rate == "--")
                //{
                //    rate = "0.00%";
                //}

                var rate = vt?.gszzl??"0"; 
                if (rate.Contains("+") || Convert.ToDouble(rate) > 0)
                {
                    rate = "<font color=\"red\">" + rate + "</font>";
                }
                if (rate == "--")
                {
                    rate = "0.00%";
                }
                #endregion

                #region 净值估算时间

                var date = doc.GetElementbyId("gz_gztime").InnerHtml;
                if (date == "--")
                {
                    date = "暂无";
                }

                #endregion

                sb.Append("<tr>" +
                          "<td>" + objFund.Id + "</td>" +
                          "<td>" + "<a href=\"" + sPage + objFund.Id + ".html\" target=\"_blank\">" + objFund.Name +
                          "</a>" + "</td>" +
                          "<td>" + guimoNode?.InnerText.Replace("基金规模:", "") + "</td>" +
                          "<td>" + managerNode.InnerText + "</td>" +
                          "<td>" + tenureNode.InnerText + "</td>" +
                          "<td>" + workingdaysNode.InnerText + "</td>" +
                          "<td>" + rate + "</td>" +
                          "<td>" + date + "</td>" +
                          "<td>" + ranking.InnerText + "</td>" +
                          "<td>" + oneyear.InnerText + "</td>" +
                          "<td>" + twoyear.InnerText + "</td>" +
                          "<td>" + threeyear.InnerText + "</td>" +
                          "<td>" + oneyearSharp.InnerText + "</td>" +
                          "<td>" + twoyearSharp.InnerText + "</td>" +
                          "<td>" + threeyearSharp.InnerText + "</td>" +
                          "<td>" + objFund.Has + "</td>" +
                          "</tr>");
            }

            sb.Append("</table >");
            sContent = sb.ToString();
        }

        private void backgroundWorker1_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
        {
            webBrowser1.DocumentText = sContent;
            button1.Enabled = true;
            sContent = null;
        }
    }
}

AllFunds.txt

000216,华安黄金易ETF联接A,y
040046,华安纳斯达克100指数,y
161130,易方达纳斯达克100人民币,y
001549,天弘上证50指数C,y
110011,易方达中小盘混合,y
163406,兴全合润分级混合,y
001632,天弘中证食品饮料指数C,y
000961,天弘沪深300ETF联接A,y
163415,兴全商业模式优选混合,y
001513,易方达信息产业混合,y
161033,富国中证智能汽车(LOF),y
003095,中欧医疗健康混合A,y
161903,万家行业优选混合(LOF),y
110023,易方达医疗保健行业混合,y
001550,天弘中证医药100A,y
519674,银河创新成长混合,y
006751,富国互联科技股票,y
161122,易方达生物分级,y
161725,招商中证白酒指数分级,y
001071,华安媒体互联网混合,y
150303,华安创业板50指数分级A,y
164906,交银中证海外中国互联网指数,n
110022,易方达消费行业股票,n

CommonHelper.cs

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using EasyHttp.Http;
using Newtonsoft.Json;

namespace FundDataDemo
{
    public class CommonHelper
    {
        /// <summary>
        /// 获取数据
        /// </summary>
        /// <returns></returns>
        public static Valuation Get(string url)
        {
            HttpResponse response = new HttpClient().Get(url);
            Valuation vObject = JsonConvert.DeserializeObject<Valuation>(response.RawText.JsonpTrim());
            return vObject;
        }
    }

    public class Valuation
    {
        /// <summary>
        /// 
        /// </summary>
        public string fundcode { get; set; }

        /// <summary>
        /// 基金名称
        /// </summary>
        public string name { get; set; }

        /// <summary>
        /// 
        /// </summary>
        public string jzrq { get; set; }
        /// <summary>
        /// 
        /// </summary>
        public string dwjz { get; set; }
        /// <summary>
        /// 
        /// </summary>
        public string gsz { get; set; }
        /// <summary>
        /// 
        /// </summary>
        public string gszzl { get; set; }

        /// <summary>
        /// 
        /// </summary>
        public string gztime { get; set; }
    }

    public static class StringExtension
    {
        public static string JsonpTrim(this string jsonp)
        {
            Regex reg = new Regex(@"jsonpgz\(");
            return reg.Replace(jsonp, "").Replace(");", "");
        }
    }
}

Fund.cs

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace FundDataDemo
{
    class Fund
    {
        public string Id { get; set; } //基金代码
        public string Name { get; set; } //基金名字
        public string Has { get; set; } //是否持有
    }
}

如图:

在这里插入图片描述
在这里插入图片描述
代码下载地址

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值