.net core 写个抓取博客之星的排名程序

2020年的博客之星正在火热投票中,在这里放出我抓取排名的.net core 代码,你也可以自己抓取了。
然后再为自己拉一波票,欢迎投票给我,1票2票不嫌少,10票20票不嫌多。来吧,朝我开炮。

拉拉票

大家有余票的,支持下我啊!编号150号: 支持点我投票
在这里插入图片描述

浏览器F12查看网络请求

博客之星的页面是个开放页面,没有任何的登陆需求,我们只需要访问
https://bss.csdn.net/m/topic/blog_star2020/getUsers 既可以获取数据,那么抓取数据和接卸json对c#开发者来说,分分钟的事情了。

定义一个结果类

public class RtnBlogInfo
    {
        public int Code { get; set; }
        public string Msg { get; set; }
        public List<BlogInfo> Data { get; set; }
    }
    public class BlogInfo
    {
        public string Id { get; set; }
        public string Url { get; set; }
        public string Nick_name { get; set; }
        public string AccountName { get; set; }
        public int Vote_num { get; set; }

        public string BlogUrl { get; set; }

        public string PullUrl { get; set; }

        public string Number { get; set; }

        public string Avatar { get; set; }
    }

定义一个定时抓取的main函数

为了定时获取信息,我们使用 dotnet new console建立一个新项目,在Program.cs中编写如下代码。这里使用 ServiceProvider利用DI注入httpclient工厂类,然后开启一个循环即可。

class Program
    {
        private static Dictionary<string, bool> _dict = new Dictionary<string, bool>();
        private static ServiceProvider ServiceProvider = null;
        public static ManualResetEvent _event = new ManualResetEvent(false);
        static async Task Main(string[] args)
        {
            Console.CancelKeyPress += Console_CancelKeyPress;
            Console.WriteLine("Start!");
            var sc = new ServiceCollection()
                .AddHttpClient("csdn", c =>
                {
                    c.BaseAddress = new Uri("https://bss.csdn.net/");
                    c.DefaultRequestHeaders.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3");
                    c.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36");
                })
                .AddTransientHttpErrorPolicy(p =>
                    p.WaitAndRetryAsync(3, _ => TimeSpan.FromMilliseconds(600)));
           
            ServiceProvider = sc.Services.BuildServiceProvider();
           
            while (!_event.WaitOne(1000))
            {               
               
                var cf = ServiceProvider.GetService<IHttpClientFactory>();
                var rss = new RankingFeed(cf);
                await rss.InitAsync();
                   
                var resultList = await rss.GetAllRankingAsync();
                Thread.Sleep(60 * 1000);
            }
        }

        private static void Console_CancelKeyPress(object sender, ConsoleCancelEventArgs e)
        {
            _event.Set();
            Environment.Exit(0);

        }
    }

解析内容和排序

排序利用 linq OrderByDescending即可,很简单。输出为 md格式,方便直接拷贝到博客上发布。

public class RankingFeed
    {
        public IHttpClientFactory HttpclientFactory;
        public RankingFeed(IHttpClientFactory httpFactory)
        {
            HttpclientFactory = httpFactory;
        }

        public async Task InitAsync()
        {
            var client = HttpclientFactory.CreateClient("csdn");
            var response = await client.SendAsync(new HttpRequestMessage(HttpMethod.Get, "/"));
            var content = await response.Content.ReadAsStringAsync();
        }
       

        //https://bss.csdn.net/m/topic/blog_star2020
        public async Task<List<BlogInfo>> GetAllRankingAsync()
        {
            var client = HttpclientFactory.CreateClient("csdn");
            var response = await client.SendAsync(new HttpRequestMessage(HttpMethod.Get, $"m/topic/blog_star2020/getUsers"));
            var content = await response.Content.ReadAsStringAsync();
            var list = new List<BlogInfo>();
            if (!string.IsNullOrEmpty(content))
            {
                var rtn = JsonConvert.DeserializeObject<RtnBlogInfo>(content);
                if(rtn.Code != 200)
                {

                    Console.WriteLine($"Error: {rtn.Msg}");
                    return list;
                }
                list = rtn.Data;
                

                Console.WriteLine($"============Start stat csdn vote page=========");
                list = list.OrderByDescending(x => x.Vote_num).ToList();
                var i = 1;
                Console.WriteLine("|排名|博主|票数|投票");
                Console.WriteLine("|--|--|--|--|");

                foreach (var blog in list)
                {
                    blog.AccountName = GetAccount(blog.Url);
                    blog.BlogUrl = $"https://blog.csdn.net/{blog.AccountName}";
                    if( i<0)
                        Console.WriteLine($"|{i++}|![描述]({blog.Avatar})<br> 博主:[{blog.Nick_name}]({blog.BlogUrl}) 编号:{blog.Number}|**{blog.Vote_num}**|[{blog.Url}]({blog.Url})|");
                    else
                        Console.WriteLine($"|{i++}|博主:[{blog.Nick_name}]({blog.BlogUrl}) 编号:{blog.Number}|**{blog.Vote_num}**|[{blog.Url}]({blog.Url})|");

                    if (i > 110) break;
                }
                Console.WriteLine($"============End stat csdn vote page======");

            }

            return list;
        }


        private int GetVote(string html)
        {
            var vote = html?.Replace("当前票数: <em>", "").Replace("</em> 票", "");
            int.TryParse(vote, out var nVote);
            return nVote;
        }

        //https://bss.csdn.net/m/topic/blog_star2020/detail?username=qq_34361283
        private string GetAccount(string url)
        {
            var s = url?.Replace("https://bss.csdn.net/m/topic/blog_star2020/detail?username=", "");
            return s;
        }
        

    }

展示效果

在这里插入图片描述

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

webmote

如果能帮到你,请支持下博主

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值