python获取指定用户csdn博客列表并查询质量分，将结果保存到excel

hougang

已于 2023-12-04 14:55:42 修改

阅读量892

点赞数 13

分类专栏：运维文章标签： python

于 2023-12-04 14:55:14 首次发布

本文链接：https://blog.csdn.net/hougang/article/details/134770986

版权

运维专栏收录该内容

17 篇文章 0 订阅

订阅专栏

API接口

获取博文总数接口

username=hougang，表示获取用户hougang的所有博文数量

https://blog.csdn.net/community/home-api/v1/get-tab-total?username=hougang

获取博文列表接口

https://blog.csdn.net/community/home-api/v1/get-business-list

质量分接口

https://bizapi.csdn.net/trends/api/v1/get-article-score

代码实现

import pandas as pd
import requests
import math
import datetime

# 设置博文用户名
username = 'hougang'

# 设置获取质量分的请求参数，可以通过浏览器访问https://www.csdn.net/qc，获取请求头参数
X_Ca_Key = "203930474"
X_Ca_Nonce = "a88f3e4b-87ae-4d2e-b52b-cc342869829b"
X_Ca_Signature = "jm6aZZ6mMvAanqXJZR4E0LMKBElXbGWr3TPZyYVSph4="


# 获取所有博文数量
def get_articles_count():
    url = f"https://blog.csdn.net/community/home-api/v1/get-tab-total?username={username}"

    # 反爬，设置请求头
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"
    }

    response = requests.get(url, headers=headers)
    # print(response.json())

    # 获取博文总数
    articles_count = response.json()['data']['blog']
    # print(articles_count)

    # 返回博文总数
    return articles_count


# 获取博文列表，包含质量分数
def get_article_list():
    # 定义变量存储所有文章信息
    articles = []

    # 设置每页查询数量
    page_size = 80

    # 获取所有博文数量
    articles_count = get_articles_count()

    # 计算分页数量
    count = math.ceil(articles_count / page_size)
    # print(count)

    # 分页循环获取文章列表
    for i in range(count):
        # 设置页码
        page = i + 1
        # print(page)

        # 构建请求url
        url = f"https://blog.csdn.net/community/home-api/v1/get-business-list?page={page}&size={page_size}&businessType=blog&orderby=&noMore=false&year=&month=&username={username}"

        # 反爬，设置请求头
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"
        }
        response = requests.get(url, headers=headers)

        # 获取当前页文章列表
        data_list = response.json()['data']['list']
        # print(data_list)

        # 获取每篇博文信息，包含标题，url，发布时间，质量分和质量描述
        for article in data_list:
            # 博文url
            article_url = article['url']

            # 获取博文url质量分和message
            res = get_article_quality_score(article_url)

            articles.append(
                {
                    'title': article['title'],
                    'url': article_url,
                    'postTime': article['postTime'],
                    "score": res['score'],
                    "message": res['message']
                }
            )
    # print(articles)

    # 返回所有文章信息
    return articles


# 传入文章url,返回文章质量分和质量描述
def get_article_quality_score(article_url):
    # 查质量分接口
    url = f"https://bizapi.csdn.net/trends/api/v1/get-article-score"

    # 设置请求头
    headers = {
        "accept": "application/json, text/plain, */*",
        "x-ca-key": X_Ca_Key,
        "x-ca-nonce": X_Ca_Nonce,
        "x-ca-signature": X_Ca_Signature,
        "x-ca-signature-headers": "x-ca-key,x-ca-nonce",
        "x-ca-signed-content-type": "multipart/form-data"
    }

    # post提交data
    data = {"url": article_url}

    # 发送post请求
    response = requests.post(url=url, headers=headers, data=data)

    # 博文质量分
    score = response.json()['data']['score']

    # 描述
    message = response.json()['data']['message']

    score_obj = {
        "score": score,
        "message": message
    }

    # 返回包含分数和描述的对象
    return score_obj


# 将文章信息写入excel文件
def write_to_excel():
    # 获取博文数据
    print("开始获取博文数据")
    data = get_article_list()

    # 将当前时间作为excel文件名
    now = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")

    # 将博文数据写入excel
    df = pd.DataFrame(data)
    df.to_excel(f"{now}.xlsx", index=False)
    print(f"将博文数据写入ecxel文件成功，文件名称：{now}.xlsx")


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    # 获取文章总数量
    articles_sum = get_articles_count()
    print(f"博文总数为：{articles_sum}")

    # 获取博文列表,包含质量分
    # article = get_article_list()
    # print(article)

    # 获取博文数据，并写入excel
    write_to_excel()

    pass

用法示例

# python main.py
博文总数为：36
开始获取博文数据
将博文数据写入ecxel文件成功，文件名称：2023-12-04_144607.xlsx

Process finished with exit code 0

数据展示

种草

我的博文内容主要针对“计算机网络”、“安全”、“运维”和“云计算”方向，感兴趣朋友的请关注我，我将不定期发布新的博文并不断改进已发布博文。

后期依据大家对博文的评论，点赞及关注情况，针对大家感兴趣的内容我也会录制视频并整理出成套的学习资料免费分享给大家，期待能和大家一起交流学习。

hougang

关注

13
点赞
踩
7

收藏

觉得还不错? 一键收藏
打赏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录