Python实现 CSDN博客下载并保存为Markdown文件可以转epub

import os

import html2text
import requests
from bs4 import BeautifulSoup
from parsel import Selector

from Novel import headers


def get_all_article_id(csdn):
    articleid = []
    page = 1
    while True:
        content = requests.get('%s/article/list/%s' % (csdn, page),
                               headers=headers).content.decode(errors='ignore')
        soup = BeautifulSoup(content)
        links = soup.select('.article-list [data-articleid]')
        if not links:
            break
        else:
            for link in links:
                articleid.append(link.attrs['data-articleid'])
        page += 1
    return articleid


def get_all_article(csdn):
    for article_id in get_all_article_id(csdn):
        article_url = 'https://blog.csdn.net/mbh12333/article/details/%s' % article_id
        get_article(article_url)


def get_article(article_url):
    __down_article(article_url)


def __down_article(article_url):
    content = requests.get(article_url, headers=headers).content.decode(errors='ignore')
    soup = BeautifulSoup(content)
    sel = Selector(text=content)
    user_id = sel.css("#uid::text").get()
    title = str(soup.select('h1.title-article')[0].string)
    content = str(soup.select('#content_views')[0].prettify())
    h = html2text.HTML2Text()
    content = h.handle(content)
    path = './%s' % user_id
    if not os.path.exists(path):
        os.mkdir(path)
    file = '%s/%s.md' % (path, title)
    if not os.path.exists(file):
        with open(file, 'w', encoding='utf-8') as f:
            f.write(content)
        print("%s 下载成功!" % file)
    else:
        print("%s 已存在!" % file)


if __name__ == '__main__':
    # get_all_article('https://blog.csdn.net/mbh12333')
    get_article('https://blog.csdn.net/u010842515/article/details/65443084')

支持下载单个博客,和 批量下载用户下所有博客

评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值