taoqick 生成下一篇文章的脚本(偶尔更新)[Debug]

from bs4 import BeautifulSoup
import urllib
import urllib.request


def IsValidTitle(title):
    blockList = [
        '帝都',
        'windows anaconda TypeError: LoadLibrary()',
        '[Debug]',
        '【Debug】',
        'iOS'
    ]
    for item in blockList:
        if item.lower() in title.lower():
            return False
    return True


def FetchCSDNTitles(sourceUrl):
    res = []
    with urllib.request.urlopen(sourceUrl) as httpResponse:
        htmlSource = httpResponse.read()
        soup = BeautifulSoup(htmlSource, "html.parser")
        tags = soup.find_all("h4")
        for tag in tags:
            urls = tag.find_all("a")
            if (urls):
                title = tag.get_text().strip('\t\n\r原创转载 ')
                if (IsValidTitle(title) == True):
                    res.append('[{0}]({1})'.format(title, urls[0]['href']))
    print(res)
    return res


outputPath = r'E:\Temp\taoqick.txt'
with open(outputPath, 'w', encoding='utf-8') as outputFile:
    for i in range(1, 24):
        print('-----------------------------------------\nCrawling '+'https://blog.csdn.net/taoqick/article/list/{0}'.format(i))
        titles = FetchCSDNTitles('https://blog.csdn.net/taoqick/article/list/{0}'.format(i))
        for title in titles:
            outputFile.write(title + '\n')

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值