# -*- coding:utf-8 -*-
# 利用爬虫刷CSDN博客阅读数
import requests
from bs4 import BeautifulSoup
# 解析源码
def GetHtmlText(url):
try:
r = requests.get(url, timeout = 30)
r.raise_for_status()
r.encoding = 'utf-8'
return r.text
except:
return ''
# 查找博文地址并进行一次点击
def Find_Click(soup):
Divs = soup.find_all('div', {'class': 'article-item-box csdn-tracking-statistics'})
for Div in Divs:
ClickUrl = Div.find('h4').find('a')['href']
# 点一下
Click = requests.get(ClickUrl, timeout = 30)
def main():
# 博文页数
Pages = int(input('Please enter the number of blog pages:'))
for Page in range(1, Pages + 1):
print('Page=', Page)
# 博客地址,这里是我的CSDN博客地址
url = 'https://blog.csdn.net/qq_44621510/article/list/' + str(Page)
html = GetHtmlText(url)
soup = BeautifulSoup(html, 'html.parser')
Find_Click(soup)
if __name__ == '__main__':
main()
运行成功后,输入你的博客页数,程序就会自动把所有的博客遍历一遍,
即每个博客阅读数加1.
待优化:
1,运行一次程序,每个博客阅读数只加1,然后运行结束,再想增加需要再次运行。
2,点击速度较慢,大概15秒一页(20个)
3,每次重新运行都要输入page数。