import requests as req
import threading
from bs4 import BeautifulSoup
import time
blog_url = 'csdn博客地址' #博客所在地址
is_proxy = True #是否开启代理
user = '123456' #用户名
password = '123456' #密码
loop = 3 #循环次数
proxy = {#代理设置
"http":"http://{__user}:{__password}@10.191.131.43:3128".format(__user=user,__password=password),
"https":"http://{__user}:{__password}@10.191.131.43:3128".format(__user=user,__password=password)
}
header = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE'
}
if(is_proxy==False):
proxy=None
blog_html = req.get(blog_url,proxies=proxy,headers=header).text
blog_html_tree = BeautifulSoup(blog_html,'lxml')
blog_href_list = [item["href"] for item in blog_html_tree.select("div.article-item-box h4 a")]
delay_time = 60
def up_click(href_list,proxy,header,loop_num):
for href in href_list:
req_get(href,proxy,header,loop_num)
def req_get(href,proxy,header,loop_num):
req.get(href,proxies=proxy,headers=header)
loop_num=loop_num-1
print(time.time(),href)
if(loop_num==0):
return
threading.Timer(delay_time,req_get,(href,proxy,header,loop_num)).start()
up_click(blog_href_list,proxy,header,loop)
这里需要注意一点:需要设置延迟发送请求,因为1分钟内对文章页面的点击都是无效的
,因此需要设置 delay = 60 ,并设置threading.Timer定时器