#!/usr/bin/python3.6
# -*- coding: utf-8 -*-
# @Time : 2019/11/26 11:26
# @Author : mason.tang
# @File : testclickcsdn.py
import requests
from lxml import html
import time
import random
def auto_click(url,pageNum=4,num=1):
#pageNum文件页数,num循环访问次数
headersList = [{
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'},
{
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36'}]
while num:
req_headers = random.choice(headersList)
print("req_headers",req_headers)
base_page_link = url + '/article/list/'
for i in range(pageNum):
real_page_link = base_page_link + str(i) + '?'
print("real_page_link",real_page_link)
# 提取本页所有文章链接s
resp = requests.get(real_page_link,verify=False,headers=req_headers)
if resp.status_code == requests.codes.ok:
etree = html.etree
"""
在python3.5之后的 lxm 模块l中不能再直接引入etree模块,虽然新版本无法直接”from lxml import etree“这样,但是,它只不过是换了一个办法引出 etree而已,具体如下:
import requests
from lxml import html
etree = html.etree
"""
Html = etree.HTML(resp.text)
print(resp.text)
print("Html",Html)
article_links = Html.xpath('//div[@class="article-item-box csdn-tracking-statistics"]//h4/a/@href')
print("article_links",article_links)
#访问每一篇文章,模拟点击
for article_link in article_links:
real_article_link = article_link
requests.get(real_article_link,verify=False,headers=req_headers)
print('正在第 [{0}] 次点击链接 {1}'.format(num, real_article_link))
time.sleep(random.randint(120, 300))
else:
"""
for 的级别比 if 高,else不在乎是否有 if
只要 for 顺利执行完毕,else再执行
"""
num -= 1
# time.sleep(200)
if __name__ == '__main__':
url = 'https://blog.csdn.net/qq_39813400'
auto_click(url)
增加网页访问量
最新推荐文章于 2021-02-25 20:42:08 发布