声明:仅供学习交流,请勿用于不正当用途
通过之前的学习,我们成功获取代理IP,有了代理IP,加上伪装用户UA,referer(告诉网站你是通过什么渠道进入这里的),我们可以完全制造一个虚拟的访客,来帮助我们达到一些目的,比如刷访问量
首先是代理IP的获取,直接从之前的博客搬过来:
from bs4 import BeautifulSoup
import requests
import random
import concurrent.futures,os
headers = {'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate, sdch, br',
'Accept-Language':'zh-CN,zh;q=0.8',
'Connection':'close',
}
ip_url = 'http://httpbin.org/ip'
def get_ip_list(url):
page = requests.get(url,headers=headers)
soup = BeautifulSoup(page.text,'lxml')
# print(soup)
ips = soup.find_all('tr')
ip_list = []
for i in range(1,len(ips)):
ip_info = ips[i]
td = ip_info.find_all('td')
ip_list.append(td[1].text + ':'+ td[2].text)
ip_set = set(ip_list)
ip_list = list(ip_set) #去重
print(ip_list)
#true_ip = []
with concurrent.futures.ThreadPoolExecutor(len(ip_list)) as x:
for ip in ip_list:
x.submit(ip_test,ip)
def ip_test(ip):
proxies = {
'http': 'http://' + ip,
'https': 'https://' + ip,
}
print(proxies)
try:
response = requests.get(ip_url,headers=headers,proxies=proxies,timeout=3)
if response.status_code == 200:
with open('可用IP.txt','a') as f:
f.write(ip)
f.write('\n')
print('测试通过')
print(proxies)
print(response.text)
except Exception as e:
print(e)
def get_random_ip(ip_list):
proxy_list = []
for ip in ip_list:
proxy_list.append(ip)
proxy_ip = random.choice(proxy_list)
proxies = {'http': 'http://' + proxy_ip, 'https': 'https://' + proxy_ip}
return proxies
if __name__ == '__main__':
url = 'https://www.xicidaili.com/wt'
if os.path.exists('可用IP.txt'):
os.remove('可用IP.txt')
get_ip_list(url)
get_ip_list(url+'/2')
获取到的代理IP储存在 可用IP.txt 文件里,刷访问量的主程序我们另外新建一个py文件
首先是头文件和多个可供使用的伪装UA,referer,以及目标网站:
import requests
import random
import time
user_agent_list=[
'Mozilla/5.0(compatible;MSIE9.0;WindowsNT6.1;Trident/5.0)',
'Mozilla/4.0(compatible;MSIE8.0;WindowsNT6.0;Trident/4.0)',
'Mozilla/4.0(compatible;MSIE7.0;WindowsNT6.0)',
'Opera/9.80(WindowsNT6.1;U;en)Presto/2.8.131Version/11.11',
'Mozilla/5.0(WindowsNT6.1;rv:2.0.1)Gecko/20100101Firefox/4.0.1',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36'
]
referer_list=[
'https://blog.csdn.net/Xylon_/article/details/100053138',
'http://blog.csdn.net/',
#'https://www.baidu.com/link?url=TVS47tYso1NWxFTD8ieQOOe5q3HpJEdFDAXcGZb_F6ooFilKVeXTt7zTUJgZ0jSr&wd=&eqid=b5f9b4bd00121a9e000000035d60fa47'
]
url = 'https://blog.csdn.net/Xylon_/article/details/100053138'
接下来是主程序
读取文件:
if __name__ == '__main__':
ip_list = []
with open('可用IP.txt','r') as f:
while True:
line = f.readline()
if not line:
break
line = line.strip('\n')
ip_list.append(line)
print(ip_list)
然后使用所有的代理IP进行模拟访问:
拼接proxies代理IP地址,然后headers随机获取UA和referer,请求页面,一次虚拟的"访问"就完成了
for ip in ip_list:
proxies = {
'http': 'http://' + ip,
'https': 'https://' + ip,
}
headers = {
'User-Agent':random.choice(user_agent_list),
'Referer':random.choice(referer_list)
}
try:
page = requests.get(url, headers=headers, proxies=proxies,timeout=3)
if page.status_code == 200:
print('可取 '+ str(proxies))
time.sleep(random.randint(5,30))
except Exception as e:
print(e)
访问页面是我的上一篇博客:https://blog.csdn.net/Xylon_/article/details/100053138
测试过程中发现CSDN做了反作弊处理,同一时间频繁访问将会被视为非正常行为,因此间隔时间随机取值(30s左右较好)
测试对比
刷之前:
刷之后:
大约十次有效访问,成功率一半一半,如果延长时间或采取其他应对措施可能会更好