ip:代理安装:
环境docker docker-compose
1.git clone https://github.com/Python3WebSpider/ProxyPool.git
2.cd ProxyPool
3.sudo docker docker-compose up
启动:
4.sudo docker star proxypool
5.sudo docker start redis4proxypool
访问地址:http://localhost:5555/random返回ip值,表示安装成功!
如果下载速度特别慢,可以自行修改 Dockerfile,修改:
RUN pip install -r requirements.txt
替换:
RUN pip install -r requirements.txt -i https://pypi.douban.com/simple
获取代理之后动态调取有道翻译:
# -*- coding: UTF-8 -*-
import hashlib
import random
import requests
import time
is_file_ip = False # 是否获取文件中的ip
file_ips = [] # ip集合
index = 0 # 数组ip集合的下标
# 获取ip集合
def get_file_ips():
global file_ips
with open('ips.txt', 'r', encoding='utf-8') as f:
for p in f.readlines():
proxy = p.replace('\n', '')
file_ips.append(proxy)
f.close()
# 获取随机ip值
def get_proxy():
if is_file_ip:
if len(file_ips) == 0:
get_file_ips()
return file_ips[index]
else:
return requests.get("http://localhost:5555/random").text
# your spider code
def getHtml(s, url, headers, data, retry_count=5, proxy_2=None):
for i in range(0, retry_count):
proxy = get_proxy()
try:
if proxy_2 is not None:
proxy = proxy_2
html = s.post(url, headers=headers, data=data, proxies={"http": "http://{}".format(proxy)}, timeout=1)
# 使用代{过}{滤}理访问
return html.json(), proxy
except Exception:
global index
proxy_2 = None
print(proxy)
print('代{过}{滤}理不通, 剩余机会:' + str(retry_count - i), index)
if is_file_ip:
index += 1
if index >= len(file_ips):
break
# 出错5次, 直接返回None
return None, None
##################################
s = requests.Session()
m = hashlib.md5()
class Dict:
def __init__(self):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
'Referer': 'http://fanyi.youdao.com/',
'contentType': 'application/x-www-form-urlencoded; charset=UTF-8',
'Cookie': 'OUTFOX_SEARCH_USER_ID=-352392290@116.136.20.84; P_INFO=a121bc; OUTFOX_SEARCH_USER_ID_NCOO=710017829.1902944; JSESSIONID=aaaDa3sqezCDY-snjj91w; SESSION_FROM_COOKIE=unknown; ___rl__test__cookies=' + str(
int(time.time() * 1000))
}
self.url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&sessionFrom='
self.base_config()
def base_config(self):
"""
设置基本的参数,cookie
"""
s.get('http://fanyi.youdao.com/')
def translate(self, e, from_lang, to_lang, proxy_2):
# i = "With what? I'll be out with my Mom at an expo for the next few days, but..."
i = e
self.headers['Content-Length'] = str(233 + len(e))
ts = str(int(time.time() * 1000))
salf = ts + str(random.randint(0, 9))
n = 'fanyideskweb' + i + salf + "n%A-rKaT5fb[Gy?;N5@Tj"
m.update(n.encode('utf-8'))
sign = m.hexdigest()
data = {
'i': i,
'from': from_lang,
'to': to_lang,
'smartresult': 'dict',
'client': 'fanyideskweb',
'salt': salf,
'sign': sign,
'ts': ts,
'bv': '53539dde41bde18f4a71bb075fcf2e66',
'doctype': 'json',
'version': "2.1",
'keyfrom': "fanyi.web",
'action': "FY_BY_REALTlME"
}
##########################################
rt, proxy_2 = getHtml(s, self.url, self.headers, data, 100, proxy_2)
####################
if rt is None:
return None, None
else:
arr = []
for c in rt['translateResult']:
arr.append(c[0]['tgt'])
return '\n'.join(arr), proxy_2
'''
is_file:是否读取文件中的ip
proxy_2:保存可用的ip地址
'''
def parse_trans(query, has_return, from_lang, to_lang, proxy_2=None, is_file=False):
try:
global is_file_ip
is_file_ip = is_file
dic = Dict()
result, proxy_2 = dic.translate(query, from_lang, to_lang, proxy_2)
if result is None:
return None
if has_return:
print('youdao----', result)
return str(result)
else:
result = parse_trans(str(result), True, to_lang, from_lang, proxy_2)
return result
except:
return None
def write_file_ips():
t1 = time.time()
with open('ips.txt', 'w') as f:
while True:
proxy = get_proxy()
print(proxy)
f.write(str(proxy) + '\n')
t2 = time.time()
if t2 - t1 > 60:
break
f.close()
import threading
if __name__ == '__main__':
result = parse_trans('我吃饭了\n大饼\n包子', False, 'zh-CN', 'en')
# write_file_ips()
# get_file_ips()
# a = get_proxy()
# print('--------------', a)