2019/7/15
发现之前写的有道翻译爬虫不能用了,老是报 {'errorCode': 50},
用Fiddler检查了一下收发包,没发现header或者data有什么问题。从网上找了一下有道翻译反爬虫的答案,思路都差不多,但解决不了问题。
刚才突然想到可能是加密内容变了(毕竟很好实现,改个常量就行),我就查看了有道翻译的js文件,搜索了一下关键字就发现问题所在了,
就这个地方,组成sign签名的常量变了,所以会访问失败,在代码里改了就成功了。
这是之前用的
更改为:
下面是完整代码,写的很清晰了
"""
Created on Mon Jul 15 13:38:25 2019
@author: Happyhui
"""
# 有道翻译爬虫
import hashlib
import requests
import json
import time
import random
class Youdao:
def __init__(self, msg):
self.msg = msg
self.url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
self.D = "97_3(jkMYg@T[KZQmqjTK"
self.salt = self.get_salt()
self.sign = self.get_sign()
def get_md(self,value):
"""md5加密"""
m = hashlib.md5() # 哈希对象
m.update(value.encode('utf-8'))
return m.hexdigest()
def get_salt(self):
'''根据当前时间戳获取salt参数'''
s = int(time.time() * 1000) + random.randint(0, 10)
return str(s)
def get_sign(self):
'''使用md5函数和其他参数,得到sign参数'''
s = "fanyideskweb" + self.msg + self.salt + self.D
return self.get_md(s)
def get_result(self):
#headers里面有一些参数是必须的,注释掉的可以不用带上
headers = {
#"Host" : " fanyi.youdao.com",
#"Connection" : " keep-alive",
#"Content-Length" : " 258",
#"Accept" : " application/json, text/javascript, */*; q=0.01",
#"Origin: http" : "//fanyi.youdao.com",
#"X-Requested-With" : " XMLHttpRequest",
"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0",
#"Content-Type" : " application/x-www-form-urlencoded; charset=UTF-8",
"Referer": "http://fanyi.youdao.com/",
#"Accept-Encoding" : " gzip, deflate",
#"Accept-Language" : " zh-CN,zh;q=0.8",
"Cookie" : 'OUTFOX_SEARCH_USER_ID=1685905330@112.6.124.172; OUTFOX_SEARCH_USER_ID_NCOO=1489454309.4343596; UM_distinctid=16b302361e00-08950154a71e7c-4d045769-100200-16b302361e11e5; P_INFO=15563729402|1559880798|1|youdaodict|00&99|null&null&null#shd&370200#10#0|&0||15563729402; _ntes_nnid=d30ea1176507c3c2e18519b36f4563b8,1561278665392; JSESSIONID=abc2BYk9QiwEQho0M60Vw; DICT_UGC=be3af0da19b5c5e6aa4e17bd8d90b28a|; ___rl__test__cookies=1563184107229'
}
data = {
"i" : self.msg,
"from" : "auto",
"to" : "auto",
"smartresult" : "dict",
"client" : "fanyideskweb",
"salt" : self.salt,
"sign" : self.sign,
"ts" : "1563184107237",
"bv" : "f355c521b6e13c15aa35c72a097b7786",
"doctype" : "json",
"version" : "2.1",
"keyfrom" : "fanyi.web",
"action" : "FY_BY_REALTlME"
}
html = requests.post(self.url, data=data, headers=headers).text
infos = json.loads(html) # 加载文本进行json数据分析
if "translateResult" in infos:
try:
# 提取翻译内容
result = infos['translateResult'][0][0]['tgt']
print(result)
except:
print("提取失败")
else:
print(infos)
if __name__ == "__main__":
s = Youdao(input("请输入要查询的内容:"))
s.get_result()