# 爬有道翻译,用requsets发送POST数据,并抓取有道翻译信息
'''
爬取方案
url地址:http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule
url旧地址:http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule
旧地址是没有反扒处理的(_o)
请求方式:POST
参数dara--headers: doctype: json ; i: 颜
响应结果: application/json;
'''
旧地址操作
from urllib import request
from urllib import error
import re
import requests
import json
import time,random,hashlib
def translate(keyword):
url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule"
data = {'i':keyword,'doctype': 'json'}
res = requests.post(url,data=data)
if res.status_code ==200:
# json_data =json.loads(res.content.decode('utf-8'))
json_data = res.json()
print(json_data['translateResult'][0][0]['tgt'])
else:
print('请求失败')
# 主入口程序判断
if __name__ == '__main__':
while True:
keyword = input('请输入翻译的单词:')
if keyword == 'q':
break
translate(keyword)
新地址有反扒处理,怎么解决问题
from urllib import request
from urllib import error
import re
import requests
import json
import time,random,hashlib
def salt_sign(keyword):
now_time = int(time.time()*1000)
salt = now_time+random.randint(1,10)
sign = 'fanyideskweb'+ keyword + str(salt) +']BjuETDhU)zqSxf-=B#7m'
sign = hashlib.md5(sign.encode('utf-8')).hexdigest()
return (salt,sign,now_time)
# url有_o存在的时候:这是网页进行了反爬处理
def translate(keyword):
salt,sign,now_time=salt_sign(keyword)
url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
data = {'i': keyword,
'from': 'AUTO',
'to': 'AUTO',
'smartresult':'dict',
'client':'fanyideskweb',
'salt':salt, # 这里是一直在变化的
'sign':sign, # 这里是一直在变化的
'lts': '1606114303802',
'bv': '0785986963146aebf8c240a24088d066',
'doctype':'json',
'version':'2.1' ,
'keyfrom':'fanyi.web',
'action':'FY_BY_REALTlME' }
# 这里我的headers和data是进行了百分百模拟浏览器请求
headers = {
'Accept':'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Content-Length':'255',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Cookie': 'OUTFOX_SEARCH_USER_ID=-202005280@10.169.0.83; OUTFOX_SEARCH_USER_ID_NCOO=673265493.3751627; _ntes_nnid=3099b0d9c57c5e886c29f7d76744a2df,1598710309022; JSESSIONID=aaa4id-xpauiPgctt7Yxx; ___rl__test__cookies='+str(now_time),
'Host':'fanyi.youdao.com',
'Origin':'http://fanyi.youdao.com',
'Referer':'http://fanyi.youdao.com/',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest'
}
res = requests.post(url,data=data,headers=headers)
if res.status_code ==200:
# json_data =json.loads(res.content.decode('utf-8'))
json_data = res.json()
print(json_data)
else:
print('请求失败')
# 主入口程序判断
if __name__ == '__main__':
while True:
keyword = input('请输入翻译的单词:')
if keyword == 'q':
break
translate(keyword)