一、爬取逻辑
基础路径:http://fanyi.youdao.com/
基于XHR文件判断这是ajax请求方式
二、代码实现
import random
import time,hashlib
import requests
def get_md5(value):
return hashlib.md5(bytes(value,encoding='utf-8')).hexdigest()
def fanyi(kw):
'''
翻译单词
:param kw: 翻译的单词内容
:return: str
'''
base_url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
ts = str(int(time.time() * 1000))
salt = ts + str(random.randint(0,9))
sign = get_md5("fanyideskweb" + kw + salt + "Nw(nmmbP%A-r6U3EUn]Aj")
# print(salt)
# print(ts)
# print(sign)
data = {
'i': kw,
'from': 'AUTO',
'to': 'AUTO',
'smartresult': 'dict',
'client': 'fanyideskweb',
'salt': salt,
'sign': sign,
'ts': ts,
#324a37ffcab402e0b2c0d6b645f8543b
'bv': '324a37ffcab402e0b2c0d6b645f8543b',
'doctype': 'json',
'version': '2.1',
'keyfrom':'fanyi.web',
'action': 'FY_BY_REALTlME',
}
# print(data)
headers = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
#客户端
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',
#表单类型
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
#ajax请求
'X-Requested-With': 'XMLHttpRequest',
#表单长度
'Content-Length': '242',
# 请求来自于哪里--->防盗链
#<a href="网站B这篇新闻的内容">某明星深夜。。。。。(新闻)<>----网站a
'Referer': 'http://fanyi.youdao.com/',#爬虫很重要的头
'Host': 'fanyi.youdao.com',
'Origin': 'http://fanyi.youdao.com',
'Cookie': 'OUTFOX_SEARCH_USER_ID=299610334@10.169.0.83; OUTFOX_SEARCH_USER_ID_NCOO=1705126783.3297095; _ga=GA1.2.834458747.1587627882; JSESSIONID=aaaBwJH4pSvsj4Samx6ix; ___rl__test__cookies=1590118618294',
}
response = requests.post(base_url,headers=headers,data=data)
result = ''
# print(response.json())
# print(response)
json_data = response.json()
if json_data['errorCode'] == 0:
for data in json_data['smartResult']['entries']:
# print(data)
result += data
return result
else:
print('请求有误!')
if __name__ == '__main__':
kw = input('请输入你要查询的单词:')
result = fanyi(kw)
print(result)