里面有几个问题,请大神讲讲
'''
日期:2021年1月10日
百度翻译 利用第三方库js解析fromdata里面的sign值
js里找到 i ,u 参数
这个脚本仅供学习交流,小白一枚大神勿喷
缺点:过于简单
'''
import requests
import jsonpath
import execjs
from fake_useragent import UserAgent
class BaiDu():
def __init__(self, word):
# 此处不明白,加self和不加self的区别,不加也能调用
self.word = word
self.headers = {
"User-Agent": UserAgent().random,
'cookie': 'PSTM=1592717775; BIDUPSID=BE1B387567083C74F8D5CED3C828E0B8; BAIDUID=F7CE4B6E6C66296A3614B1CD6F6BF553:FG=1; BDUSS=d6WVVYTHhNfm8zbkJwbks5QTVvSX5EamZmaE1SRGtyRWxtTzRQMW5SR2poZ0ZnRVFBQUFBJCQAAAAAAAAAAAEAAAAqyXQMc2EzMzY2AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKP52V-j-dlfam; BDUSS_BFESS=d6WVVYTHhNfm8zbkJwbks5QTVvSX5EamZmaE1SRGtyRWxtTzRQMW5SR2poZ0ZnRVFBQUFBJCQAAAAAAAAAAAEAAAAqyXQMc2EzMzY2AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKP52V-j-dlfam; __yjs_duid=1_73118e33f40aaaa3deddfd28764729e71608950605967; H_PS_PSSID=33423_33419_33261_31254_33284_33287_33350_33460_22160_33370; delPer=0; PSINO=2; BA_HECTOR=aga50l042l20a400jc1fvi7p20q; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BAIDUID_BFESS=F7CE4B6E6C66296A3614B1CD6F6BF553:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1610161957; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; yjs_js_security_passport=5315d7c053acd0a3c1511051724e32c095fcd098_1610161956_js; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1610162461; ab_sr=1.0.0_MzAzZDQ3NWI5ZTI3NTUwMWRjMjliZmVkZTNjNTcxZWNlNDdjYmViZWJiNWY1NmZjZTlmZjVmOWJiZTMzNDc5ZTJkYzMwZDM2NzlhNGRhOWE4N2I0OTczZjE3NTVlY2Vm; __yjsv5_shitong=1.0_7_8551d242d1db0166b8486cb44d581e04fbbc_300_1610162460533_60.1.128.76_305b5505'
}
self.From, self.To = self.get_lang()
self.fromdata = {
'from': f'{self.From}',
'to': f'{self.To}',
'query': f'{self.word}',
'simple_means_flag': '3',
'sign': self.get_sign(),
'token': '08a031823bb6373a2cbc56171ca5e03a',
'domain': ' common'
}
def get_sign(self):
'''
通过第三方库获取js数据,这里并没有提供js文件,可以百度搜索一下。挺多的
:return:
'''
with open('baidu-test.js', 'r') as f:
jsdata = f.read()
jsdata = execjs.compile(jsdata).call('e', f'{self.word}')
return jsdata
def get_lang(self):
'''
自动翻译语言,目前只支持汉译英、英译汉
:return:
'''
lang_url = 'https://fanyi.baidu.com/langdetect'
lang_data = {
'query': f'{self.word}'
}
lang = requests.post(lang_url, headers=self.headers, data=lang_data)
# 单词输入错误会返回 nor。
From = lang.json()['lan']
To = 'en'
if From == 'nor':
print('输入错误')
elif From != 'zh':
To = 'zh'
return From, To
def get_html(self):
'''
翻译请求
:return:
'''
#此处不明白,加self和不加self的区别,好像同样能调用
re = requests.post(f'https://fanyi.baidu.com/v2transapi?from={self.From}&to={self.To}', headers=self.headers, data=self.fromdata)
return re.json()
if __name__ == '__main__':
word = input('请输入你的单词:')
baidu = BaiDu(word)
fanyi = jsonpath.jsonpath(baidu.get_html(), "$..dst")
print(fanyi)