这篇文章就用来写js逆向百度翻译的爬虫部分,希望过审,上一篇文章看完我们就可以获得最难得到的sign值了,而其他部分也是比较的容易去实现。
分析
首先我们打开百度翻译的页面(看过我上一篇的可以跳过)然后f12查看里面的接口地址,并且查看我们需要上传的参数。
ok,然后我们再加上headers,请求头,这里只需要Cookie和User-Agent就行,普遍都是这些也有些有反爬的需要加Referer,或者更多,
有了这些之后就可以开始写代码了。
导库
import requests
import TestBaidu
import zhEn
这里我将几个方法写在了别的文件上所以需要对他们进行导入,
TestBaidu
import execjs
def signGet(Query):
query = Query
with open('translate_js.js', 'r', encoding='utf-8')as f:
res = execjs.compile(f.read())
sign = res.call('e', query)
return sign
zhEn
本来我只想写中英转换的,但是我发现了一个模块,详细了解请看这篇文章,主要就是一个语种识别模块,这样我们只需对他进行特定的更改就可以对其进行自动语种识别,但是目前我只对中英韩日四国语言进行了优化,所以只能在这几种语言中互翻,不过也可以满足普通需求了。
import langid
def is_lang(you_input):
res = langid.classify(you_input)[0]
# print(res)
if res == 'ko':
return 'kor'
elif res == 'ja':
return 'jp'
else:
return res
def to_lang(you_input):
if you_input == '1':
return 'zh'
elif you_input == '2':
return 'en'
elif you_input == '3':
return 'kor'
elif you_input == '4':
return 'jp'
else:
return 'zh'
main代码
def demobaiduE_Z(need,to_lang):
query = need
url = "https://fanyi.baidu.com/v2transapi"
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36",
"Cookie": "BIDUPSID=BE836E9EFEAC25FF0819736738B564FE; PSTM=1634205747; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; HISTORY_SWITCH=1; __yjs_duid=1_e6b1f1bc7d1116ffdf4b11449d5dee1a1638085737645; BAIDUID=BE836E9EFEAC25FF23BD9F047BD2EA4C:SL=0:NR=10:FG=1; BDUSS=jJtTUF1MHdHTVJqRHlYajVoVlVUNXdPTnQ1UENPMXRGNW1Lc1Vhbm95THZ2UHRoRVFBQUFBJCQAAAAAAAAAAAEAAACV24pgZGlkamtza3MAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAO8v1GHvL9RhM; BDUSS_BFESS=jJtTUF1MHdHTVJqRHlYajVoVlVUNXdPTnQ1UENPMXRGNW1Lc1Vhbm95THZ2UHRoRVFBQUFBJCQAAAAAAAAAAAEAAACV24pgZGlkamtza3MAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAO8v1GHvL9RhM; APPGUIDE_10_0_2=1; BDSFRCVID=Lf0OJeC62w0Se_6Hgd1PulYMrX2MQPjTH6aoeCZ7og8PDWTRKukAEG0PhU8g0K4bGxQJogKK3gOTH4CF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tbIOoILhJDD3jP-k245KM6vbqxbXq-7i02OZ0l8KtD3Y8JogXU7JMlkt5fvkWRDt-gvC-UjmWIQHDp3Db-7d5JI-MUADLtKfbI64KKJxWhCWeIJo5fc13JT-hUJiB5JMBan7_pOIXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtnLhbCPljTLajTvMbeTDbC62aKDsKpK2BhcqEIL4jn5H-n-gyMJBXPbtaN5uQJohtIoMfxbSj4Qojt4X5MLLWhOeBDbR3hne2p5nhMJF3j7JDMP0qec9qxTy523iob3vQpPMEpQ3DRoWXPIqbN7P-p5Z5mAqKl0MLPbtbb0xXj_0D6jyjGDHJj_sKC7JB4QVbPI_j-5p5PoE-PAe5f8X5-RLfKv0al7F5l8-hCQ-2JDM0xksbfOd-47kB6ri2561MncxOKQphUuaj60fyfvGWfQ-B5vqQ-oN3KJmMqC9bT3vj-D-Mb3M2-biWb7M2MbdJpOP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe6-5D530eHKtq-cJHjrtWJoS-5rMDPtry6brXUI8LNDHhUJBMJ7Ban3TKDbhDMTEK4QcDJ80XnO7ttoyMJbp2nRq-lbZqMjJLxT_MML1Db3iW6vMtg3C3foga4OoepvoDPJc3Mv3LPjdJJQOBKQB0KnGbUQkeq8CQft20b0EeMtjKjLEtbCq_KLMtCt3fP36q45Eh-FVMfv0etJyaR3r0DJvWJ5TMCo6DhbqBTKnbU8DQnQQymjHhIozLxJ8ShPC-tnqLpTWhUtOQU3UQNriLRvp3l02Vbnae-t2yT0VQp7IBtRMW20e0h7mWIbTsxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJIjj6jK4JKDNAJJToP; BDSFRCVID_BFESS=Lf0OJeC62w0Se_6Hgd1PulYMrX2MQPjTH6aoeCZ7og8PDWTRKukAEG0PhU8g0K4bGxQJogKK3gOTH4CF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tbIOoILhJDD3jP-k245KM6vbqxbXq-7i02OZ0l8KtD3Y8JogXU7JMlkt5fvkWRDt-gvC-UjmWIQHDp3Db-7d5JI-MUADLtKfbI64KKJxWhCWeIJo5fc13JT-hUJiB5JMBan7_pOIXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtnLhbCPljTLajTvMbeTDbC62aKDsKpK2BhcqEIL4jn5H-n-gyMJBXPbtaN5uQJohtIoMfxbSj4Qojt4X5MLLWhOeBDbR3hne2p5nhMJF3j7JDMP0qec9qxTy523iob3vQpPMEpQ3DRoWXPIqbN7P-p5Z5mAqKl0MLPbtbb0xXj_0D6jyjGDHJj_sKC7JB4QVbPI_j-5p5PoE-PAe5f8X5-RLfKv0al7F5l8-hCQ-2JDM0xksbfOd-47kB6ri2561MncxOKQphUuaj60fyfvGWfQ-B5vqQ-oN3KJmMqC9bT3vj-D-Mb3M2-biWb7M2MbdJpOP_IoG2Mn8M4bb3qOpBtQmJeTxoUJ25DnJhbLGe6-5D530eHKtq-cJHjrtWJoS-5rMDPtry6brXUI8LNDHhUJBMJ7Ban3TKDbhDMTEK4QcDJ80XnO7ttoyMJbp2nRq-lbZqMjJLxT_MML1Db3iW6vMtg3C3foga4OoepvoDPJc3Mv3LPjdJJQOBKQB0KnGbUQkeq8CQft20b0EeMtjKjLEtbCq_KLMtCt3fP36q45Eh-FVMfv0etJyaR3r0DJvWJ5TMCo6DhbqBTKnbU8DQnQQymjHhIozLxJ8ShPC-tnqLpTWhUtOQU3UQNriLRvp3l02Vbnae-t2yT0VQp7IBtRMW20e0h7mWIbTsxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJIjj6jK4JKDNAJJToP; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; delPer=0; PSINO=7; BDRCVFR[qx2AoFthGvY]=pHe4utUwz0tuAFWUhwCQhPEUf; BAIDUID_BFESS=BE836E9EFEAC25FF23BD9F047BD2EA4C:SL=0:NR=10:FG=1; BDRCVFR[M7BGybOYvDY]=mbxnW11j9Dfmh7GuZR8mvqV; H_PS_PSSID=26350; BA_HECTOR=8gak8k000l840h005g1gutfce0r; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1642693465,1642743352,1643036051,1643036058; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1643039300; ab_sr=1.0.1_YTRjZDdiMmI1MzUzZThhMGIxMDU1YTU3OWIzYWFjNTllNWU4MWRlY2RmNjFiYzU5M2M4YjE1YTcwNDY2YzMwOTQ4NDE3OWFiNDI5OGJjNWMzNDQ5Zjc0ODU3Y2EwYWY3ZGJkMjg5ZjE5ZmJmNmI0MThkNjkwYWU0MTRkMzVhNzc="
}
dic = {
"from": zhEn.is_lang(need),
"to": zhEn.to_lang(to_lang),
"query": query,
"transtype": "realtime",
"simple_means_flag": "3",
"sign": TestBaidu.signGet(need),
"token": "b18f8e806097af8e2d1db55c7ddefca4",
"domain": "common"
}
res = requests.post(url, data=dic, headers=header)
response = res.json()["trans_result"]['data'][0]['dst']
# response = res.json()
print(response)
while True:
to_lang = input("你需要翻译成什么语言(支持中文(1),英文(2),韩文(3),日文(4),输入编号即可):")
if to_lang == '':
continue
msg = input("请输入你需要翻译的语句:")
if(msg == "q"):
break
else:
demobaiduE_Z(msg, to_lang=to_lang)
代码也是很好理解,如何哪里不对请多多指正,哪里不懂也可以问我。
效果展示
下章预告,有道翻译!