1. 导入爬虫需要使用的包
import urllib.request
import urllib.parse
2.定义url
如图查看请求url
url = 'https://fanyi.baidu.com/v2transapi?from=en&to=zh'
3.定义请求头
查看preview带有数据的请求接口,复制headers中request headers所有内容,格式如下:
headers = { 'Accept':'*/*', # 'Accept-Encoding':'gzip, deflate, br', 'Accept-Language':'zh-CN,zh;q=0.9', 'Acs-Token':'1706008100941_1706056600478_s/kAnbkVP4JWoKi1gFmf1ekE7I6UKHyDA4KqU52A9sjwI5drQR9injn1c/sgUxmu5j23eyfJTT2UZWFMGcjexGEzFe//pv6K+OiVHY1mazHyglRRDPyGvo/s4BhOtHidCMPkrGiaIUfCMQ+zbCfo2Do44gGmJ5XgyOw9yy1k+gv40yo1Lh0N7KrbSpskvV9dHaYyyd9vYBUMhbdmQ/uSuVd2lGxyQ021SQHlqvJIC1FH1XlP3z+tkJhMQ5DpTX4AMNKktXOsCIwv/q3KTQiupRfgCxYJu86+uqxxk5The1ookuogUIkD43ZJhF6cRIaweXCkzAqJTdPY+VzBXMYLXuyGIPFXPekWfbuKhGZog2uILE/782XoJQtX4PndfrU+6DyBkJ/V2VmrVMjbIYYhxiwtCSejeKrnqYqUJKeKu3CCGVgylTPQN7z0Y0XWqMWBX3lCcewLvv/1zKYqi+LmoNsWQ0jIMQqva/A8PM1KRwo=', 'Connection':'keep-alive', 'Content-Length':'150', 'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8', 'Cookie':'BAIDUID=82EBB200977DA3EF21EC75BD07BDC355:FG=1; PSTM=1695974617; BIDUPSID=AE210E91D4157F53D214A3C1028951E7; BDUSS=3UtMW95MS01RzQ2R3Ita1dWTERDM2VrdllFRG80fkJ0ei16d2F6dXJwdHVlc1JsRVFBQUFBJCQAAAAAAAAAAAEAAAAc4~pFuMPE48HLvvwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAG7tnGVu7ZxlN; BDUSS_BFESS=3UtMW95MS01RzQ2R3Ita1dWTERDM2VrdllFRG80fkJ0ei16d2F6dXJwdHVlc1JsRVFBQUFBJCQAAAAAAAAAAAEAAAAc4~pFuMPE48HLvvwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAG7tnGVu7ZxlN; H_WISE_SIDS=39998_40042_40073_40117; BAIDUID_BFESS=82EBB200977DA3EF21EC75BD07BDC355:FG=1; ZFY=0GwYZXZtwC7e4iezsEuWlux8uo5zpYzcB:AShMnlm0PM:C; __bid_n=18bb2c4acc32016a6f44bd; RT="z=1&dm=baidu.com&si=72413b5a-c654-4d95-8395-9521de69c470&ss=lro7y1sx&sl=b&tt=6nr&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=2y5s&ul=309z&hd=30a1"; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_WISE_SIDS_BFESS=39998_40042_40073_40117; H_PS_PSSID=39998_40042_40123; BA_HECTOR=0185012hal0h0g8h200h008h65km6e1iquq4q1s; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; PSINO=1; delPer=0; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1705996999,1706056556; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1706056556; ab_sr=1.0.1_ODA1MzU4M2Y4MzljZmYxOWJmNWJjOWUyYWRlNjMxMTUwNzM1MGMxNzY2ZWFhZjNlODQwNDdhODdkMTQ2ODVmMjdkZGNhNjgxNjQ4NmNkMjg0YjU2YWJhMjdjZTAwYTM2N2FjY2I5NjdiYzUzOTZiOTkxZDY1NTEyMzEwNjY2ODBlZTNlN2Q0ZjE0YzJkYjBiYTU1NjE3NTFmMjFkMjNkODYwMmNkMmMyODlmMzA0ZGRmMGE5MWFhMTY4YzhiN2QyMTMyNTI3MGZkM2NlYTc3Nzc0YWJhYWYxZGQxMWJhMTBiODk5YzJmNTMyN2M0MTA3OWNmZDc1NzIyMTk1ZWQ0YQ==', 'Host':'fanyi.baidu.com', 'Origin':'https://fanyi.baidu.com', 'Referer':'https://fanyi.baidu.com/?aldtype=16047', 'Sec-Ch-Ua':'"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', 'Sec-Ch-Ua-Mobile':'?0', 'Sec-Ch-Ua-Platform':'"Windows"', 'Sec-Fetch-Dest':'empty', 'Sec-Fetch-Mode':'cors', 'Sec-Fetch-Site':'same-origin', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'X-Requested-With':'XMLHttpRequest', }
4.定义data
data = { 'from':'en', 'to':'zh', 'query':'spider', 'transtype':'enter', 'simple_means_flag':'3', 'sign':'63766.268839', 'token':'05a9c5109f188a54564f31a44b45d480', 'domain':'common', 'ts':'1706056600466' }
# post请求的参数,要进行编码
data = urllib.parse.urlencode(data).encode('utf-8')
5.请求对象的定制
request = urllib.request.Request(url=url, data=data, headers=headers)
6.模拟浏览器向服务器发送请求
response = urllib.request.urlopen(request)
7.获取响应的源码为字节形式,使用decode('utf-8')解码
content = response.read().decode('utf-8')
8.字符串转json
import json obj = json.loads(content) print(obj)
9.展示
10.源码
import urllib.request
import urllib.parse
url = 'https://fanyi.baidu.com/v2transapi?from=en&to=zh'
headers = {
'Accept':'*/*',
# 'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.9',
'Acs-Token':'1706008100941_1706056600478_s/kAnbkVP4JWoKi1gFmf1ekE7I6UKHyDA4KqU52A9sjwI5drQR9injn1c/sgUxmu5j23eyfJTT2UZWFMGcjexGEzFe//pv6K+OiVHY1mazHyglRRDPyGvo/s4BhOtHidCMPkrGiaIUfCMQ+zbCfo2Do44gGmJ5XgyOw9yy1k+gv40yo1Lh0N7KrbSpskvV9dHaYyyd9vYBUMhbdmQ/uSuVd2lGxyQ021SQHlqvJIC1FH1XlP3z+tkJhMQ5DpTX4AMNKktXOsCIwv/q3KTQiupRfgCxYJu86+uqxxk5The1ookuogUIkD43ZJhF6cRIaweXCkzAqJTdPY+VzBXMYLXuyGIPFXPekWfbuKhGZog2uILE/782XoJQtX4PndfrU+6DyBkJ/V2VmrVMjbIYYhxiwtCSejeKrnqYqUJKeKu3CCGVgylTPQN7z0Y0XWqMWBX3lCcewLvv/1zKYqi+LmoNsWQ0jIMQqva/A8PM1KRwo=',
'Connection':'keep-alive',
'Content-Length':'150',
'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
'Cookie':'BAIDUID=82EBB200977DA3EF21EC75BD07BDC355:FG=1; PSTM=1695974617; BIDUPSID=AE210E91D4157F53D214A3C1028951E7; BDUSS=3UtMW95MS01RzQ2R3Ita1dWTERDM2VrdllFRG80fkJ0ei16d2F6dXJwdHVlc1JsRVFBQUFBJCQAAAAAAAAAAAEAAAAc4~pFuMPE48HLvvwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAG7tnGVu7ZxlN; BDUSS_BFESS=3UtMW95MS01RzQ2R3Ita1dWTERDM2VrdllFRG80fkJ0ei16d2F6dXJwdHVlc1JsRVFBQUFBJCQAAAAAAAAAAAEAAAAc4~pFuMPE48HLvvwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAG7tnGVu7ZxlN; H_WISE_SIDS=39998_40042_40073_40117; BAIDUID_BFESS=82EBB200977DA3EF21EC75BD07BDC355:FG=1; ZFY=0GwYZXZtwC7e4iezsEuWlux8uo5zpYzcB:AShMnlm0PM:C; __bid_n=18bb2c4acc32016a6f44bd; RT="z=1&dm=baidu.com&si=72413b5a-c654-4d95-8395-9521de69c470&ss=lro7y1sx&sl=b&tt=6nr&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=2y5s&ul=309z&hd=30a1"; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_WISE_SIDS_BFESS=39998_40042_40073_40117; H_PS_PSSID=39998_40042_40123; BA_HECTOR=0185012hal0h0g8h200h008h65km6e1iquq4q1s; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; PSINO=1; delPer=0; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1705996999,1706056556; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1706056556; ab_sr=1.0.1_ODA1MzU4M2Y4MzljZmYxOWJmNWJjOWUyYWRlNjMxMTUwNzM1MGMxNzY2ZWFhZjNlODQwNDdhODdkMTQ2ODVmMjdkZGNhNjgxNjQ4NmNkMjg0YjU2YWJhMjdjZTAwYTM2N2FjY2I5NjdiYzUzOTZiOTkxZDY1NTEyMzEwNjY2ODBlZTNlN2Q0ZjE0YzJkYjBiYTU1NjE3NTFmMjFkMjNkODYwMmNkMmMyODlmMzA0ZGRmMGE5MWFhMTY4YzhiN2QyMTMyNTI3MGZkM2NlYTc3Nzc0YWJhYWYxZGQxMWJhMTBiODk5YzJmNTMyN2M0MTA3OWNmZDc1NzIyMTk1ZWQ0YQ==',
'Host':'fanyi.baidu.com',
'Origin':'https://fanyi.baidu.com',
'Referer':'https://fanyi.baidu.com/?aldtype=16047',
'Sec-Ch-Ua':'"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
'Sec-Ch-Ua-Mobile':'?0',
'Sec-Ch-Ua-Platform':'"Windows"',
'Sec-Fetch-Dest':'empty',
'Sec-Fetch-Mode':'cors',
'Sec-Fetch-Site':'same-origin',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'X-Requested-With':'XMLHttpRequest',
}
# post请求的参数,要进行编码
data = {
'from':'en',
'to':'zh',
'query':'spider',
'transtype':'enter',
'simple_means_flag':'3',
'sign':'63766.268839',
'token':'05a9c5109f188a54564f31a44b45d480',
'domain':'common',
'ts':'1706056600466'
}
data = urllib.parse.urlencode(data).encode('utf-8')
request = urllib.request.Request(url=url, data=data, headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
# 字符串转json
import json
obj = json.loads(content)
print(obj)