各种接口
get请求:
# coding: utf-8
import urllib.request
import urllib.parse
# https://www.baidu.com/s?ie=UTF-8&wd=%E6%97%A5%E6%9C%AC
keyword = input("请输入关键字:")
url = "https://www.baidu.com/s?"
data = {
"ie": "UTF-8",
"wd": keyword
}
query_string = urllib.parse.urlencode(data)
query_url = url + query_string
print(query_url) # https://www.baidu.com/s?ie=UTF-8&wd=%E6%97%A5%E6%9C%AC
# 构建请求对象
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
request = urllib.request.Request(url=query_url, headers=headers)
# 发起请求,得到响应
response = urllib.request.urlopen(request)
fileName = keyword + ".html"
with open(fileName, "wb") as fp:
fp.write(response.read())
post请求:
# coding: utf-8
import urllib.request
import urllib.parse
url = "https://fanyi.baidu.com/sug"
word = "tomato"
formdata = {
"kw": word
}
# 构建请求对象
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'
}
request = urllib.request.Request(url=url, headers=headers)
# 向request发送post请求
# 无论是get还是post,使用的方法都是urlopen()方法,不同的是,post有一个data参数。而get没有
# 首先对formdata进行处理
formdata = urllib.parse.urlencode(formdata).encode('utf-8')
response = urllib.request.urlopen(request, data=formdata)
print(response.read().decode('utf-8'))
js-post请求:
#coding: utf-8
import urllib.request
import urllib.parse
word = "tomato"
url = "https://fanyi.baidu.com/v2transapi?"
formdata = {
"from": "en",
"to": "zh",
"query": word,
"transtype": "realtime",
"simple_means_flag": 3,
# 加密措施,这两个参数必须写对才会给你结果。模拟js代码实现这两个参数,然后写到这里即可破解成功
"sign": 530563.850866,
"token": "423e69fd2de5b349890ab1f66f08cf11"
}
formdata = urllib.parse.urlencode(formdata).encode("utf-8")
headers = {
"accept": "*/*",
# "accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9,am;q=0.8,zh-TW;q=0.7,jv;q=0.6",
#"content-length": "118",
"content-type": "application/x-www-form-urlencoded; charset=UTF-8",
"cookie": "REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; PSTM=1557482984; BIDUPSID=C7D1EB1A06204824459A9277CA7DCFFF; BAIDUID=0936574F060A41E8C4612AC9979270FE:SL=0:NR=10:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; to_lang_often=%5B%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%2C%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%5D; from_lang_often=%5B%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%2C%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%5D; delPer=0; pgv_pvi=2019398656; pgv_si=s6800575488; BDRCVFR[tox4WRQ4-Km]=mk3SLVN4HKm; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; BDRCVFR[CLK3Lyfkr9D]=mk3SLVN4HKm; ZD_ENTRY=baidu; H_PS_PSSID=1444_21110_29568_29220_26350; BCLID=7981879505648115283; BDSFRCVID=KskOJeC62w3DGNJw6Mc6EMFW6YFPRTjTH6aIKCdiNJ_38EuV_1l0EG0P_f8g0Ku-jgOsogKK0mOTHv8F_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tJFJoDtMJI-3fP36q47HMt-8KU70etJyaR3v245vWJ5TMCoqjTJCKMIWjG5laJjl2J50QlnvbljCShPC-tn0hfLiQx7yQq523mrQoDLE3l02Vbc9e-t2yU_VWHj9WtRMW20jWl7mWPLVsxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJEjjCKjjvWeaDDJ5nfb5kXXJnV-nT_KROvhjRS0p4gyxomtjjHMH7KBh75Jq6EVh5EQlt-hR_i5RttLUkqKCO7BKJM5M7KqJ5s5q7n34tJQttjQpRhfIkjahjtbJOkjJ7TyU42hf47yhDO0q4Hb6b9BJcjfU5MSlcNLTjpQT8r5MDOK5OuJRLD_KIMJIIhbP365IT0M-_eKMrXetJyaROeBR-BtC_ahDDRe505JRL3hJne2bQHKKI_3JjV5P52HPJuJRoMhq4DbfO0-4u_K5RJVb3yH4OkeqOJ2Mt5M68IjptqW4tO-25ihqA5BJT8ExjRetLabjtpeGKft6-JJbks3br2HtcsHJ7xMtI_-P4DePReaMRZ5mAqot0b3p3FO40CXt--Qfuz3Njk0qONQIQnaIQqWlvTebT8ylP5eML03RPD2TO43bRTMbLy5KJvfj6FbUn5hP-UyN3LWh37bJblMKoaMp78jR093JO4y4Ldj4oxJpOJ5JbMopCafJOKHIC4j6uhjf5; locale=zh; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1570962537,1571056727; yjs_js_security_passport=2b407ebb845d0df85c7bbedcc1eefcc79cfc3923_1571056731_js; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1571098416; __yjsv5_shitong=1.0_7_6b6995477b82cdd1872429eadc58baef473b_300_1571098423998_218.241.81.222_29670bfe; PSINO=2",
"origin": "https://fanyi.baidu.com",
"referer": "https://fanyi.baidu.com/?aldtype=16047",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36",
# ajax请求
"x-requested-with": "XMLHttpRequest"
}
request = urllib.request.Request(url=url, headers=headers)
response = urllib.request.urlopen(request, data=formdata)
print(response.read().decode("utf-8"))
# html = response.read()
# print(html)
# html = html.decode("utf-8")
报错:
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte
解决:
注释掉Accept-Encoding的Header
长度也可注释
参考:https://blog.csdn.net/AnYeZhiYin/article/details/83421650
{"error":998,"from":"en","to":"zh","query":"tomato"}
解决:百度翻译接口破解
https://www.jianshu.com/p/38a65d8d3e80