def baidu(keyword):
"""百度指数"""
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36',
'Referer': 'http://index.baidu.com/v2/main/index.html',
'Cookie': 'BAIDUID=6229B79E28BFECE4250939A302FCE55D:FG=1; PSTM=1568271960; BIDUPSID=8A924DAC3545706903AD37123D127E9C; MCITY=-317%3A; BDUSS=I1NGNES3FTVnh6MlNCZGZYaFNPYmdFTElyZG9LUDBZd1laZjV6V1A2Yi04RzFlSVFBQUFBJCQAAAAAAAAAAAEAAAASGdwuwdZfsMtfveQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP5jRl7-Y0ZeQ3; bdshare_firstime=1583388817527; BDSFRCVID=GIFOJeC629jnl9RuAf1iM2x1PVK1Lt5TH6aoobmkUJmKfkZguPwgEG0P_U8g0Ku-S2EqogKK3mOTHR8F_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tJAfoDDXJKL3jP5pMDTEht0_bMLX5-CsQgQW2hcH0KLKsPL9Wlro5tKJ5-5uJJvWb5Q73xQJWfb1MRjv3jovy4u4hajR2hJ-yjIO0p5TtUJaSDnTDMRh-6LTLx7yKMnitKj9-pn4fpQrh459XP68bTkA5bjZKxtq3mkjbPbDfn028DKuDj0aejv0ea_s-bbfHj73B4TqaJRofjrnhPF3WtC0XP6-35KHbmQpbCjFbRQ5On7EDR7O5PtDyPv8-q37JD6yaqDEyUJrf-oIBTooh5tHK-oxJpOy5JbMopvaHROEfq6vbURvDPug3-7NqU5dtjTO2bc_5KnlfMQ_bf--QfbQ0hOhqP-jBRIE_D-XJI02bDv65nt_MtD8Mfj-2t-XKKOLVKOybp7keq8CD6D506L9DpohLUJUBRrOB-3jL-j2eCQ2y5jHhp0getnhLUCe3Ru80nOD3PJpsIJMQ-DWbT8U5ecK2x6UaKviaKOEBMb1VCnDBT5h2M4qMxtOLR3pWDTm_q5TtUJMeCnTDMFhe6jLDHLeJ6kff5vfL5rb2nO5jJrIhITjhPrMyGjCWMT-05vubfb2Wq5kftjdLtRYDRkf5nrUKpQTbHn7_JjObULVfqoj2trNqtDlyRJDBMQxtNRR-CnjtpvhHlo23hnobUPUDUc9LUvNfgcdot5yBbc8eIna5hjkbfJBQttjQn3hfIkj2CKLtC8WhD8GD6Rb5nbHKMQbKP7--I6b0nRHHJ7KeTrnh6RnyfKgyxomtjD82DoQVxoa0RLajqC425Qm3xnLXh8tLUkqKCOy3J6oQlcVOnQcW4Jh0b-sQttjQpRPfIkja-KELKopqJ7TyU45hf47ybKO0q4Hb6b9BJcjfU5MSlcNLTjpQT8r5MDOK5OuJRQ2QJ8BtCPhhCQP; H_PS_PSSID=; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; delPer=0; PSINO=7; BDRCVFR[VXHUG3ZuJnT]=mk3SLVN4HKm; bdindexid=cl0an1bubpm0bt34oijcdr0ir6; RT="sl=0&ss=k8qp2pjw&tt=0&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&z=1&dm=baidu.com&si=n2j8kigaif"; Hm_lvt_d101ea4d2a5c67dab98251f0b5de24dc=1586259791,1586308431,1586311780,1586311832; Hm_lpvt_d101ea4d2a5c67dab98251f0b5de24dc=1586311832',
'Host': 'index.baidu.com',
'X-Requested-With': 'XMLHttpRequest'
}
# 搜索指数(实时)如果需要更换时间频率需要修改url
url = 'https://index.baidu.com/api/LiveApi/getLive?region=0&word=[[{{"name":"{}","wordType":1}}]]'.format(keyword)
r = requests.get(url, headers=headers)
data = r.json()['data']
uniqid = data['uniqid']
t_url = "http://index.baidu.com/Interface/ptbk?uniqid={}".format(uniqid)
rep = requests.get(url=t_url, headers=headers).json()
t = rep["data"]
e = data['result'][0]['index'][0]['_all']
date = data['result'][0]['index'][0]['period'].split('|')
date_index = pd.date_range(start=str(date[0]), end=str(date[1]), freq='H')
with open('/Users/lys/PycharmProjects/MCdata/spider/parsing_data_function.js', encoding='utf-8') as f:
js = f.read()
docjs = execjs.compile(js)
res = docjs.call('decrypt', t, e)
datas = (eval('[' + res + ']'))
for s in range(len(date_index)):
timeArray = time.strptime(str(date_index[s]), "%Y-%m-%d %H:%M:%S")
timestamp = int(time.mktime(timeArray)) * 1000
value = datas[s]
keyword = keyword
print(timestamp, value, keyord)
解析data的js函数:
// 解析data的js函数
function decrypt(t, e) {
for (var n = t.split(""), i = e.split(""), a = {}, r = [], o = 0; o < n.length / 2; o++)
a[n[o]] = n[n.length / 2 + o];
for (var s = 0; s < e.length; s++)
r.push(a[i[s]]);
return r.join("")
}
输出:print(baidu(‘NBA’))
输出结果: