python爬取百度指数

def baidu(keyword):
  """百度指数"""

  headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36',
    'Referer': 'http://index.baidu.com/v2/main/index.html',
    'Cookie': 'BAIDUID=6229B79E28BFECE4250939A302FCE55D:FG=1; PSTM=1568271960; BIDUPSID=8A924DAC3545706903AD37123D127E9C; MCITY=-317%3A; BDUSS=I1NGNES3FTVnh6MlNCZGZYaFNPYmdFTElyZG9LUDBZd1laZjV6V1A2Yi04RzFlSVFBQUFBJCQAAAAAAAAAAAEAAAASGdwuwdZfsMtfveQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP5jRl7-Y0ZeQ3; bdshare_firstime=1583388817527; BDSFRCVID=GIFOJeC629jnl9RuAf1iM2x1PVK1Lt5TH6aoobmkUJmKfkZguPwgEG0P_U8g0Ku-S2EqogKK3mOTHR8F_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tJAfoDDXJKL3jP5pMDTEht0_bMLX5-CsQgQW2hcH0KLKsPL9Wlro5tKJ5-5uJJvWb5Q73xQJWfb1MRjv3jovy4u4hajR2hJ-yjIO0p5TtUJaSDnTDMRh-6LTLx7yKMnitKj9-pn4fpQrh459XP68bTkA5bjZKxtq3mkjbPbDfn028DKuDj0aejv0ea_s-bbfHj73B4TqaJRofjrnhPF3WtC0XP6-35KHbmQpbCjFbRQ5On7EDR7O5PtDyPv8-q37JD6yaqDEyUJrf-oIBTooh5tHK-oxJpOy5JbMopvaHROEfq6vbURvDPug3-7NqU5dtjTO2bc_5KnlfMQ_bf--QfbQ0hOhqP-jBRIE_D-XJI02bDv65nt_MtD8Mfj-2t-XKKOLVKOybp7keq8CD6D506L9DpohLUJUBRrOB-3jL-j2eCQ2y5jHhp0getnhLUCe3Ru80nOD3PJpsIJMQ-DWbT8U5ecK2x6UaKviaKOEBMb1VCnDBT5h2M4qMxtOLR3pWDTm_q5TtUJMeCnTDMFhe6jLDHLeJ6kff5vfL5rb2nO5jJrIhITjhPrMyGjCWMT-05vubfb2Wq5kftjdLtRYDRkf5nrUKpQTbHn7_JjObULVfqoj2trNqtDlyRJDBMQxtNRR-CnjtpvhHlo23hnobUPUDUc9LUvNfgcdot5yBbc8eIna5hjkbfJBQttjQn3hfIkj2CKLtC8WhD8GD6Rb5nbHKMQbKP7--I6b0nRHHJ7KeTrnh6RnyfKgyxomtjD82DoQVxoa0RLajqC425Qm3xnLXh8tLUkqKCOy3J6oQlcVOnQcW4Jh0b-sQttjQpRPfIkja-KELKopqJ7TyU45hf47ybKO0q4Hb6b9BJcjfU5MSlcNLTjpQT8r5MDOK5OuJRQ2QJ8BtCPhhCQP; H_PS_PSSID=; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; delPer=0; PSINO=7; BDRCVFR[VXHUG3ZuJnT]=mk3SLVN4HKm; bdindexid=cl0an1bubpm0bt34oijcdr0ir6; RT="sl=0&ss=k8qp2pjw&tt=0&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&z=1&dm=baidu.com&si=n2j8kigaif"; Hm_lvt_d101ea4d2a5c67dab98251f0b5de24dc=1586259791,1586308431,1586311780,1586311832; Hm_lpvt_d101ea4d2a5c67dab98251f0b5de24dc=1586311832',
    'Host': 'index.baidu.com',
    'X-Requested-With': 'XMLHttpRequest'
  }
  # 搜索指数(实时)如果需要更换时间频率需要修改url
  url = 'https://index.baidu.com/api/LiveApi/getLive?region=0&word=[[{{"name":"{}","wordType":1}}]]'.format(keyword)
  r = requests.get(url, headers=headers)
  data = r.json()['data']
  uniqid = data['uniqid']
  t_url = "http://index.baidu.com/Interface/ptbk?uniqid={}".format(uniqid)
  rep = requests.get(url=t_url, headers=headers).json()
  t = rep["data"]
  e = data['result'][0]['index'][0]['_all']
  date = data['result'][0]['index'][0]['period'].split('|')
  date_index = pd.date_range(start=str(date[0]), end=str(date[1]), freq='H')
  with open('/Users/lys/PycharmProjects/MCdata/spider/parsing_data_function.js', encoding='utf-8') as f:
    js = f.read()
    docjs = execjs.compile(js)
    res = docjs.call('decrypt', t, e)
    datas = (eval('[' + res + ']'))
  for s in range(len(date_index)):
    timeArray = time.strptime(str(date_index[s]), "%Y-%m-%d %H:%M:%S")
    timestamp = int(time.mktime(timeArray)) * 1000
    value = datas[s]
    keyword = keyword
    print(timestamp, value, keyord)

解析data的js函数:

// 解析data的js函数

function decrypt(t, e) {
    for (var n = t.split(""), i = e.split(""), a = {}, r = [], o = 0; o < n.length / 2; o++)
        a[n[o]] = n[n.length / 2 + o];
    for (var s = 0; s < e.length; s++)
        r.push(a[i[s]]);
    return r.join("")
}

输出:print(baidu(‘NBA’))
输出结果:
在这里插入图片描述

  • 1
    点赞
  • 15
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值