获取百度搜索关键字_百度指数关键字获取

import asyncio
import datetime
import pandas as pd
from collections import defaultdict
from urllib.parse import urlencode
import json
from pandas import DataFrame
from Util.RequestUtil import get_html
from config import ALL_CODE


class BaiduIndex:
    def __init__(self, startdate, enddate, city, key_word):
        self.start_date = datetime.datetime.strptime(startdate, '%Y-%m-%d')
        self.end_date = datetime.datetime.strptime(enddate, '%Y-%m-%d')
        self.area = ALL_CODE.get(city, 0)
        self.key_words = key_word
        self.result = {keyword: defaultdict(list) for keyword in self.key_words}
        self._all_cate = ['all', 'pc', 'wise']
        self._headers = {
            'Cookie': ''
        }

    async def gen_date(self, start, end):
        """

        :param start:
        :param end:
        :return:
        """
        paramter = {
            'startDate': start,
            'endDate': end,
            'area': self.area,
            'word': self.key_words,

        }
        url = 'http://index.baidu.com/api/SearchApi/index?' + urlencode(paramter)
        html = await get_html(url, self._headers)
        datas = json.loads(html)
        uniqueid = datas['data']['uniqid']
        encrypt_datas = []
        for single_data in datas['data']['userIndexes']:
            encrypt_datas.append(single_data)
        return encrypt_datas, uniqueid

    async def gen_key(self, uniqueid):
        """

        :param uniqueid:
        :return:
        """
        url = 'http://index.baidu.com/Interface/api/ptbk?uniqid=%s' % uniqueid
        html = await get_html(url, header=self._headers)
        datas = json.loads(html)
        return datas['data']

    def trans_data(self, key, data):
        """

        :param key:
        :param data:
        :return:
        """
        a = key
        i = data
        n = {}
        s = []
        for o in range(len(a) // 2):
            n[a[o]] = a[len(a) // 2 + o]
        for r in range(len(data)):
            s.append(n[i[r]])
        return ''.join(s).split(',')

    def parse_save_data(self, data):
        """
        转化保存数据
        """
        data_dic_reshape = {}
        for cate in self._all_cate:
            data_dic_reshape[cate] = data[cate]['data']
        time_series = pd.date_range(start=self.start_date, end=self.end_date)
        data_df = DataFrame(data_dic_reshape, index=time_series)
        csv_name = './{}_指数_{}--{}.csv'.format(self.key_words, self.start_date.date(), self.end_date.date())
        data_df.to_csv(path_or_buf=csv_name, mode='w+')

    async def main(self):
        encrypt_datas, uniqid = await self.gen_date(self.start_date, self.end_date)
        key = await self.gen_key(uniqid)
        for encrypt_data in encrypt_datas:
            for kind in self._all_cate:
                encrypt_data[kind]['data'] = self.trans_data(key, encrypt_data[kind]['data'])
            self.parse_save_data(encrypt_data)


if __name__ == '__main__':
    baidu = BaiduIndex('2018-4-20', '2018-10-20', '', '王俊凯')
    loop = asyncio.get_event_loop()
    loop.run_until_complete(baidu.main())
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值