获取搜狗指数数据

// An highlighted block
import  requests, re ,json
from bs4 import  BeautifulSoup
import random
import json
import time
user_agent_list = [
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:73.0) Gecko/20100101 Firefox/73.0',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.5 Safari/605.1.15',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0'
]
def load_data_from_dict(o, *keys):
    oo = o
    for i, key in enumerate(keys):
        if not oo:
            return None
        if i == (len(keys) - 1):
            return oo.get(key) if isinstance(oo, dict) else None
        oo = oo.get(key) if isinstance(oo, dict) else oo

def write_fun(line):
    with open('shougou2.csv','a') as f:
        f.write(line)
        f.close()

class shougou:
    def __init__(self):
        self.session =requests.session()

    def run(self,keyword):
        self.get_main(keyword)

    def get_main(self,keyword):

        #更新head
            self.session.headers.update(
                {
                    'user-agent':random.choice(user_agent_list)
                }

            )

            params = {
                "kwdNamesStr" : keyword,
                "timePeriodType" : 'MONTH',
                "dataType" : 'SEARCH_ALL',
                "queryType" : 'INPUT'
            }
            url = 'http://zhishu.sogou.com/index/searchHeat'
            print(url)
            try:
              # r= self.session.get(url,params=params)
              r=requests.get(url,params=params)
              print(r)
            except requests.exceptions.ChunkedEncodingError:
                    print("requests.exceptions.ChunkedEncodingError")
            soup =BeautifulSoup(r.content,'lxml')
            try:
                datas= re.search('.*root.SG.wholedata =(.*)', str(soup)).group(1).split(";")[0]
            except AttributeError:
                  print(keyword)

            data= json.loads(datas)
            pvList=load_data_from_dict(data,"pvList")[0]
            print(pvList)
            for pv in pvList:
               pv1= load_data_from_dict (pv, "pv")
               date =load_data_from_dict(pv,"date")
               line = str(pv1) + ','+str(date)+','+str(keyword)+'\n'
               #print(line)
               write_fun (line)


if __name__ == '__main__':
    shougou = shougou()
    #'药用',老龄
    #dicts =['卫生','医疗','营养','心理','健康','防疫','治疗','疫情','健康','养生','疾病','亚健康','疗养','体质','感染','调理','慢性病','癌症','康复','体能','食疗','污染','筛查','压力','饮食','药用','疗效','妇幼','体检','睡眠','熬夜','去火','养老','康养','敬老','退休','老龄','健身','运动','健美','塑形','锻炼','器械','瑜伽','健体','增肌','减脂','减肥','肌肉','户外','跑步','卡路里','心理健康','心理咨询','心理障碍','抑郁','忧郁','自闭','宣泄','自卑','偏执','焦虑','厌世','烦躁','执拗','减压','癔症','失眠','自杀','狂躁','压抑']
    #dicts =['健身','运动','健美','塑形','锻炼','器械','瑜伽','健体','增肌','减脂','减肥','肌肉','户外','跑步','卡路里','心理健康','心理咨询','心理障碍','抑郁','忧郁','自闭','宣泄','自卑','偏执','焦虑','厌世','烦躁','执拗','减压','癔症','失眠','自杀','狂躁','压抑']
    dicts = ['老龄']
    for i in dicts:
        shougou.run(i)
        time.sleep(1)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值