// An highlighted block
import requests, re ,json
from bs4 import BeautifulSoup
import random
import json
import time
user_agent_list = [
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:73.0) Gecko/20100101 Firefox/73.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.5 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0'
]
def load_data_from_dict(o, *keys):
oo = o
for i, key in enumerate(keys):
if not oo:
return None
if i == (len(keys) - 1):
return oo.get(key) if isinstance(oo, dict) else None
oo = oo.get(key) if isinstance(oo, dict) else oo
def write_fun(line):
with open('shougou2.csv','a') as f:
f.write(line)
f.close()
class shougou:
def __init__(self):
self.session =requests.session()
def run(self,keyword):
self.get_main(keyword)
def get_main(self,keyword):
#更新head
self.session.headers.update(
{
'user-agent':random.choice(user_agent_list)
}
)
params = {
"kwdNamesStr" : keyword,
"timePeriodType" : 'MONTH',
"dataType" : 'SEARCH_ALL',
"queryType" : 'INPUT'
}
url = 'http://zhishu.sogou.com/index/searchHeat'
print(url)
try:
# r= self.session.get(url,params=params)
r=requests.get(url,params=params)
print(r)
except requests.exceptions.ChunkedEncodingError:
print("requests.exceptions.ChunkedEncodingError")
soup =BeautifulSoup(r.content,'lxml')
try:
datas= re.search('.*root.SG.wholedata =(.*)', str(soup)).group(1).split(";")[0]
except AttributeError:
print(keyword)
data= json.loads(datas)
pvList=load_data_from_dict(data,"pvList")[0]
print(pvList)
for pv in pvList:
pv1= load_data_from_dict (pv, "pv")
date =load_data_from_dict(pv,"date")
line = str(pv1) + ','+str(date)+','+str(keyword)+'\n'
#print(line)
write_fun (line)
if __name__ == '__main__':
shougou = shougou()
#'药用',老龄
#dicts =['卫生','医疗','营养','心理','健康','防疫','治疗','疫情','健康','养生','疾病','亚健康','疗养','体质','感染','调理','慢性病','癌症','康复','体能','食疗','污染','筛查','压力','饮食','药用','疗效','妇幼','体检','睡眠','熬夜','去火','养老','康养','敬老','退休','老龄','健身','运动','健美','塑形','锻炼','器械','瑜伽','健体','增肌','减脂','减肥','肌肉','户外','跑步','卡路里','心理健康','心理咨询','心理障碍','抑郁','忧郁','自闭','宣泄','自卑','偏执','焦虑','厌世','烦躁','执拗','减压','癔症','失眠','自杀','狂躁','压抑']
#dicts =['健身','运动','健美','塑形','锻炼','器械','瑜伽','健体','增肌','减脂','减肥','肌肉','户外','跑步','卡路里','心理健康','心理咨询','心理障碍','抑郁','忧郁','自闭','宣泄','自卑','偏执','焦虑','厌世','烦躁','执拗','减压','癔症','失眠','自杀','狂躁','压抑']
dicts = ['老龄']
for i in dicts:
shougou.run(i)
time.sleep(1)
获取搜狗指数数据
最新推荐文章于 2021-06-08 22:08:36 发布