微博指数爬取

# -*- codeing = utf-8 -*-
# @Time:2021/8/11 0011 21:20
# @Author:权倾天下
# @File:微博指数_搜索.py
# @Software:PyCharm
import requests
from jsonpath import jsonpath
import csv
import matplotlib
from matplotlib import pyplot as ply
import numpy as np
from lxml import etree

class Webo_spider():
    def __init__(self):
        self.url='https://data.weibo.com/index/ajax/newindex/getchartdata'
        self.headers={
            "accept": "application/json",
            "content-length": "33",
            "content-type": "application/x-www-form-urlencoded",
            "cookie": "UM_distinctid=178012496bd3c8-04f68b8b1b347c-3a65420e-1fa400-178012496be231; SINAGLOBAL=983041799798.694.1614926878550; SCF=AgTqljDCCmzObHj3SxUmb-G8QE6in6kYdVDhbvPKcfwCHwUYrQEJhxJ1j7DGEiSDrHSXrCikp6D9OJzew__wEcg.; SUB=_2AkMX1uxTf8NxqwJRmf8UxWnhaIl_ygDEieKhih2IJRMxHRl-yT9kqnA_tRB6PFbCvJD89mizBhHaK3w_4aAdSN1v4nev; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WhalFGAxCzCvn45xoDZ7YkK; UOR=,,www.baidu.com; WEB3=7ef756770a4aba698ee9731722a669fa; login_sid_t=453429d52a4b723865d02445c94b085e; cross_origin_proto=SSL; _s_tentry=www.baidu.com; Apache=5777892168773.717.1628681736121; ULV=1628681736134:8:1:1:5777892168773.717.1628681736121:1625319792980",
            "origin": "https://data.weibo.com",
            "referer": "https://data.weibo.com/index/newindex?visit_type=trend&wid=1091324464527",
            "user-agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1",
        }
        self.data={
        "wid": "",
        "dateGroup": "",
        }
        f = open('weibo.csv', 'w', newline='', encoding='gbk')  # gbk收录汉字更广
        self.csv_w = csv.writer(f)
        self.csv_w.writerow(['关键字', '指数', '时间'])  # 按行写入数据

    def serch(self):
        url = 'https://data.weibo.com/index/ajax/newindex/searchword'
        data = {
            'word': self.serch_text
        }
        res=requests.post(url=url,headers=self.headers,data=data).text
        wids=etree.HTML(res)
        self.wid=wids.xpath('//li/@wid')[0].replace('"','').replace('\\','')
        return self.wid

    def post_data(self,url,data):
        wid=self.serch()
        self.data['wid']=wid
        res=requests.post(url=url,headers=self.headers,data=data)
        if res.status_code==200:
            return res.json()
        return None

    def parse_data(self,data):
        self.data_time=(jsonpath(data,'$..x'))
        self.data_data=(jsonpath(data,'$..s'))
        for times,datas in zip(self.data_time[0],self.data_data[0]):
            print(datas)
            print(times)
            print('***'*20)
            self.save_csv(datas,times)


    def save_csv(self,data,times):
        self.csv_w.writerow(['{}'.format(self.serch_text), data, times])



    def show_data(self,xpoint,ypoint):
        # fname 为 你下载的字体库路径,注意 SourceHanSansSC-Bold.otf 字体的路径
        zhfont1 = matplotlib.font_manager.FontProperties(fname="SourceHanSansSC-Bold.otf")
        x = np.array(xpoint)
        y = np.array(ypoint)
        ply.title("微博指数分析----范冰冰",fontproperties=zhfont1)
        # 设置x轴的名字
        ply.xlabel("time")
        # 设置y轴的名字
        ply.ylabel("data")
        ply.plot(x, y)
        # ply.xticks(y[::2]
        ply.xticks(x[::5])
        ply.show()


    def main(self):
        self.serch_text = input('请输入要搜索的关键词')
        list_data=['1hour','1day','1month','3month']
        for i in list_data:
            self.data['dateGroup']=i
            res = self.post_data(self.url,self.data)
            self.parse_data(res)
            # self.show_data(self.data_time[0],self.data_data[0])



if __name__ == '__main__':
    weibo=Webo_spider()
    weibo.main()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

权倾天下_code

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值