从新浪微博上获取疫情最新信息

# -*- coding:utf-8 -*-
"""
Author: Edgar
Created time:2/1/2020 12:06 PM
爬取新浪微博中的相关信息
"""
import os
import json
import requests
import pymysql


class Virus(object):
    def __init__(self):
        super(Virus, self).__init__()
        self.url = "https://interface.sina.cn/news/wap/fymap2020_data.d.json"
        self.header = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36"}

    def get_json(self):
        """获取加载出来的json"""
        response = requests.get(self.url, self.header)
        try:
            response.raise_for_status()
        except:
            print("获取json文件失败")
        else:
            return response.json()

    def download_json(self, filename='data.json'):
        """下载json文件"""
        flag = True
        json_ = self.get_json()
        print(json_)

        if not os.path.exists(filename):
            with open(filename, "w") as file:
                json.dump(json_, file)
        else:
            while flag:
                answer = input("该目录已经存在文件 %s,是否删除该文件(y/n):  " % filename)
                if answer in ['y', 'Y']:
                    with open(filename, 'w') as file:
                        json.dump(json_, file)
                    flag = False
                elif answer in ['n', 'N']:
                    return
                else:
                    print("输入错误,请重新输入: ")

    @staticmethod
    def create_table():
        connect = pymysql.connect("localhost", 'root', 'Edgar', 'virus')
        cursor = connect.cursor()
        # 创建一个表来保存每个省市的信息
        sql = "CREATE TABLE IF NOT EXISTS Virus_province(name VARCHAR(60) NOT NULL, ename varchar(20), value varchar(20), susNum varchar(20), deathNum varchar(20), cureNum varchar(20), city TEXT)"
        cursor.execute(sql)
        # 创建一个表来保存所有相关城市的相关信息
        sql = "CREATE TABLE IF NOT EXISTS Virus_city(province VARCHAR(20),name VARCHAR(20) NOT NULL ,conNum VARCHAR(20), susNum VARCHAR(20), cureNum VARCHAR(20), deathNum VARCHAR(20))"
        cursor.execute(sql)
        # 保存全球疫情信息
        sql = "CREATE TABLE IF NOT EXISTS Virus_world(name VARCHAR(20), value VARCHAR(20), susNum VARCHAR(20), deathNum VARCHAR(20), cureNum VARCHAR(20))"
        cursor.execute(sql)
        sql = "CREATE TABLE IF NOT EXISTS Virus_timeline(url varchar(100), title varchar(200), media varchar(40), date varchar(30));"
        cursor.execute(sql)
        connect.commit()
        cursor.close()
        connect.close()

    @staticmethod
    def insert_city(city):
        connect = pymysql.connect("localhost", 'root', "Edgar", 'virus')
        cursor = connect.cursor()
        sql = 'INSERT INTO virus_city(province,name, conNum, susNum, cureNum, deathNum) VALUES ("%s", "%s", "%s","%s","%s", "%s")' % (
            city.get("province"),
            city.get("name"),
            city.get("conNum"),
            city.get("susNum"),
            city.get("cureNum"),
            city.get("deathNum"))
        cursor.execute(sql)
        connect.commit()
        cursor.close()
        connect.close()

    @staticmethod
    def insert_province(province):
        connect = pymysql.connect("localhost", 'root', 'Edgar', 'virus')
        cursor = connect.cursor()
        sql = 'INSERT INTO virus_province(name, ename, value, susNum, deathNum, cureNum, city) VALUES("%s","%s","%s","%s","%s","%s","%s")' % (
            province.get("name"),
            province.get("ename"),
            province.get("value"),
            province.get("susNum"),
            province.get("deathNum"),
            province.get("cureNum"),
            ",".join([i.get("name") for i in province.get("city")])
        )
        cursor.execute(sql)
        connect.commit()
        cursor.close()
        connect.close()

    @staticmethod
    def insert_world(world):
        connect = pymysql.connect("localhost", 'root', 'Edgar', 'virus')
        cursor = connect.cursor()
        sql = 'INSERT INTO virus_world(name, value, susNum, deathNum, cureNum) VALUES ("%s","%s","%s","%s","%s")' % (
            world.get("name"), world.get("value"), world.get("susNum"),
            world.get("deathNum"), world.get("cureNum"))
        cursor.execute(sql)
        connect.commit()
        cursor.close()
        connect.close()

    @staticmethod
    def insert_timeline(data):
        connect = pymysql.connect("localhost", 'root', 'Edgar', 'virus')
        cursor = connect.cursor()
        sql = "INSERT INTO virus_timeline(url, title, media, date) VALUES('%s','%s','%s','%s')" % (
            data.get("url"), data.get("title"), data.get("media"),
            data.get("date"))
        cursor.execute(sql)
        connect.commit()
        cursor.close()
        connect.close()

    def upload_data(self):
        data_json = self.get_json()
        data = data_json.get("data").get("list")
        for i in data:
            self.insert_province(i)
            for city in i.get("city"):
                city["province"] = i.get("name")
                self.insert_city(city)

        for world in data_json.get("data").get("worldlist"):
            self.insert_world(world)
        self.get_timeline()

    def refresh_data(self):
        connect = pymysql.connect("localhost", 'root', 'Edgar', 'virus')
        cursor = connect.cursor()
        sql = 'TRUNCATE TABLE virus_province;'
        cursor.execute(sql)
        sql = "TRUNCATE TABLE virus_city;"
        cursor.execute(sql)
        sql = "TRUNCATE TABLE virus_world;"
        cursor.execute(sql)
        sql = "TRUNCATE TABLE virus_timeline;"
        cursor.execute(sql)
        connect.commit()
        cursor.close()
        connect.close()
        self.upload_data()

    def get_timeline(self):
        """获取timeline中的信息"""
        url = "https://interface.sina.cn/wap_api/wap_std_subject_feed_list.d.json?component_id=_conf_13|wap_zt_std_theme_timeline|http://news.sina.cn/zt_d/yiqing0121&page={}"
        count = 0
        while True:
            response = requests.get(url.format(count), headers=self.header)
            try:
                response.raise_for_status()
            except:
                return
            else:
                data = response.json().get("result").get("data").get("data")
                if data:
                    count += 1
                    for i in data:
                        self.insert_timeline(i)
                else:
                    return


if __name__ == '__main__':
    virus = Virus()
    # virus.create_table()
    # virus.download_json()

    # virus.upload_data()
    virus.refresh_data()
    # virus.get_timeline()

爬取部分内容如下:
virus_world:

namevaluesusNumdeathNumcureNum
中国1441119544304328
德国8300
西班牙1000
俄罗斯2000
柬埔寨1000
印度2000

virus_city:

provincenameconNumsusNumcureNumdeathNum
北京海淀区41000
北京怀柔区1000
北京丰台区16000
北京大兴区22000
北京东城区3000
北京昌平区12000
北京西城区22000
北京朝阳区35000
北京石景山区5000

最新代码见 GitHub

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值