疫情信息获取。

爬取目标:腾讯新闻,疫情数据。

确定信息。

在目标网页按F12,在全局数据中寻找,发现有目标数据。接下来在Network中寻找数据并在Headers中获得链接地址。也可以点击搜索查找(较为方便)(省份数据和全球数据也是同样的方法)(注意图二)

最终确定国内数据在https://api.inews.qq.com/newsqa/v1/query/pubished/daily/list?

编写代码

导入库

import json
import requests#爬虫模块
import pandas as pd #数据处理模块

爬取程序。

# 全国疫情数据
chinatotal_url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_other'
chinatotal_h5_url = "https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5"
# 省区信息请求网址
province_city_url = "https://view.inews.qq.com/g2/getOnsInfo?name=wuwei_ww_city_list_order"

def getChinaTotalData(chinatotal_url):
    response = requests.get(chinatotal_url).json()  # 发出请求并json化处理
    #不要反复获取,访问太频繁可能会被识别出来
    data = json.loads(response['data'])
    return data

def getChinaAbsData(chinatotal_h5_url):
    response = requests.get(chinatotal_h5_url).json()
    data = json.loads(response['data'])
    return data['chinaTotal']

def getProvinceTotalData(chinatotal_h5_url):
    response = requests.get(chinatotal_h5_url).json()
    data = json.loads(response['data'])
    areaTree = data['areaTree'][0]['children']
    return areaTree


all_data = getChinaTotalData(chinatotal_url)
abs_data = getChinaAbsData(chinatotal_h5_url)

# 获取从1月03日起的一系列累计数据(网站仅提供最近三十天的数据)
chinaDayList = all_data["chinaDayList"]
date_list1 = list()
year_day_list1 = list()
total_confirm = list()
total_suspect = list()
total_dead = list()
total_heal = list()
total_importedCase = list()
for total in chinaDayList:
    #date_list1.append(total['date'][:2] + "/" + total['date'][3:])
    year_day_list1.append(total["y"]+"/"+total['date'][:2] + "/" + total['date'][3:])
    total_confirm.append(int(total['confirm']))
    total_suspect.append(int(total['suspect']))
    total_dead.append(int(total['dead']))
    total_heal.append(int(total['heal']))
    total_importedCase.append(int(total['importedCase']))



# 获取从1月03日起的一系列每日数据
chinaDayAddList = all_data["chinaDayAddList"]
date_list2 = list()
everyday_confirm = list()
everyday_suspect = list()
everyday_dead = list()
everyday_heal = list()
everyday_importedCase = list()
for everyday in chinaDayAddList:
    date_list2.append(everyday["y"]+"/"+everyday['date'][:2] + "/" + everyday['date'][3:])
    everyday_confirm.append(int(everyday['confirm']))
    everyday_suspect.append(int(everyday['suspect']))
    everyday_dead.append(int(everyday['dead']))
    everyday_heal.append(int(everyday['heal']))
    everyday_importedCase.append(int(everyday['importedCase']))


areaTree = getProvinceTotalData(chinatotal_h5_url)
province_name = list()
province_total_confirm = list()
province_total_suspect = list()
province_total_dead = list()
province_total_heal = list()
province_today_confirm = list()

for province in areaTree:
    province_name.append(province['name'])
    province_total_confirm.append(province['total']['confirm'])
    province_today_confirm.append(province['today']['confirm'])
    #province_total_suspect.append(province['total']['suspect'])
    province_total_dead.append(province['total']['dead'])
    province_total_heal.append(province['total']['heal'])


将数据转化为表格(pandas)。

china_total_data = pd.DataFrame({
    '日期':year_day_list1 , 
    '累计确诊': total_confirm, 
    '疑似病例': total_suspect, 
    '累计死亡': total_dead, 
    '累计治愈': total_heal, 
    '累计境外输入病例': total_importedCase, 
})
china_total_data.tail(10)


china_daily_data = pd.DataFrame({
    '日期': date_list2, 
    '今日确诊': everyday_confirm, 
    '疑似病例': everyday_suspect, 
    '今日死亡': everyday_dead, 
    '今日_治愈': everyday_heal, 
    '今日_境外输入病例': everyday_importedCase, 
})
china_daily_data.tail()

province_total_data = pd.DataFrame({
    '省份': province_name,
    '今日新增':province_today_confirm,
    '累计确诊': province_total_confirm,
    #'疑似': province_total_suspect,
    '死亡': province_total_dead,
    '治愈': province_total_heal,
})
province_total_data.head()

文件展示

  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

大数据白白

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值