三大网站一共四篇
记得改URL地址
试验成果
疫情_163
# -*- coding: utf-8 -*-
"""
爬取疫情数据:城市、确诊人数、治愈人数、死亡人数、日期
网易:https://news.163.com/special/epidemic/
"""
#台湾、香港、澳门城市为空,需要单独提取
import requests
import pandas as pd
import os
def get_page(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
}
try:
r = requests.get(url, headers=headers)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.json()
except Exception as e:
print("Error", e)
return ""
def parse_page(data_json):
data = data_json['data']['areaTree'][0]['children']
all_data = []
for i in data: #遍历每一个省份
if len(i['children']) != 0:
province = i['name']
date = i['lastUpdateTime'].split(' ')[0]
for j in i['children']: #遍历每一个省内城市
data_one = {
} #存放每一个城市信息
#省份
data_one['province'] = province
#城市
data_one['city'] = j['name']
#确诊人数
data_one['sureNum'] = j['total']['confirm']
#治愈人数
data_one['cureNum'] = j['total']['heal']
#死亡人数
data_one['deathNum'] = j['total']['dead']
#日期
data_one['date'] = date
all_data.append(data_one)
else:
#台湾、香港、澳门城市为空,需要单独提取
data_one = {
}
data_one['province'] = i['name']
data_one['city'] = i['name']
data_one['sureNum'] = i['total']['confirm']
data_one['deathNum'] = i['total']['dead']
data_one['cureNum'] = i['total']['heal']
data_one['date'] = i['lastUpdateTime'].split(' ')[0