数据分析师工作中常规流程一般是:数据获取、数据处理、数据分析展示等。
本篇通过国内疫情数据实现数据从爬取到展示的过程。
介绍
py版本:python 3.8
目标绘制全国疫情图。
思路
通过以下三个方法实现:
-
爬取国内疫情数据。data_download(),引用包requests、json。1)访问网站获取数据;2)保存数据成json文件
-
将数据转存到excel。cpdata_toexcel(),引用包openpyxl、json。
1)从json文件中抽取所需数据,字段需求:省份、地市、总确诊人数、总疑似病例、总死亡人数。
2)创建Excel表,数据保存。 -
读取文件数据画疫情地图。show_data(),引用包pandas、pyecharts。
脚本
引用包
import requests
import json
import pandas as pd
import openpyxl
from pyecharts.charts import Bar
from pyecharts.charts import Map
from pyecharts import options as opts
方法一:爬取国内疫情数据
def data_download():
"爬取国内疫情数据"
china_url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
headers={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36',
'referer': 'https://news.qq.com/zt2020/page/feiyan.htm'
}
# 获取json数据
response = requests.get(url=china_url,headers=headers).json()
# 先把数据转成python中的字典
data = json.loads(response['data'])
# 保存数据
with open('./国内疫情.json','w') as f:
# 再把字典改成json字符串
f.write(json.dumps(data,ensure_ascii=False,indent=2))
方法二:将数据转存到excel
def cpdata_toexcel():
"将数据转存到excel"
# 读取文件
with open('./国内疫情.json','r',encoding='GBK') as f:
data = f.read()
data = json.loads(data)
# 获取国内数据
chinaAreaDict = data['areaTree'][0]
# 获取省份数据
provinceList = chinaAreaDict['children']
# excel表头 省 市 ...
china_citylist = [] #[{湖北,武汉},{湖北,襄阳}]
for x in range(len(provinceList)):
province = provinceList[x]['name']
province_list = provinceList[x]['children']
for y in range(len(province_list)):
city = province_list[y]['name']
today = province_list[y]['today']
total = province_list[y]['total']
city_dict = {'province':province,
'city':city,
'today':today,
'total':total
}
china_citylist.append(city_dict)
chinaTotalData = pd.DataFrame(china_citylist)
# 将chinaTotalData的today和total数据添加dataFrame中
confirmlist = []
suspectlist = []
deadlist = []
for value in chinaTotalData['total'].values.tolist():
confirmlist.append(value['confirm'])
suspectlist.append(value['suspect'])
deadlist.append(value['dead'])
chinaTotalData['confirm'] = confirmlist
chinaTotalData['suspect'] = suspectlist
chinaTotalData['dead'] = deadlist
# 删除total列
chinaTotalData.drop(['total','today'],axis=1,inplace=True)
print(chinaTotalData)
# 创建excel
wb = openpyxl.Workbook()
wb.save('国内疫情.xlsx')
#将其保存excel中
book = openpyxl.load_workbook('国内疫情.xlsx')
writer = pd.ExcelWriter('国内疫情.xlsx',engine='openpyxl')
writer.book = book
writer.sheet = dict((ws.title,ws) for ws in book.worksheets)
chinaTotalData.to_excel(writer,index=False,sheet_name = '国内疫情')
writer.save()
writer.close()
方法三:读取文件数据画疫情地图
def show_data():
"读取文件数据画疫情地图"
df = pd.read_excel('./国内疫情.xlsx',sheet_name = 'Sheet1')
# 1. 根据绘制国内总疫情图(确诊)
data = df.groupby(by='province',as_index=False).sum()
data_list = list(zip(data['province'].values.tolist(),data['confirm'].values.tolist()))
# ------------------------中国地图
c =(
Map()
.add(series_name="确诊病例",data_pair=data_list,maptype='china')
.set_global_opts(
title_opts=opts.TitleOpts(title="疫情地图"),
visualmap_opts=opts.VisualMapOpts(is_piecewise=True,
pieces=[{"max": 9, "min": 0,"label": "0-9", "color": "#FFE4E1"},
{"max": 99, "min": 10, "label": "10-99", "color": "#FF7F50"},
{"max": 499, "min": 100, "label": "100-499", "color": "#F08080"},
{"max": 999, "min": 500, "label": "500-999", "color": "#CD5C5C"},
{"max": 99999, "min": 1000, "label": ">=1000", "color": "#990000"}]
)
)
.render("d_map.html")
)
效果图:
相关文章:【python】自动化办公之excel常用操作实战详解(xlwings)