成品如下
思路
- 明确需求
采集确诊人数 新增人数
数据所在位置:https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_aladin_banner - 代码流程 爬虫四大步骤
1.发送请求 https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_aladin_banner
2.获取数据 网页源代码
3.解析数据 筛选需要数据
4.保存数据
5.做数据可视化分析
具体步骤
导入所需库
import json
import re
import requests # 发送网络请求
import csv
from pyecharts.charts import Map
from pyecharts import options as opts
import pandas as pd
发起请求获取数据
url = 'https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_aladin_banner'
# 1.发送请求
response = requests.get(url=url)
# <Response [200]> 访问成功
# 2.获取数据 网页源代码
html_data = response.text
解析数据
# 3.解析数据 筛选需要的数据 正则+json
# 对网页内容进行提取 字符串 json格式 {'键':'值'}
component = re.findall('"component":\[(.*)\],', html_data)[0]
# 转类型 把字符串转为 字典类型数据
json_data = json.loads(component)
# 继续提取
caseList = json_data['caseList']
存储数据
for case in caseList:
area = case['area']
confirmedRelative = case['confirmedRelative']
curConfirm = case['curConfirm']
confirmed = case['confirmed']
crued = case['crued']
died = case['died']
# print(area,confirmedRelative,curConfirm,confirmed,crued,died)
with open('data.csv',mode='a',newline='') as f:
#用csv写数据
csv_writer = csv.writer(f)
csv_writer.writerow([area,confirmedRelative,curConfirm,confirmed,crued,died])
数据可视化
df = pd.read_csv('data.csv',encoding='gbk')
df.head()
x = df['area'].values.tolist()
y = df['curConfirm'].values.tolist()
china_map = (
Map()
.add('现有确诊',[list(i) for i in zip(x,y)],'china')
.set_global_opts(
title_opts=opts.TitleOpts(title='各地区确诊人数'),
visualmap_opts = opts.VisualMapOpts(max_=200,is_piecewise=True)
)
)
china_map.render("yang.html")
完成