import json
import re
import openpyxl as ply
import requests
# 路径
path = r'E:\Datas\data-办公自动化\疫情数据采集.xlsx'
# 创建工作簿
wk = ply.Workbook()
# 创建工作表并取名为各省疫情数据
sheet = wk.create_sheet('各省疫情数据')
# 向工作表插入列名
sheet.append(['地点', '累计确诊人数', '现有确诊', '新增确诊人数',
'累计死亡人数', '累计治愈人数', '新增本土无症状', '新增本土', '新增境外输入'])
# 网址
url = 'https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_aladin_banner'
# 伪装为浏览器,防止爬取不到数据
headers = {'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, '
'like Gecko) Chrome/92.0.4515.131 Mobile Safari/537.36'}
response = requests.get(url=url, headers=headers).text
# 正则表达式提取数据
list_str = re.findall('"component":\[(.*)\],', response)[0]
# json解析字符串数据为字典
json_dict = json.loads(list_str)
caseList = json_dict['caseList']
# for循环遍历数据
for case in caseList:
# 向工作表插入数据
sheet.append([case['area'], case['confirmed'], case['curConfirm'], case['confirmedRelative'],
case['died'], case['crued'], case['asymptomaticLocalRelative'], case['nativeRelative'], case['overseasInputRelative']])
# 保存工作表
wk.save(path)