此文件为一个脚本,肯定需要调用的
用户输入具体省份
txt = “广东”
getDetailCityInfo(txt) 即可。
#爬取具体的县数据需要丁香医生,但是丁香医生是需要手机微信看,只能通过手机下载fiddler证书然后设置代理本机WiFi,才能在电脑上面抓包
#后来发现了 pc端的页面,可以抓取到丁香医生的数据,数据在js里面,我们直接urllib.request整个网页,然后用bs4进行获取具体细节
import json
import time
import urllib.request
import urllib.parse
import json
import csv
import time
from bs4 import BeautifulSoup
def getDetailCityInfo(txt):
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
}
url = 'https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0'
request = urllib.request.Request(url=url,headers=headers)
# 获得响应内容
response = urllib.request.urlopen(request)
#拿到字符串类型了
str1 = response.read().decode()
soup = BeautifulSoup(str1,'lxml')
res = soup.find('script',id="getAreaStat")
# print(res.string)
resStr = res.string
resStr = str(resStr)
# print(resStr)
s = resStr.replace('try { window.getAreaStat = ',' ')
s = s.replace('}catch(e){}',' ')
cityData = json.loads(s)
cityDataLength = len(cityData)
# print("可查询省份有",cityDataLength)
# txt = '天津'
# txt的各个区县市
detailCity = []
for i in range(0,cityDataLength):
# print(cityData[i])
if(cityData[i]['provinceShortName'] == txt):
# print(cityData[i]['cities'])
detailCity.append(cityData[i]['cities'])
# print("--------------------------------------------")
finalDetailData = []
# 这个北京有多少个具体的县市呢
#
detailCityLength = len(detailCity[0])
# 由于最后一个的cityName是待明确地区,所以length-1,但有的不是,
for i in range(0,detailCityLength):
# print(detailCity[0][i])
# print(detailCity[0][i]['cityName'])
#因为current确诊太少 了,导致数据为0,我们就拿累计确诊人数吧
# print(detailCity[0][i]['confirmedCount'])
t =(detailCity[0][i]['cityName'],detailCity[0][i]['confirmedCount'])
finalDetailData.append(t)
# print(finalDetailData)
return finalDetailData