前提安装:Python3
安装第三方库:pip3 install urllib3; pip3 install BeautifulSoup4; pip3 install bottle
import urllib.request
import jsonfrom bs4 import BeautifulSoup
from bottle import route, run, request
'''
获取所有城市的空气质量数据
'''
@route('/getCitysAirQualityData')
def getCitysAirQualityData():
#网址
url = "http://www.pm25.com/rank.html"
#请求
request = urllib.request.Request(url)
#爬取结果
response = urllib.request.urlopen(request)
data = response.read()
#设置解码方式
data = data.decode('utf-8')
#打印结果
#用BeautifulSoup解析数据 python3 必须传入参数二'html.parser' 得到一个对象,接下来获取对象的相关属性
html=BeautifulSoup(data,'html.parser')
dataArray = html.find_all('li', 'pj_area_data_item')
cityAirQualityArray = []
# 获取想要数据的字典格式
for ulData in dataArray:
cityName = ulData.find('a', 'pjadt_location')
pm25Value = ulData.find('span', 'pjadt_pm25')
aqiValue = ulData.find('span', 'pjadt_aqi')
# 去除指定字符串
pm25 = '' + str(pm25Value.get_text())
pm25 = pm25.replace('μg/m³', '')
# 去掉左右两边空格
pm25 = pm25.strip()
# 设置字典
cityAndPm25Dict = {}
cityAndPm25Dict.setdefault('city', cityName.get_text())
cityAndPm25Dict.setdefault('pm25', pm25)
cityAndPm25Dict.setdefault('aqi', aqiValue.get_text())
# 添加到数组中
# 判断是否数据已存在
isExist = False
#pdb.set_trace() # 调试
for cityData in cityAirQualityArray:
myCity = cityData['city']
if myCity == cityName.get_text():
isExist = True
if isExist != True:
cityAirQualityArray.append(cityAndPm25Dict)
dataString = json.dumps(cityAirQualityArray)
return "{\"status\":\"101\",\"msg\":\"操作成功\",\"data\":" + dataString + "}"
run(host='0.0.0.0', port=8080, debug=True)