博客推荐:
http://www.pianshen.com/article/7251318624/#70_AQI_312
功能1.0 AQI计算
""""
auther:Susan
function:AQI Calculation
version:v1.0
data:2019/4/27
"""
def cal_linear(iaqi_lo,iaqi_hi,bp_lo,bp_hi,cp):
"""
Range scaling
"""
iaqi = (iaqi_hi-iaqi_lo)*(cp-bp_lo)/(bp_hi-bp_lo)+iaqi_lo
return iaqi
def cal_pm_iaqi(pm_val):
"""
Calculation:pm2.5_IAQI
"""
if 0 <= pm_val < 35:
iaqi = cal_linear(0, 50, 0, 35, pm_val)
elif 35 <= pm_val <75:
iaqi = cal_linear(50, 100, 35, 75,pm_val)
elif 75 <= pm_val <115:
iaqi = cal_linear(100, 150, 75, 115,pm_val)
elif 115 <= pm_val <150:
iaqi = cal_linear(150, 200, 115, 150,pm_val)
else:
pass
def cal_co_iapi(co_val):
"""
Calculation:CO_IAQI
"""
# global iaqi
if 0 <= co_val < 3:
iaqi = cal_linear(0, 50, 0, 3, co_val)
elif 3 <= co_val <5:
iaqi = cal_linear(50, 100, 2, 4,co_val)
elif 5 <= co_val <15:
iaqi = cal_linear(100, 150,5, 14,co_val)
else:
pass
def cal_api(param_list):
"""
AQI Calculation
"""
pm_val = param_list[0]
co_val = param_list[1]
pm_iaqi = cal_pm_iaqi(pm_val)
co_iaqi = cal_co_iapi(co_val)
iaqi_list = []
iaqi_list.append(pm_iaqi)
iaqi_list.append(co_iaqi)
aqi = max(iaqi_list)
return aqi
def main():
print('Please enter this information,and separate by spaces.')
input_str = input('(1)PM2.5: (2)CO:')
str_list = input_str.split(' ')
pm_val = float(str_list[0])
co_val = float(str_list[1])
param_list = []
param_list.append(pm_val)
param_list.append(co_val)
"""
Transfer function of AQI Calculation
"""
aqi_val = cal_api(param_list)
print('Air quality index value: {} '.format(aqi_val))
if __name__ == '__main__':
main()
功能2.0 JSON读取
# -*- coding:utf-8 -*-
""""
auther:Susan
function:JSON reading
version:v2.0
data:2019/4/27
"""
import json
def process_json_file(filepath):
f = open(filepath,mode='r', encoding='utf-8')
city_list = json.load(f)
return city_list
def main():
filepath = input('Please input a json filemane:')
city_list = process_json_file(filepath)
city_list.sort(key=lambda city:city['aqi'])
top5_list = city_list[:5]
f = open('top5_aqi.json',mode='w',encoding='utf-8')
json.dump(top5_list,f, ensure_ascii=False)
f.close()
print (city_list)
if __name__ == '__main__':
main()
功能3.0 CSV读取
# -*- coding:utf-8 -*-
""""
auther:Susan
function:CSV reading
version:v3.0
data:2019/4/27
"""
import json
import csv
def process_json_file(filepath):
f = open(filepath,mode='r', encoding='utf-8')
city_list = json.load(f)
return city_list
def main():
filepath = input('Please input a json filemane:')
city_list = process_json_file(filepath)
city_list.sort(key=lambda city:city['aqi'])
top5_list = city_list[:5]
lines = []
#Column name
lines.append(city_list[0].keys())
#
for city in city_list:
lines.append(list(city.values()))
f = open('aqi1.csv','w',encoding='utf-8',newline='')
writer = csv.writer(f)
for line in lines:
writer.writerow(line)
f.close()
if __name__ == '__main__':
main()
newline=''
新行不加入任何字符,不指定则会在新行末尾加入空行
根据输入的文件判断是JSON格式还是CSV格式,并进行相应的操作
功能4.0 判断文件格式
# -*- coding:utf-8 -*-
""""
auther:Susan
function:Judge file format
version:v4.0
data:2019/5/8
CSV:comma separated values
"""
import json
import csv
import os
def process_json_file(filepath):
# f = open(filepath,mode='r', encoding='utf-8')
# city_list = json.load(f)
#
# return city_list
with open(filepath,mode='r', encoding='utf-8') as f:
city_list = json.load(f)
print(city_list)
def process_csv_file(filepath):
with open(filepath,mode='r', encoding='utf-8',newline='') as f:
reader = csv.reader(f)
for row in reader:
print(','.join(row))#Connect each element in the list with a comma
def main():
filepath = input('Please input a filemane:')
# 'json' in filepath
filename,file_ext = os.path.splitext(filepath)
if file_ext == '.json':
process_json_file(filepath)
elif file_ext == '.csv':
process_csv_file(filepath)
else:
print('Unsupported file format!')
if __name__ == '__main__':
main()
功能5.0 利用爬虫做实时计算
# -*- coding:utf-8 -*-
""""
auther:Susan
function:Use crawlers for real-time calculations
version:v5.0
data:2019/5/8
Access the webpage through the crawler and display it to the user
"""
import requests
def get_html_text(url):
"""
Return url text
"""
r = requests.get(url,timeout=30)
print(r.status_code)
return r.text
def main():
city_pinyin = input('Please enter the city pinyin:')
url = 'http://pm25.in/'+city_pinyin
url_text = get_html_text(url)
aqi_div = ''' <div class="span12 data">
<div class="span1">
<div class="value">
'''
index = url_text.find(aqi_div)
begin_index = index + len(aqi_div)
end_index = begin_index + 2
aqi_val = url_text[begin_index:end_index]
print('Air quality:{}'.format(aqi_val))
# print(url_text)
if __name__ == '__main__':
main()
• 为了能有效地提取并利用网络信息并工作提高效率,出现了网络爬虫
• 利用网络爬虫实时获取城市的空气质量
• 高效地解析和处理HTML,beautifulsoup4
功能6.0 利用网络爬虫实时获取城市的空气质量
# -*- coding:utf-8 -*-
""""
auther:Susan
function:Parse HTML content more efficiently by BeautifilSoup
version:v8.0
data:2019/5/8
"""
import requests
from bs4 import BeautifulSoup
def get_city_aqi(city_pinyin):
"""
Return url text
"""
url = 'http://pm25.in/'+city_pinyin
r = requests.get(url,timeout=30)
soup = BeautifulSoup(r.text,'lxml')
div_list = soup.find_all('div',{'class':'span1'})
city_aqi = []
for i in range(8):
div_content = div_list[i]
caption = div_content.find('div',{'class':'caption'}).text.strip()
value = div_content.find('div',{'class':'value'}).text.strip()
city_aqi.append((caption,value))
return city_aqi
def main():
city_pinyin = input('Please enter the city pinyin:')
city_aqi = get_city_aqi(city_pinyin)
print('Air quality:{}'.format(city_aqi))
if __name__ == '__main__':
main()
Python strip() 方法用于移除字符串头尾指定的字符(默认为空格或换行符)或字符序列。
注意:该方法只能删除开头或是结尾的字符,不能删除中间部分的字符。
功能7.0 利用beautifulsoup4获取所有城市的空气质量
# -*- coding:utf-8 -*-
""""
auther:Susan
function:BeautifilSoup
version:v7.0
data:2019/5/10
"""
import requests
from bs4 import BeautifulSoup
def get_city_aqi(city_pinyin):
"""
Return url text
"""
url = 'http://pm25.in/'+city_pinyin
r = requests.get(url,timeout=30)
soup = BeautifulSoup(r.text,'lxml')
div_list = soup.find_all('div',{'class':'span1'})
city_aqi = []
for i in range(8):
div_content = div_list[i]
caption = div_content.find('div',{'class':'caption'}).text.strip()
value = div_content.find('div',{'class':'value'}).text.strip()
city_aqi.append((caption,value))
return city_aqi
def get_all_cities():
"""
Get the city
"""
url = 'http://pm25.in/'
city_list = []
r = requests.get(url,timeout=30)
soup = BeautifulSoup(r.text,'lxml')
city_div = soup.find_all('div',{'class':'bottom'})[1]
city_link_list = city_div.find_all('a')
for city_link in city_link_list:
city_name = city_link.text
city_pinyin = city_link['href'][1:]
city_list.append((city_name,city_pinyin))
return city_list
def main():
city_list = get_all_cities()
for city in city_list:
city_name = city[0]
city_pinyin = city[1]
city_aqi = get_city_aqi(city_pinyin)
print(city_name,city_aqi)
if __name__ == '__main__':
main()
功能8.0 将获取的所有城市空气质量保存成CSV数据文件
# -*- coding:utf-8 -*-
""""
auther:Susan
function:
1.Get the website of all cities AQI:BeautifilSoup
2.Real-time AQI preservation
version:v6.0
data:2019/5/8
"""
import requests
from bs4 import BeautifulSoup
import csv
def get_city_aqi(city_pinyin):
"""
Return url text
"""
url = 'http://pm25.in/'+city_pinyin
r = requests.get(url,timeout=30)
soup = BeautifulSoup(r.text,'lxml')
div_list = soup.find_all('div',{'class':'span1'})
city_aqi = []
for i in range(8):
div_content = div_list[i]
caption = div_content.find('div',{'class':'caption'}).text.strip()
value = div_content.find('div',{'class':'value'}).text.strip()
# city_aqi.append((caption,value))
city_aqi.append(value)
return city_aqi
def get_all_cities():
"""
Get the city
"""
url = 'http://pm25.in/'
city_list = []
r = requests.get(url,timeout=30)
soup = BeautifulSoup(r.text,'lxml')
city_div = soup.find_all('div',{'class':'bottom'})[1]
city_link_list = city_div.find_all('a')
for city_link in city_link_list:
city_name = city_link.text
city_pinyin = city_link['href'][1:]
city_list.append((city_name,city_pinyin))
return city_list
def main():
city_list = get_all_cities()
header = ['city','AQI','PM2.5/1h','PM10/1h','CO/1h','NO2/1h','03/8h','SO2/1h']
with open('China_city_aqi.csv','w',encoding='utf-8',newline='') as f:
write = csv.writer(f)
write.writerow(header)
for i,city in enumerate(city_list):
if (i+1)%10 == 0:
print('Several records have been processed:{},a total of several records:{}.'.format(i+1,len(city_list)))
city_name = city[0]
city_pinyin = city[1]
city_aqi = get_city_aqi(city_pinyin)
row = [city_name]+city_aqi
write.writerow(row)
if __name__ == '__main__':
main()
什么是Pandas
Pandas的数据结构
Pandas的数据操作
Pandas统计计算和描述
功能9.0 简单的数据处理和分析
结构化数据:CSV,JSON
非结构化数据:视频,图片,声音
aqi_data.sort_values(by=['AQI'])# 默认从小到达
aqi_data.sort_values(by=['AQI',ascending=False])#从大到小
# -*- coding:utf-8 -*-
""""
auther:Susan
function:
1.Get the website of all cities AQI:BeautifilSoup
2.Real-time AQI preservation
version:v10.0
data:2019/5/8
note:python2.7
"""
import pandas as pd
def main():
aqi_data = pd.read_csv('China_city_aqi.csv')
# print aqi_data.head(5)
# print (aqi_data[['city','AQI']])
print('Basic Information:')
print(aqi_data.info())
print('Data preview:')
print(aqi_data.head())
#Basic statistics
print('AQI max:',aqi_data['AQI'].max)
print('AQI min:',aqi_data['AQI'].max)
print('AQI mean:',aqi_data['AQI'].mean())
#top10
top10_cities = aqi_data.sort_values(by=['AQI']).head(10)
print('Ten cities with the best air quality:')
print(top10_cities)
#bottom10
bottom_cities = aqi_data.sort_values(by=['AQI']).tail(10)
#bottom_cities = aqi_data.sort_values(by=['AQI'],ascending=False).head(10)
print('Ten cities with the worst air quality:')
print( bottom_cities)
#Save as CSV
top10_cities.to_csv('top10_aqi.csv')
bottom_cities.to_csv('bottom10_aqi.csv')
if __name__ == '__main__':
main()
功能10.0 数据清洗和可视化
• 数据清洗;利用Pandas进行数据可视化
数据获取(网络爬虫)--->数据清洗(只保留AQI>0的数据)
plot(kind, x, y, title, figsize) #kind指定绘制图像类型
https://blog.csdn.net/claroja/article/details/73872066 plot属性设置
https://www.jianshu.com/p/33f843a7cef5 plot教程
https://blog.csdn.net/qq_37904945/article/details/79818719 无法显示中文字体的问题
终端输入(系统中的中文字体所在的位置):fc-list :lang=zh 在python用绝对路径来引用字体:
import matplotlib.pyplot as plt
import matplotlib as mpl
zhfont= mpl.font_manager.FontProperties(fname='/usr/share/fonts/truetype/arphic/ukai.ttc')
plt.plot([1, 2, 3])
plt.xlabel('x轴标签', fontproperties=zhfont)
plt.ylabel('y轴标签',fontproperties=zhfont)
plt.show()
# -*- coding:utf-8 -*- """" auther:Susan function: 1.Get the website of all cities AQI:BeautifilSoup 2.Real-time AQI preservation 3.Plot Top5 version:v10.0 data:2019/5/8 note:python2.7 """ import pandas as pd import matplotlib.pyplot as plt import matplotlib as mpl # plt.rcParams['font.sans-serif'] = ['SimHei'] # plt.rcParams['axes.unicode_minus'] = False # Linux useless to display as Chinese def main(): aqi_data = pd.read_csv('China_city_aqi.csv') # print aqi_data.head(5) # print (aqi_data[['city','AQI']]) print('Basic Information:') print(aqi_data.info()) print('Data preview:') print(aqi_data.head()) # filter_condition = aqi_data['AQI']>0 # clean_aqi_data = aqi_data(filter_condition) clean_aqi_data = aqi_data[aqi_data['AQI']>0] #Basic statistics print('AQI max:',clean_aqi_data['AQI'].max) print('AQI min:',clean_aqi_data['AQI'].max) print('AQI mean:',clean_aqi_data['AQI'].mean()) #top50 # font = mpl.font_manager.FontProperties(fname='/usr/share/fonts/truetype/arphic/ukai.ttc') font = mpl.font_manager.FontProperties(fname='/usr/share/fonts/opentype/noto/NotoSansCJK-Bold.ttc') top50_cities = clean_aqi_data.sort_values(by=['AQI']).head(50) top50_cities.plot(kind='bar',x='City',y='AQI',title='Fifty cities with the best air quality', figsize=(20,10)) plt.xticks(fontproperties=font) plt.xlabel(u"城市", fontproperties=font) plt.xlabel(u"空气质量", fontproperties=font) plt.savefig('Top50_api.png') plt.show() top50_cities.to_csv('top50_aqi.csv') if __name__ == '__main__': main()