使用JupyterLab进行的数据分析
数据集下载
import pandas as pd
import numpy as np
from pandas import Series, DataFrame
import matplotlib as mpl
import matplotlib.pyplot as plt
#显示所有列
pd.set_option('display.max_columns', None)
from matplotlib.font_manager import _rebuild
_rebuild()
# 支持中文
mpl.rcParams['font.sans-serif'] = [u'SimHei'] # 用来正常显示中文标签
mpl.rcParams['axes.unicode_minus'] = False # 用来正常显示负号,解决保存图像是负号'-'显示为方块的问题,或者转换负号为字符串
data = pd.read_csv('air.csv')
# data = DataFrame(df)
# data
data.shape
data.columns
data.dtypes
#查看不同价格区间的AQI,在当前数据集中的占比情况
AQI_min = data.AQI.min()
AQI_max = data.AQI.max()
display(AQI_min, AQI_max)
AQI_cut = pd.cut(data.AQI, bins = [AQI_min, 50, 100, 150, 200, 300, AQI_max])
AQI_count = AQI_cut.value_counts()
AQI_count
def func1():
X = np.arange(len(AQI_count))
Y = AQI_count
plt.figure(figsize=(8,6))
plt.bar(X,Y,color='steelblue',alpha=0.8)
plt.title('AQI分布图')
plt.xlabel('AQI区间')
plt.ylabel('2014-2018年AQI天数')
plt.xticks(np.arange(len(AQI_count)),AQI_count.index, rotation=30)
plt.ylim([0,320000])
percents = [str(round(i*100,2)) + '%'for i in AQI_count / AQI_count.sum()]
for x,y,z in zip(X,Y,percents):
plt.text(x-0.2,y+5000,z)
plt.savefig('C:\\Users\\Administrator\\Desktop\\数据分析图\\1.png')
func1()
# 全国污染程度饼图
def func2():
labels = ['良(50,100]','优(0,50]','轻度污染(100,150]','中度污染(150,200]','重度污染(200,300]','严重污染(300,1210]']
x = [i for i in AQI_count / AQI_count.sum()]
colors= ['#32CD32','#FFDAB9','#8A2BE2','#2442aa','#dd5555','#FFFF00']
explode = [0,0.1,0,0,0,0]
plt.pie(x=x,#绘图的数据
labels=labels,#数据标签
colors=colors,#饼图颜色
autopct='%.1f%%',#设置百分比
startangle=180,#设置初始角度
#frame=1,
#center=(2,2)
explode=explode,#设置突出显示
radius=2#设置饼的半径
)
plt.savefig('C:\\Users\\Administrator\\Desktop\\数据分析图\\2.png')
func2()
# AQI与PM2.5的关系
def func3