import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
一、加载数据
bj = pd.read_csv(r"PM2.5\BeijingPM20100101_20151231.csv")
cd = pd.read_csv(r"PM2.5\ChengduPM20100101_20151231.csv")
gz = pd.read_csv(r"PM2.5\GuangzhouPM20100101_20151231.csv")
sh = pd.read_csv(r"PM2.5\ShanghaiPM20100101_20151231.csv")
sy = pd.read_csv(r"PM2.5\ShenyangPM20100101_20151231.csv")
lst =[bj, cd, gz, sh, sy]
二、把分开的时间字符串通过periodIndex的方法转化为pandas的时间类型
for df in lst:
period = pd.PeriodIndex(year=df["year"], month=df["month"], day=df["day"], hour=df["hour"], freq="H")
df["datatime"]= period
df.set_index("datatime", inplace=True)# print(df.head(5))
三、降采样
for i inrange(len(lst)):
lst[i]= lst[i].resample("14D").mean()
四、绘图
plt.figure(figsize=(40,20), dpi=80)
city =["beijing","chengdu","guangzhou","shanghai","shenyang"]for i inrange(len(lst)):
df = lst[i]
data = df["PM_US Post"]
_x = data.index
_y = data.values
plt.plot(range(len(_x)), _y, label=city[i])
plt.xticks(range(0,len(_x),1),list(_x)[::1], rotation=45)
plt.title("PM2.5")
plt.legend(loc='best')
plt.show()