%matplotlib notebook
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import calendar
## read file
num =400
fileid ="fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89"
df = pd.read_csv('data/C2A2_data/BinnedCsvs_d{}/{}.csv'.format(num, fileid), parse_dates=['Date'])## parse_dates 主动解析格式## return each day, tmax/tmin in a list
df['year']= df.Date.dt.year
# or df['year'] = pd.to_datetime(df['Date']).apply(lambda x: x.year)
df['month']= df.Date.dt.month
# or df['month'] = pd.to_datetime(df['Date']).apply(lambda x: x.month)
df['day']= df.Date.dt.day
# or df['day'] = pd.to_datetime(df['Date']).apply(lambda x: x.day)## change the Data_Value
df['temp']= df['Data_Value'].apply(lambda x: x/10)## deal with 2005-2014 dataset## drop row feb 29## find out all row with feb 29
leapday = df[(df['month']==2)&(df['day']==29)]## find out all row index with feb 29
leapday_index =list(leapday.index)## drop the row
df = df.drop(leapday_index, axis=0)## only remain data before 2015
df_2005_2014 = df[df['year']<2015]## group by each day of the year, tmax and tmin to find out the max and min in that day
max_df = df_2005_2014.groupby(['month','day']).agg({'temp':max}).rename(columns={'temp':'Max_Temp'})
min_df = df_2005_2014.groupby(['month','day']).agg({'temp':min}).rename(columns={'temp':'Min_Temp'})## merge these two dataframe
merge_df = pd.merge(max_df, min_df, how='inner', left_index =True, right_index =True)## deal with 2015 dataset## find each day in 2015, compare the temperature to the tmax and tmin list
df_15 = df[df['year']==2015]
max_df_15 = df_15.groupby(['month','day']).agg({'temp':max}).rename(columns={'temp':'Max_Temp'})
min_df_15 = df_15.groupby(['month','day']).agg({'temp':min}).rename(columns={'temp':'Min_Temp'})
broken_max = np.where(max_df_15['Max_Temp']> max_df['Max_Temp'])[0]## 输出符合条件的每个元素对应的坐标,[row, column]
broken_min = np.where(min_df_15['Min_Temp']< min_df['Min_Temp'])[0]## set x and y of df
merge_df = merge_df.reset_index()
x =range(1,366)
y1 =list(merge_df['Max_Temp'])
y2 =list(merge_df['Min_Temp'])## plot it into line graph## linewidth 线形的宽度## 设定画布
fig = plt.figure()
plt.plot(x, y1, linewidth =1, label ='Max_Temp 2005-2014', alpha =0.6, color ='salmon')
plt.plot(x, y2, linewidth =1, label ='Min_Temp 2005-2014', alpha =0.6, color ='royalblue')
plt.fill_between(x,y1,y2,color='lightblue', alpha=0.25)## scatter df_15, which are broken max and broken min
plt.scatter(broken_max, max_df_15.iloc[broken_max], s =10, color ='red', label='High Temp Broken 2015')
plt.scatter(broken_min, min_df_15.iloc[broken_min], s =10, color ='blueviolet', label='Low Temp Broken 2015')## set the x-sticks## use calendar module to set the name
month=[0,31,28,31,30,31,30,31,31,30,31,30,31]
ticks =[0]for i inrange(1,13):
ticks.append(ticks[i-1]+ month[i])
plt.xticks(ticks, calendar.month_name[1:13], rotation =20, fontsize =5, alpha =0.7)## 因为坐标轴有瑕疵,坐标轴不过远点,因此使用plt.gca()进行位置优化,挪动坐标轴
ax = plt.gca()
ax.yaxis.set_ticks_position('left')#要挪动的坐标轴,这里只有顶,底,左,右四个参数
ax.spines['left'].set_position(('data',0))# data表示按数值挪动,其后数字代表挪动到y轴的刻度值# set y ticks
plt.yticks(fontsize =5, alpha =0.7)# add a lengend, loc is the location, default upper right
plt.legend(frameon=False, title='Legend', fontsize =5)# set axis lable# add a label to the x axis
plt.xlabel('Months', alpha =0.7)# add a label to the y axis
plt.ylabel('Temperature(degrees C)', alpha =0.7)# add a title
plt.title('Temperature records across the globe (2005 and 2015)', alpha =0.7)# set the limit of x axis
plt.xlim(0,365)# remove the plot frame lines. They are unnecessary chart junk
ax = fig.add_subplot(111)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_alpha(0.5)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_alpha(0.6)