import pandas as pd
import os
os.chdir(r"C:\Users\Hans\Desktop\data_analysis\test_data")
df = pd.read_csv("Beijing_2014-02.csv")
df.head()
| Date | Temperature(Celsius)(avg) | Dew Point(Celsius)(avg) | Humidity(%)(avg) | Sea Level Press(hPa's)(avg) | Precipitation(mm)(high) | Precipitation(mm)(sum) |
---|
0 | 2014/1/1 | 4 | -17 | 19 | 1015 | 47 | 0.0 |
---|
1 | 2014/1/2 | 0 | -9 | 50 | 1018 | - | 0.0 |
---|
2 | 2014/1/3 | 3 | -13 | 32 | 1022 | - | 0.0 |
---|
3 | 2014/1/4 | -2 | -7 | 68 | 1023 | - | 0.0 |
---|
4 | 2014/1/5 | 0 | -11 | 51 | 1025 | - | 0.0 |
---|
df.set_index(pd.to_datetime(df['Date']),inplace = True)
df.head()
| Date | Temperature(Celsius)(avg) | Dew Point(Celsius)(avg) | Humidity(%)(avg) | Sea Level Press(hPa's)(avg) | Precipitation(mm)(high) | Precipitation(mm)(sum) |
---|
Date | | | | | | | |
---|
2014-01-01 | 2014/1/1 | 4 | -17 | 19 | 1015 | 47 | 0.0 |
---|
2014-01-02 | 2014/1/2 | 0 | -9 | 50 | 1018 | - | 0.0 |
---|
2014-01-03 | 2014/1/3 | 3 | -13 | 32 | 1022 | - | 0.0 |
---|
2014-01-04 | 2014/1/4 | -2 | -7 | 68 | 1023 | - | 0.0 |
---|
2014-01-05 | 2014/1/5 | 0 | -11 | 51 | 1025 | - | 0.0 |
---|
df.loc['2014-03'].head()
| Date | Temperature(Celsius)(avg) | Dew Point(Celsius)(avg) | Humidity(%)(avg) | Sea Level Press(hPa's)(avg) | Precipitation(mm)(high) | Precipitation(mm)(sum) |
---|
Date | | | | | | | |
---|
2014-03-01 | 2014/3/1 | 4 | -15 | 31 | 1023 | 55 | 0.0 |
---|
2014-03-02 | 2014/3/2 | 4 | -8 | 46 | 1022 | - | 0.0 |
---|
2014-03-03 | 2014/3/3 | 0 | -6 | 60 | 1022 | - | 0.0 |
---|
2014-03-04 | 2014/3/4 | 4 | -19 | 23 | 1026 | 50 | 0.0 |
---|
2014-03-05 | 2014/3/5 | 1 | -21 | 18 | 1028 | 35 | 0.0 |
---|
df.loc['2014-03-01':'2014-03-02']
| Date | Temperature(Celsius)(avg) | Dew Point(Celsius)(avg) | Humidity(%)(avg) | Sea Level Press(hPa's)(avg) | Precipitation(mm)(high) | Precipitation(mm)(sum) |
---|
Date | | | | | | | |
---|
2014-03-01 | 2014/3/1 | 4 | -15 | 31 | 1023 | 55 | 0.0 |
---|
2014-03-02 | 2014/3/2 | 4 | -8 | 46 | 1022 | - | 0.0 |
---|
df.index.week
Int64Index([ 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
...
52, 52, 52, 52, 52, 52, 52, 1, 1, 1],
dtype='int64', name='Date', length=365)
df.index.month
Int64Index([ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
...
12, 12, 12, 12, 12, 12, 12, 12, 12, 12],
dtype='int64', name='Date', length=365)
df.index.quarter
Int64Index([1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
...
4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
dtype='int64', name='Date', length=365)
df.groupby(df.index.week)['Humidity(%)(avg)'].max().head(10)
Date
1 68
2 72
3 61
4 50
5 67
6 76
7 65
8 73
9 66
10 60
Name: Humidity(%)(avg), dtype: int64
df.groupby(df.index.month)['Humidity(%)(avg)'].max()
Date
1 72
2 76
3 60
4 66
5 85
6 80
7 85
8 86
9 93
10 92
11 88
12 64
Name: Humidity(%)(avg), dtype: int64
df.groupby(df.index.quarter)['Humidity(%)(avg)'].max()
Date
1 76
2 85
3 93
4 92
Name: Humidity(%)(avg), dtype: int64
%matplotlib inline
df.groupby(df.index.quarter)['Humidity(%)(avg)'].max().plot()
<matplotlib.axes._subplots.AxesSubplot at 0x117acb4b3c8>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-0Ks2DJcg-1601386291153)(output_10_1.png)]
df.groupby(df.index.month)['Humidity(%)(avg)'].max().plot()
<matplotlib.axes._subplots.AxesSubplot at 0x117acc580f0>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-S9wTY919-1601386291157)(output_11_1.png)]