第四课 Pandas时序型数据分析
第一节 时序型数据的创建和表示
import pandas as pd
通过日期型数据列表创建
from datetime import datetime
date_list1 = [datetime(2019, 1, 1), datetime(2019, 1, 2), datetime(2019, 1, 3)]
pd.Series(date_list1)
0 2019-01-01
1 2019-01-02
2 2019-01-03
dtype: datetime64[ns]
date_list2 = [pd.Timestamp('20190101'), pd.Timestamp('20190102'), pd.Timestamp('20190103')]
pd.Series(date_list2)
0 2019-01-01
1 2019-01-02
2 2019-01-03
dtype: datetime64[ns]
date_list3 = [pd.Timestamp('2019/01/01'), pd.Timestamp('2019/01/02'), pd.Timestamp('2019/01/03')]
pd.Series(date_list3)
0 2019-01-01
1 2019-01-02
2 2019-01-03
dtype: datetime64[ns]
date_range
pd.Series(pd.date_range(start='2019-01-01', end='2019-02-01'))
0 2019-01-01
1 2019-01-02
2 2019-01-03
3 2019-01-04
4 2019-01-05
5 2019-01-06
6 2019-01-07
7 2019-01-08
8 2019-01-09
9 2019-01-10
10 2019-01-11
11 2019-01-12
12 2019-01-13
13 2019-01-14
14 2019-01-15
15 2019-01-16
16 2019-01-17
17 2019-01-18
18 2019-01-19
19 2019-01-20
20 2019-01-21
21 2019-01-22
22 2019-01-23
23 2019-01-24
24 2019-01-25
25 2019-01-26
26 2019-01-27
27 2019-01-28
28 2019-01-29
29 2019-01-30
30 2019-01-31
31 2019-02-01
dtype: datetime64[ns]
pd.Series(pd.date_range(start='2019/01/01', periods=10))
0 2019-01-01
1 2019-01-02
2 2019-01-03
3 2019-01-04
4 2019-01-05
5 2019-01-06
6 2019-01-07
7 2019-01-08
8 2019-01-09
9 2019-01-10
dtype: datetime64[ns]
pd.Series(pd.date_range(start='20190101', periods=6, freq='M'))
0 2019-01-31
1 2019-02-28
2 2019-03-31
3 2019-04-30
4 2019-05-31
5 2019-06-30
dtype: datetime64[ns]
to_datetime()
# 转换日期字符串
date_val_list = ['2019-01-01', '20190102', None]
pd.Series(pd.to_datetime(date_val_list))
0 2019-01-01
1 2019-01-02
2 NaT
dtype: datetime64[ns]
# 合并转换日期
df = pd.read_csv('./datasets/BeijingPM20100101_20151231.csv')
df.head()
No year month day hour season PM_Dongsi PM_Dongsihuan PM_Nongzhanguan PM_US Post DEWP HUMI PRES TEMP cbwd Iws precipitation Iprec
0 1 2010 1 1 0 4 NaN NaN NaN NaN -21.0 43.0 1021.0 -11.0 NW 1.79 0.0 0.0
1 2 2010 1 1 1 4 NaN NaN NaN NaN -21.0 47.0 1020.0 -12.0 NW 4.92 0.0 0.0
2 3 2010 1 1 2 4 NaN NaN NaN NaN -21.0 43.0 1019.0 -11.0 NW 6.71 0.0 0.0
3 4 2010 1 1 3 4 NaN NaN NaN NaN -21.0 55.0 1019.0 -14.0 NW 9.84 0.0 0.0
4 5 2010 1 1 4 4 NaN NaN NaN NaN -20.0 51.0 1018.0 -12.0 NW 12.97 0.0 0.0
pd.to_datetime(df[['year', 'month', 'day']])
0 2010-01-01
1 2010-01-01
2 2010-01-01
3 2010-01-01
4 2010-01-01
5 2010-01-01
6 2010-01-01
7 2010-01-01
8 2010-01-01
9 2010-01-01
10 2010-01-01
11 2010-01-01
12 2010-01-01
13 2010-01-01
14 2010-01-01
15 2010-01-01
...
52567 2015-12-31
52568 2015-12-31
52569 2015-12-31
52570 2015-12-31
52571 2015-12-31
52572 2015-12-31
52573 2015-12-31
52574 2015-12-31
52575 2015-12-31
52576 2015-12-31
52577 2015-12-31
52578 2015-12-31
52579 2015-12-31
52580 2015-12-31
52581 2015-12-31
52582 2015-12-31
52583 2015-12-31
dtype: datetime64[ns]