第6章 金融时间序列
6.1 pandas基础
6.1.1 使用DataFrame类的第一步
import pandas as pd
import numpy as np
df = pd.DataFrame([10, 20, 30, 40], columns=['numbers'], index=['a', 'b', 'c', 'd'])
df
# numbers
# a 10
# b 20
# c 30
# d 40
df.index
# Index(['a', 'b', 'c', 'd'], dtype='object')
df.columns
# Index(['numbers'], dtype='object')
df.loc['c']
# numbers 30
# Name: c, dtype: int64
df.loc[['a', 'b']]
# numbers
# a 10
# b 20
df.loc[df.index[1:3]]
# numbers
# b 20
# c 30
df.sum()
# numbers 100
# dtype: int64
df.apply(lambda x: x ** 2)
# numbers
# a 100
# b 400
# c 900
# d 1600
df ** 2
# numbers
# a 100
# b 400
# c 900
# d 1600
df['floats'] = (1.5, 2.5, 3.5, 4.5)
df
# numbers floats
# a 10 1.5
# b 20 2.5
# c 30 3.5
# d 40 4.5
df['floats']
# a 1.5
# b 2.5
# c 3.5
# d 4.5
# Name: floats, dtype: float64
df.floats
# a 1.5
# b 2.5
# c 3.5
# d 4.5
# Name: floats, dtype: float64
df['names'] = pd.DataFrame(['Yves', 'Guido', 'Felix', 'Francesc'], index=['d', 'a', 'b', 'c'])
df
# numbers floats names
# a 10 1.5 Guido
# b 20 2.5 Felix
# c 30 3.5 Francesc
# d 40 4.5 Yves
df.append({
'numbers': 100, 'floats': 5.75, 'names': 'Henry'}, ignore_index=True)
# numbers floats names
# 0 10 1.50 Guido
# 1 20 2.50 Felix
# 2 30 3.50 Francesc
# 3 40 4.50 Yves
# 4 100 5.75 Henry
df = df.append(pd.DataFrame({
'numbers': 100, 'floats': 5.75, 'names': 'Henry'}, index=['z', ]))
df
# floats names numbers
# a 1.50 Guido 10
# b 2.50 Felix 20
# c 3.50 Francesc 30
# d 4.50 Yves 40
# z 5.75 Henry 100
df.join(pd.DataFrame([1, 4, 9, 16, 25], index=['a', 'b', 'c', 'd', 'y'], columns=['squares', ]))
# floats names numbers squares
# a 1.50 Guido 10 1.0
# b 2.50 Felix 20 4.0
# c 3.50 Francesc 30 9.0
# d 4.50 Yves 40 16.0
# z 5.75 Henry 100 NaN
df = df.join(pd.DataFrame([1, 4, 9, 16, 25],
index=['a', 'b', 'c', 'd', 'y'],
columns=['squares', ]),
how='outer')
df
# floats names numbers squares
# a 1.50 Guido 10.0 1.0
# b 2.50 Felix 20.0 4.0
# c 3.50 Francesc 30.0 9.0
# d 4.50 Yves 40.0 16.0
# y NaN NaN NaN 25.0
# z 5.75 Henry 100.0 NaN
df[['numbers', 'squares']].mean()
# numbers 40.0
# squares 11.0
# dtype: float64
df[['numbers', 'squares']].std()
# numbers 35.355339
# squares 9.669540
# dtype: float64
6.1.2 使用DataFrame类的第二步
a = np.random.standard_normal((9, 4))
a.round(6)
# array([[ 0.109076, -1.05275 , 1.253471, 0.39846 ],
# [-1.561175, -1.997425, 1.158739, -2.030734],
# [ 0.764723, 0.760368, 0.864103, -0.174079],
# [ 2.429043, 0.281962, -0.496606, 0.009445],
# [-1.679758, -1.02374 , -1.135922, 0.077649],
# [-0.247692, 0.301198, 2.156474, 1.537902],
# [ 1.162934, 2.102327, -0.4501 , 0.812529],
# [-0.374749, -0.818229, -1.013962, -0.476855],
# [ 0.626347, 2.294829, -1.29531 , -0.031501]])
df = pd.DataFrame(a)
df
# 0 1 2 3
# 0 0.109076 -1.052750 1.253471 0.398460
# 1 -1.561175 -1.997425 1.158739 -2.030734
# 2 0.764723 0.760368 0.864103 -0.174079
# 3 2.429043 0.281962 -0.496606 0.009445
# 4 -1.679758 -1.023740 -1.135922 0.077649
# 5 -0.247692 0.301198 2.156474 1.537902
# 6 1.162934 2.102327 -0.450100 0.812529
# 7 -0.374749 -0.818229 -1.013962 -0.476855
# 8 0.626347 2.294829 -1.295310 -0.031501
df.columns = [['No1', 'No2', 'No3', 'No4']]
df
# No1 No2 No3 No4
# 0 0.109076 -1.052750 1.253471 0.398460
# 1 -1.561175 -1.997425 1.158739 -2.030734
# 2 0.764723 0.760368 0.864103 -0.174079
# 3 2.429043 0.281962 -0.496606 0.009445
#