1.量化投资理论基础
2.量化投资发展趋势
3.量化主要策略
4.量化研究的流程
1.一个策略包含:
1.数据
2.回测
3.评价
5.常见数据类型的操作
6.Pandas 基础工具
#-*-coding:utf-8-*-
import numpy as np
import pandas as pd
'''
Series 数据对象的生成
'''
s = pd.Series([-1.1556677, -1.277655], index=["a","b"])
print(s)
print("<<<--->>>")
#指定 dtype 为 int6
s2 = pd.Series(["a", -1.1556677, -1.277655], index=["a","b","c"])
print(s2)
print("<<<--->>>")
#ndarray 数据类型创建 Series 对象
s3 = pd.Series(np.random.random(5), index=["a","b","c","d","e"])
print(s3)
print("<<<--->>>")
s4 = pd.Series(np.random.random(5))
print(s4)
print("<<<--->>>")
#以字典作为类型创建 Series 对象
s5 = pd.Series({'a':0, 'b':1, 'c':2}, index=["b","c","d","a"])
print(s5)
print("<<<--->>>")
#以常量值作为数据类型创建 Series 对象
s6 = pd.Series(5., index=['b','c','d','a'])
print(s6)
print("<<<--->>>")
'''
Series 数据对象的访问
'''
s = pd.Series({'a':0,'b':1.,'c':2.}, index=["b","c","d","a"])
print(s.values)
print(s.index)
print(s['a'])
print(s[['a','b']])
print(s[:2])
print("<<<--->>>")
'''
DataFrame 数据对象生成
'''
#以列表组成的字典形式创建
df = pd.DataFrame({'one':[1.,2.,3.,5], 'two':[1.,2.,3.,4.]})
print(df)
print("<<<--->>>")
#以嵌套列表形式创建
df2 = pd.DataFrame([[1.,2,3.,5],[1.,2,3.,4]], index=['a','b'], columns=['one','two','three','four'])
print(df2)
print("<<<--->>>")
#以二维 ndarray 创建
data = np.zeros((2,), dtype=[('A','i4'), ('B','f4'), ('C','a10')])
data[:] = [(1, 2., 'hello'), (2,3.,'world')]
df3 = pd.DataFrame(data)
df4 = pd.DataFrame(data, index=['first', 'second'], columns=['C','A','B'])
print(df3)
print(df4)
print("<<<--->>>")
# series 组成的字典形式创建
data = {'one':pd.Series([1.,2.,3.],index=['a','b','c']), 'two':pd.Series([1.,2.,3.,4.], index=['a','b','c','d'])}
df5 = pd.DataFrame(data)
df6 = pd.DataFrame(data,index=['d','b','a'])
df7 = pd.DataFrame(data,index=['d','b','a'], columns=['two','three'])
print(df5)
print(df6)
print(df7)
print("<<<--->>>")
#字典的列表形式创建
data = [{'a':1, 'b':2},{'a':5, 'b':10, 'c':20}]
df8 = pd.DataFrame(data)
print(df8)
'''
访问DataFrame 数据对象的方法
'''
print("<<<--->>>")
print(df8.index)
print(df8.columns)
print(df8.values)
print(df8['a'])
print(df8.a)
print(df8[0:1])
print(df8.loc[:,['a','b']])
print(df8.iloc[0:2, 0:1])
# print(df8.iloc[[0,2],[0,1]])
# print(df8.ix['a',['one','two']])
# print(df8.ix[['a','b'],[0,1]])
# print(df8.ix[[0,1],[0,1]])
7.股票数据处理
#-*-coding:utf-8-*-
import numpy as np
import pandas as pd
'''
Series 数据对象的生成
'''
s = pd.Series([-1.1556677, -1.277655], index=["a","b"])
print(s)
print("<<<--->>>")
#指定 dtype 为 int6
s2 = pd.Series(["a", -1.1556677, -1.277655], index=["a","b","c"])
print(s2)
print("<<<--->>>")
#ndarray 数据类型创建 Series 对象
s3 = pd.Series(np.random.random(5), index=["a","b","c","d","e"])
print(s3)
print("<<<--->>>")
s4 = pd.Series(np.random.random(5))
print(s4)
print("<<<--->>>")
#以字典作为类型创建 Series 对象
s5 = pd.Series({'a':0, 'b':1, 'c':2}, index=["b","c","d","a"])
print(s5)
print("<<<--->>>")
#以常量值作为数据类型创建 Series 对象
s6 = pd.Series(5., index=['b','c','d','a'])
print(s6)
print("<<<--->>>")
'''
Series 数据对象的访问
'''
s = pd.Series({'a':0,'b':1.,'c':2.}, index=["b","c","d","a"])
print(s.values)
print(s.index)
print(s['a'])
print(s[['a','b']])
print(s[:2])
print("<<<--->>>")
'''
DataFrame 数据对象生成
'''
#以列表组成的字典形式创建
df = pd.DataFrame({'one':[1.,2.,3.,5], 'two':[1.,2.,3.,4.]})
print(df)
print("<<<--->>>")
#以嵌套列表形式创建
df2 = pd.DataFrame([[1.,2,3.,5],[1.,2,3.,4]], index=['a','b'], columns=['one','two','three','four'])
print(df2)
print("<<<--->>>")
#以二维 ndarray 创建
data = np.zeros((2,), dtype=[('A','i4'), ('B','f4'), ('C','a10')])
data[:] = [(1, 2., 'hello'), (2,3.,'world')]
df3 = pd.DataFrame(data)
df4 = pd.DataFrame(data, index=['first', 'second'], columns=['C','A','B'])
print(df3)
print(df4)
print("<<<--->>>")
# series 组成的字典形式创建
data = {'one':pd.Series([1.,2.,3.],index=['a','b','c']), 'two':pd.Series([1.,2.,3.,4.], index=['a','b','c','d'])}
df5 = pd.DataFrame(data)
df6 = pd.DataFrame(data,index=['d','b','a'])
df7 = pd.DataFrame(data,index=['d','b','a'], columns=['two','three'])
print(df5)
print(df6)
print(df7)
print("<<<--->>>")
#字典的列表形式创建
data = [{'a':1, 'b':2},{'a':5, 'b':10, 'c':20}]
df8 = pd.DataFrame(data)
print(df8)
'''
访问DataFrame 数据对象的方法
'''
print("<<<--->>>")
print(df8.index)
print(df8.columns)
print(df8.values)
print(df8['a'])
print(df8.a)
print(df8[0:1])
print(df8.loc[:,['a','b']])
print(df8.iloc[0:2, 0:1])
# print(df8.iloc[[0,2],[0,1]])
# print(df8.ix['a',['one','two']])
# print(df8.ix[['a','b'],[0,1]])
# print(df8.ix[[0,1],[0,1]])
'''
股票数据获取
'''
#csv 获取
df_csvload = pd.read_csv('路径', header=0, index_col=0, encoding='gb2312')
df_csvload1 = pd.read_csv('路径', header=None, names=range(2,8), index_col=0, encoding='gb2312')
print(df_csvload)
#api 接口获取
import pandas_datareader.data as web
import datetime
#601233.SS 股票代码.交易所,从 yahoo 网站获取,开始时间,结束时间
df_csvsave = web.DataReader("601233.SS", "yahoo", datetime.datetime(2018,1,1), datetime.date.today())
print(df_csvsave)
df_csvsave.to_csv('路径', columns=df_csvsave.columns, index=True)
'''
股票数据处理
'''
df_csvload2 = pd.read_csv('路径', parse_dates=False, index_col=0, encoding='gb2312')
#前三行,后三行
print(df_csvload2.head(3))
print(df_csvload2.tail(3))
print(df_csvload2.columns)
print(df_csvload2.index)
#行列情况
print(df_csvload2.shape)
#各列数据描述性统计
print(df_csvload2.describe())
#数据是否有缺失
print(df_csvload2.info())
#判断数据是否为缺失值
print(df_csvload2.isnull())
print(df_csvload2.isnull().T.any())
#删除包含缺失值的行,0为行,1为列
print(df_csvload2.dropna(axis=0))
#how=all,所有值都为缺失值删除。 how=any 只要有一行或一列为缺失值就删除
print(df_csvload2.dropna(axis=0, how='all'))
#method 为填充方式,ffill 行或列的上一个方向值作为填充, bfill 行或者列的下一个值来填充。akis=0,0是列,1是行.inplace=True,改变原来的DataFrame,False不改变
print(df_csvload2.fillna(method='ffill', akis=0, inplace=True))
'''
特殊值处理
'''
#'%0.2f'x
df_csvload3 = df_csvload.applymap(lambda x:'%0.2f'%x)
df_csvload3.Volume = df_csvload3.ix[:, ['Volume']].apply(lambda x:'%0.0f'%x, axis=1)
df_csvload3 = df_csvload3.round(2)
df_csvload3.Volume = df_csvload3.Volume.astype(int)
print(df_csvload3[df_csvload3.values == 0])
print(df_csvload3[df_csvload3.High.isin([0])])
df_csvload3.loc[df_csvload3.loc[:, 'High']==0, 'High']=df_csvload3.High.median()
print(df_csvload3)
print(df_csvload3.ix['2018-02-01'])