anaconda实现
工具包:pandas;numpy;matplotlib;statsmodels
DataFrame基础
DataFrame基本结构
import pandas as pd
df=pd.read_csv('E:/python量化投资data/china_index.csv')
df.head()
df=pd.read_csv('E:/python量化投资data/china_index.csv',index_col=0,parse_dates=True) #index_col=0表示以原有数据的第一列(索引为0)当作行索引;将Date列设置为时间类型
df.head()
df.plot.line() #绘制折线图
属性
df.index #行索引
df.index[0]
df.index[-1]
df.columns #列索引
df.shape #(行,列)
方法
df.tail()
df. describe() #返回数组描述统计函数(计数、平均值、标准差、最⼩值、四分位数、最⼤值)
df.mean() #返回所有列的均值
df.median() #返回所有列的中位数
切片
df.loc['2022-2-28','HS300'] #选择行和列
df.loc['2020-1-3':'2020-2-28',:]
df.iloc[3,0]
df.iloc[:,1]
df.iloc[3:6,:]
df.loc['2020-01-01':'2020-12-31','HS300'].plot()
df.loc['2021-01-01':'2021-12-31','HS300'].plot()
df.loc['2022-01-01':'2022-12-31','HS300'].plot()
操作
df['HS_0']=df['HS300'].shift(1) #把数据向下移动一位
df
df['different']=df.HS300-df.HS_0
df['rets']=df.different/df.HS_0
df.head()
练习:第一个交易策略 df['MA40']=df['HS300'].rolling(40).mean()
df['MA200']=df['HS300'].rolling(200).mean()
import matplotlib.pyplot as plt
df["MA40"].plot.line(legend=True)
df['MA200'].plot.line(legend=True)
#df['HS300'].plot.line(legend=True)
df['HS300'].plot.line(legend=True)
df['Shares']=[3000 if df.loc[d,'MA40']>df.loc[d,'MA200'] else 0 for d in df.index]
df.Shares
df["MA40"].plot.line(legend=True)
df['MA200'].plot.line(legend=True)
df['Shares'].plot.line(legend=True)
df.head()
df["HS_0"]=df['HS300'].shift(-1)
df['Profit']=[(df.loc[d,'HS_0']-df.loc[d,'HS300'])/df.loc[d,'HS300'] if df.loc[d,'Shares']==3000 else 0 for d in df.index]
df['Wealth']=df['Profit'].cumsum()
df['Profit'].plot()
df['Wealth'].plot()
plt.axhline(y=0,color='red')
df['Wealth'].plot(legend=True)
df['HS300'].plot(secondary_y=True,legend=True)
df['Wealth'].tail
随机变量
dice=pd.DataFrame([1,2,3,4,5,6])
sum_of_dices=dice.sample(2,replace=True)
sum_of_dices
for i in range(50):
sum_of_dices=dice.sample(2,replace=True).sum().loc[0]
print(sum_of_dices)
result=[dice.sample(2,replace=True).sum().loc[0] for i in range(50)]
result
随机变量的频率
freq=pd.DataFrame(result)[0].value_counts()
freq=freq.sort_index()
freq
freq.plot.bar()
(freq/50).plot.bar()
沪深300日收益率历史分布
df['rets'].hist(bins=10)