代码如下:
####### step1
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from scipy.optimize import minimize
# 读取收盘价数据
def read_data(file):
s="C:/Users\Anita\Desktop"
file_address=s+file
df= pd.read_excel(file_address,sheetname="data")
# 保证金比例
marginal_cost=[0.15,0.3,0.02,0.15,1,1]
# 删除前两行说明行
df.drop(0,inplace=True)
df.drop(1,inplace=True)
# 删除最后一行2018年的3月数据
df.drop(1497,inplace=True)
# 把日期列设置为索引
df.set_index('Date',inplace=True)
# 缺失值检验:没有缺失值
df.isnull().any()
# 把日数据转换为月数据
period_type = 'M'
__df= df.resample(period_type).last()
monthly_df=pd.concat([df.iloc[0,0::].T,__df.T],axis=1).T
return monthly_df
# 计算对数收益率
def calculate_log_return(file):
data=read_data(file)
variable_list=['IF00.CFE','IC00.CFE','TF00.CFE','AU00.SHF','159920.OF','511880.SH']
for j in variable_list:
data['return_'+str(j)]=0.0
for i in range(1,len(data)):
data['return_'+str(j)][i]=np.log(float(data[j][i])/float(data[j][i-1]))
return data.iloc[1::,0::]
# 将收益率进行标准化处理,形成新的表格std_data
def standardlize_process(file):
data=calculate_log_return(file)
scaler=StandardScaler()
scaler.fit(data)
column_list=['IF00.CFE','IC00.CFE','TF00.CFE','AU00.SHF','159920.OF','511880.SH','return_IF00.CFE','return_IC00.CFE','return_TF00.CFE','return_AU00.SHF','return_159920.OF','return_511880.SH']
std_data=pd.Dat