import pandas as pd
import numpy as np
from pandas_datareader import data as web
import fix_yahoo_finance as yf
import tushare as ts
#提取中国银行和浦发银行的调整后收盘数据
PAf = web.get_data_yahoo('601988.SS', start = '2014-01-01', end = '2015-01-01')
PBf = web.get_data_yahoo('600000.SS', start ='2014-01-01', end = '2015-01-01')
#PAF= ts.get_k_data(code=‘601988’,start=‘2014-01-01’,end=‘2015-01-01’)
PAf = PAf['Adj Close']
PBf = PBf['Adj Close']
#将两只股票数据合在一起形成Dataframe
pairf = pd.concat([PAf,PBf],axis=1)
#求形成期长度
len(pairf)
#最小距离法
#构造标准化价格之差平方累积SSD函数
def SSD(priceX,priceY):
if priceX is None or priceY is None:
print('缺少价格序列')
returnX= (priceX - priceX.shift(1))/priceX.shift(1)[1:]
returnY= (priceY - priceY.shift(1))/priceY.shift(1)[1:]
standardX = (returnX +1).cumprod()
standardY = (returnY +1).cumprod()
SSD= np.sum((standardX-standardY)**2)
return(SSD)
#协整模型
from arch.unitroot import ADF
#检验中国银行对数价格的一阶单整性
PAflog = np.log(PAf)
PBflog = np.log(PBf)
#对中国银行对数价格进行单位根检验
adfA = ADF(PAflog)
print(adfA.summary().as_text())
adfB = ADF(PBflog)
print(adfB.summary().as_text())
#将中国银行对数价格差分
retA = PAflog.diff()[1:]
retB = PBflog.diff()[1:]
adfretA = ADF(retA)
print(adfretA.summary().as_text())
adfretB = ADF(retB)
print(adfretB.summary().as_text())
#画出中国银行和浦发银行股票对数价格时序图
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei']
PAflog.plot(label='601988ZGYH',style='--')
PBflog.plot(label='600000ZGLT',style='-')
plt.legend(loc='upper left')
plt.title('中国银行与浦发银行的对数价格时序图')
plot.show()
#绘制股票对数价格差分的时序图
retA.plot(label='601988ZGYH')
retB.plot(label='600000ZGLT')
plt.legend(loc='lower left')
plt.title('中国银行与浦发银行的对数价格差分(收益率)')
plt.show()
#回归分析
#因变量是中国银行(A)股票的对数价格
#自变量是浦发银行(B)股票的对数价格
import statsmodels.api as sm
model=sm.OLS(PBflog, sm.add_constant(PAflog))
results = model.fit()
print(results.summary())
#将浦发银行股票的对数价格与中国银行股票的对数价格做线型回归后,对回归残差进行平稳性检验。
#提取回归截距项和系数
alpha = results.params[0]
beta = results.params[1]
#求残差
spread = PBflog-beta*PAflog-alpha
spread.plot()
plt.title('价差序列')
plt.show()
#价差序列单位根检验
adfspread = ADF(spread,trend='nc')
print(adfspread.summary().as_text())
‘最小距离法交易策略’
#中国银行和浦发银行标准化价格
standardA = (1+retA).cumprod()
standardB = (1+retB).cumprod()
#求中国银行和浦发银行标准化价格序列的价差
SSD_pair = standardB-standardA
SSD_pair.head()
meanSSD_pair = np.mean(SSD_pair)
sdSSD_pair = np.std(SSD_pair)
thresholdUp = meanSSD_pair+1.2*sdSSD_pair
thresholdDown = meanSSD_pair-1.2*sdSSD_pair
SSD_pair.plot()
plt.title('中国银行与浦发银行标准化价差序列(形成期)')
plt.axhline(y=meanSSD_pair,color='black')
plt.axhline(y=thresholdUp,color='green')
plt.axhline(y=thresholdDown,color='green')
plt.show()
#构建pairtrading 类
import re
import pandas as pd
import numpy as np
from arch.unitroot import ADF
import statsmodels.api as sm
class PairTrading:
def SSD(self,priceX,priceY):
if priceX is None or priceY is None:
print('缺少价格序列')
returnX = (priceX - priceX.shift(1)) / priceX.shift(1)[1:]
returnY = (priceY - priceY.shift(1)) / priceY.shift(1)[1:]
standardX = (returnX + 1).cumprod()
standardY = (returnY + 1).cumprod()
SSD = np.sum((standardX - standardY) ** 2)
return (SSD)
def SSDSpread(self,priceX,priceY):
if priceX is None or priceY is None:
print('缺少价格序列')
retx = (priceX - priceX.shift(1)) / priceX.shift(1)[1:]
rety = (priceY - priceY.shift(1)) / priceY.shift(1)[1:]
standardX = (1+retx).cumprod()
standardY = (1+rety).cumprod()
spread = standardY - standardX
return(spread)
def cointegration(self,priceX,priceY):
if priceX is None or priceY is None:
print('缺少价格序列')
priceX = np.log(priceX)
priceY = np.log(priceY)
results = sm.OLS(priceY,sm.add_constant(priceX)).fit()
resid = results.resid
adfSpread = ADF(resid)
if adfSpread.pvalue >= 0.05:
print('''交易价格不具有协整关系,
P-value of ADF test: %f
Coefficients of regression:
Intercept: %f
Beta: %f
'''% (adfSpread.pvalue, results.params[0],results.params[1]))
return(None)
else:
print('''交易价格具有协整关系,
P-value of ADF test: %f
Coefficients of regression:
Intercept: %f
Beta: %f
''' % (adfSpread.pvalue, results.params[0], results.params[1]))
return(results.params[0], results.params[1])
def CointegrationSpread(self,priceX,priceY,formPeriod, tradePeriod):
if priceX is None or priceY is None:
print('缺少价格序列')
if not (re.fullmatch('\d{4}-\d{2}-\d{2}:\d{4}-\d{2}-\d{2}',formPeriod)
or re.fullmatch('\d{4}-\d{2}-\d{2}:\d{4}-\d{2}-\d{2}',tradePeriod)):
print('形成期交易期格式错误.')
formX = priceX[formPeriod.split(':')[0]:formPeriod.split(':')[1]]
formY = priceY[formPeriod.split(':')[0]:formPeriod.split(':')[1]]
coefficients = self.cointegration(formX,formY)
if coefficients is None:
print('未形成协整关系,无法配对.')
else:
spread=(np.log(priceY[tradePeriod.split(':')[0]:tradePeriod.split(':')[1]])-
coefficients[0]-coefficients[1]*np.log(priceX[tradePeriod.split(':')[0]:
tradePeriod.split(':')[1]]))
return(spread)
def calBound(self,priceX,priceY,method,formPeriod,width=1.5):
if not (re.fullmatch('\d{4}-\d{2}-\d{2}:\d{4}-\d{2}-\d{2}',formPeriod)
or re.fullmatch('\d{4}-\d{2}-\d{2}:\d{4}-\d{2}-\d{2}',tradePeriod)):
print('形成期格式错误.')
if method=='SSD':
spread=self.SSDSpread(priceX[formPeriod.split(':')[0]:
formPeriod.split(':')[1]],
priceY[formPeriod.split(':')[0]:
formPeriod.split(':')[1]])
mu = np.mean(spread)
sd = np.std(spread)
UpperBound = mu+width*sd
LowerBound = mu-width*sd
return(UpperBound,LowerBound)
elif method=='Cointegration':
spread=self.CointegrationSpread(priceX,priceY,formPeriod,formPeriod)
mu = np.mean(spread)
sd = np.std(spread)
UpperBound = mu + width * sd
LowerBound = mu - width * sd
return (UpperBound, LowerBound)
else:
print('不存在该方法,请选择“SSD"或是”Cointegration".')
testing
import pandas as pd
import numpy as np
from pandas_datareader import data as web
import fix_yahoo_finance as yf
import tushare as ts
formPeriod = '2014-01-01:2015-01-01'
tradePeriod = '2015-01-01:2015-06-30'
priceA = web.get_data_yahoo('601988.SS', start = '2014-01-01', end = '2015-06-30')
priceB = web.get_data_yahoo('600000.SS', start ='2014-01-01', end = '2015-06-30')
priceA = priceA['Adj Close']
priceB = priceB['Adj Close']
priceAf = priceA[formPeriod.split(':')[0]:formPeriod.split(":")[1]]
priceBf = priceB[formPeriod.split(':')[0]:formPeriod.split(":")[1]]
priceAt =priceA[tradePeriod.split(':')[0]:tradePeriod.split(":")[1]]
priceBt = priceB[tradePeriod.split(':')[0]:tradePeriod.split(":")[1]]
pt=PairTrading()
SSD=pt.SSD(priceAf,priceBf)
SSDspread = pt.SSDSpread(priceAf,priceBf)
SSDspread.describe()
SSDspread.head()
coefficients=pt.cointegration(priceAf,priceBf)
CoSpreadF=pt.CointegrationSpread(priceA,priceB,formPeriod,formPeriod)
CoSpreadTr=pt.CointegrationSpread(priceA,priceB,formPeriod,tradePeriod)
bound = pt.calBound(priceA,priceB,'Cointegration',formPeriod,width=1.2)
CoSpreadTr.plot()
plt.axhline(bound[0],color='black')
plt.axhline(bound[1],color='black')
plt.show()
logPBf=np.log(priceBf)
logPAf=np.log(priceAf)
spreadf=logPBf-beta*logPAf-alpha
adfSpread=ADF(spreadf)
print(adfSpread.summary().as_text())
CoSpreadT=logPBf-beta*logPAf-alpha
mu=np.mean(spreadf)
sd=np.std(spreadf)
#设定交易期
tradeStart = '2015-01-01'
tradeEnd='2015-06-30'
#绘制价差区间图
CoSpreadTr.plot()
plt.title('交易期价差序列(协整配对)')
plt.axhline(y=mu,color='black')
plt.axhline(y=mu+0.2*sd,color='blue',ls='-',lw=2)
plt.axhline(y=mu-0.2*sd,color='blue',ls='-',lw=2)
plt.axhline(y=mu+1.5*sd,color='green',ls='--',lw=2.5)
plt.axhline(y=mu-1.5*sd,color='green',ls='--',lw=2.5)
plt.axhline(y=mu+2.5*sd,color='red',ls='-.',lw=3)
plt.show()
#根据开仓平仓点制定交易策略,并模拟交易账户。
level =(float('-inf'),mu-2.5*sd,mu-1.5*sd,mu-0.2*sd,mu+0.2*sd,mu+1.5*sd,mu+2.5*sd,float('inf'))
prcLevel = pd.cut(CoSpreadTr,level,labels=False)-3
prcLevel.head()
#构造交易新号函数
def TradeSig(prcLevel):
n= len(prcLevel)
signal = np.zeros(n)
for i in range(1,n):
if prcLevel[i-1]==1 and prcLevel[i]==2:
signal[i]=-2
elif prcLevel[i-1]==1 and prcLevel[i]==0:
signal[i]=2
elif prcLevel[i-1]==2 and prcLevel[i]==3:
signal[i]=3
elif prcLevel[i-1]==-1 and prcLevel[i]==-2:
signal[i]=1
elif prcLevel[i-1]==-1 and prcLevel[i]==0:
signal[i]=-1
elif prcLevel[i-1]==-2 and prcLevel[i]==-3:
signal[i]=-3
return(signal)
signal = TradeSig(prcLevel)
position=[signal[0]]
ns=len(signal)
for i in range (1,ns):
position.append(position[-1])
if signal[i]==1:
position[i]=1
elif signal[i]==-2:
position[i]==-1
elif signal[i]==1 and postion[i-1]==1:
position[i]=0
elif signal[i]==2 and position[i-1]==-1:
position[i]=0
elif signal[i]==3:
position[i]=0
elif signal[i]==-3:
position[i]=0
position = pd.Series(position,index=CoSpreadTr.index)
position.tail()
def TraddeSim(priceX, priceY, position):
n=len(position)
size=1000
shareY = size*position
shareX = [(-beta)*shareY[0]*priceY[0]/priceX[0]]
cash=[2000]
for i in range (1,n):
shareX.append(shareX[i-1])
cash.append(cash[i-1])
if position[i-1]==0 and position[i]==1:
shareX[i]=[(-beta)*shareY[i]*priceY[i]/priceX[i]]
cash[i]=cash[i-1]-(shareY[i]*priceY[i]+ np.array(shareX[i])*priceX[i])
elif position[i-1]==0 and position[i]==-1:
shareX[i] = [(-beta) * shareY[i] * priceY[i] / priceX[i]]
cash[i]=cash[i-1]-(shareY[i]*priceY[i]+np.array(shareX[i])*priceX[i])
elif position[i-1]==1 and position[i]==0:
shareX[i]=0
cash[i]=cash[i-1]+(shareY[i-1]*priceY[i]+np.array(shareX[i-1])*priceX[i])
elif position[i-1]==-1 and position[i]==0:
shareX[i]=0
cash[i]=cash[i-1]+(shareY[i-1]*priceY[i]+np.array(shareX[i-1])*priceX[i])
cash = pd.Series(cash,index=position.index)
shareY = pd.Series(shareY,index=position.index)
shareX = pd.Series(shareX,index=position.index)
asset = cash + shareY*priceY +pd.Series(shareX)*priceX
account =pd.DataFrame({'Position':position,'ShareY':shareY,'ShareX':shareX,'Cash':cash,'Asset':asset})
account.index=position.index
return(account)
account = TradeSim(priceAt,priceBt,position)
account.iloc[:,0].plot()
account.iloc[:,1].plot()
account.iloc[:,2].plot()
plt.show()