配对交易策略 Pair Trading
0. 引库
import pandas as pd
import numpy as np
import tushare as ts
import seaborn
from matplotlib import pyplot as plt
plt. style. use( 'seaborn' )
% matplotlib inline
data = pd. read_csv( 'pair-trade-data.csv' )
data. set_index( 'date' , inplace = True )
data. head( )
000568 000858 date 2010/1/4 27.488118 26.117536 2010/1/5 27.335123 26.391583 2010/1/6 26.941707 25.694008 2010/1/7 26.388011 24.913389 2010/1/8 26.825140 24.863562
data. plot( figsize= ( 8 , 6 ) ) ;
2. 策略开发思路
data[ 'priceDelta' ] = data[ '000568' ] - data[ '000858' ]
data. head( )
000568 000858 priceDelta date 2010/1/4 27.488118 26.117536 1.370582 2010/1/5 27.335123 26.391583 0.943540 2010/1/6 26.941707 25.694008 1.247699 2010/1/7 26.388011 24.913389 1.474622 2010/1/8 26.825140 24.863562 1.961578
data[ 'priceDelta' ] . plot( figsize= ( 8 , 6 ) ) ;
plt. ylabel( 'Spread' )
plt. axhline( data[ 'priceDelta' ] . mean( ) ) ;
data[ 'zscore' ] = ( data[ 'priceDelta' ] - np. mean( data[ 'priceDelta' ] ) ) / np. std( data[ 'priceDelta' ] )
data. head( )
000568 000858 priceDelta zscore date 2010/1/4 27.488118 26.117536 1.370582 0.569895 2010/1/5 27.335123 26.391583 0.943540 0.500520 2010/1/6 26.941707 25.694008 1.247699 0.549932 2010/1/7 26.388011 24.913389 1.474622 0.586796 2010/1/8 26.825140 24.863562 1.961578 0.665903
len ( data[ data[ 'zscore' ] > 1.5 ] )
17
data[ 'position_1' ] = np. where( data[ 'zscore' ] > 1.5 , - 1 , np. nan)
data[ 'position_1' ] = np. where( data[ 'zscore' ] < - 1.5 , 1 , data[ 'position_1' ] )
data[ 'position_1' ] = np. where( abs ( data[ 'zscore' ] ) < 0.5 , 0 , data[ 'position_1' ] )
data. head( )
000568 000858 priceDelta zscore position_1 date 2010/1/4 27.488118 26.117536 1.370582 0.569895 NaN 2010/1/5 27.335123 26.391583 0.943540 0.500520 NaN 2010/1/6 26.941707 25.694008 1.247699 0.549932 NaN 2010/1/7 26.388011 24.913389 1.474622 0.586796 NaN 2010/1/8 26.825140 24.863562 1.961578 0.665903 NaN
产生交易信号
data[ 'position_1' ] = data[ 'position_1' ] . ffill( ) . fillna( 0 )
data[ 'position_1' ] . plot( ylim= [ - 1.1 , 1.1 ] , figsize= ( 10 , 6 ) ) ;
data[ 'position_2' ] = - np. sign( data[ 'position_1' ] )
data[ 'position_2' ] . plot( ylim= [ - 1.1 , 1.1 ] , figsize= ( 10 , 6 ) ) ;
3. 计算策略年化收益并可视化
data[ 'returns_1' ] = ( np. log( data[ '000568' ] / data[ '000568' ] . shift( 1 ) ) ) . fillna( 0 )
data[ 'returns_2' ] = ( np. log( data[ '000858' ] / data[ '000858' ] . shift( 1 ) ) ) . fillna( 0 )
data. head( 10 )
000568 000858 priceDelta zscore position_1 position_2 returns_1 returns_2 date 2010/1/4 27.488118 26.117536 1.370582 0.569895 0.0 -0.0 0.000000 0.000000 2010/1/5 27.335123 26.391583 0.943540 0.500520 0.0 -0.0 -0.005581 0.010438 2010/1/6 26.941707 25.694008 1.247699 0.549932 0.0 -0.0 -0.014497 -0.026787 2010/1/7 26.388011 24.913389 1.474622 0.586796 0.0 -0.0 -0.020766 -0.030852 2010/1/8 26.825140 24.863562 1.961578 0.665903 0.0 -0.0 0.016430 -0.002002 2010/1/11 25.936311 24.631037 1.305274 0.559285 0.0 -0.0 -0.033696 -0.009396 2010/1/12 26.409867 25.336916 1.072951 0.521543 0.0 -0.0 0.018094 0.028255 2010/1/13 26.577433 25.137609 1.439824 0.581143 0.0 -0.0 0.006325 -0.007897 2010/1/14 28.420660 26.109231 2.311428 0.722738 0.0 -0.0 0.067054 0.037924 2010/1/15 28.253094 26.208885 2.044209 0.679327 0.0 -0.0 -0.005913 0.003810
data[ 'strategy' ] = 0.5 * ( data[ 'position_1' ] . shift( 1 ) * data[ 'returns_1' ] ) + 0.5 * ( data[ 'position_2' ] . shift( 1 ) * data[ 'returns_2' ] )
data[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . cumsum( ) . apply ( np. exp) . tail( 1 )
returns_1 returns_2 strategy date 2019/4/8 2.470158 3.837651 0.986754
data[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . cumsum( ) . apply ( np. exp) . plot( figsize= ( 10 , 6 ) ) ;
Pair trading 策略 - 小范围时间(2013.6-2014.12)
data2 = pd. read_csv( 'pair-trade-data2.csv' )
data2. set_index( 'date' , inplace = True )
data2. head( )
000568 000858 date 2013/6/3 20.719056 20.343053 2013/6/4 20.357220 20.060867 2013/6/5 20.514540 20.274644 2013/6/6 20.113374 20.172031 2013/6/7 19.704342 19.667508
data2. plot( figsize= ( 8 , 6 ) ) ;
data2[ 'priceDelta' ] = data[ '000568' ] - data[ '000858' ]
data2. head( )
000568 000858 priceDelta date 2013/6/3 20.719056 20.343053 0.376004 2013/6/4 20.357220 20.060867 0.296353 2013/6/5 20.514540 20.274644 0.239896 2013/6/6 20.113374 20.172031 -0.058657 2013/6/7 19.704342 19.667508 0.036833
data2[ 'priceDelta' ] . plot( figsize= ( 8 , 6 ) ) ;
plt. ylabel( 'Spread' )
plt. axhline( data2[ 'priceDelta' ] . mean( ) ) ;
data2[ 'zscore' ] = ( data2[ 'priceDelta' ] - np. mean( data2[ 'priceDelta' ] ) ) / np. std( data2[ 'priceDelta' ] )
data2. head( )
000568 000858 priceDelta zscore date 2013/6/3 20.719056 20.343053 0.376004 0.048513 2013/6/4 20.357220 20.060867 0.296353 0.000596 2013/6/5 20.514540 20.274644 0.239896 -0.033369 2013/6/6 20.113374 20.172031 -0.058657 -0.212979 2013/6/7 19.704342 19.667508 0.036833 -0.155532
len ( data2[ data2[ 'zscore' ] > 1.5 ] )
40
len ( data2[ data2[ 'zscore' ] < - 1.5 ] )
16
data2[ 'position_1' ] = np. where( data2[ 'zscore' ] > 1.5 , - 1 , np. nan)
data2[ 'position_1' ] = np. where( data2[ 'zscore' ] < - 1.5 , 1 , data2[ 'position_1' ] )
data2[ 'position_1' ] = np. where( abs ( data2[ 'zscore' ] ) < 0.5 , 0 , data2[ 'position_1' ] )
data2. head( )
000568 000858 priceDelta zscore position_1 date 2013/6/3 20.719056 20.343053 0.376004 0.048513 0.0 2013/6/4 20.357220 20.060867 0.296353 0.000596 0.0 2013/6/5 20.514540 20.274644 0.239896 -0.033369 0.0 2013/6/6 20.113374 20.172031 -0.058657 -0.212979 0.0 2013/6/7 19.704342 19.667508 0.036833 -0.155532 0.0
data2[ 'position_1' ] = data2[ 'position_1' ] . ffill( ) . fillna( 0 )
data2[ 'position_1' ] . plot( ylim= [ - 1.1 , 1.1 ] , figsize= ( 10 , 6 ) ) ;
data2[ 'position_2' ] = - np. sign( data2[ 'position_1' ] )
data2[ 'position_2' ] . plot( ylim= [ - 1.1 , 1.1 ] , figsize= ( 10 , 6 ) ) ;
data2[ 'returns_1' ] = ( np. log( data2[ '000568' ] / data2[ '000568' ] . shift( 1 ) ) ) . fillna( 0 )
data2[ 'returns_2' ] = ( np. log( data2[ '000858' ] / data2[ '000858' ] . shift( 1 ) ) ) . fillna( 0 )
data2. head( 10 )
000568 000858 priceDelta zscore position_1 position_2 returns_1 returns_2 date 2013/6/3 20.719056 20.343053 0.376004 0.048513 0.0 -0.0 0.000000 0.000000 2013/6/4 20.357220 20.060867 0.296353 0.000596 0.0 -0.0 -0.017618 -0.013968 2013/6/5 20.514540 20.274644 0.239896 -0.033369 0.0 -0.0 0.007698 0.010600 2013/6/6 20.113374 20.172031 -0.058657 -0.212979 0.0 -0.0 -0.019749 -0.005074 2013/6/7 19.704342 19.667508 0.036833 -0.155532 0.0 -0.0 -0.020546 -0.025329 2013/6/13 19.562754 19.012515 0.550239 0.153334 0.0 -0.0 -0.007212 -0.033871 2013/6/14 19.617816 19.012515 0.605301 0.186459 0.0 -0.0 0.002811 0.000000 2013/6/17 19.255979 18.720423 0.535556 0.144501 0.0 -0.0 -0.018616 -0.015482 2013/6/18 19.405434 18.853192 0.552241 0.154539 0.0 -0.0 0.007731 0.007067 2013/6/19 19.956054 19.269202 0.686852 0.235521 0.0 -0.0 0.027979 0.021826
data2[ 'strategy' ] = 0.5 * ( data2[ 'position_1' ] . shift( 1 ) * data2[ 'returns_1' ] ) + 0.5 * ( data2[ 'position_2' ] . shift( 1 ) * data2[ 'returns_2' ] )
data2[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . cumsum( ) . apply ( np. exp) . tail( 1 )
returns_1 returns_2 strategy date 2014/12/31 0.892955 0.97347 1.12623
data2[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . cumsum( ) . apply ( np. exp) . plot( figsize= ( 10 , 6 ) ) ;
data2[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . mean( ) * 252
returns_1 -0.073915
returns_2 -0.017554
strategy 0.077608
dtype: float64
data2[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . std( ) * 252 ** 0.5
returns_1 0.300306
returns_2 0.280425
strategy 0.057016
dtype: float64
data2[ 'cumret' ] = data2[ 'strategy' ] . dropna( ) . cumsum( ) . apply ( np. exp)
data2[ 'cummax' ] = data2[ 'cumret' ] . cummax( )
drawdown = ( data2[ 'cummax' ] - data2[ 'cumret' ] )
drawdown. max ( )
0.03645280148896235
Pair trading 策略 - 考虑时间序列平稳性
import pandas as pd
import numpy as np
import tushare as ts
import seaborn
from matplotlib import pyplot as plt
plt. style. use( 'seaborn' )
% matplotlib inline
1. 数据准备
data3 = pd. read_csv( 'pair-trade-data2.csv' )
data3. set_index( 'date' , inplace = True )
data3. head( )
000568 000858 date 2013/6/3 20.719056 20.343053 2013/6/4 20.357220 20.060867 2013/6/5 20.514540 20.274644 2013/6/6 20.113374 20.172031 2013/6/7 19.704342 19.667508
data3. plot( figsize= ( 8 , 6 ) ) ;
2. 策略开发思路
data3. corr( )
000568 000858 000568 1.000000 0.552409 000858 0.552409 1.000000
plt. figure( figsize = ( 10 , 8 ) )
plt. title( 'Stock Correlation' )
plt. plot( data[ '000568' ] , data[ '000858' ] , '.' ) ;
plt. xlabel( '000568' )
plt. ylabel( '000858' )
data. dropna( inplace = True )
[ slope, intercept] = np. polyfit( data3. iloc[ : , 0 ] , data3. iloc[ : , 1 ] , 1 ) . round ( 2 )
slope, intercept
(0.51, 7.82)
data3[ 'spread' ] = data3. iloc[ : , 1 ] - ( data3. iloc[ : , 0 ] * slope + intercept)
data3. head( )
000568 000858 spread date 2013/6/3 20.719056 20.343053 1.956334 2013/6/4 20.357220 20.060867 1.858684 2013/6/5 20.514540 20.274644 1.992228 2013/6/6 20.113374 20.172031 2.094210 2013/6/7 19.704342 19.667508 1.798294
data3[ 'spread' ] . plot( figsize = ( 10 , 8 ) , title = 'Price Spread' ) ;
data3[ 'zscore' ] = ( data3[ 'spread' ] - data3[ 'spread' ] . mean( ) ) / data3[ 'spread' ] . std( )
data3. head( )
000568 000858 spread zscore date 2013/6/3 20.719056 20.343053 1.956334 1.452385 2013/6/4 20.357220 20.060867 1.858684 1.382488 2013/6/5 20.514540 20.274644 1.992228 1.478078 2013/6/6 20.113374 20.172031 2.094210 1.551075 2013/6/7 19.704342 19.667508 1.798294 1.339261
data3[ 'zscore' ] . plot( figsize = ( 10 , 8 ) , title = 'Z-score' )
plt. axhline( 1.5 )
plt. axhline( 0 )
plt. axhline( - 1.5 )
<matplotlib.lines.Line2D at 0xcb62632e8>
产生交易信号
data3[ 'position_1' ] = np. where( data3[ 'zscore' ] > 1.5 , 1 , np. nan)
data3[ 'position_1' ] = np. where( data3[ 'zscore' ] < - 1.5 , - 1 , data3[ 'position_1' ] )
data3[ 'position_1' ] = np. where( abs ( data3[ 'zscore' ] ) < 0.5 , 0 , data3[ 'position_1' ] )
data3[ 'position_1' ] = data3[ 'position_1' ] . ffill( ) . fillna( 0 )
data3[ 'position_1' ] . plot( ylim= [ - 1.1 , 1.1 ] , figsize= ( 10 , 6 ) , title = 'Trading Signal_Uptrade' ) ;
data3[ 'position_2' ] = - np. sign( data3[ 'position_1' ] )
data3[ 'position_2' ] . plot( ylim= [ - 1.1 , 1.1 ] , figsize= ( 10 , 6 ) , title = 'Trading Signal_Downtrade' ) ;
3. 计算策略年化收益并可视化
data3[ 'returns_1' ] = np. log( data3[ '000568' ] / data3[ '000568' ] . shift( 1 ) )
data3[ 'returns_2' ] = np. log( data3[ '000858' ] / data3[ '000858' ] . shift( 1 ) )
data3[ 'strategy' ] = 0.5 * ( data3[ 'position_1' ] . shift( 1 ) * data3[ 'returns_1' ] ) + 0.5 * ( data3[ 'position_2' ] . shift( 1 ) * data3[ 'returns_2' ] )
data3[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . cumsum( ) . apply ( np. exp) . tail( 1 )
returns_1 returns_2 strategy date 2014/12/31 0.892955 0.97347 1.174494
data3[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . cumsum( ) . apply ( np. exp) . plot( figsize= ( 10 , 8 ) , title = 'Strategy_Backtesting' ) ;
data3[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . mean( ) * 252
returns_1 -0.073915
returns_2 -0.017554
strategy 0.105002
dtype: float64
data3[ [ 'returns_1' , 'returns_2' , 'strategy' ] ] . dropna( ) . std( ) * 252 ** 0.5
returns_1 0.300306
returns_2 0.280425
strategy 0.068639
dtype: float64
data3[ 'cumret' ] = data3[ 'strategy' ] . dropna( ) . cumsum( ) . apply ( np. exp)
data3[ 'cummax' ] = data3[ 'cumret' ] . cummax( )
drawdown = ( data3[ 'cummax' ] - data3[ 'cumret' ] )
drawdown. max ( )
0.038159777097367176
策略的思考
对多只ETF进行配对交易,是很多实盘量化基金的交易策略;
策略的风险和问题:
Spread不回归的风险,当市场结构发生重大改变时,用过去历史回归出来的Spread会发生不回归的重大风险;
中国市场做空受到限制,策略中有部分做空的收益是无法获得的;
回归系数需要Rebalancing;
策略没有考虑交易成本和其他成本;