mean reverting策略设计

http://www.quantstart.com/articles/Basics-of-Statistical-Mean-Reversion-Testing

trend following/momentum和mean-reversion是两种最基本的设计策略的思路,对于后者而言,首先我们要知道time-series是不是满足mean-reversion的。从数学上说,连续的均值回复过程就是随机过程中的OU过程(Ornstein-Uhlenbeck),不同于布朗运动。我们可以利用pandas和statsmodel进行ADF检验来判断均值回复性(均值回复性是平稳性的必要条件),以判断序列是不是具有均值回复性。如果我们得到的统计量大于临界值,那么不能拒绝原假设,即序列是非均值回复的,而是随机游走。如果是从网络数据库抓取数据,上面的过程可以写为

# Import the Time Series library
import statsmodels.tsa.stattools as ts

# Import Datetime and the Pandas DataReader
from datetime import datetime
from pandas.io.data import DataReader

# Download the Google OHLCV data from 1/1/2000 to 1/1/2013
goog = DataReader("GOOG", "yahoo", datetime(2000,1,1), datetime(2013,1,1))

首先对时间序列进行单位根检验

ts.adfuller(,1)

我们还可以用HURST INDEX对平稳性进行检验

H<0.5 - The time series is mean reverting
H=0.5 - The time series is a Geometric Brownian Motion
H>0.5 - The time series is trending

from numpy import cumsum, log, polyfit, sqrt, std, subtract
from numpy.random import randn

def hurst(ts):
	"""Returns the Hurst Exponent of the time series vector ts"""
	# Create the range of lag values
	lags = range(2, 100)

	# Calculate the array of the variances of the lagged differences
	tau = [sqrt(std(subtract(ts[lag:], ts[:-lag]))) for lag in lags]

	# Use a linear fit to estimate the Hurst Exponent
	poly = polyfit(log(lags), log(tau), 1)

	# Return the Hurst exponent from the polyfit output
	return poly[0]*2.0

# Create a Gometric Brownian Motion, Mean-Reverting and Trending Series
gbm = log(cumsum(randn(100000))+1000)
mr = log(randn(100000)+1000)
tr = log(cumsum(randn(100000)+1)+1000)

# Output the Hurst Exponent for each of the above series
# and the price of Google (the Adjusted Close price) for 
# the ADF test given above in the article
print "Hurst(GBM):   %s" % hurst(gbm)
print "Hurst(MR):    %s" % hurst(mr)
print "Hurst(TR):    %s" % hurst(tr)

# Assuming you have run the above code to obtain 'goog'!
print "Hurst(GOOG):  %s" % hurst(goog['Adj Close'])
下面讨论协整的检验,Cointegrated Augmented Dickey-Fuller Test,至于协整这一概念的阐述本质两个序列的线性组合为平稳的,


#做统计套利时我们常常要作散点图以及进行协整检验

# cadf.py


import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import pandas.io.data as web
import pprint
import statsmodels.tsa.stattools as ts


from pandas.stats.api import ols




def plot_price_series(df, ts1, ts2):
    months = mdates.MonthLocator()  #可以是DayLocator HourLocator
    fig, ax = plt.subplots()
    ax.plot(df.index, df[ts1], label=ts1)
    ax.plot(df.index, df[ts2], label=ts2)
    ax.xaxis.set_major_locator(months)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    ax.set_xlim(datetime.datetime(2012, 1, 1), datetime.datetime(2013, 1, 1))
    ax.grid(True)
    fig.autofmt_xdate()


    plt.xlabel('Month/Year')
    plt.ylabel('Price ($)')
    plt.title('%s and %s Daily Prices' % (ts1, ts2))
    plt.legend()
    plt.show()


import datetime
#import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
#import pandas as pd
#import pandas.io.data as web
#import pprint
import statsmodels.tsa.stattools as ts


from pandas.stats.api import ols


#我们的main函数里面的DataFrame就是d,而程序处理后的df就是mergeDF和Pnl等结果
def plot_price_series(df, ts1, ts2):
    hours = mdates.HourLocator()  #可以是DayLocator HourLocator
    fig, ax = plt.subplots()
    ax.plot(df.index, df[ts1], label=ts1) #这里要把index重设一下,因为原本的index是pTime和sym_
    ax.plot(df.index, df[ts2], label=ts2)
    ax.xaxis.set_major_locator(hours)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    ax.set_xlim(datetime.datetime(2014, 1, 2), datetime.datetime(2014, 1, 1)) #开始和结束日期要和样本统一
    ax.grid(True)
    fig.autofmt_xdate()


    plt.xlabel('Hour/Year')
    plt.ylabel('TrdPriceLast ($)')
    plt.title('%s and %s Prices' % (ts1, ts2))
    plt.legend()
    plt.show()


def plot_scatter_series(df, ts1, ts2):
    plt.xlabel('%s TrdPriceLast ($)' % ts1)
    plt.ylabel('%s TrdPriceLast ($)' % ts2)
    plt.title('%s and %s Price Scatterplot' % (ts1, ts2))
    plt.scatter(df[ts1], df[ts2])
    plt.show()


def plot_residuals(df):
    hours = mdates.HourLocator() 
    fig, ax = plt.subplots()
    ax.plot(df.index, df["res"], label="Residuals")
    ax.xaxis.set_major_locator(Hours)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    ax.set_xlim(datetime.datetime(2014, 1, 2), datetime.datetime(2014, 1, 1))
    ax.grid(True)
    fig.autofmt_xdate()


    plt.xlabel('Hour/Year')
    plt.ylabel('TrdPriceLast ($)')
    plt.title('Residual Plot')
    plt.legend()


    plt.plot(df["res"])
    plt.show()




if __name__ == "__main__":
    start = datetime.datetime(2014, 1, 2)
    end = datetime.datetime(2014, 1, 1)


    arex = web.DataReader("AREX", "yahoo", start, end)
    wll = web.DataReader("WLL", "yahoo", start, end)
   
    
    df = pd.DataFrame(index=arex.index)
    d = d.reset_index()
    d = d.set_index(["pTime"])
    df["D"] = d[d["sym"]=="D"][TrdPriceLast]
    df["E"] = d[d["sym"]=="E"][TrdPriceLast]
    
    # Plot the two time series
    plot_price_series(df, "D", "E")


    # Display a scatter plot of the two time series
    plot_scatter_series(df, "D", "E")


    # Calculate optimal hedge ratio "beta"
    res = ols(y=df['D'], x=df["E"])
    beta_hr = res.beta.x


    # Calculate the residuals of the linear combination
    df["res"] = df["D"] - beta_hr*df["E"]


    # Plot the residuals
    plot_residuals(df)


    # Calculate and output the CADF test on the residuals
    cadf = ts.adfuller(df["res"])
    pprint.pprint(cadf)





http://www.quantstart.com/articles/Forecasting-Financial-Time-Series-Part-1




http://matplotlib.org/api/dates_api.html
MinuteLocator
HourLocator






http://blog.sina.com.cn/s/blog_02cf67f00101iuuh.html


  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值