mean reverting策略设计

最新推荐文章于 2023-05-01 18:08:49 发布

_Rush_Go_On_

最新推荐文章于 2023-05-01 18:08:49 发布

阅读量2.1k

点赞数

分类专栏： Python Quantitative_Trading

Quantitative_Trading 同时被 2 个专栏收录

7 篇文章 0 订阅

订阅专栏

Python

4 篇文章 0 订阅

订阅专栏

http://www.quantstart.com/articles/Basics-of-Statistical-Mean-Reversion-Testing

trend following/momentum和mean-reversion是两种最基本的设计策略的思路，对于后者而言，首先我们要知道time-series是不是满足mean-reversion的。从数学上说，连续的均值回复过程就是随机过程中的OU过程(Ornstein-Uhlenbeck)，不同于布朗运动。我们可以利用pandas和statsmodel进行ADF检验来判断均值回复性(均值回复性是平稳性的必要条件)，以判断序列是不是具有均值回复性。如果我们得到的统计量大于临界值，那么不能拒绝原假设，即序列是非均值回复的，而是随机游走。如果是从网络数据库抓取数据，上面的过程可以写为

# Import the Time Series library
import statsmodels.tsa.stattools as ts

# Import Datetime and the Pandas DataReader
from datetime import datetime
from pandas.io.data import DataReader

# Download the Google OHLCV data from 1/1/2000 to 1/1/2013
goog = DataReader("GOOG", "yahoo", datetime(2000,1,1), datetime(2013,1,1))

首先对时间序列进行单位根检验

ts.adfuller(,1)

我们还可以用HURST INDEX对平稳性进行检验

H<0.5 - The time series is mean reverting
H=0.5 - The time series is a Geometric Brownian Motion
H>0.5 - The time series is trending

from numpy import cumsum, log, polyfit, sqrt, std, subtract
from numpy.random import randn

def hurst(ts):
	"""Returns the Hurst Exponent of the time series vector ts"""
	# Create the range of lag values
	lags = range(2, 100)

	# Calculate the array of the variances of the lagged differences
	tau = [sqrt(std(subtract(ts[lag:], ts[:-lag]))) for lag in lags]

	# Use a linear fit to estimate the Hurst Exponent
	poly = polyfit(log(lags), log(tau), 1)

	# Return the Hurst exponent from the polyfit output
	return poly[0]*2.0

# Create a Gometric Brownian Motion, Mean-Reverting and Trending Series
gbm = log(cumsum(randn(100000))+1000)
mr = log(randn(100000)+1000)
tr = log(cumsum(randn(100000)+1)+1000)

# Output the Hurst Exponent for each of the above series
# and the price of Google (the Adjusted Close price) for 
# the ADF test given above in the article
print "Hurst(GBM):   %s" % hurst(gbm)
print "Hurst(MR):    %s" % hurst(mr)
print "Hurst(TR):    %s" % hurst(tr)

# Assuming you have run the above code to obtain 'goog'!
print "Hurst(GOOG):  %s" % hurst(goog['Adj Close'])

下面讨论协整的检验，Cointegrated Augmented Dickey-Fuller Test，至于协整这一概念的阐述本质两个序列的线性组合为平稳的，

#做统计套利时我们常常要作散点图以及进行协整检验

# cadf.py


import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import pandas.io.data as web
import pprint
import statsmodels.tsa.stattools as ts


from pandas.stats.api import ols




def plot_price_series(df, ts1, ts2):
    months = mdates.MonthLocator()  #可以是DayLocator HourLocator
    fig, ax = plt.subplots()
    ax.plot(df.index, df[ts1], label=ts1)
    ax.plot(df.index, df[ts2], label=ts2)
    ax.xaxis.set_major_locator(months)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    ax.set_xlim(datetime.datetime(2012, 1, 1), datetime.datetime(2013, 1, 1))
    ax.grid(True)
    fig.autofmt_xdate()


    plt.xlabel('Month/Year')
    plt.ylabel('Price ($)')
    plt.title('%s and %s Daily Prices' % (ts1, ts2))
    plt.legend()
    plt.show()


import datetime
#import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
#import pandas as pd
#import pandas.io.data as web
#import pprint
import statsmodels.tsa.stattools as ts


from pandas.stats.api import ols


#我们的main函数里面的DataFrame就是d,而程序处理后的df就是mergeDF和Pnl等结果
def plot_price_series(df, ts1, ts2):
    hours = mdates.HourLocator()  #可以是DayLocator HourLocator
    fig, ax = plt.subplots()
    ax.plot(df.index, df[ts1], label=ts1) #这里要把index重设一下，因为原本的index是pTime和sym_
    ax.plot(df.index, df[ts2], label=ts2)
    ax.xaxis.set_major_locator(hours)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    ax.set_xlim(datetime.datetime(2014, 1, 2), datetime.datetime(2014, 1, 1)) #开始和结束日期要和样本统一
    ax.grid(True)
    fig.autofmt_xdate()


    plt.xlabel('Hour/Year')
    plt.ylabel('TrdPriceLast ($)')
    plt.title('%s and %s Prices' % (ts1, ts2))
    plt.legend()
    plt.show()


def plot_scatter_series(df, ts1, ts2):
    plt.xlabel('%s TrdPriceLast ($)' % ts1)
    plt.ylabel('%s TrdPriceLast ($)' % ts2)
    plt.title('%s and %s Price Scatterplot' % (ts1, ts2))
    plt.scatter(df[ts1], df[ts2])
    plt.show()


def plot_residuals(df):
    hours = mdates.HourLocator() 
    fig, ax = plt.subplots()
    ax.plot(df.index, df["res"], label="Residuals")
    ax.xaxis.set_major_locator(Hours)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    ax.set_xlim(datetime.datetime(2014, 1, 2), datetime.datetime(2014, 1, 1))
    ax.grid(True)
    fig.autofmt_xdate()


    plt.xlabel('Hour/Year')
    plt.ylabel('TrdPriceLast ($)')
    plt.title('Residual Plot')
    plt.legend()


    plt.plot(df["res"])
    plt.show()




if __name__ == "__main__":
    start = datetime.datetime(2014, 1, 2)
    end = datetime.datetime(2014, 1, 1)


    arex = web.DataReader("AREX", "yahoo", start, end)
    wll = web.DataReader("WLL", "yahoo", start, end)
   
    
    df = pd.DataFrame(index=arex.index)
    d = d.reset_index()
    d = d.set_index(["pTime"])
    df["D"] = d[d["sym"]=="D"][TrdPriceLast]
    df["E"] = d[d["sym"]=="E"][TrdPriceLast]
    
    # Plot the two time series
    plot_price_series(df, "D", "E")


    # Display a scatter plot of the two time series
    plot_scatter_series(df, "D", "E")


    # Calculate optimal hedge ratio "beta"
    res = ols(y=df['D'], x=df["E"])
    beta_hr = res.beta.x


    # Calculate the residuals of the linear combination
    df["res"] = df["D"] - beta_hr*df["E"]


    # Plot the residuals
    plot_residuals(df)


    # Calculate and output the CADF test on the residuals
    cadf = ts.adfuller(df["res"])
    pprint.pprint(cadf)

http://www.quantstart.com/articles/Forecasting-Financial-Time-Series-Part-1

http://matplotlib.org/api/dates_api.html
MinuteLocator
HourLocator

http://blog.sina.com.cn/s/blog_02cf67f00101iuuh.html