利用python对资产收益率进行正态检验?
代码如下,根据已知资产的日价格变化趋势,证明其价格服从几何布朗运动(也就是日收益率服从正态分布,且每天相互独立)
import pandas as pd
import numpy as np
from pylab import mpl,plt
import scipy.stats as scs
import statsmodels.api as sm
plt.style.use('seaborn')
mpl.rcParams['font.family']='serif'
pd.set_option('display.width', None) # 设置字符显示宽度
pd.set_option('display.max_rows', 50) # 设置显示最大行
filename='tr_eikon_eod_data.csv'
data=pd.read_csv(filename,index_col=0,parse_dates=True)
#print(data)
symbols=["AAPL.O", "MSFT.O" ,"AMZN.O" ,"GDX" , "GLD"]
noa=len(symbols)
data=pd.DataFrame(data[symbols]).dropna()
price=data/data.iloc[0]*100
rets=np.log(data/data.shift(1))
rets.plot.hist(bins=50,subplots=True) # ——————————————————————第一步,分析各种资产收益率分布
plt.show()
def print_statistics(array):
"""
Prints selected statistics
:param array: object to generate statistic on
:return:
"""
sta = scs.describe(array)
print("%14s %15s" % ('statistic', 'value'))
print(30 * "-")
print("%14s %15.5f" % ('size', sta[0]))
print("%14s %15.5f" % ('min', sta[1][0]))
print("%14s %15.5f" % ('max', sta[1][1]))
print("%14s %15.5f" % ('mean', sta[2]))
print("%14s %15.5f" % ('std', np.sqrt(sta[3])))
print("%14s %15.5f" % ('skew', sta[4]))#偏度 =0正态 >0左偏 <0右偏
print("%14s %15.5f" % ('kurtosis', sta[5]))#峰度 =0正态 >0尖 <0平
for sym in symbols:# ————————————————————————第二步,分析每种资产的统计量,包括峰度和偏度
print("\nResults for symbol %s" % sym)
print(30 * "-")
log_data = np.array(rets[sym].dropna())
print_statistics(log_data)
# print(scs.describe(log_data))
# 通过 qq 图检查数据与正态分布的贴近程度(这里只举了一个例子)
sm.qqplot(rets[symbols[1]].dropna(), line='s')
plt.grid(True)
plt.xlabel('theoretical quantiles')
plt.ylabel('sample quantiles')
plt.show()
# ——————————————————————————————第三步,正式的正态分析
def normality_tests(arr):
"""
Tests for normality distribution of given data set
:param arr: ndarray
object to generate statistics on
:return:
"""
print("Skew of data set %14.3f" % scs.skew(arr))
print("Skew test p-value %14.6f" % scs.skewtest(arr)[1])
print("Kurt of data set %14.3f" % scs.kurtosis(arr))
print("Kurt test p-value %14.6f" % scs.kurtosistest(arr)[1])
print("Kurt test p-value %14.6f" % scs.normaltest(arr)[1])
for sym in symbols:
print("\nResults for symbol %s" % sym)
print(30 * "-")
log_data = np.array(rets[sym].dropna())
normality_tests(log_data)
关于qq图的解读
红线表示y=x,即要求理想正态分布的分位数与实际对比,如图所示,当正态分布接近尾部时,样本分位数突然增加,显示出大尾巴的特点,因而认为其不是正态分布