日历效应(二):月份效应
完整代码和数据可关注gzh’finance褪黑素’回复关键词【2002】免费+无套路 获取!
1. 获取数据
import baostock as bs
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, date
# 登陆系统
import baostock as bs
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, date
lg = bs.login()
code = 'sz.399006'
start = '2018-01-01'
end = '2023-1-01'
# 获取指数基金指数历史数据
# 沪深300指数
hs300_price = bs.query_history_k_data_plus(code, "date,code,open,high,low,close,preclose,pctChg",
start_date=start, end_date=end, frequency="d")
# 整合为DataFrame格式
data_list = []
while (hs300_price.error_code == '0') & hs300_price.next():
data_list.append(hs300_price.get_row_data())
hs300 = pd.DataFrame(data_list, columns=hs300_price.fields)
hs300.to_csv('hs30.csv')
hs300.to_excel('hs30.xlsx')
hs300=pd.read_csv('hs30.csv',index_col='date')
del hs300['code']
hs300.head()
2.描述性统计分析
# from pandas_datareader import data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# 导入数据
hs300 =pd.read_csv("hs300.csv")
del hs300['code']
# 标准化数据格式
hs300["dates"]=pd.to_datetime(hs300["date"])
# 获取月份的数据
hs300["month"] = hs300["dates"].dt.month_name()
# 月份字符串转换成数值
days ={'January':1,'February':2,'March':3,'April':4,'May':5,
'June':6,'July':7,'August':8,'September':9,'October':10,
'November':11,'December':12}
hs300["month"] = hs300["month"].apply(lambda x: days[x])
# 计算对数收益率
hs300_close=hs300['close']
hs300_lgreturn=np.log(hs300_close)-np.log(hs300_close.shift(1))
# 计算收益率
hs300_return=hs300_close/hs300_close.shift(1)
# 分组描述
df1=pd.DataFrame(hs300_lgreturn.groupby(hs300["month"]).describe())
df2=pd.DataFrame(hs300_lgreturn.describe())
df3=pd.DataFrame(np.array(df2).reshape(1,8))
df3.columns=['count','mean','std','min','25%','50%','75%','max']
df3.index=['All']
hh=pd.concat([df1,df3],axis=0)
# 偏度、峰度、标准差、J-B统计量
import numpy as np
import scipy.stats as stats
def self_JBtest(y):
# 样本规模n
n = y.size
y_ = y - y.mean()
"""
M2:二阶中心钜
skew 偏度 = 三阶中心矩 与 M2^1.5的比
krut 峰值 = 四阶中心钜 与 M2^2 的比
"""
M2 = np.mean(y_**2)
skew = np.mean(y_**3)/M2**1.5
krut = np.mean(y_**4)/M2**2
"""
计算JB统计量,以及建立假设检验
"""
JB = n*(skew**2/6 + (krut-3 )**2/24)
pvalue = 1 - stats.chi2.cdf(JB,df=2)
data={'偏度':skew,'峰值':krut,'JB检验':JB,'P值':pvalue}
data1=pd.DataFrame(data,index=[1])
return data1
a1=pd.DataFrame()
a2=self_JBtest(hs300_lgreturn)
a2.index=['All']
for i in range(1,13):
data=hs300[hs300['month']==i]
w_close=data["close"]
ret = np.log(w_close) - np.log(w_close.shift(1)) # 这里用对数收益率,因为对数收益率的统计特性比较好
a=self_JBtest(ret)
a1=pd.concat([a,a1],axis=0)
a1.index=[1,2,3,4,5,6,7,8,9,10,11,12]
result=pd.concat([a1,a2])
result
偏度 | 峰值 | JB检验 | P值 | |
---|---|---|---|---|
1 | 1.639012 | 36.181934 | 4169.198151 | 0.000000e+00 |
2 | 1.679215 | 33.155897 | 3299.021498 | 0.000000e+00 |
3 | 1.083555 | 26.000192 | 1467.689222 | 0.000000e+00 |
4 | 4.808498 | 40.821891 | 5266.979476 | 0.000000e+00 |
5 | 6.327881 | 50.633985 | 8906.937585 | 0.000000e+00 |
6 | 4.094596 | 34.286906 | 3878.669472 | 0.000000e+00 |
7 | 0.746986 | 24.804115 | 1612.073478 | 0.000000e+00 |
8 | 1.155379 | 22.347403 | 1186.442555 | 0.000000e+00 |
9 | 3.695878 | 29.039659 | 2503.398098 | 0.000000e+00 |
10 | 5.440282 | 40.069552 | 5597.018945 | 0.000000e+00 |
11 | 5.579888 | 43.347194 | 6279.564813 | 0.000000e+00 |
12 | 4.809296 | 33.516392 | 3967.098890 | 0.000000e+00 |
All | -0.289843 | 4.182104 | 72.875276 | 1.110223e-16 |
ds=pd.concat([result,hh],axis=1)
del ds['25%']
del ds['50%']
del ds['75%']
ds1=pd.DataFrame()
for i in range(13):
ds0=pd.DataFrame(np.array(ds[i:i+1]).reshape(9,1))
ds1=pd.concat([ds1,ds0],axis=1)
ds1.columns=['January','February','March','April','May',
'June','July','August','September','October',
'November','December','All']
ds1.index=ds.columns
ds1
January | February | March | April | May | June | July | August | September | October | November | December | All | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
偏度 | 1.639012 | 1.679215 | 1.083555 | 4.808498 | 6.327881 | 4.094596 | 0.746986 | 1.155379 | 3.695878 | 5.440282 | 5.579888 | 4.809296 | -2.898430e-01 |
峰值 | 36.181934 | 33.155897 | 26.000192 | 40.821891 | 50.633985 | 34.286906 | 24.804115 | 22.347403 | 29.039659 | 40.069552 | 43.347194 | 33.516392 | 4.182104e+00 |
JB检验 | 4169.198151 | 3299.021498 | 1467.689222 | 5266.979476 | 8906.937585 | 3878.669472 | 1612.073478 | 1186.442555 | 2503.398098 | 5597.018945 | 6279.564813 | 3967.098890 | 7.287528e+01 |
P值 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.110223e-16 |
count | 92.000000 | 86.000000 | 90.000000 | 82.000000 | 75.000000 | 81.000000 | 89.000000 | 88.000000 | 83.000000 | 66.000000 | 86.000000 | 90.000000 | 1.008000e+03 |
mean | 0.000918 | 0.001794 | -0.001471 | 0.000430 | 0.000300 | 0.004687 | 0.001271 | -0.001193 | -0.001857 | 0.001200 | 0.000747 | 0.001627 | 6.821462e-04 |
std | 0.016128 | 0.022321 | 0.024197 | 0.019176 | 0.019264 | 0.015136 | 0.020374 | 0.015235 | 0.015437 | 0.015616 | 0.012688 | 0.012278 | 1.770710e-02 |
min | -0.036996 | -0.070913 | -0.060817 | -0.057160 | -0.082707 | -0.042739 | -0.063340 | -0.037100 | -0.049191 | -0.037819 | -0.033623 | -0.030070 | -8.270743e-02 |
max | 0.038305 | 0.053526 | 0.050713 | 0.053708 | 0.042889 | 0.038466 | 0.051846 | 0.034384 | 0.039796 | 0.038334 | 0.031538 | 0.035925 | 5.370838e-02 |
ds
偏度 | 峰值 | JB检验 | P值 | count | mean | std | min | max | |
---|---|---|---|---|---|---|---|---|---|
1 | 1.639012 | 36.181934 | 4169.198151 | 0.000000e+00 | 92.0 | 0.000918 | 0.016128 | -0.036996 | 0.038305 |
2 | 1.679215 | 33.155897 | 3299.021498 | 0.000000e+00 | 86.0 | 0.001794 | 0.022321 | -0.070913 | 0.053526 |
3 | 1.083555 | 26.000192 | 1467.689222 | 0.000000e+00 | 90.0 | -0.001471 | 0.024197 | -0.060817 | 0.050713 |
4 | 4.808498 | 40.821891 | 5266.979476 | 0.000000e+00 | 82.0 | 0.000430 | 0.019176 | -0.057160 | 0.053708 |
5 | 6.327881 | 50.633985 | 8906.937585 | 0.000000e+00 | 75.0 | 0.000300 | 0.019264 | -0.082707 | 0.042889 |
6 | 4.094596 | 34.286906 | 3878.669472 | 0.000000e+00 | 81.0 | 0.004687 | 0.015136 | -0.042739 | 0.038466 |
7 | 0.746986 | 24.804115 | 1612.073478 | 0.000000e+00 | 89.0 | 0.001271 | 0.020374 | -0.063340 | 0.051846 |
8 | 1.155379 | 22.347403 | 1186.442555 | 0.000000e+00 | 88.0 | -0.001193 | 0.015235 | -0.037100 | 0.034384 |
9 | 3.695878 | 29.039659 | 2503.398098 | 0.000000e+00 | 83.0 | -0.001857 | 0.015437 | -0.049191 | 0.039796 |
10 | 5.440282 | 40.069552 | 5597.018945 | 0.000000e+00 | 66.0 | 0.001200 | 0.015616 | -0.037819 | 0.038334 |
11 | 5.579888 | 43.347194 | 6279.564813 | 0.000000e+00 | 86.0 | 0.000747 | 0.012688 | -0.033623 | 0.031538 |
12 | 4.809296 | 33.516392 | 3967.098890 | 0.000000e+00 | 90.0 | 0.001627 | 0.012278 | -0.030070 | 0.035925 |
All | -0.289843 | 4.182104 | 72.875276 | 1.110223e-16 | 1008.0 | 0.000682 | 0.017707 | -0.082707 | 0.053708 |
for i in range(1,13):
bel=['January','February','March','April','May',
'June','July','August','September','October',
'November','December']
fig, ax = plt.subplots()
plt.axhline(y=0,c="orange",ls="--",lw=2)
data=hs300[hs300['month']==i]
w_close=data["close"]
ret = np.log(w_close) - np.log(w_close.shift(1)) # 这里用对数收益率,因为对数收益率的统计特性比较好
y=ret
ax.plot(y, linewidth=2.0,label=bel[i-1])
ax.legend(loc=2)
save=['m1.svg','m2.svg','m3.svg','m4.svg','m5.svg','m6.svg','m7.svg','m8.svg',
'm9.svg','m10.svg','m11.svg','m12.svg']
plt.grid()
plt.savefig(save[i-1])
plt.show()
3.虚拟变量回归
import statsmodels.api as sm
# 设置虚拟变量
all_data = []
for x in hs300["month"]:
item={
"Jan": 1 if x==1 else 0,
"Feb": 1 if x==2 else 0,
"Mar": 1 if x==3 else 0,
"Apr": 1 if x==4 else 0,
"May": 1 if x==5 else 0,
"June": 1 if x==6 else 0,
"July": 1 if x==7 else 0,
"Aug": 1 if x==8 else 0,
"Sep": 1 if x==9 else 0,
"Oct": 1 if x==10 else 0,
"Nov": 1 if x==11 else 0,
"Dec": 1 if x==12 else 0
}
all_data.append(item)
all_data = pd.DataFrame(data=all_data)
r=pd.DataFrame(hs300_lgreturn)
X=all_data[['Jan','Feb','Mar','Apr','May',
'June','July','Aug','Sep','Oct',
'Nov','Dec']]
X.index = r.index
df=pd.concat([r,X],axis=1)
# 处理空值
import missingno
# missingno.matrix(df)
# 直接剔除
df = df.dropna()
missingno.matrix(df)
plt.show()
X=df[['Jan','Feb','Mar','Apr','May',
'June','July','Aug','Sep','Oct',
'Nov','Dec']]
Y=df[["close"]]
from statsmodels.compat import scipy
import statsmodels.api as sm
X2 = sm.add_constant(X)
est = sm.OLS(Y*100, X2).fit()
print(est.summary())
OLS Regression Results
==============================================================================
Dep. Variable: close R-squared: 0.009
Model: OLS Adj. R-squared: -0.002
Method: Least Squares F-statistic: 0.8154
Date: Sat, 27 May 2023 Prob (F-statistic): 0.625
Time: 03:07:54 Log-Likelihood: -2001.2
No. Observations: 1008 AIC: 4026.
Df Residuals: 996 BIC: 4085.
Df Model: 11
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 0.0650 0.052 1.257 0.209 -0.037 0.167
Jan 0.0268 0.178 0.151 0.880 -0.322 0.375
Feb 0.1144 0.183 0.624 0.533 -0.245 0.474
Mar -0.2121 0.179 -1.182 0.238 -0.564 0.140
Apr -0.0220 0.187 -0.118 0.906 -0.390 0.346
May -0.0350 0.195 -0.179 0.858 -0.418 0.348
June 0.4037 0.188 2.143 0.032 0.034 0.773
July 0.0620 0.180 0.344 0.731 -0.292 0.416
Aug -0.1843 0.181 -1.016 0.310 -0.540 0.172
Sep -0.2508 0.186 -1.346 0.179 -0.616 0.115
Oct 0.0550 0.207 0.265 0.791 -0.352 0.462
Nov 0.0097 0.183 0.053 0.958 -0.350 0.369
Dec 0.0977 0.179 0.544 0.586 -0.254 0.450
==============================================================================
Omnibus: 38.999 Durbin-Watson: 1.998
Prob(Omnibus): 0.000 Jarque-Bera (JB): 73.064
Skew: -0.268 Prob(JB): 1.36e-16
Kurtosis: 4.205 Cond. No. 1.48e+15
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 5.01e-28. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
4.arch-garch模型拟合波动率
import arch
am = arch.arch_model(y=Y*100,x=X,mean='LS',lags=0,vol='GARCH')
res1 = am.fit()
res1.summary()
Dep. Variable: | close | R-squared: | 0.006 |
---|---|---|---|
Mean Model: | Least Squares | Adj. R-squared: | -0.006 |
Vol Model: | GARCH | Log-Likelihood: | -1966.25 |
Distribution: | Normal | AIC: | 3964.50 |
Method: | Maximum Likelihood | BIC: | 4043.15 |
No. Observations: | 1008 | ||
Date: | Sat, May 27 2023 | Df Residuals: | 995 |
Time: | 03:07:57 | Df Model: | 13 |
coef | std err | t | P>|t| | 95.0% Conf. Int. | |
---|---|---|---|---|---|
Const | 0.0797 | 0.115 | 0.691 | 0.490 | [ -0.146, 0.306] |
Jan | 0.1310 | 0.241 | 0.542 | 0.588 | [ -0.342, 0.604] |
Feb | -0.1013 | 0.272 | -0.373 | 0.709 | [ -0.634, 0.432] |
Mar | -0.2149 | 0.278 | -0.773 | 0.440 | [ -0.760, 0.330] |
Apr | 0.2005 | 0.255 | 0.786 | 0.432 | [ -0.300, 0.701] |
May | -0.0408 | 0.149 | -0.274 | 0.784 | [ -0.333, 0.251] |
June | 0.4049 | 0.200 | 2.025 | 4.291e-02 | [1.292e-02, 0.797] |
July | -0.0220 | 0.164 | -0.134 | 0.893 | [ -0.344, 0.300] |
Aug | -0.1780 | 0.199 | -0.897 | 0.370 | [ -0.567, 0.211] |
Sep | -0.2264 | 0.217 | -1.043 | 0.297 | [ -0.652, 0.199] |
Oct | 0.0848 | 0.184 | 0.462 | 0.644 | [ -0.275, 0.445] |
Nov | -9.8481e-03 | 0.147 | -6.683e-02 | 0.947 | [ -0.299, 0.279] |
Dec | 0.0521 | 0.167 | 0.312 | 0.755 | [ -0.275, 0.379] |
coef | std err | t | P>|t| | 95.0% Conf. Int. | |
---|---|---|---|---|---|
omega | 0.0841 | 3.683e-02 | 2.284 | 2.240e-02 | [1.192e-02, 0.156] |
alpha[1] | 0.0813 | 2.055e-02 | 3.956 | 7.629e-05 | [4.102e-02, 0.122] |
beta[1] | 0.8936 | 2.287e-02 | 39.081 | 0.000 | [ 0.849, 0.938] |
Covariance estimator: robust
res1.plot()
plt.plot(Y*100)
plt.savefig("residuals.svg")
am = arch.arch_model(y=Y*100,x=X,mean='AR',lags=0,vol='GARCH')
res1 = am.fit()
res1.summary()
res1.hedgehog_plot()
plt.savefig("heg2.svg")
5.使用arch-garch进行波动性消除以进一步验证星期效应的稳健性
Y=res1.conditional_volatility
df=pd.concat([Y,X2],axis=1)
missingno.matrix(df)
plt.show()
df = df.dropna()
df=df.values
X=df[:,1:14]
Y=df[:,0]
from statsmodels.compat import scipy
import statsmodels.api as sm
est2 = sm.OLS(Y, X).fit()
print(est2.summary())
P值部分小于0.05 说明模型部分参数是显著的 存在影响,当前三、四、五、十、十一月份效应是初步存在的。但R2依然不高 后续可考虑使用滚动样本检验进一步进行稳健性分析。
参考文献:
[1]张兵.中国股市日历效应研究:基于滚动样本检验的方法[J].金融研究,2005(07):33-44.
[2]焦璇琨,李从欣.中国股票市场周内效应的实证研究[J].统计与管理,2020,35(02):64-68.DOI:10.16722/j.issn.1674-537x.2020.02.012.
完整代码和数据可关注gzh’finance褪黑素’回复关键词【2002】免费+无套路 获取!