The Leading Premium（2023）因子在上证A股市场的复现

fermat_2019

已于 2023-11-03 15:22:51 修改

阅读量62

点赞数 1

文章标签： python 金融

于 2023-10-31 22:47:22 首次发布

本文链接：https://blog.csdn.net/m0_59962218/article/details/134150926

版权

引言

尝试对The Leading Premium（2023）这篇文章在A股市场的复现，最后效果不好，数据来源国泰安。

原文是分行业做行业的LL因子，行业的产出增长与t期之后的GDP增长相关系数越大，t越大，LL因子越大，上20%相对于下20%的组合有显著的超额收益率。对于个股的operating income growth分别考虑滞后1-4期与领先1-4期与GDP增长率计算相关系数，加权得到。

$LL_{t}^{i}=\sum_{j=-J}^{J} \frac{\left | \rho_{t,j}^{i} \right | \cdot j}{\sum_{j=-J}^J \left | \rho_{t,j}^{i} \right |}$

对于t期的因子，考虑之前8个季度经营性现金流量净额增长率与中国GDP增长率（未除去通胀）的根据相关系数加权的领先期数，在12个季度的跨度下，对于个股在横截面层面分别考虑分组计算spread和回归的方法，t检验均不显著。

一、导入数据并处理

import numpy as np
import pandas as pd
import scipy.stats as stats

GDP = pd.read_excel('CME_Qqgdp.xlsx').loc[2:,:]
OPI = pd.read_excel('FS_Comscfd.xlsx').loc[2:,:]
ret = pd.read_excel('TRD_Mnth.xlsx').loc[2:,:]

GDP = GDP[GDP['Quarter']>='2005-03']
OPI = OPI[(OPI['Stkcd']<'700000')&(OPI['Stkcd']>='600000')]
GDP['Quarter'] = GDP['Quarter'].apply(lambda x: int(x[0:4]+x[5:7]))
OPI['Quarter'] = OPI['Accper'].apply(lambda x: int(x[0:4]+x[5:7]) if x[5:7]!='01' else int(x[0:4]+x[5:7])-89)
OPI = OPI[~OPI['ShortName'].isin(OPI[OPI['C001000000']==0]['ShortName'].tolist())]
OPI = OPI.drop_duplicates(subset=['Stkcd','Quarter'])

ret['Newmnt'] = ret['Trdmnt'].apply(lambda x: int(x.replace('-','')))
ret['NewMretwd'] = ((1+ret['Mretwd'])*(1+ret['Mretwd'].shift(1))*(1+ret['Mretwd'].shift(2))-1).astype('float64') #计算季度累计收益率

二、计算12个季度的LL因子值

#生成样本所需时间序列，20个季度用来计算相关系数,8个季度的加权，12个季度的验证
start_date = 201003  #样本形成期的季度，这里的日期可以修改
formation_dates=[start_date]
for i in range(1,40):
    if formation_dates[-1]%10==2:
        formation_dates.append(formation_dates[-1]+91)
    else:
        formation_dates.append(formation_dates[-1]+3)

#筛选时间序列里全部有值的股票
OPI['flag'] = OPI['Quarter'].apply(lambda x: 1 if x in formation_dates else 0)
temp1 = OPI.groupby('Stkcd')['flag'].sum()
stocks_list = temp1[temp1==len(formation_dates)].index.tolist()

OPI = OPI[OPI['Stkcd'].isin(stocks_list)]
OPI.loc[:,'OPI_gr'] = OPI['C001000000']/OPI['C001000000'].shift(4)#直接计算增长率

#最开始的4期为nan
OPI['flag1'] = OPI['Quarter']<OPI['Quarter'].shift(4)
OPI.loc[OPI[OPI['flag1']==True].index,'OPI_gr'] = np.nan

#保证样本期内的值不为nan
stocks_list = list(set(stocks_list).intersection(set(OPI[(OPI['Quarter']==start_date)&(OPI['OPI_gr']).notnull()]['Stkcd'].tolist())))

allLL_factor = pd.DataFrame()
for j in range(0,12):
    print('T0='+str(formation_dates[23+j]))
    #计算个股因子
    LL_factor = {}
    for stock in stocks_list:
        temp_opi = OPI[OPI['Stkcd']==stock]
        corr = [abs(stats.pearsonr(temp_opi[temp_opi['Quarter'].isin(formation_dates[4+i+j:24+i+j])]['OPI_gr'], GDP[GDP['Quarter'].isin(formation_dates[4+j:24+j])]['Gdpcurrperiod'])[0]) for i in range(-4,5)]
        LL_factor[stock] = -np.array(list(range(-4,5))).dot(np.array(corr))/np.array(corr).sum()

    LL_factor = pd.DataFrame(LL_factor,index=['LL_factor']).T
    LL_factor['Stkcd'] = LL_factor.index
    LL_factor['Newmnt'] = formation_dates[28+j]
    allLL_factor=pd.concat((allLL_factor,LL_factor),axis=0)

## 当前期的因子和下一期的return做合并
testdata=pd.merge(ret,allLL_factor.reset_index(),how='inner',on=['Stkcd','Newmnt'])
testdata

三、分位数组合LL因子有效性检验

import statsmodels.api as sm

def getgroup(x):
    return pd.qcut(x,10,labels=False)

#decile portfolios
testdata['group'] = testdata.groupby('Newmnt')['LL_factor'].apply(getgroup)
gret = testdata.groupby(['Newmnt','group'])['NewMretwd'].mean()
gretnew = gret.reset_index().pivot_table(index='Newmnt',columns='group',values='NewMretwd')
gretnew.loc[:,'spread']=gretnew.loc[:,9]-gretnew.loc[:,0]

from scipy import stats
print('单因素t检验：')
print(stats.ttest_1samp(gretnew['spread'], 0))

tmpdata=sm.add_constant(gretnew['spread'])
tmpdata

mod = sm.OLS(tmpdata.iloc[:,1], tmpdata.iloc[:,0])
reshac=mod.fit(cov_type='hac',cov_kwds={'maxlags':2}) #Newey-west estimator
print(reshac.summary())