借助alphalens进行财务数据因子分析
引入包:
import pandas as pd
import numpy as np
import akshare as ak
import numpy as np
import pandas as pd
import pyfolio as pf
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('seaborn-darkgrid')
plt.rcParams['figure.figsize'] = (10,7)
import akshare as ak
import warnings
warnings.filterwarnings('ignore')
from alphalens.utils import get_clean_factor_and_forward_returns
from alphalens.tears import create_full_tear_sheet
获得股价数据
i = "600004"
df= ak.stock_zh_a_hist(symbol=i, period="daily", start_date="19900301", end_date='20210907', adjust="")
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d')
df['code'] = i
获得财务数据
fin= ak.stock_financial_analysis_indicator(symbol="600004")
fin = fin.replace('--',0)
fin.iloc[:,1:] = fin.iloc[:,1:].astype('float')
fin['日期'] = pd.to_datetime(fin['日期'], format='%Y-%m-%d')
获得数据列表:
start = "19900301"
end= '20230907'
def select(code,start,end):
fin= ak.stock_financial_analysis_indicator(symbol=code)
fin = fin.replace('--',0)
fin.iloc[:,1:] = fin.iloc[:,1:].astype('float')
fin['日期'] = pd.to_datetime(fin['日期'], format='%Y-%m-%d')
fin = fin.set_index('日期')
df= ak.stock_zh_a_hist(symbol=code, period="daily", start_date=start, end_date=end, adjust="")
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d')
df = df.set_index('日期')
df['code'] = code
last = pd.concat([df,fin],axis=1).ffill(axis=0).fillna(0)
#last.index = pd.to_datetime(last['日期'])
#last.index.name = None
last.sort_index(inplace=True)
return last
# 成分股名单
stock_list = ak.index_stock_cons(symbol="000016")
df = pd.DataFrame()
for i in stock_list['品种代码']:
print(i)
df = df.append(select(i,start,end))
assets = df.set_index([df.index,df.code], drop=True)
assets = assets[~(assets['code']==0)]
# column为股票代码,index为日期,值为股票收盘价
assets.rename(columns= {'code':'code2'},inplace=True)
close = assets.pivot_table(index='日期',columns='code', values='收盘')
close.index = pd.to_datetime(close.index)
close.index.name = None
股价数据:
因子分析:
ret = get_clean_factor_and_forward_returns(assets[['1-2年以内预付货款(元)']],close,max_loss=1.0,periods=(5,10,20))
create_full_tear_sheet(ret, long_short=True)