2020-6-17
最近实习中研究基金经理的风格归因,需要计算因子的Rank IC。数据集是matlab的.mat文件。
主要涉及的知识点:
- .mat文件的加载
- pandas数据处理
先贴下结果:
mark一下源码:
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 16 21:10:16 2020
@author: Sun
"""
import scipy.io as sio
from pandas import DataFrame as df
from pandas import Period as prd
# import pandas as pd
# import numpy as np
# from numpy import array as na
# 读取原始数据
raw = sio.loadmat('data_managers_X(1).mat')
# 从原始数据中读取索引
factors = {}
managers = list(df(raw['Company_managers_unique'])[1].apply(lambda x: str(x)[2:-2]))
date_m = list(df(raw['Date_M'].reshape(1,-1)[0])[0].apply(lambda x: prd(x)))
date_d = list(df(raw['Date_D'].reshape(1,-1)[0])[0].apply(lambda x: prd(x)))
# 从原始数据中读取因子
factors['sue_scale'] = df(raw['vOutput_stock']['sue_scale'][0,0])
factors['sue_scale'].index=date_m
factors['sue_scale'].columns=managers
# 从原始数据中读取收益率(日)
returns = {}
returns['sue_scale'] = df(raw['vOutput_nav']['ret_managers_scale'][0,0])
returns['sue_scale'].index = date_d
returns['sue_scale'].columns = managers
# 从原始数据中读取净值(日)
net_value = {}
net_value['sue_scale'] = df(raw['vOutput_nav']['value_managers_scale'][0,0])
net_value['sue_scale'].index = date_d
net_value['sue_scale'].columns = managers
return_list=df()
period_list=[]
for year in range(2010,2020):
# 当年5-10月收益
try:
nav_section = net_value['sue_scale'][(returns['sue_scale'].index>='{}-05-01'.format(year))&(returns['sue_scale'].index<'{}-10-31'.format(year))]
ret_section = df(nav_section.iloc[-1,:]/nav_section.iloc[0,:]-1).T
# print(ret_section)
return_list = return_list.append(ret_section)
period_list.append('{}-10-31'.format(year))
except Exception as e:
print(e)
break
# 当年11月-次年4月收益
try:
next_year = year + 1
nav_section = net_value['sue_scale'][(returns['sue_scale'].index>='{}-11-01'.format(year))&(returns['sue_scale'].index<prd('{}-04-30'.format(next_year)))]
ret_section = df(nav_section.iloc[-1,:]/nav_section.iloc[0,:]-1).T
# print(ret_section)
return_list = return_list.append(ret_section)
period_list.append('{}-04-30'.format(next_year))
except Exception as e:
print(e)
break
# 得到半年为单位的收益率
return_list.index = period_list
# 对收益率进行排名
return_rank = return_list.rank(method='min',axis=1)
# 计算因子的排名,在此之前,先把2010年4月以来的数据筛选出来
factors['sue_scale'] = factors['sue_scale'][(factors['sue_scale'].index.month.isin([4,11]))&(factors['sue_scale'].index.year>=2010)]
factors['sue_scale'].fillna(.0, inplace=True)
factors_rank = factors['sue_scale'].rank(method='min',axis=1)
# 计算rank ic
rank_ic = {}
rank_ic['sue_scale'] = [factors_rank.iloc[i,:].corr(return_rank.iloc[i+1,:]) for i in range(len(factors_rank)-2)]