面板数据方法
https://www.solomonegash.com/econometrics/wooldridge_python/iexample14_py.html
代码提示纠错:
在涉及到 多类别虚拟变量 回归中,比如 行业,常常会出现多重共线性的问题,导致程序不能运行出结果,程序的提示是:
这个提示可能有点问题,正确的写法应该是:
check_rank=False,
而不是 :rank_check=False
**
完整的示例:
**
import numpy as np
import pandas as pd
from linearmodels import PanelOLS
from linearmodels import RandomEffects
import statsmodels.api as sm
import statsmodels.formula.api as smf
data2= pd.read_stata("d://data11.dta")
year = pd.Categorical(data2.year)
data2 = data2.set_index(["code", "year"])
data2["year"] = year
fe_res2 = PanelOLS.from_formula("Patent ~ 1+ Fintech+Population+C(industry)+C(year)*C(industry)+ EntityEffects + TimeEffects",drop_absorbed=True,check_rank=False,data=data2)
print(fe_res2.fit())
import statsmodels.formula.api as smf
import numpy as np
from scipy.stats import norm, gaussian_kde
import matplotlib.pyplot as plt
import pandas as pd
from linearmodels.panel.data import PanelData
from linearmodels.panel import PanelOLS, PooledOLS, RandomEffects, compare
from collections import OrderedDict
import wooldridge
from statsmodels.formula.api import ols
import warnings
warnings.filterwarnings("ignore")
wagepan = wooldridge.data('wagepan')
wooldridge.data('wagepan', description=True)
# 説明変数のリスト
exog = ['married','union','expersq','d81','d82','d83','d84','d85','d86','d87']
# 全ての変数のリスト
var = ['lwage']+exog
# 使う変数だけで構成されるDataFrame
df = wagepan.loc[:,['nr']+var]
# varの平均からの乖離を計算(下の説明(1)を参照)
df_g = df.groupby('nr')
df_mean = df_g[var].transform('mean')
df_md = df.loc[:,var]-df_mean
# 説明変数の行列(下の説明(2)を参照)
X = df_md.loc[:,exog].values
# 被説明変数のベクトル
Y = df_md.loc[:,'lwage'].values
# OLSの計算
params = np.linalg.inv((X.T)@X)@(X.T)@Y
# 結果の表示(下の説明(3)を参照)
for idx, name in enumerate(exog):
print(f'{
name}: {
params[idx]:.4}')
wagepan = wagepan.set_index(['nr','year'],drop=False)
wagepan.head()
wagepan.info()
wagepanp = PanelData(wagepan)
wagepanp.shape
(~wagepanp.isnull).all()
#首先使用 PanelOLS 模块来进行 个体固定效应、时间固定效应 估计
formula_fe = 'lwage ~ married + union + expersq \
+d81+d82+d83+d84+d85+d86+d87 + EntityEffects'
mod_fe = PanelOLS.from_formula(formula_fe, data=wagepan)
est1 = mod_fe.fit()
#模型1、2的写法不同,结果是一样的
formula_fe = 'lwage ~ married + union + expersq \
+TimeEffects + EntityEffects'
mod_fe = PanelOLS.from_formula(formula_fe, data=wagepan)
est2 = mod_fe.fit()
#时间固定效应
formula_fe = 'lwage ~ married + union + expersq \
+TimeEffects '
mod_fe = PanelOLS.from_formula(formula_fe, data=wagepan)
est3 = mod_fe.fit()
#个体固定效应
formula_fe = 'lwage ~ married + union + expersq \
+ EntityEffects'
mod_fe = PanelOLS.from_formula(formula_fe, data=wagepan)
est5 = mod_fe.fit()
# 面板回归后,直接调用 summary_col 是不行的,需要对数据结果进行一点变化
def transfor(resfm):
resfm.bse = resfm.std_errors
resfm.tvalues = resfm.tstats
resfm.model.exog_names = list(resfm.model.exog.dataframe.columns)
resfm.model.endog_names = list(resfm.model.dependent.dataframe.columns)
return resfm
est3 = transfor(est3)
est1 = transfor(est1)
est2 = transfor(est2)
est5 = transfor(est5)
from statsmodels.iolib.summary2 import summary_col
kk=summary_col([est1,est2,est3,est5],float_format="%.3f",
model_names = ['f1','f2','f3','f4'],
stars = True,regressor_order = ['const','married','union','expersq','d81','d82','d83','d84','d85','d86','d87'],
info_dict = {
'':lambda x: '',
'':lambda x: '',
'Observation':lambda x:str(int(x.nobs)),
})
open('d://ss3.html', 'w').write(kk.as_html()) # for html
# 采用 ols 估计,加入 时间 以及 个体的 虚拟变量
est4 = smf.ols('lwage ~ married + union + expersq \
+d81+d82+d83+d84+d85+d86+d87', data=wagepan).fit()
#模型 4 6 是一样的
est6 = smf.ols('lwage ~ married + union + expersq \
+C(year)', data=wagepan).fit()
est7 = smf.ols('lwage ~ married + union + expersq \
+C(nr)', data=wagepan).fit()
est8 = smf.ols('lwage ~ married + union + expersq \
+C(year)+C(nr)', data=wagepan).fit()
from stargazer.stargazer