【STATA】代码自用

该文介绍了使用Stata进行数据预处理、面板数据回归、熵值法构建代理变量、PSM和DID方法评估政策效果、分位数回归、安慰剂检验及中介效应分析的一系列步骤。此外,还涉及了系统GMM和VAR模型的稳健性检验。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

写在前面:这篇本来是写给自己和一个好朋友用的,但是发现很多人看到了。其中有些代码有更好的替代方案但是没有更新。如果大家对代码有疑问的地方,或是有错误要指正等,欢迎探讨,大家共同进步!谢谢!

Panel Data

数据处理

//描述性统计 输出//   

asdoc sum $x, stat(N mean sd tstat p1 p99) fs(7) dec(2)

//一对多合并//   

 merge 1:m  id year using"xx.dta",force

一般情况都采用1:1, 使用1:m之前先确认匹配规则。

//CSMAR日期格式截取//

Split accper, parse (year "-") gen (year)

//去除金融业和公用事业 st行业等// 

gen J=regexm(IndustryCode,"J")

drop if J==1 

//双边缩尾1% 99%//   

winsor2 $x,cut(1 99)

//按行业年份加总// 

 bysort code year:egen sum_v=sum(v)

//线性插值 外插//

by code:ipolate x year,gen(x1) epolate

//文字赋值//

gen new = .

replace new = 1 if strmatch(name, "*A*")

replace new = 0 if strmatch(name, "*B*" or "*B1*")

//保存excel文件//

export excel ABC.xlsx, firstrow(var) replace

//熵权法//

*- 设定指标

// 正向指标
 global positiveVar $x+


// 负向指标
global negativeVar $x-


global allVar $positiveVar $negativeVar

// 标准化正向指标
foreach v in $positiveVar {
    qui sum `v'
    gen z_`v' = (`v'-r(min))/(r(max)-r(min))
    replace z_`v' = 0.0001 if z_`v' == 0
}

// 标准化负向指标
foreach v in $negativeVar {
    qui sum `v'
    gen z_`v' = (r(max)-`v')/(r(max)-r(min))
    replace z_`v' = 0.0001 if z_`v' == 0
}

// 计算各指标比重
foreach v in $allVar {
    egen sum_`v' = sum(z_`v')
    gen p_`v' = z_`v' / sum_`v'
}

// 计算熵值
foreach v in $allVar {
    egen sump_`v' = sum(p_`v'*ln(p_`v'))
    gen e_`v' = -1 / ln(_N) * sump_`v'
}

// 计算信息效用值
foreach v in $allVar {
    gen d_`v' = 1 - e_`v'
}

// 计算各指标权重
egen sumd = rowtotal(d_*)
foreach v in $allVar {
    gen w_`v' = d_`v' / sumd
}

// 计算各样本的综合得分
foreach v in $allVar {
    gen score_`v' = w_`v' * z_`v'
}
egen score = rowtotal(score*)

drop z_* p_* e_* d_* sum*

//critic赋权//


global positive_var x1 x2 x3  //定义正指标
global negative_var x4  //定义负指标

global all_var $positive_var $negative_var


foreach i in $positive_var{
qui sum `i'
gen x_`i'=(`i'-r(min))/(r(max)-r(min))
}


foreach i in $negative_var{
qui sum `i'
gen x_`i'=(r(max)-`i')/(r(max)-r(min))
}

//计算指标变异性
foreach i in $all_var{
egen sd_`i'=sd(x_`i')
}
 
 
//计算指标冲突性

foreach j in $all_var{

  foreach i in $all_var{
   corr `j' `i' 
   gen corr_`j'`i' = r(rho) 
  }
  
}    //提取指标两两之间的皮尔逊相关系数


foreach i in $all_var {
 egen `i'_corr = rowtotal(corr_`i'x*)
 replace `i'_corr = k - `i'_corr
}  //k为指标个数



foreach i in $all_var{
gen c_`i'=sd_`i' * `i'_corr
}
 egen sum=rowtotal(c_x*)

//计算每个指标的critic客观权重
foreach i in $all_var{
gen w_`i'=c_`i'/ sum
}


//标准化综合得分
foreach i in $all_var{
gen score_`i'= x_`i'*w_`i'
}

 egen score = rowtotal(score_*)
 label variable score "CRITIC赋值法score"
  

//拆分字符串(从n开始取m)//   

gen Code=substr(Vcode,n,m)

//处理重复样本// 

duplicates drop Symbol Year,force

ssc install unique

unique v 看是否有重复值

duplicates 找到重复值

将重复样本标注出来 duplicates  tag v , gen(tag1)

将重复样本单列出来 duplicates  list v 

剔除重复值 duplicates drop v,force (强制执行)


基础回归

//相关性检验 输出//   

asdoc corr ROA RD

//回归(个体层面稳健标准误)//   

 reg Y X $control i.year i.Code,vce(cluster Symbol)

reghdfe Y X$control,a(code year) vce(cluster symbol)

//生成行业平均变量(不含自身)//

sort year Code
by  year Code :  egen  total_RD=total(RD)
by  year Code :  egen  number=count(RD)
gen    deps=total_RD-RD
gen  RD_Code=deps/(number-1)

//门槛效应//

xthreg  lny1 lngii lnopen lnpopu lnsecond, rx(lnipr) qx(lngii) thnum(2) trim(0.01 0.01) grid(400) bs(300 300)

//导出//

outreg2 using xxx.doc,replace tstat bdec(3) tdec(2) ctitle(y) keep(x) addtext(FE,YES)

PSM

pscore treat control-varlist,logit/probit comsup blockid(block) pscore(myscore)

set seed 10101

gen ranorder =runiform ()

sort ranorder

//邻域匹配n(k)  卡尺radius cal(0.01) 核匹配(默认带宽核函数)kernel 局部线性llr

psmatch2 treat control-varlist,outcome(y) n(k) ate ties logit common

bootstrap r(ate) r(att) r(atu) psmatch2 treat control-varlist,outcome(y) n(k) ate ties logit common

pstest control-varlist,both graph

psgraph

DID

//前期准备//

ssc install asdoc,replace(导入word)

ssc install estout,replace(绘制三线表)

ssc install parmest,replace(导出回归分析的参数和统计量,stata16可以安装)

ssc install coeplot,replace(回归系数可视化)

ssc install dpplot,replace(绘制核密度估计图)

ssc install diff,replace(双重差分估计)

ssc install ftools

ssc install reghdfe (直接回归命令)

//创建虚拟变量//

gen Time = (Year >= xxx)&!missing(Year) //创建时间虚拟变量

gen Treat = (ID <= xxx)&!missing(ID) ..创建政策虚拟变量 

gen DID=Time*Treat

//基准回归估计//

//DID估计1//

asdoc xtreg Y Time Treat DID v v v,fe

//DID估计2//

diff Y ,t(Treat) p(Time) cov( v v v ) 

//平行趋势检验 //

//画出每年均值趋势图//

gen Treatment= Y if ID <= 108 //构造实验组变量

gen Control=Y if ID >108 //构造对照组变量

bysort Year: egen tTreat= mean(Treat)

bysort Year: egen cControl=mean(Control) //按年份分组并求每年均值

duplicates drop Year,force //删除重复值,只保留一年一个数据

scatter tTreat Year,c(1)|| scatter cControl Year,c(1) //自动画图 

//更换被解释变量// 

diff Y1 ,t(Treat) p(Time) cov(x x x) robust report bs reps(100) test #两组的被解释变量出现显著差异、其他控制变量不显著表示被解释变量的差异是由于自变量(即政策)而产生的 

//平衡趋势检验//

tab Year,gen(yrdum) #构建一个时间的虚拟变量

forvalues v=n1/n2 {

gen Treat`A' =yrdum`A'* Treat

} //构建变量A,n1-n2年的交乘项

xtreg Y Time Treatn1-Treatn2 i.Year, fe #回归

est sto reg

coefplot reg, keep( Treatn1-Treatn2 ) vertical recast(connect) yline(0) //画图,置信区间均跨过零线说明系数不显著,没有明显差异 

//分位数回归//

diff Contracts ,t(Treat) p(Time) cov(v v v ) qdid(0.5) report //qdid(0.5)指did做50%分位数回归,report可报告变量 

 //安慰剂检验(置换检验)//

cap erase "simulations.dta" //覆盖文件,便于之后保存

permute DID beta = _b[DID] se = _se[DID] df = e(df_r), reps(500) seed(100) saving("simulations.dta"):reghdfe Y DID, absorb( ID Year) vce(robust) #抽取100个作为伪实验组,随机抽取500次,不要加控制变量

use "simulations.dta", clear

gen t_value = beta / se

gen p_value = 2 * ttail(df, abs(beta/se))

dpplot beta, xtitle(" Estimator", size(*0.8)) xlabel(, format(%4.3f) labsize(smalI)) ytitle("Density", size(*0 .8)) ylabel(, nogrid format(%4.3f) labsize(smalI)) note(" ") //图中值基本都在0附近,且服从正态分布,说明影响非常微弱,意味着模型设定中并未遗漏掉足够重要的影响因素,模型基本没有问题

caption(" ") graphregion(fcolor(white)) 

//twoway//

twoway (scatter pvalue1 beta, msymbol(smcircle_hollow) mcolor(blue))       (kdensity beta ,yaxis(2) lp(dash)) , ///
       title("置换检验") ///
       xlabel(-0.3(0.1)0.3 -0.1 "-0.1" -0.2 "-0.2" -0.3 "-0.3" 0.1 "0.1" 0.2       "0.2" 0.3 "0.3" ,format(%7.1f) angle(0)) ///
       ylabel(0(0.2)1, format(%7.1f) angle(0) nogrid axis(1)) ///
       ylabel(0(2)8, format(%7.1f) angle(0)  nogrid axis(2)) ///
       xtitle("回归系数") ///
       ytitle("P" "值" ,orientation(horizontal) axis(1)) ///
       ytitle("核" "密" "度" ,orientation(horizontal)  axis(2)) ///
       xline(0, lwidth(0.2) lp(dash))  ///
       xline(0.13, lwidth(0.3) lp(solid)) ///
       yline(0.1,lwidth(0.2) lp(dash)) ///
       legend(label(1 "P值") label( 2 "核密度")) ///
       plotregion(style(none)) /// 
       graphregion(color(white)) ///

//改变政策时间的反事实检验//

 xtset ID Year

gen Time = (Year >= xxx)&!missing(Year) #将政策时间提前到xxx年

gen Treat = (ID <= xxx)&!missing(ID)

diff Y ,t(Treat) p(Time) cov( v v v) robust report bs reps(100)

机制检验

//中介效应//

sgmediation2 GTFP, mv(lnGP) iv(DEI) cv($control i.year i.id)

set seed 10101

bootstrap r(ind_eff) r(dir_eff), reps(500) : sgmediation2 GTFP, mv( lnGP ) iv(DEI) cv($control i.year i.id)

estat bootstrap, percentile bc

稳健性检验

//系统GMM//

xtdpdsys GTFP INVEST FDI PGDP RD PD SO2I, lags(1) maxldep(3) pre(GFE,lag(1,2))endogenous(DEI,lag(0,3))  vce(robust)
estat abon
xtdpdsys GTFP INVEST FDI PGDP RD PD SO2I, lags(1) maxldep(3) pre(GFE,lag(1,2))endogenous(DEI,lag(0,3))
estat sargan


工具变量回归

  • //输出两阶段结果//

ivreg2 Y (X=IV) x1 x2  i.year ,r first savefp(first)
eststo second   

outreg2 [firstX second] using"xxx.doc",tstat bdec(3)tdec(2) replace

(Underid p<0.01 Weakid C-D Wald F>S-Y 10% Hansen J p>0.01(H0:模型设置正常))

Time Series

VAR模型

//滞后阶数确定//

varsoc V

//ADF检验//

dfuller V,lags(0) notrend

//建立VAR模型//

var V,lags(1/4)

//单位圆检验//

varstable,graph

//格兰杰因果检验//

vargranger

//脉冲响应分析//

irf create  model,step(n) set(myirf) replace

irf graph oirf,impulse(V) response(V) yline(0,lcolor(black))  byopts(yrescale)

//方差分解//

irf graph fevd,irf(model) impulse(V) response(V) yline(0,lcolor(black))  byopts(yrescale)

irf table fevd,irf(model) impulse(V) response(V) yline(0,lcolor(black))  byopts(yrescale)

//预测//

fcast compute,p,step(n)

fcast graph pV,observed

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值