stata备忘录

Stata备忘录

1. 画图

(1)时间趋势图
label var year "年份"
label var per "制造业增加值比重[左轴]"
label var tjj "工业增加值比重[右轴]"
graph twoway (connect per year ,yaxis(1) color(black) )   ///
(connect tjj year ,yaxis(2) color(black) lpattern(dash)  ) ///
, graphregion(color(white))  xlabel(2003(2)2019) ///
ytitle("世界银行制造业增加值比重(%)",axis(1) height(5))  ///
ytitle("国家统计局工业增加值比重(%)",axis(2) height(5))  ///
note(数据来源:World Bank Open Data、国家统计局) xline(2011) 

时间趋势图1
等价命令

tw (connect value1819 season , /// 
	lcolor(black) lpattern(dash) msymbol(O) mlcolor(gs5) mfcolor(gs12)) /// 
(connect value2020 season , ///
	lcolor(black) lpattern(solid) msymbol(S) mlcolor(gs5) mfcolor(gs12)) ///
,graphregion(color(white)) ///
legend(label(1 "18-19年平均") label(2 "2020年") ) ///
xlabel(1 "第一季度"  2 "第二季度"  3 "第三季度"  4 "第四季度"  ,labsize(small) )
label var year "年份"

tw bar mR1 year,yaxis(2) bc(balck) sort barwidth(0.9) fintensity(inten0) ///
ylabel(0(2000)6000, axis(2)) /// 
xlabel(2014(1)2021)|| /// 
connect percent_R year,yaxis(1) lc(black) lp(dash) mc(blace) ///
ylabel(0.5 "50%" 0.6 "60%" 0.7 "70%" 0.8 "80%" ,axis(1))  ||, ///
graphregion(color(white) ) ///
bgcolor(white) ///
title("中国数字内容企业(游戏)收入金额及占全球市场比重", c(black) size(*0.8)) ///
ytitle("占比(%)",axis(1) height(7))  ///
ytitle("收入额(百万美元)",axis(2) height(5))  /// 
legend(label(1 "中国数字内容企业(游戏)收入占全球市场比重") label(2 "中国数字内容企业(游戏)收入金额") ) ///
legend(size(small) col(1)) ///
note("数据源自:app annie")

graph save "Graph" "$path\output\playdata_1_percent_and_value_of_Chinese_Apps_Export.gph",replace

时间趋势图2

use hs_adj_year_PQV_2000_2015.dta,clear
use hs_adj_year_PQV_2000_2015_cregime10,clear

merge m:1 hs_adj using equipment
replace BEC=4 if BEC==1 & equipment!=1

destring hs_adj,replace

reghdfe lnV i.year if year!=2006 & BEC==2, a(hs)
est store result_accessories

reghdfe lnV i.year if year!=2006 & BEC==4, a(hs)
est store result_equipment

reghdfe lnV i.year if year!=2006 & BEC==0, a(hs)
est store result_noncapital



#d ;
coefplot 
  (result_accessories,c(l) label("accessories") lp(dash) lc(black) mc(black) ms(smcircle_hollow) offset(-0.07))  
  (result_equipment ,c(l)  label("equipment") lp(solid) lc(black) mc(black) ms(smcircle_hollow))
  (result_noncapital,c(l)  label("noncapital") lp(dot) lc(black) mc(black) ms(smcircle_hollow) offset(0.07))
  , vertical 
  drop(_cons) byopts(xrescale) 
  xlabel(1 "2001"  3"2003"  5"2005" 6"2007"  8"2009"  10"2011" 12"2013" 14"2015")
  graphregion(color(white))
  legend(size(small) col(3))
  ;
#d cr

时间趋势图3

字体大小 option
字体大小optiondescription
zerono size whatsoever, vanishingly small
minusculesmallest
quarter_tiny
third_tiny
half_tiny
tiny
vsmall
small
medsmall
medium
medlarge
large
vlarge
huge
vhugelargest
tenthone-tenth the size of the graph
quarterone-fourth the size of the graph
thirdone-third the size of the graph
halfone-half the size of the graph
fulltext the size of the graph
sizeany size you want
节点样式 eg: msymbol(O) mlcolor(gs5) mfcolor(gs12)
symbolstyleSynonym(if any)Description
circleOsolid
diamondDsolid
triangleTsolid
squareSsolid
plus+
XX
arrowfAfilled arrow head
arrowa
pipe
VV
smcircleosolid
smdiamonddsolid
smsquaressolid
smtriangletsolid
smplus
smxx
smvv
circle_hollowOhhollow
diamond_hollowDhhollow
triangle_hollowThhollow
square_hollowShhollow
smcircle_hollowohhollow
smdiamond_hollowdhhollow
smtriangle_hollowthhollow
smsquare_hollowshhollow
pointpa small dot
noneia symbol that is invisible
线样式
linepatternstyleDescription
solidsolid line
dashdashed line
dotdotted line
dash_dot
shortdash
shortdash_dot
longdash
longdash_dot
blankinvisible line
formulae.g.,-. or --… etc.
A formula is composed of any combination of
lsolid line
_(underscore) a long dash
-(hyphen) a medium dash
.short dash (almost a dot)
#small amount of blank space
颜色
blackedkbluegs12limeorange
blueeggshellgs13ltblueorange_red
bluishgrayeltbluegs14ltbluishgraypink
bluishgray8eltgreengs15ltbluishgray8purple
brownemeraldgs16ltkhakired
chocolateemidbluegs2magentasand
cranberryerosegs3maroonsandb
cyanforest_greengs4midbluesienna
dimgraygoldgs5midgreenstone
dkgreengraygs6mintsunflowerlime
dknavygreengs7navyteal
dkorangegs0gs8navy8white
ebbluegs1gs9noneyellow
ebggs10khakiolive
edkbggs11lavenderolive_teal
(2)柱状图
#delimit ;
graph bar cn_wzje_80 cn_wzje_81 if wzlx==0, over(sec) bargap(-30)
  ytitle("吸引外资金额")
  legend( label(1 "08年前") label(2 "08年后") )
  title("各行业吸引外资2008年前后对比")
  subtitle("中国中西部服务业")
  note("中国中西部服务业") ;
#delimit cr

在这里插入图片描述

(3)散点图
tw (scatter delta_v2_v3 delta_v1_v2 if delta_v1_v2>=-0.3& delta_v1_v2 <=2.3 ///
&delta_v2_v3>= -0.3&delta_v2_v3<=2.3, ///
mlabel(hy4) mlc(black) mlabc(black) ms(x) mlabs(tiny)) ///
(fun y=x,range(-0.3 2.3)) , ///
xlab(-0.3(0.5)2.3) ylab(-0.3(0.5)2.3)  ///
graphregion(color(white)) ///
xline(0,lp(dash) lc(gs10)) ///
yline(0,lp(dash) lc(gs10)) ///
legend(ring(0) pos(5) order(2 "45°线")) ///
ytitle("11-15时段的增速") ///
xtitle("07-11时段的增速")

在这里插入图片描述

graph twoway (scatter wzje_CE0308 wzje_CE1419, mlabel(sec) mlabv(sec) ) (function y=x, range(0 0.11)) , ///
title("中国东部地区03-08对比中国东部地区14-19[金额]")  ///
ytitle("中国东部03-08") ///
xtitle("中国东部14-19") ///
legend(ring(0) pos(5) order(2 "45°线")) ///
graphregion(color(white))

在这里插入图片描述

graph twoway (scatter c_AS c_WD, mlabel(cic03)  ) (lfit c_AS c_WD) , ///
title("东盟增速放缓 vs 世界增速放缓")  ///
ytitle("东盟")  ///
xtitle("世界") ///
legend(ring(0) pos(5) order(2 "拟合")) ///
graphregion(color(white))

(4)bgshade
bgshade ks, shaders(uu9)  ///
   twoway(connect lamda22 ks if treat==1&ks>=6&ks<=11 ||             ///  
          connect lamda22 ks if treat==0&ks>=6&ks<=11 , xlab(6(1)11) ///
   title("新冠疫情冲击下企业平均收入变化趋势"))

在这里插入图片描述

(5)coefplot
coefplot,  levels(90) vertical lcolor(black)mcolor(black) ///
 msymbol(circle_hollow) ytitle(估计系数, size(small)) ///
 ylabel(, labsize(small) angle(horizontal) nogrid) ///
 yline(0, lwidth(vthin)lpattern(solid) lcolor(black)) ///
 xtitle(事件发生时间, size(small)) ///
  title("(B)企业缴税的平行趋势检验") ///
 xlabel(0"." 1"2019s2" 2"2019s3" 3"2019s4" 4"2020s1" 5"2020s2" 6"2020s3" 7"2020s4") 

在这里插入图片描述

reghdfe lnQ i.Year if elec == 1,a(i.citycode) vce(r)
est store elec_Q_1 

reghdfe lnV i.Year if elec == 1,a(i.citycode) vce(r)
est store elec_V_1 

reghdfe lnQ i.Year if elec == 0,a(i.citycode) vce(r)
est store elec_Q_0

reghdfe lnV i.Year if elec == 0,a(i.citycode) vce(r)
est store elec_V_0

coefplot (elec_Q_1,label("半导体电子元件相关企业进口数量") offset(0.05)  pstyle(p3)) ///
(elec_Q_0 ,label("非半导体电子元件相关企业进口数量") offset(-0.05)  pstyle(p4) ), ///
vertical drop(_cons) xline(0) ///
graphregion(color(white)) /// 
yline(0) ///
addplot(line @b @at,lp(dash) lwidth(*0.5)) /// 
legend(label(1 "半导体电子元件相关企业进口数量") label(2 "非半导体电子元件相关企业进口数量") ) 

在这里插入图片描述

(6)画系数和置信区间
twoway (scatter coef week) /// 
(rcap ci_lower ci_upper week, /// 
 lcolor(black) /// 
 mcolor(black) /// 
 lwidth(vthin) /// 
 lpattern(dash) ///
 msymbol(circle_hollow) ///
 legend(label(2 "99% CI"))) , ///
yline(0) ///
xtitle("")  ///
graphregion(fcolor(white)) ///
title("第X周的系数", size(medium)) /// 
name("Coef_all_I", replace)

(7)画直方图

一般使用kdensity

hist year if year>=1400 & year<=2010, freq bin(200) ylabel(0(500)2500) xtitle("Year") xline(1950 1980,lw(thin)) ///
     text(1500 1950 "Year=1950", place(w)) text(2000 1980 "Year=1980", place(w)) 

在这里插入图片描述

(8)画桑基图
cd $path\appdata
use Data_games.dta,clear
merge m:1 ParentCompanyName using "$path\data\company_city"
keep if _m == 3
drop _m
gen from = city_code
encode iso3_j,gen(to)
bys from to :egen tR = total(Revenue)
bys from to :egen tD = total(Downloads)
duplicates drop  from to ,force
gen x0 = 1
gen x1 = 2
tostring city_code ,gen(city2)
drop if dest == "CHN"

sankey_plot x0 from x1 to, ///
width0(tR) extra xlabel(1 "Source" 2 "Destination", nogrid labsize(small)) ///
colorpalette(economist, opacity(30)) ///
label0(city) label1(iso3_j) ///
labsize(*0.6) labcolor(black) ///
graphregion(color(white))  gap(0.1) ///
title("地级市层面Apps出海流向(按收入额)",color(black) size(*0.8))

graph save "Graph" "$path\output\sankey_R_0228.gph",replace


sankey_plot x0 from x1 to, ///
width0(tD) extra xlabel(1 "Source" 2 "Destination", nogrid labsize(small)) ///
colorpalette(economist, opacity(30)) ///
label0(city) label1(iso3_j) ///
labsize(*0.6) labcolor(black) ///
graphregion(color(white))  gap(0.1) ///
title("地级市层面Apps出海流向(按下载量)",color(black) size(*0.8))

graph save "Graph" "$path\output\sankey_D_0228.gph",replace

在这里插入图片描述

(9)气泡图
twoway(scatter mv T_gap_05_00 [fweight=N] if BEC == 0&T_gap_05_00!=0&N !=0,msymbol(Oh) mc(ebblue%40)) ///
(scatter mv T_gap_05_00 [fweight=N] if BEC == 4&T_gap_05_00!=0&N !=0,msymbol(Oh) mc(orange_red%40)) ///
(scatter mv T_gap_05_00 [fweight=N] if BEC == 2&T_gap_05_00!=0&N !=0,msymbol(Oh) mc(green%20)) ///
, legend(label(1 "非资本品") label(2 "equipment") label(3 "accessories") ) 

在这里插入图片描述

2. 处理数据

(1)拓展expand数据

例如:当前数据中 有9523、9524、9525、9526、9527、9528共计6个样本,现在想把这6个样本根据freq进行扩充,变为9523、9524_1、9524_2、9525_1、9525_2、9526_1、9526_2、9527_1、9527_2、9528_1、9528_2这11个样本

freqcountvalue
195234845.1143
29524969.66498
29525129.53349
2952671284.508
295271038.127
29528445877.09

count是id的唯一识别码,expandcl函数可以生成freq行相同的样本,并生成一个新的id识别码freq_count

egen count=group(id hs02_6)
expandcl freq,gen(freq_count) cluster(count)
drop freq_count
(2)时间数据
gen R= mdy(month_r,day_r,year_r)
gen week_r = week(R)
gen day_r = day(R)
gen dow_r = dow(R)  //返回周几
gen doy_r = doy(R)  //返回年内日期
gen yw_r = yw(year_r,week_r)
gen ed = yw - yw_r
//yw ym yq yh分别为年周、年月、年季、年半年
gen period_kb= date(date_u,"YMD")-date(date_kb,"YMD")
(3)常见函数
int(x) //取整,不论后面的小数是什么,只取小数点前的数值
round(x) // 四舍五入取整
round(x, .01) //保留两位小数四舍五入
gen y = sum(x)  //求列累积和
egen y = sum(x) //求列总和
egen y = rsum(x y z) //求x+y+z总和
egen y = rowmean(x y z) //求(x+y+z)/3
egen y = rowsd(x y z) //求x y z的方差
egen y = rowmim(x y z) //求x y z的最小值
egen y = rowmax(x y z) //求x y z的最大值
egen y = mean(x)      //求列均值
egen y = median(x)    //求列中位数
egen y = std(x)       //求变异系数,与方差不同

bysort x(y): gen z = y[1] //按照x分组,分组后按照y排序,生成一个新变量z=y的第一个观察值

(4)缩尾处理
foreach v of var DexpoAS4- DlnexpoWD2{
gen `v'_w=`v'
qui su `v',det
replace `v'_w=r(p99) if `v'>r(p99) & `v'<.
replace `v'_w=r(p1) if `v'<r(p1)
}

winsor2 wage, replace cuts(1 99) trim

summary 一个变量之后,可以返回的结果有

r(N)           //number of observations
r(mean)        //mean
r(skewness)    //skewness (detail only)
r(min)         //minimum
r(max)         //maximum
r(sum_w)       //sum of the weights
r(p1)          //1st percentile (detail only)
r(p5)          //5th percentile (detail only)
r(p10)         //10th percentile (detail only)
r(p25)         //25th percentile (detail only)
r(p50)         //50th percentile (detail only)
r(p75)         //75th percentile (detail only)
r(p90)         //90th percentile (detail only)
r(p95)         //95th percentile (detail only)
r(p99)         //99th percentile (detail only)
r(Var)         //variance
r(kurtosis)    //kurtosis (detail only)
r(sum)         //sum of variable
r(sd)          //standard deviation

(5)创建文件夹

在project路径下生成一个workspace文件夹
再生成子文件夹储存数据(data)、控制变量(controlvars )、临时数据(tempdata)、回归结果(outreg)

efolder, cd(D:\stata15\project\workspace)
efolder, cd(D:\stata15\project\workspace sub(data controlvars tempdata outreg)
(6)分组处理数据(bysort的替代方案)
*展示根据highzupu50(族谱)和year分组后的变量drqianfen(死亡率)均值;
collapse (mean) drqianfen, by(highzupu50 year)
(7)定义无缺失的样本
g rsample = !mi(avggrain_fyr) & !mi(nograin_fyr) & !mi(urban_fyr)& !mi(dis_bj_fyr) & !mi(dis_pc_fyr) & !mi(migrants_fyr)& !mi(rice_fyr) & !mi(minor_fyr) & !mi(edu_fyr)
(8)定义Dummy的新替代式(时间range)
*如果yob满足1825≤yob≤1899则pre取值为1,否则pre取值为0。mid、post生成过程类似。
gen pre = inrange(yob, 1825, 1899)
gen mid = inrange(yob, 1899, 1919)
gen post = inrange(yob, 1920, 1960)
(9)快速替换
recode treatyear (1969 = 1) (1979 = 2) (1989 = 3) (1999 = 4) (2009 = 5)

3. 处理字符

(1)替换字符
replace 候选人姓名=subinstr( 候选人姓名, " ", "",. )
(2)捕捉字符中的某些特征
keep if strmatch(city, "*山东*")
gen temp = 1 if strmatch(reporteriso3, "A*")
(3)提取字符,检索特定字符
//从enddate字符1开始取,取4个字符赋给year
gen year = substr(enddate,1,4)  

//strpos(s1, s2)返回字符s2在s1中的位置,如果s1中找不到s2,则返回0,将该判断再赋给y
gen y = strpos(s1, s2) != 0    

4. 输出结果

(1)常规输出
outreg2 using "E:\mfg\outreg\r2", word append addtext(CountryFE, YES,YearFE, YES)
(2)iv回归输出第一阶段
eststo: xtivreg p_a_w (DexpoCN4_w=dexpo44) /// 
 i.t c.expr0#t c.Lshare0#t /// 
 c.lnGDP0#t c.lngdp0#t, fe first vce(cluster c)  
 
 eststo: xtreg DexpoCN4_w dexpo44 /// 
 i.t c.expr0#t c.Lshare0#t /// 
 c.lnGDP0#t c.lngdp0#t if e(sample)==1 ,fe  
 cd $path\outreg 
 outreg2 using "table3", word replace addtext(CityFE, YES,YearFE, YES) keep(dexpo44) 
(3)变量描述性统计
*列出inv等变量的样本数、均值、标准差、最小值和最大值。
tabstat inv loginv log_levies ///
     logpopl logincome logasset hhsize landpc logmigration logtax logtransfer share_admin  ///
	 postcont postopen secret_ballot proxy_voting moving_ballot ///
	 , s(N mean sd min max) c(s)

5.矩阵保存结果

mat T1 = J(3,3,.)

reghdfe temp ib1.season if year == 2018 & treat == 1,noa
forvalues i = 1/3{
	local j = `i' + 1
	mat T1[`i',1] = _b[`j'.season]
}

reghdfe temp ib1.season if year == 2019 & treat == 1,noa
forvalues i = 1/3{
	local j = `i' + 1
	mat T1[`i',2] = _b[`j'.season]
}

reghdfe temp ib1.season if year == 2020 & treat == 1,noa
forvalues i = 1/3{
	mat T1[`i',3] = _b[`=1+`i''.season]
}

svmat T1

6.导入数据(全字符串)

forv i = 2000/2003{
	cd E:\Data\EPS工企海关匹配库\origindata
	import delimited "工企+海关(`i').csv", stringcols(_all) clear 
	cd E:\Data\EPS工企海关匹配库
	save data`i'.dta,replace
}

7.循环

clear all
set obs 1000

**#** 用forvalues循环对单一变量进行处理

gen id = .    
//生成一个变量名为id,代表第几个人,假设一共有50个人
//假设每个人都有20个观测值,代表20年

forvalues i = 1/50 {
	local j = `i' - 1				//暂时定义0~49 方便计算
	local lower = `j' * 20  +1		//定义下限 1、21、41、61 
	local upper = `j' * 20 + 20		//定义上限 20、40、60、80
									//由此就定义了 1~20  21~40 41~60 ……
	replace id = `i' in `lower'/`upper'		//给第1~20行,赋值为第1个人
											//给第21~40行,赋值为第2个人
}

bys id : gen T = _n + 2000					//对于每个人,都生成一个时间序列



**#** 用forvalues循环对多个变量进行处理

forvalues i = 1/5 {
	gen value`i' = .
	cap gen e = rnormal()
	replace value`i' = e * 10 + `i'
	cap drop e
}

// 等价于 
gen value6 = .
cap gen e = rnormal()
replace value6 = e * 10 + 6
cap drop e

gen value7 = .
cap gen e = rnormal()
replace value7 = e * 10 + 7
cap drop e

gen value8 = .
cap gen e = rnormal()
replace value8 = e * 10 + 8
cap drop e

gen value9 = .
cap gen e = rnormal()
replace value9 = e * 10 + 9
cap drop e

gen value10 = .
cap gen e = rnormal()
replace value10 = e * 10 + 10
cap drop e


**#** 用while循环对单一变量进行处理
// 只要时间在T=11和T=20之间,就对value1~value10进行 " 乘0.1"的处理
local i = 2010 
while `i' < 2020 {
	forvalues j = 1/10{
		replace value`j' = value`j' * 0.1 if T == `i'
	}
	local i = `i' + 1
}

**#** 用foreach对变量进行处理
foreach v in value1 value2 value3 value4 value5  {
	su `v' ,d
	replace `v' = (`v' - r(min)) / (r(max) - r(min))
	kdensity `v'
}

// 等价于 
su value6,d 
replace value6 = (value6 - r(min)) / (r(max) - r(min))
kdensity value6

su value7,d 
replace value7 = (value7 - r(min)) / (r(max) - r(min))
kdensity value7

su value8,d 
replace value8 = (value8 - r(min)) / (r(max) - r(min))
kdensity value8

su value9,d 
replace value9 = (value9 - r(min)) / (r(max) - r(min))
kdensity value9

su value10,d 
replace value10 = (value10 - r(min)) / (r(max) - r(min))
kdensity value10

8.将第一行作为 label / varname

* 把第一行作为变量标签
labone,nrow(1) 

* 把第一行作为变量名(不保留第一行)
nrow

* 把第一行作为变量名(保留第一行)
nrow,keep
  • 4
    点赞
  • 27
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

mengke25

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值