///ccmlink by year
use ccmlink.dta,clear
replace cusip=substr(cusip,1,8)
gen year1=year(LINKDT)
gen year2=year(LINKENDDT)
replace year2=2020 if missing(year2)
duplicates drop cusip year1 year2,force
bys cusip: egen max_y=max(year2)
bys cusip: egen min_y=min(year1)
replace year1=min_y if year1>min_y
replace year2=max_y if year2<max_y
keep cusip gvkey year1 year2
duplicates drop cusip,force
reshape long year,i(cusip) j(j)
drop j
egen cusip1=group(cusip)
duplicates drop cusip1 year,force
xtset cusip1 year
tsfill
replace cusip=cusip[_n-1] if cusip==""
replace gvkey=gvkey[_n-1] if gvkey==.
drop cusip1
save ccmlink_ibes_year.dta,replace
///ccmlink, not by year
use ccmlink.dta,clear
replace cusip=substr(cusip,1,8)
duplicates drop cusip,force
drop cik LINKPRIM LIID LINKTYPE PERMNO LPERMCO LINKDT LINKENDDT COUNTY CITY FYRC
save ccmlink_ibes.dta,replace
Analyst variables: No. of analyst forecasts and No. of analyst s following
firm-year level
///Prepare analyst coverage
use analysts_origin,clear
rename CUSIP cusip
gen year=year(FPEDATS)
drop if missing(cusip)
keep cusip year ANALYS
duplicates drop
bys cusip year: egen no_analysts=count(ANALYS)
duplicates drop cusip year, force
save analysts_count.dta,replace
use analysts_origin,clear
rename CUSIP cusip
gen year=year(FPEDATS)
drop if missing(cusip)
keep cusip year ANALYS
bys cusip year: egen no_forecasts=count(ANALYS)
merge m:1 cusip year using analysts_count
drop _merge
duplicates drop cusip year, force
merge m:1 cusip using ccmlink_ibes.dta
keep if _merge==3
drop _merge
duplicates drop gvkey year,force
save analysts.dta,replace