1.利用均值/最小值/最大值等进行插补
PROC SQL noprint;
create table sample as
select *
,CASE _value_
when . then MEAN(_value_) else _value_
end as value
from _temp_
GROUP BY patient;
Quit;
2.利用最常出现的数据的均值/最小值/最大值等进行插补
/*计算数值频率*/
proc sql noprint;
create table FreqVals as
select patient, freq(_value_) as frq, _value_
from _temp_
where _value_ is not null
group by patient, _value_;
/*计算符合条件的数值均值*/
create table Target as
select patient, MEAN(_value_) as _value_
from (select distinct patient, _value_
from FreqVals
group by patient
having frq eq max(frq) )
GROUP BY patient
order by patient;
/*进行缺失值插补替换 */
create table sample as
select e.*
, case e._value_
when . then f._value_ else e._value_
end as value
from _temp_ as e left join Target as f
on f.patient eq e.patient
order by patient, time;
quit;
3.利用趋势进行插补,包括延后/提前/左右临近值插补
proc sort data=_temp_;
by patient DESCENDING time;
data _temp_;
set _temp_;
by patient DESCENDING time;
retain BackWard;
if first.patient then BackWard=.;
if _value_ ne . then BackWard=_value_;
run;
proc sort data=_temp_;
by patient time;
data _temp_;
set _temp_;
by patient time;
retain ForWard;
if first.patient then ForWard=.;
if _value_ ne . then ForWard=_value_;
run;
data sample (drop=ForWard BackWard);
set _temp_;
if _value_ ne . then value=_value_;
else value=(ForWard); if value eq . then value = max(ForWard, BackWard);
run;
4.基于样本均值的缺失值的随机插补,类似与多重插补,
proc sort data=_temp_;
by patient ;
run;
proc means data=_temp_ noprint;
var _value_;
by patient;
output out=Target mean=m std=s;
run;
proc sql noprint;
create table sample as
select e.*
,case e._value_ when .
then f.m+f.s*rannor(0)
else e._value_
end as value
from _temp_ as e left join Target as f
on f.patient eq e.patient
order by patient, time;
quit;
代码摘自<A SAS® Macro for Single Imputation>一文