缺失值简单插补方法

1.利用均值/最小值/最大值等进行插补

PROC SQL noprint;
   create table sample as
   select *
          ,CASE _value_
              when . then MEAN(_value_) else _value_
           end as value
   from _temp_
   GROUP BY patient;
Quit;

2.利用最常出现的数据的均值/最小值/最大值等进行插补

/*计算数值频率*/
proc sql noprint;
   create table FreqVals as
   select patient, freq(_value_) as frq, _value_
      from _temp_
      where _value_ is not null
      group by patient, _value_;
/*计算符合条件的数值均值*/
   create table Target as
   select patient, MEAN(_value_) as _value_
      from (select distinct patient, _value_
              from FreqVals
              group by patient
              having frq eq max(frq) )
              GROUP BY patient
              order by patient;
/*进行缺失值插补替换 */
   create table sample as
   select e.*
          , case e._value_
            when . then f._value_ else e._value_
            end as value
      from _temp_ as e left join Target as f
      on f.patient eq e.patient
      order by patient, time;
quit;


3.利用趋势进行插补,包括延后/提前/左右临近值插补

proc sort data=_temp_;
   by patient DESCENDING time;
data _temp_;
   set _temp_;
      by patient DESCENDING time;
      retain BackWard;
      if first.patient then BackWard=.;
      if _value_ ne . then BackWard=_value_;
run;
proc sort data=_temp_;
     by patient time;
data _temp_;
     set _temp_;
        by patient time;
        retain ForWard;
        if first.patient then ForWard=.;
        if _value_ ne . then ForWard=_value_;
run;
data sample (drop=ForWard BackWard);
     set _temp_;
        if _value_ ne . then value=_value_;
        else value=(ForWard); if value eq . then value = max(ForWard, BackWard);
run;


4.基于样本均值的缺失值的随机插补,类似与多重插补,

proc sort data=_temp_;
   by patient ;
run;
proc means data=_temp_ noprint;
   var _value_;
   by patient;
   output out=Target mean=m std=s;
run;
proc sql noprint;
   create table sample as
   select e.*
          ,case e._value_ when .
              then f.m+f.s*rannor(0)
           else e._value_
           end as value
      from _temp_ as e left join Target as f
   on f.patient eq e.patient
   order by patient, time;
quit;

代码摘自<A SAS® Macro for Single Imputation>一文
  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值