缺失值简单插补方法

最新推荐文章于 2023-12-17 20:05:58 发布

qq1323362960

最新推荐文章于 2023-12-17 20:05:58 发布

阅读量3.3k

点赞数 1

分类专栏：抽样、模拟、实验设计文章标签： table join sql output null

本文链接：https://blog.csdn.net/yugao1986/article/details/6759092

版权

抽样、模拟、实验设计专栏收录该内容

14 篇文章 0 订阅

订阅专栏

1.利用均值/最小值/最大值等进行插补

PROC SQL noprint;
   create table sample as
   select *
          ,CASE _value_
              when . then MEAN(_value_) else _value_
           end as value
   from _temp_
   GROUP BY patient;
Quit;

2.利用最常出现的数据的均值/最小值/最大值等进行插补

/*计算数值频率*/

proc sql noprint;
   create table FreqVals as
   select patient, freq(_value_) as frq, _value_
      from _temp_
      where _value_ is not null
      group by patient, _value_;
/*计算符合条件的数值均值*/
   create table Target as
   select patient, MEAN(_value_) as _value_
      from (select distinct patient, _value_
              from FreqVals
              group by patient
              having frq eq max(frq) )
              GROUP BY patient
              order by patient;
/*进行缺失值插补替换 */
   create table sample as
   select e.*
          , case e._value_
            when . then f._value_ else e._value_
            end as value
      from _temp_ as e left join Target as f
      on f.patient eq e.patient
      order by patient, time;
quit;

3.利用趋势进行插补,包括延后/提前/左右临近值插补

proc sort data=_temp_;
   by patient DESCENDING time;
data _temp_;
   set _temp_;
      by patient DESCENDING time;
      retain BackWard;
      if first.patient then BackWard=.;
      if _value_ ne . then BackWard=_value_;
run;
proc sort data=_temp_;
     by patient time;
data _temp_;
     set _temp_;
        by patient time;
        retain ForWard;
        if first.patient then ForWard=.;
        if _value_ ne . then ForWard=_value_;
run;
data sample (drop=ForWard BackWard);
     set _temp_;
        if _value_ ne . then value=_value_;
        else value=(ForWard); if value eq . then value = max(ForWard, BackWard);
run;

4.基于样本均值的缺失值的随机插补,类似与多重插补,

proc sort data=_temp_;
   by patient ;
run;
proc means data=_temp_ noprint;
   var _value_;
   by patient;
   output out=Target mean=m std=s;
run;
proc sql noprint;
   create table sample as
   select e.*
          ,case e._value_ when .
              then f.m+f.s*rannor(0)
           else e._value_
           end as value
      from _temp_ as e left join Target as f
   on f.patient eq e.patient
   order by patient, time;
quit;

代码摘自<A SAS® Macro for Single Imputation>一文

qq1323362960

关注

1
点赞
踩
5

收藏

觉得还不错? 一键收藏
0
评论
缺失值简单插补方法

1.利用均值/最小值/最大值等进行插补PROC SQL noprint; create table sample as select * ,CASE _value_ when . then MEAN(_val
复制链接

扫一扫