sas学习笔记

最新推荐文章于 2024-07-16 12:16:46 发布

小白歆呀

最新推荐文章于 2024-07-16 12:16:46 发布

阅读量2.1k

点赞数

分类专栏：语言基础文章标签： sas

本文链接：https://blog.csdn.net/baixinzxl/article/details/88762187

版权

语言基础专栏收录该内容

6 篇文章

订阅专栏

sas个人学习笔记

1.允许中文名启用压缩
sas默认不支持数据集中的中文列名，需要用validvarname=any开启。
sas数据集启用压缩后可以节省很多空间，需要自行开启！

OPTION compress=yes;
OPTION validvarname=any;

2.读取同一目录下的相同名字的数据

data datapath.trans_all(drop=productname floor rename=(pname=productname));
set datapath.trans_all_: indsname=_tablelabel_;
format store $20.;
format pname $32.;
store = scan(_tablelabel_,3,"_");
pname=KTRUNCATE(productname,1,length(productname));
run;

3.iml模块
sas的矩阵模块可以和r python的矩阵思想一起学习
参考的文章：https://wenku.baidu.com/view/25bd65896137ee06eef91804.html
用法很详细。

/* 自己用iml写的标准差 */
/*
proc iml;
use A; read all var _NUM_ into A; close A;
use B; read all var _NUM_ into B; close B;
use C; read all var _NUM_ into C; close C;
*/
proc iml;
reset deflib=work;
use answer;
read all into frame;
row2=(frame-frame[,+]/7)##2;
result=sqrt(row2[,+]/6);
CREATE result FROM result;
APPEND from result;
quit;
run;

/* 计算矩阵所占内存大小 这段是网上找到的 用iml算还是很耗内存的*/
proc iml;
/* Compute gigabytes (GB) of RAM required for matrix with r rows and c columns */
start HowManyGigaBytes(Rows, Cols);
   GB = 8 # Rows#Cols / 2##30;  /* 1024##3 bytes in a gigabyte */
   Fit2GB = choose(GB <= 2, "Yes", " No");
   print Rows[F=COMMA9.] Cols[F=COMMA9.] 
         GB[F=6.2] Fit2GB[L="Fits into 2GB"];
finish;
 
/* test:   rows   cols */
sizes = {250000   1000,
6000000 350,
2000000 350,
628000 349,
500000 350};
run HowManyGigaBytes(sizes[,1], sizes[,2]);

proc iml symsize=40000000 worksize=40000000 ;
show space;
/* Compute gigabytes (GB) of RAM required for matrix with r rows and c columns */
start HowManyGigaBytes(Rows, Cols);
   GB = 8 # Rows#Cols / 2##30;  /* 1024##3 bytes in a gigabyte */
   Fit2GB = choose(GB <= 2, "Yes", " No");
   print Rows[F=COMMA9.] Cols[F=COMMA9.] 
         GB[F=6.2] Fit2GB[L="Fits into 2GB"];
finish;
 
/* test:   rows   cols */
sizes = {5000000  676};
run HowManyGigaBytes(sizes[,1], sizes[,2]);

4.分位数函数

proc univariate data=aus noprint;
var aus;
output out=stats pctlpts=10 20 30 40 50 60 70 80 90  pctlpre=p1 p2 p3 p4 p5 p6 p7 p8 p9;
run;

5.缺失值填充
这里就怀念fillna和na.replace了

data result;
 set result;
 array result[*] _NUMERIC_;
 do j = 1 to dim(result);
     if result(j) = . then result(j) = 0;
     end;
 drop j;
run;

6.抽样

proc surveyselect data=lanzhou.Inactive170830(rename=(Selected=Selected0))  out=lanzhou.Inactive170830
	method=srs samprate = .05; /*抽样比例*/
	*strata CLUSTER;  /*分层抽样参数*/
run;

7.把大的数据集拆分----自己写的宏例子

%macro split(data=,k=);
data test;
set &data. end=eof nobs=count;
if eof then call symput('nobs', left(count));
run;

%let n=%sysfunc(int(%eval(&nobs/&k)));

%put &nobs &n;

%do i=1 %to &n;
data ttt.customer&i test;
set test;
if _N_<=&k then output test.customer&i;
else output test;
run;
%end;
%mend;

%split(data=test.totalforecast7,k=250000);

8.hash合并数据集----网上大神的例子

data bg.test_hash;
if _n_=0 then set bg.test_i;
if _n_ = 1 then do;
declare hash ttt(dataset:"bg.test_i");
ttt.definekey("i");
ttt.definedata(All:"yes");
ttt.definedone();
end;
call missing(of _all_);
set bg.test;
rc=ttt.find(key:i);
if rc=0 then do;end;
drop rc;
run;

9.得到一年前的同一天

purchase_date=intnx('year',purchase_date,2,"sameday");

10.将ytd销售转化为月度销售

data test;
set test1;
by name yr;
sls_mon=sales-lag(sales);
if first.yr then sls_mon=sales;
run;

11.变量聚类

proc contents data=out.path_cat_lan4 out=cont noprint;run;

proc sql  noprint;
select kcompress("'"||name||"'")||"n " into : name1 separated by ' '
from cont
where name not in ('customer_id','diaoyanclassnew');
quit;

%put &name1.;

proc varclus data=out.path_cat_lan4(drop=customer_id) outstat=clus centroid short PLOTS(MAXPOINTS=400)
		outtree=clust_tree;
		 var &name1.;
		 run;