1、导入数据并新增计算列
DATA weight;
INFILE'E:\11\6\tomhs.txt'FIRSTOBS=2dlm='09'x ;
INPUT ptid $ clinic $sex $ height weight;
bmi = (weight*703.1)/(height*height);
RUN;
bmi就是计算后新生成的列2、NOOBS 不显示自动编号
PROCPRINTDATA=weight (obs=3) NOOBS;
title'Proc Print: TOMHS 数据的3条观测';
run;
3、读入数据后,给列打上标签
DATA tdata ;
INFILE 'e:\11\6\tomhs3.dat';
INPUT ptid $ clinic $ group sex ;
LABEL clinic = 'ClinicalCenter';
LABEL group = 'Drug Treatment Group';
run;
4、从不同位置取数据,赋值于变量,dlm定义字段间的分隔符
DATA tdata2;
INFILE 'e:/11/6/tomhs4.dat' dlm='09'x ;
INPUT
@ 1 ptid $6.
@ 8 educ 1.
@10 sbp12 3. ;
if educ < 7then grad1 = 2 ; else
if educ >=7then grad1 = 1 ;
RUN;
5、利用format对数据分段,下例中学习IN函数,以及缺失值的注意要点,缺失值小于任何值
* 下面的两种方法结果一样且是正确的;
if educ < 7 and educ ne . then grad2 = 2; else
if educ >=7 then grad2 = 1;
*IN 是SAS的一个特殊函数;
if educ IN(1,2,3,4,5,6) then grad3 = 2; else
if educ IN(7,8,9) then grad3 = 1;
run;
PROC FORMAT;
VALUE grad 1-6 = '< College Graduate'
7-9 = 'CollegeGraduate';
VALUE sbpcat LOW - 119 = '<120'
120-139 = '120-139'
140-HIGH= '140+' ;
run;
6、根据条件,输出多个数据集
DATA clinica clinicb clinicc clinicd;
SET mylib.weight;
if clinic = 'A'thenOUTPUT clinica; else
if clinic = 'B'thenOUTPUT clinicb; else
if clinic = 'C'thenOUTPUT clinicc; else
if clinic = 'D'thenOUTPUT clinicd;
KEEP ptid clinic weight sex;
RUN;
7、指定变量的输入格式 informat ,显示格式format
DATA followup;
INFORMAT datedth mmddyy8.;
INFILE DATALINES;
INPUT id $ datedth cause;
format datedth yymmdd10.;
DATALINES;
B00714 01/13/92 3
D02027 03/19/91 1
C00601 12/21/90 1
;