1、向量是有名字的,可以使用names()查看或者unname()去除
2、构建空的向量或者空的数据框接收循环结果的时候,必须把构建步骤放在循坏之外,否则每次循环都会生成新的空的数据框,最终得到的数据框会有很多零值。
setwd("E:/天睿TERADATA/data_analysis")
rawdata<-read.csv("test_data.csv")
#筛选出待分析数据(只挑选数值型变量),并用p_1与p_99替换异常值:
data_prepare<-function(dta){
is_numeric_logitcal<-vector(length=ncol(dta))
for(i in 1:ncol(dta)){
is_numeric_logical[i]<-is.numeric(dta[,i])
}
newdata<-dta[,!is_numeric_logit]
for(i in 1:ncol(dta)){
p_1<-unname(quantile(dta[,i],0.01))
p_99<-unname(quantile(dta[,i],0.99))
dta[,i]<-ifelse(dta[,i]<p_1,p_1,
ifelse(dta[,i]>p_99,p_99,dta[,i]))
}
return(dta)
}
#导出新表:
newdata<-data_prepare(rawdata)
write.csv(newdata,"newdata.csv")
#得到统计量数据框statistic_framework
statistic_framework<-function(dta){
data_statistic<-function(variable,na.omit=T){
if(na.omit)
variable<-variable[!is.na(variable)] #是否忽视缺失值
n<-length(variable)
nmiss<-length(variable[is.na(variable)])
nobs<-n-nmiss
min<-min(variable)
max<-max(variable)
mean<-mean(variable)
sd<-sd(variable)
quantile_num<-c(0.01,0.02,0.03,0.04,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.6,0.65,
0.7,0.75,0.8,0.85,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98,0.99,1)
px<-vector(mode="numeric",length=length(quantile_num))
for(i in 1:length(quantile_num)){
px[i]<-unname(quantile(variable,quantile_num[i]))
}
statistic_vector<-c(n,nmiss,nobs,min,max,mean,sd,px)
return(statistic_vector)
}
ncol_result<-length(data_statistic(dta[,1]))
result<-as.data.frame(matrix(0,nrow=ncol(dta),ncol=ncol_result))
rownames(result)<-colnames(dta)
for(i in 1:ncol(dta)){
result[i,]<-data_statistic(dta[,i])
}
colnames(result)<-c("n","nmiss","nobs","min","max","mean","sd","p_1","p_2","p_3","p_4","p_5","p_10",
"p_15","p_20","p_25","p_30","p_35","p_40","p_45","p_50","p_55","p_60","p_65",
"p_70","p_75","p_80","p_85","p_90","p_91","p_92","p_93","p_94","p_95","p_96","p_97",
"p_98","p_99","p_100")
return(result)
}
#查看测试数据结果:
statistic_result<-statistic_framework(rawdata)
write.csv(statistic_result,"statistic_result.csv")