exercise:caret包使用:
library(caret)
data<-read.csv("npc1.csv") #读取数据
str(data)
data_raw<-data[,-1] #剔除obs编号列
library(tidyverse)
data_use<-data_raw %>% mutate(series = lose_rate > 0.4) %>% #将大于0.4分为serious
select(-lose_rate) #删除原来的lose_rate
head(data_use)
#visualize and select features by estimating their importance
dummies_model<-dummyVars(target~.,data=data) #对类别变量独热编码
data_mat<-predict(dummies_model,newdata = data)
data_frame<-data.frame(data_mat) #构建新的数据框
missingdata_model<-preProcess(data_use,method='knnImpute') #补齐缺失值
library(RANN) #KNN补齐算法需要的函数包
impute_missingdata<-predict(missingdata_model,newdata =data)
anyNA(data)
preProcess_sc_nzv_corr_model<-preProcess(data,method='scale',"nzv","corr")
#数据归一化、删除近零方差和共线性变量
transfrom_data<-predict(preProcess_sc_nzvz_corr_model,newdata = data)