### the first step: set your working directory
setwd("C:/users/TaoJie/Desktop/2016-2017新学期经验似然/drugsatfda")
### R中的文件路径应把Windows系统默认的"\"替换为"/"
install.packages("readr")
install.packages("recipes")
install.packages("doParallel")
install.packages("backports")
### load packages to be used, if not installed, please use ##install.packages("yourPackage")
library(readr)
require(ggplot2)
require(dplyr)
require(tidyr)
library(caret)
library(corrplot)
require(Hmisc)
require(parallel)
library(doParallel)
require(ggthemes)
library(foreach)
library(VIM)
library(mice)
# parallel processing set up
n_Cores <- detectCores()##检测你的电脑的CPU核数
cluster_Set <- makeCluster(n_Cores)##进行集群
registerDoParallel(cluster_Set)
dir()
TE=read.table("TE.txt",sep="")
head(TE)
fix(TE)
str(TE)
describe(TE)
###missing values
## set 30 numbers in the first column into NA
set.seed(1001)
random_Number <- sample(1:16768,100)
TE_Original <- TE
TE_Original[random_Number,3] <- NA
describe(TE_Original)
fix(TE_Original)
aggr(TE_Original, prop = FALSE, numbers = TRUE)
### impute missing data
original_Impute <- preProcess(TE_Original,method="knnImpute")
TE_Original <- predict(original_Impute,TE_Original)
imp <- mice(data =TE_Original, m = 5)
imp$imp
### compare results of imputation
compare_Imputation <- data.frame(
TE[random_Number,3],
TE_Original[random_Number,3]
)
compare_Imputation
setwd("C:/users/TaoJie/Desktop/2016-2017新学期经验似然/drugsatfda")
### R中的文件路径应把Windows系统默认的"\"替换为"/"
install.packages("readr")
install.packages("recipes")
install.packages("doParallel")
install.packages("backports")
### load packages to be used, if not installed, please use ##install.packages("yourPackage")
library(readr)
require(ggplot2)
require(dplyr)
require(tidyr)
library(caret)
library(corrplot)
require(Hmisc)
require(parallel)
library(doParallel)
require(ggthemes)
library(foreach)
library(VIM)
library(mice)
# parallel processing set up
n_Cores <- detectCores()##检测你的电脑的CPU核数
cluster_Set <- makeCluster(n_Cores)##进行集群
registerDoParallel(cluster_Set)
dir()
TE=read.table("TE.txt",sep="")
head(TE)
fix(TE)
str(TE)
describe(TE)
###missing values
## set 30 numbers in the first column into NA
set.seed(1001)
random_Number <- sample(1:16768,100)
TE_Original <- TE
TE_Original[random_Number,3] <- NA
describe(TE_Original)
fix(TE_Original)
aggr(TE_Original, prop = FALSE, numbers = TRUE)
### impute missing data
original_Impute <- preProcess(TE_Original,method="knnImpute")
TE_Original <- predict(original_Impute,TE_Original)
imp <- mice(data =TE_Original, m = 5)
imp$imp
### compare results of imputation
compare_Imputation <- data.frame(
TE[random_Number,3],
TE_Original[random_Number,3]
)
compare_Imputation