library(reshape)
#cor
cor <- cor(algae[,4:18], use = "complete.obs")
#reshape
meltCor <- melt(cor)
#tolower
names(meltCor) <- c('x1','x2','value')
#kickout cor==1
meltCor <- meltCor[-which(meltCor[,'value']==1),]
#find which cases are lost
NAalgae <- algae[!complete.cases(algae),];head(NAalgae)
#find which cases are complete
comAlgae <- algae[complete.cases(algae),]
#calculate coef a b 这个函数用来产生lmdata数据框中所有组合的coef参数
lmCoef <- function(lmData){
coefData <- as.data.frame(matrix(NA,nrow(lmData),5))
for (i in1:nrow(lmData)){
formula <- paste(lmData[i,1],'~',lmData[i,2],sep='')
b = coef(lm(formula, data=comAlgae))[1]
a = coef(lm(formula, data=comAlgae))[2]
coefData[i,] <-cbind(as.character(lmData[i,1]),as.character(lmData[i,2]),lmData[i,3],a,b)
}
names(coefData) <- c('x1','x2','cor','a','b')
return(coefData)
}
#create lists to store the ordered cor and coef data by different elements
corList <- list()
corList <- lapply(unique(meltCor[,'x1']),
function(element){
df <- subset(meltCor, x1==element)
df <- df[order(abs(df$value),decreasing = T),]
element <- lmCoef(df)
})
names(corList) <- unique(meltCor[,'x1'])
corList[[1]]
#input lmValue to NA
for (i in1:nrow(NAalgae)){
NAnames <- names(NAalgae)[is.na(NAalgae[i,])]
completeNames <- names(NAalgae)[!is.na(NAalgae[i,])]
for (name in NAnames){
#find a not NA valuename from corList
k=1
CoefName =corList[[name]][k,'x2']
while (sum(which(completeNames==CoefName))==0){
k=k+1
CoefName =corList[[name]][k,'x2']
}
df <- corList[[name]]
a <- as.numeric(df[which(df$x2==CoefName),'a'])
b <- as.numeric(df[which(df$x2==CoefName),'b'])
NAalgae[i,name] <- NAalgae[i,CoefName]*a+b
}
}
#complete Naalgae!
head(NAalgae)
data(algae)#find which cases are lostNAalgae <- algae[!complete.cases(algae),];NAalgae#find which cases are completecomAlgae <- algae[complete.cases(algae),]
#put One NAalgae into comAlgae for (i in1:nrow(NAalgae)){
oneNAalgae <- rbind(NAalgae[i,],comAlgae)
NAalgae[i,] <- knnImputation(oneNAalgae, k = 10)[1,]
}
head(NAalgae)