一、
最近在做机器学习分类问题的评价,写了一些代码和评价方法
总的来说,用随机森林和其他分类器做好分类后对混淆矩阵进行处理可以得到rr和kappa系数,此外对于二分类变量,还可以计算出roc曲线和auc面积,在对随机森林的计算中,我得到了以下的代码:
library(randomForest)
library(moments)
library(car)
library("soiltexture")
library(caret)
setwd("d:/Z/test")
A<-matrix(0,30,1)
ALOW<-matrix(0,30,1)
AUP<-matrix(0,30,1)
KAPPA<-matrix(0,30,1)
trainall=read.csv("classT.csv")
trainall$veget <-as.factor(trainall$veget)
trainall$soilt <-as.factor(trainall$soilt)
trainall$lc <- as.factor(trainall$lc)
trainall$lcc<- as.factor(trainall$lcc)
trainall$geo <- as.factor(trainall$geo)
for (ttt in 1:30)
{
while(TRUE)
{
while(TRUE)
{
index<- sample(1:nrow(trainall), 449)
train<- trainall[index, ]
testdata<- trainall[-index, ]
train$class<- factor(train$class)
testdata$class<- factor(testdata$class)
if(length(levels(train$class))==10&&length(levels(testdata$class))==10)
break
}
TF=randomForest(class~.,data=train,mtry=5,ntree=1000,importance=T)
pre<-predict(TF,testdata,tpye=prob)
if(length(levels(pre))==10)
break
}
cc<-confusionMatrix(pre,testdata$class)
##cc$overall
a<-matrix(cc$overall)
A[ttt]=a[1,1]
KAPPA[ttt]=a[2,1]
ALOW[ttt]=a[3,1]
AUP[ttt]=a[4,1]
}
a1<-0
a2<-0
a3<-0
a4<-0
for(eee in 1:30)
{
a1<-a1+A[eee]
a2<-a2+KAPPA[eee]
a3<-a3+ALOW[eee]
a4<-a4+AUP[eee]
}
RESULT<-matrix(0,4,2)
</