http://www.kaggle.com/c/titanic-gettingStarted/data
.
new_model
C5.0(train[,-2],train$Survived)
levels(train$Cabin)[1] = "missing"
levels(train$Embarked)[1] = "missing"
窗体顶端
窗体底端
library(C50)
test$Survived
combinedData
rbind(train,test)
combinedData$Survived
factor(combinedData$Survived)
# fixing empty character level names
levels(combinedData$Cabin)[1] = "missing"
levels(combinedData$Embarked)[1] = "missing"
new_train
combinedData[1:891,]
new_test
combinedData[892:1309,]
new_model
C5.0(new_train[,-2],new_train$Survived)
new_model_predict
predict(new_model,new_test)
submitC50
data.frame(PassengerId=new_test$PassengerId,
Survived=new_model_predict)
write.csv(submitC50, file="c50dtree.csv",
row.names=FALSE)
summary(new_model)
removeBlankLevelsInDataFrame
function(dataframe) {
for (i in 1:ncol(dataframe))
{
levels
if
(!is.null(levels) && levels[1] ==
"") {
levels(dataframe[,i])[1] = "?"
}
}
dataframe
}
removeBlankLevelsInVector
function(vector) {
levels
levels(vector)
if (!is.null(levels)
&& levels[1] == "") {
levels(vector)[1] = "?"
}
vector
}
trainX = removeBlankLevelsInDataFrame(trainX)
trainY = removeBlankLevelsInVector(trainY)
model = C50::C5.0.default(trainX,trainY)