数据来源:
http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/
# (描述文件: http://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Original%29 )
breast <- read.csv("breast-cancer-wisconsin.data.txt")
#get all but one columns
# reference: https://stackoverflow.com/questions/12868581/list-all-column-except-for-one-in-r
#mtcars[, !names(mtcars) %in% c("carb", "mpg")]
head(breast[, !names(breast) %in% c("X1000025")])# First of all, we need to find which column contanins missing data.
lapply(breast[, !names(breast) %in% c("X1000025")],function(x)unique(x))
#sapply(breast,function(x) unique(x))#Now we know that column X1.3 contains '?'
#Let's find out how many rows has value '?'
nrow(breast[breast$X1.3 =='?',])/nrow(breast)
breas