#背景:某次3门考试后,根据成绩,决定有限资源倾斜
#目标:将学生进行分层,便于后期聚焦管理
#环境设置
options(digits = 2)
#数据输入
Student<-c("John Davis","Angela Williams","Bullwinkle Moose","David Jones",
"Janice Markhammer","Cheryl Cushing","Reuven Ytzrhak","Greg Knox",
"Joel England","Mary Rayburn")
Math<-c(502,600,412,358,495,512,410,625,573,522)
Science<-c(95,99,80,82,75,85,80,95,89,86)
English<-c(25,22,18,15,20,28,15,30,27,18)
roster<-data.frame(Student,Math,Science,English,stringsAsFactors = FALSE)
#数据观察
head(roster)
dim(roster)
summary(roster)
#对各门分数进行标准化,给出标准化后的平均得分
z<-scale(roster[,2:4])
score<-apply(z, 1, mean)
roster<-cbind(roster,score)
#生成分位数值
y<-quantile(score,c(.8,.6,.4,.2))
#对集合进行分类
roster$grade<-1
roster<-within(roster,{
grade[score>y[1]]<-1
grade[score>y[2] & score<=y[1]]<-2
grade[score>y[3] & score<=y[2]]<-3
grade[score>y[4] & score<=y[3]]<-4
grade[score<=y[4]] <-5})
#对结果进行排序
roster<-roster[order(-roster$score),]
#将学生名字分割为姓和名,并生成最终结果
name<-strsplit((roster$Student)," ")
lastname<-sapply(name,"[",2)
fristname<-sapply(name,"[",1)
roster<-cbind(fristname,lastname,roster[,-1])
roster
总结:
1、分类技术的目的,是将庞大的集合,进行分解,有针对性的进行处理;
2、分类的依据:A、标准衡量标尺;B、相似度