数据挖掘:关联规则挖掘实操

课本习题:探究学生成绩和学生特征的关联规则。

一.加载程序包

#0.加载程序包

library(arules)
library(arulesViz)
library(dplyr)

二.读入数据 处理数据

#1.读入数据生成R数据框 声明变量类型 关于学生类型的各变量转换为因子型变量
#as.factor()函数将其转换为因子类型

StudentsPerformance <- read.csv("machine experiment/data/StudentsPerformance.csv",
                                colClasses = c(rep("character",5),
                                               rep("numeric",3)))
StudentsPerformance <- StudentsPerformance %>%
  mutate(gender = as.factor(gender)) %>%
  mutate(race.ethnicity = as.factor(race.ethnicity)) %>%
  mutate(parental.level.of.education = 
           as.factor(parental.level.of.education)) %>%
  mutate(lunch = as.factor(lunch)) %>%
  mutate(test.preparation.course = 
           as.factor(test.preparation.course))
#2.将数学 阅读 写作每项成绩按照小于60 大于等于60且小于85 大于或等于85份划分为3组 转换为因子变量
#使用cut函数将成绩分为三个区间 cut(0,59,84,100)分为(0,60] (60,85](85,100]
#as.factor()转换为因子类型

StudentsPerformance <- StudentsPerformance %>%
  mutate(math.score = as.factor(cut(math.score,breaks=c(0,59,84,100)))) %>%
  mutate(reading.score = as.factor(cut(reading.score,breaks=c(0,59,84,100)))) %>%
  mutate(writing.score = as.factor(cut(writing.score,breaks=c(0,59,84,100))))

三.关联分析 查看结果

数学成绩(0,59]

#3.设最小支持度阈值min_sup=0.1 最小置信度阈值min_conf=0.5 学生特征与数学 阅读 写作每项成绩的关联规则
#挖掘每一项成绩与学生特征的关联规则的时候不需要另外两项成绩 先处理数据得到学生特征和单项成绩的数据框

StudentsPerformance_math <- StudentsPerformance %>%
  select(-c(reading.score,writing.score))
StudentsPerformance_reading <- StudentsPerformance %>%
  select(-c(math.score,writing.score))
StudentsPerformance_writing <- StudentsPerformance %>%
  select(-c(reading.score,math.score))
#4.挖掘数学成绩小于60分跟什么有关 查看分析结果 查看提升值排行前六位的非冗余规则查看提升值排行前六位的非冗余规则 绘制提升值大于1的无冗余规则的关联规则有向图

rules_math_below60 <- apriori(StudentsPerformance_math,
                              parameter = list(supp=0.1,conf=0.5),
                              appearance = list(rhs=c('math.score=(0,59]')),
                              control =list(verbose=F))
                              
inspect(head(rules_math_below60,by='lift'))
    lhs                               rhs                 support confidence coverage  lift count
[1] {gender=female,                                                                              
     lunch=free/reduced}           => {math.score=(0,59]}   0.106     0.5608    0.189 1.742   106
[2] {lunch=free/reduced,                                                                         
     test.preparation.course=none} => {math.score=(0,59]}   0.120     0.5357    0.224 1.664   120
     
inspect(head(rules_math_below60[!is.redundant(rules_math_below60)],by="lift"))
    lhs                               rhs                 support confidence coverage  lift count
[1] {gender=female,                                                                              
     lunch=free/reduced}           => {math.score=(0,59]}   0.106     0.5608    0.189 1.742   106
[2] {lunch=free/reduced,                                                                         
     test.preparation.course=none} => {math.score=(0,59]}   0.120     0.5357    0.224 1.664   120

rules_math_below60_pruned <- rules_math_below60[!is.redundant(rules_math_below60)]
plot(head(rules_math_below60_pruned,by="lift"),method = "graph")

在这里插入图片描述

数学成绩(59,84]

#5.挖掘数学成绩大于等于60分小于85跟什么有关 查看分析结果 查看提升值排行前六位的非冗余规则 绘制提升值大于1的无冗余规则的关联规则有向图

> rules_math_60to85 <- apriori(StudentsPerformance_math,
+                               parameter = list(supp=0.1,conf=0.5),
+                               appearance = list(rhs=c('math.score=(59,84]')),
+                               control =list(verbose=F))
+ 
> inspect(head(rules_math_60to85,by='lift'))
    lhs                                    rhs                  support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                           
     lunch=standard}                    => {math.score=(59,84]}   0.110     0.6587    0.167 1.176   110
[2] {gender=male,                                                                                      
     lunch=standard}                    => {math.score=(59,84]}   0.196     0.6203    0.316 1.108   196
[3] {race.ethnicity=group D}            => {math.score=(59,84]}   0.162     0.6183    0.262 1.104   162
[4] {lunch=standard,                                                                                   
     test.preparation.course=completed} => {math.score=(59,84]}   0.140     0.6167    0.227 1.101   140
[5] {gender=male,                                                                                      
     lunch=standard,                                                                                   
     test.preparation.course=none}      => {math.score=(59,84]}   0.125     0.6158    0.203 1.100   125
[6] {lunch=standard}                    => {math.score=(59,84]}   0.393     0.6093    0.645 1.088   393

> inspect(head(rules_math_60to85[!is.redundant(rules_math_60to85)],by="lift"))
    lhs                                    rhs                  support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                           
     lunch=standard}                    => {math.score=(59,84]}   0.110     0.6587    0.167 1.176   110
[2] {gender=male,                                                                                      
     lunch=standard}                    => {math.score=(59,84]}   0.196     0.6203    0.316 1.108   196
[3] {race.ethnicity=group D}            => {math.score=(59,84]}   0.162     0.6183    0.262 1.104   162
[4] {lunch=standard,                                                                                   
     test.preparation.course=completed} => {math.score=(59,84]}   0.140     0.6167    0.227 1.101   140
[5] {lunch=standard}                    => {math.score=(59,84]}   0.393     0.6093    0.645 1.088   393
[6] {gender=male,                                                                                      
     test.preparation.course=completed} => {math.score=(59,84]}   0.105     0.6034    0.174 1.078   105
     
> rules_math_60to85_pruned <- rules_math_60to85[!is.redundant(rules_math_60to85)]
> plot(head(rules_math_60to85_pruned,by="lift"),method = "graph")

在这里插入图片描述
数学成绩(85,100]

#6.挖掘数学成绩大于85跟什么有关 查看分析结果 查看提升值排行前六位的非冗余规则 绘制提升值大于1的无冗余规则的关联规则有向图
> rules_math_above85 <- apriori(StudentsPerformance_math,
+                               parameter = list(supp=0.1,conf=0.5),
+                               appearance = list(rhs=c('math.score=(84,100]')),
+                               control =list(verbose=F))
> inspect(head(rules_math_above85,by='lift'))
> inspect(head(rules_math_above85[!is.redundant(rules_math_above85)],by="lift"))
> rules_math_above85_pruned <- rules_math_above85[!is.redundant(rules_math_above85)]
> plot(head(rules_math_above85_pruned,by="lift"),method = "graph")
Error in plot.rules(head(rules_math_above85_pruned, by = "lift"), method = "graph") : 
  x contains 0 rules!

阅读成绩(0,59]

> rules_reading_below60 <- apriori(StudentsPerformance_reading,
+                               parameter = list(supp=0.1,conf=0.5),
+                               appearance = list(rhs=c('reading.score=(0,59]')),
+                               control =list(verbose=F))
> inspect(head(rules_reading_below60,by='lift'))
> inspect(head(rules_reading_below60[!is.redundant(rules_reading_below60)],by="lift"))
> rules_reading_below60_pruned <- rules_reading_below60[!is.redundant(rules_reading_below60)]
> plot(head(rules_reading_below60_pruned,by="lift"),method = "graph")
Error in plot.rules(head(rules_reading_below60_pruned, by = "lift"), method = "graph") : 
  x contains 0 rules!

阅读成绩(59,84]

> rules_reading_60to85 <- apriori(StudentsPerformance_reading,
+                              parameter = list(supp=0.1,conf=0.5),
+                              appearance = list(rhs=c('reading.score=(59,84]')),
+                              control =list(verbose=F))
> inspect(head(rules_reading_60to85,by='lift'))
    lhs                                    rhs                     support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                              
     lunch=standard}                    => {reading.score=(59,84]}   0.113     0.6766    0.167 1.135   113
[2] {gender=female,                                                                                       
     race.ethnicity=group C}            => {reading.score=(59,84]}   0.118     0.6556    0.180 1.100   118
[3] {gender=male,                                                                                         
     test.preparation.course=completed} => {reading.score=(59,84]}   0.113     0.6494    0.174 1.090   113
[4] {race.ethnicity=group C,                                                                              
     lunch=standard}                    => {reading.score=(59,84]}   0.133     0.6488    0.205 1.089   133
[5] {gender=female,                                                                                       
     lunch=standard,                                                                                      
     test.preparation.course=none}      => {reading.score=(59,84]}   0.139     0.6465    0.215 1.085   139
[6] {race.ethnicity=group C,                                                                              
     test.preparation.course=none}      => {reading.score=(59,84]}   0.130     0.6436    0.202 1.080   130
> inspect(head(rules_reading_60to85[!is.redundant(rules_reading_60to85)],by="lift"))
    lhs                                    rhs                     support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                              
     lunch=standard}                    => {reading.score=(59,84]}   0.113     0.6766    0.167 1.135   113
[2] {gender=female,                                                                                       
     race.ethnicity=group C}            => {reading.score=(59,84]}   0.118     0.6556    0.180 1.100   118
[3] {gender=male,                                                                                         
     test.preparation.course=completed} => {reading.score=(59,84]}   0.113     0.6494    0.174 1.090   113
[4] {race.ethnicity=group C,                                                                              
     lunch=standard}                    => {reading.score=(59,84]}   0.133     0.6488    0.205 1.089   133
[5] {gender=female,                                                                                       
     lunch=standard,                                                                                      
     test.preparation.course=none}      => {reading.score=(59,84]}   0.139     0.6465    0.215 1.085   139
[6] {race.ethnicity=group C,                                                                              
     test.preparation.course=none}      => {reading.score=(59,84]}   0.130     0.6436    0.202 1.080   130
> rules_reading_60to85_pruned <- rules_reading_60to85[!is.redundant(rules_reading_60to85)]
> plot(head(rules_reading_60to85_pruned,by="lift"),method = "graph")

在这里插入图片描述阅读成绩(84,100]

> rules_reading_above85 <- apriori(StudentsPerformance_reading,
+                               parameter = list(supp=0.1,conf=0.5),
+                               appearance = list(rhs=c('reading.score=(84,100]')),
+                               control =list(verbose=F))
> inspect(head(rules_reading_above85,by='lift'))
> inspect(head(rules_reading_above85[!is.redundant(rules_reading_above85)],by="lift"))
> rules_reading_above85_pruned <- rules_reading_above85[!is.redundant(rules_reading_above85)]
> plot(head(rules_reading_above85_pruned,by="lift"),method = "graph")
Error in plot.rules(head(rules_reading_above85_pruned, by = "lift"), method = "graph") : 
  x contains 0 rules!

写作成绩(0,59]

> rules_writing_below60 <- apriori(StudentsPerformance_writing,
+                                  parameter = list(supp=0.1,conf=0.5),
+                                  appearance = list(rhs=c('writing.score=(0,59]')),
+                                  control =list(verbose=F))
+ 
> inspect(head(rules_writing_below60,by='lift'))
    lhs                               rhs                    support confidence coverage  lift count
[1] {lunch=free/reduced,                                                                            
     test.preparation.course=none} => {writing.score=(0,59]}   0.115     0.5134    0.224 1.827   115
     
> inspect(head(rules_writing_below60[!is.redundant(rules_writing_below60)],by="lift"))
    lhs                               rhs                    support confidence coverage  lift count
[1] {lunch=free/reduced,                                                                            
     test.preparation.course=none} => {writing.score=(0,59]}   0.115     0.5134    0.224 1.827   115
     
> rules_writing_below60_pruned <- rules_writing_below60[!is.redundant(rules_writing_below60)]
> plot(head(rules_writing_below60_pruned,by="lift"),method = "graph")

在这里插入图片描述

写作成绩(59,84]

> rules_writing_60to85 <- apriori(StudentsPerformance_writing,
+                                 parameter = list(supp=0.1,conf=0.5),
+                                 appearance = list(rhs=c('writing.score=(59,84]')),
+                                 control =list(verbose=F))

> inspect(head(rules_writing_60to85,by='lift'))
    lhs                                    rhs                     support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                              
     lunch=standard}                    => {writing.score=(59,84]}   0.113     0.6766    0.167 1.159   113
[2] {gender=female,                                                                                       
     lunch=standard,                                                                                      
     test.preparation.course=none}      => {writing.score=(59,84]}   0.144     0.6698    0.215 1.147   144
[3] {gender=female,                                                                                       
     lunch=standard}                    => {writing.score=(59,84]}   0.213     0.6474    0.329 1.109   213
[4] {race.ethnicity=group D}            => {writing.score=(59,84]}   0.168     0.6412    0.262 1.098   168
[5] {gender=male,                                                                                         
     test.preparation.course=completed} => {writing.score=(59,84]}   0.110     0.6322    0.174 1.083   110
[6] {test.preparation.course=completed} => {writing.score=(59,84]}   0.224     0.6257    0.358 1.071   224

> inspect(head(rules_writing_60to85[!is.redundant(rules_writing_60to85)],by="lift"))
    lhs                                    rhs                     support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                              
     lunch=standard}                    => {writing.score=(59,84]}   0.113     0.6766    0.167 1.159   113
[2] {gender=female,                                                                                       
     lunch=standard,                                                                                      
     test.preparation.course=none}      => {writing.score=(59,84]}   0.144     0.6698    0.215 1.147   144
[3] {gender=female,                                                                                       
     lunch=standard}                    => {writing.score=(59,84]}   0.213     0.6474    0.329 1.109   213
[4] {race.ethnicity=group D}            => {writing.score=(59,84]}   0.168     0.6412    0.262 1.098   168
[5] {gender=male,                                                                                         
     test.preparation.course=completed} => {writing.score=(59,84]}   0.110     0.6322    0.174 1.083   110
[6] {test.preparation.course=completed} => {writing.score=(59,84]}   0.224     0.6257    0.358 1.071   224

> rules_writing_60to85_pruned <- rules_writing_60to85[!is.redundant(rules_writing_60to85)]
> plot(head(rules_writing_60to85_pruned,by="lift"),method = "graph")

在这里插入图片描述

写作成绩(84,100]

> rules_writing_60to85_pruned <- rules_writing_60to85[!is.redundant(rules_writing_60to85)]
> plot(head(rules_writing_60to85_pruned,by="lift"),method = "graph")
> rules_writing_above85 <- apriori(StudentsPerformance_writing,
+                                  parameter = list(supp=0.1,conf=0.5),
+                                  appearance = list(rhs=c('writing.score=(84,100]')),
+                                  control =list(verbose=F))
> inspect(head(rules_writing_above85,by='lift'))
> inspect(head(rules_writing_above85[!is.redundant(rules_writing_above85)],by="lift"))
> rules_writing_above85_pruned <- rules_writing_above85[!is.redundant(rules_writing_above85)]
> plot(head(rules_writing_above85_pruned,by="lift"),method = "graph")
Error in plot.rules(head(rules_writing_above85_pruned, by = "lift"), method = "graph") : 
  x contains 0 rules!
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件项目:分析影响学生成绩的因素的资源文件
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值