数据挖掘：关联规则挖掘实操

多学点生信怎么了

已于 2023-03-23 10:38:15 修改

阅读量395

点赞数

分类专栏：数据挖掘文章标签：数据挖掘

于 2023-03-04 15:22:46 首次发布

本文链接：https://blog.csdn.net/weixin_73362123/article/details/129333812

版权

数据挖掘专栏收录该内容

3 篇文章 0 订阅

订阅专栏

课本习题：探究学生成绩和学生特征的关联规则。

一.加载程序包

#0.加载程序包

library(arules)
library(arulesViz)
library(dplyr)

二.读入数据处理数据

#1.读入数据生成R数据框 声明变量类型 关于学生类型的各变量转换为因子型变量
#as.factor()函数将其转换为因子类型

StudentsPerformance <- read.csv("machine experiment/data/StudentsPerformance.csv",
                                colClasses = c(rep("character",5),
                                               rep("numeric",3)))
StudentsPerformance <- StudentsPerformance %>%
  mutate(gender = as.factor(gender)) %>%
  mutate(race.ethnicity = as.factor(race.ethnicity)) %>%
  mutate(parental.level.of.education = 
           as.factor(parental.level.of.education)) %>%
  mutate(lunch = as.factor(lunch)) %>%
  mutate(test.preparation.course = 
           as.factor(test.preparation.course))

#2.将数学 阅读 写作每项成绩按照小于60 大于等于60且小于85 大于或等于85份划分为3组 转换为因子变量
#使用cut函数将成绩分为三个区间 cut(0,59,84,100)分为(0,60] (60,85](85,100]
#as.factor()转换为因子类型

StudentsPerformance <- StudentsPerformance %>%
  mutate(math.score = as.factor(cut(math.score,breaks=c(0,59,84,100)))) %>%
  mutate(reading.score = as.factor(cut(reading.score,breaks=c(0,59,84,100)))) %>%
  mutate(writing.score = as.factor(cut(writing.score,breaks=c(0,59,84,100))))

三.关联分析查看结果

数学成绩(0,59]

#3.设最小支持度阈值min_sup=0.1 最小置信度阈值min_conf=0.5 学生特征与数学 阅读 写作每项成绩的关联规则
#挖掘每一项成绩与学生特征的关联规则的时候不需要另外两项成绩 先处理数据得到学生特征和单项成绩的数据框

StudentsPerformance_math <- StudentsPerformance %>%
  select(-c(reading.score,writing.score))
StudentsPerformance_reading <- StudentsPerformance %>%
  select(-c(math.score,writing.score))
StudentsPerformance_writing <- StudentsPerformance %>%
  select(-c(reading.score,math.score))

#4.挖掘数学成绩小于60分跟什么有关 查看分析结果 查看提升值排行前六位的非冗余规则查看提升值排行前六位的非冗余规则 绘制提升值大于1的无冗余规则的关联规则有向图

rules_math_below60 <- apriori(StudentsPerformance_math,
                              parameter = list(supp=0.1,conf=0.5),
                              appearance = list(rhs=c('math.score=(0,59]')),
                              control =list(verbose=F))
                              
inspect(head(rules_math_below60,by='lift'))
    lhs                               rhs                 support confidence coverage  lift count
[1] {gender=female,                                                                              
     lunch=free/reduced}           => {math.score=(0,59]}   0.106     0.5608    0.189 1.742   106
[2] {lunch=free/reduced,                                                                         
     test.preparation.course=none} => {math.score=(0,59]}   0.120     0.5357    0.224 1.664   120
     
inspect(head(rules_math_below60[!is.redundant(rules_math_below60)],by="lift"))
    lhs                               rhs                 support confidence coverage  lift count
[1] {gender=female,                                                                              
     lunch=free/reduced}           => {math.score=(0,59]}   0.106     0.5608    0.189 1.742   106
[2] {lunch=free/reduced,                                                                         
     test.preparation.course=none} => {math.score=(0,59]}   0.120     0.5357    0.224 1.664   120

rules_math_below60_pruned <- rules_math_below60[!is.redundant(rules_math_below60)]
plot(head(rules_math_below60_pruned,by="lift"),method = "graph")

在这里插入图片描述

数学成绩(59,84]

#5.挖掘数学成绩大于等于60分小于85跟什么有关 查看分析结果 查看提升值排行前六位的非冗余规则 绘制提升值大于1的无冗余规则的关联规则有向图

> rules_math_60to85 <- apriori(StudentsPerformance_math,
+                               parameter = list(supp=0.1,conf=0.5),
+                               appearance = list(rhs=c('math.score=(59,84]')),
+                               control =list(verbose=F))
+ 
> inspect(head(rules_math_60to85,by='lift'))
    lhs                                    rhs                  support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                           
     lunch=standard}                    => {math.score=(59,84]}   0.110     0.6587    0.167 1.176   110
[2] {gender=male,                                                                                      
     lunch=standard}                    => {math.score=(59,84]}   0.196     0.6203    0.316 1.108   196
[3] {race.ethnicity=group D}            => {math.score=(59,84]}   0.162     0.6183    0.262 1.104   162
[4] {lunch=standard,                                                                                   
     test.preparation.course=completed} => {math.score=(59,84]}   0.140     0.6167    0.227 1.101   140
[5] {gender=male,                                                                                      
     lunch=standard,                                                                                   
     test.preparation.course=none}      => {math.score=(59,84]}   0.125     0.6158    0.203 1.100   125
[6] {lunch=standard}                    => {math.score=(59,84]}   0.393     0.6093    0.645 1.088   393

> inspect(head(rules_math_60to85[!is.redundant(rules_math_60to85)],by="lift"))
    lhs                                    rhs                  support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                           
     lunch=standard}                    => {math.score=(59,84]}   0.110     0.6587    0.167 1.176   110
[2] {gender=male,                                                                                      
     lunch=standard}                    => {math.score=(59,84]}   0.196     0.6203    0.316 1.108   196
[3] {race.ethnicity=group D}            => {math.score=(59,84]}   0.162     0.6183    0.262 1.104   162
[4] {lunch=standard,                                                                                   
     test.preparation.course=completed} => {math.score=(59,84]}   0.140     0.6167    0.227 1.101   140
[5] {lunch=standard}                    => {math.score=(59,84]}   0.393     0.6093    0.645 1.088   393
[6] {gender=male,                                                                                      
     test.preparation.course=completed} => {math.score=(59,84]}   0.105     0.6034    0.174 1.078   105
     
> rules_math_60to85_pruned <- rules_math_60to85[!is.redundant(rules_math_60to85)]
> plot(head(rules_math_60to85_pruned,by="lift"),method = "graph")

在这里插入图片描述
数学成绩(85,100]

#6.挖掘数学成绩大于85跟什么有关 查看分析结果 查看提升值排行前六位的非冗余规则 绘制提升值大于1的无冗余规则的关联规则有向图
> rules_math_above85 <- apriori(StudentsPerformance_math,
+                               parameter = list(supp=0.1,conf=0.5),
+                               appearance = list(rhs=c('math.score=(84,100]')),
+                               control =list(verbose=F))
> inspect(head(rules_math_above85,by='lift'))
> inspect(head(rules_math_above85[!is.redundant(rules_math_above85)],by="lift"))
> rules_math_above85_pruned <- rules_math_above85[!is.redundant(rules_math_above85)]
> plot(head(rules_math_above85_pruned,by="lift"),method = "graph")
Error in plot.rules(head(rules_math_above85_pruned, by = "lift"), method = "graph") : 
  x contains 0 rules!

阅读成绩(0,59]

> rules_reading_below60 <- apriori(StudentsPerformance_reading,
+                               parameter = list(supp=0.1,conf=0.5),
+                               appearance = list(rhs=c('reading.score=(0,59]')),
+                               control =list(verbose=F))
> inspect(head(rules_reading_below60,by='lift'))
> inspect(head(rules_reading_below60[!is.redundant(rules_reading_below60)],by="lift"))
> rules_reading_below60_pruned <- rules_reading_below60[!is.redundant(rules_reading_below60)]
> plot(head(rules_reading_below60_pruned,by="lift"),method = "graph")
Error in plot.rules(head(rules_reading_below60_pruned, by = "lift"), method = "graph") : 
  x contains 0 rules!

阅读成绩(59,84]

> rules_reading_60to85 <- apriori(StudentsPerformance_reading,
+                              parameter = list(supp=0.1,conf=0.5),
+                              appearance = list(rhs=c('reading.score=(59,84]')),
+                              control =list(verbose=F))
> inspect(head(rules_reading_60to85,by='lift'))
    lhs                                    rhs                     support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                              
     lunch=standard}                    => {reading.score=(59,84]}   0.113     0.6766    0.167 1.135   113
[2] {gender=female,                                                                                       
     race.ethnicity=group C}            => {reading.score=(59,84]}   0.118     0.6556    0.180 1.100   118
[3] {gender=male,                                                                                         
     test.preparation.course=completed} => {reading.score=(59,84]}   0.113     0.6494    0.174 1.090   113
[4] {race.ethnicity=group C,                                                                              
     lunch=standard}                    => {reading.score=(59,84]}   0.133     0.6488    0.205 1.089   133
[5] {gender=female,                                                                                       
     lunch=standard,                                                                                      
     test.preparation.course=none}      => {reading.score=(59,84]}   0.139     0.6465    0.215 1.085   139
[6] {race.ethnicity=group C,                                                                              
     test.preparation.course=none}      => {reading.score=(59,84]}   0.130     0.6436    0.202 1.080   130
> inspect(head(rules_reading_60to85[!is.redundant(rules_reading_60to85)],by="lift"))
    lhs                                    rhs                     support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                              
     lunch=standard}                    => {reading.score=(59,84]}   0.113     0.6766    0.167 1.135   113
[2] {gender=female,                                                                                       
     race.ethnicity=group C}            => {reading.score=(59,84]}   0.118     0.6556    0.180 1.100   118
[3] {gender=male,                                                                                         
     test.preparation.course=completed} => {reading.score=(59,84]}   0.113     0.6494    0.174 1.090   113
[4] {race.ethnicity=group C,                                                                              
     lunch=standard}                    => {reading.score=(59,84]}   0.133     0.6488    0.205 1.089   133
[5] {gender=female,                                                                                       
     lunch=standard,                                                                                      
     test.preparation.course=none}      => {reading.score=(59,84]}   0.139     0.6465    0.215 1.085   139
[6] {race.ethnicity=group C,                                                                              
     test.preparation.course=none}      => {reading.score=(59,84]}   0.130     0.6436    0.202 1.080   130
> rules_reading_60to85_pruned <- rules_reading_60to85[!is.redundant(rules_reading_60to85)]
> plot(head(rules_reading_60to85_pruned,by="lift"),method = "graph")

在这里插入图片描述 阅读成绩(84,100]

> rules_reading_above85 <- apriori(StudentsPerformance_reading,
+                               parameter = list(supp=0.1,conf=0.5),
+                               appearance = list(rhs=c('reading.score=(84,100]')),
+                               control =list(verbose=F))
> inspect(head(rules_reading_above85,by='lift'))
> inspect(head(rules_reading_above85[!is.redundant(rules_reading_above85)],by="lift"))
> rules_reading_above85_pruned <- rules_reading_above85[!is.redundant(rules_reading_above85)]
> plot(head(rules_reading_above85_pruned,by="lift"),method = "graph")
Error in plot.rules(head(rules_reading_above85_pruned, by = "lift"), method = "graph") : 
  x contains 0 rules!

写作成绩(0,59]

> rules_writing_below60 <- apriori(StudentsPerformance_writing,
+                                  parameter = list(supp=0.1,conf=0.5),
+                                  appearance = list(rhs=c('writing.score=(0,59]')),
+                                  control =list(verbose=F))
+ 
> inspect(head(rules_writing_below60,by='lift'))
    lhs                               rhs                    support confidence coverage  lift count
[1] {lunch=free/reduced,                                                                            
     test.preparation.course=none} => {writing.score=(0,59]}   0.115     0.5134    0.224 1.827   115
     
> inspect(head(rules_writing_below60[!is.redundant(rules_writing_below60)],by="lift"))
    lhs                               rhs                    support confidence coverage  lift count
[1] {lunch=free/reduced,                                                                            
     test.preparation.course=none} => {writing.score=(0,59]}   0.115     0.5134    0.224 1.827   115
     
> rules_writing_below60_pruned <- rules_writing_below60[!is.redundant(rules_writing_below60)]
> plot(head(rules_writing_below60_pruned,by="lift"),method = "graph")

在这里插入图片描述

写作成绩(59,84]

> rules_writing_60to85 <- apriori(StudentsPerformance_writing,
+                                 parameter = list(supp=0.1,conf=0.5),
+                                 appearance = list(rhs=c('writing.score=(59,84]')),
+                                 control =list(verbose=F))

> inspect(head(rules_writing_60to85,by='lift'))
    lhs                                    rhs                     support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                              
     lunch=standard}                    => {writing.score=(59,84]}   0.113     0.6766    0.167 1.159   113
[2] {gender=female,                                                                                       
     lunch=standard,                                                                                      
     test.preparation.course=none}      => {writing.score=(59,84]}   0.144     0.6698    0.215 1.147   144
[3] {gender=female,                                                                                       
     lunch=standard}                    => {writing.score=(59,84]}   0.213     0.6474    0.329 1.109   213
[4] {race.ethnicity=group D}            => {writing.score=(59,84]}   0.168     0.6412    0.262 1.098   168
[5] {gender=male,                                                                                         
     test.preparation.course=completed} => {writing.score=(59,84]}   0.110     0.6322    0.174 1.083   110
[6] {test.preparation.course=completed} => {writing.score=(59,84]}   0.224     0.6257    0.358 1.071   224

> inspect(head(rules_writing_60to85[!is.redundant(rules_writing_60to85)],by="lift"))
    lhs                                    rhs                     support confidence coverage  lift count
[1] {race.ethnicity=group D,                                                                              
     lunch=standard}                    => {writing.score=(59,84]}   0.113     0.6766    0.167 1.159   113
[2] {gender=female,                                                                                       
     lunch=standard,                                                                                      
     test.preparation.course=none}      => {writing.score=(59,84]}   0.144     0.6698    0.215 1.147   144
[3] {gender=female,                                                                                       
     lunch=standard}                    => {writing.score=(59,84]}   0.213     0.6474    0.329 1.109   213
[4] {race.ethnicity=group D}            => {writing.score=(59,84]}   0.168     0.6412    0.262 1.098   168
[5] {gender=male,                                                                                         
     test.preparation.course=completed} => {writing.score=(59,84]}   0.110     0.6322    0.174 1.083   110
[6] {test.preparation.course=completed} => {writing.score=(59,84]}   0.224     0.6257    0.358 1.071   224

> rules_writing_60to85_pruned <- rules_writing_60to85[!is.redundant(rules_writing_60to85)]
> plot(head(rules_writing_60to85_pruned,by="lift"),method = "graph")

在这里插入图片描述

写作成绩(84,100]

> rules_writing_60to85_pruned <- rules_writing_60to85[!is.redundant(rules_writing_60to85)]
> plot(head(rules_writing_60to85_pruned,by="lift"),method = "graph")
> rules_writing_above85 <- apriori(StudentsPerformance_writing,
+                                  parameter = list(supp=0.1,conf=0.5),
+                                  appearance = list(rhs=c('writing.score=(84,100]')),
+                                  control =list(verbose=F))
> inspect(head(rules_writing_above85,by='lift'))
> inspect(head(rules_writing_above85[!is.redundant(rules_writing_above85)],by="lift"))
> rules_writing_above85_pruned <- rules_writing_above85[!is.redundant(rules_writing_above85)]
> plot(head(rules_writing_above85_pruned,by="lift"),method = "graph")
Error in plot.rules(head(rules_writing_above85_pruned, by = "lift"), method = "graph") : 
  x contains 0 rules!