关联规则-R语言实现

本文旨在演示r语言arules包的关联规则用法,以及利用arulesViz对结果进行可视化

关联规则是形如X→Y的蕴涵式,其中, X和Y分别称为关联规则的先导(antecedent或left-hand-side, LHS)和后继(consequent或right-hand-side, RHS) 。其中,关联规则XY,存在支持度和信任度。 For more details see关联规则.

r语言arules包提供了有效处理稀疏二元数据的数据结构,而且提供函数执Apriori和Eclat算法挖掘频繁项集、最大频繁项集、闭频繁项集和关联规则详见

蘑菇数据data下载

r语言代码

library(arules)
## Loading required package: Matrix
## 
## Attaching package: 'arules'
## 
## The following objects are masked from 'package:base': ## ## %in%, abbreviate, write
data=read.csv(file.choose(),head=F) trans <- as(data,"transactions") #数据格式转换 #inspect(trans) #数据查看 image(trans [1:50]) 

itemFrequencyPlot(trans, support=0.5) 

itemFrequencyPlot(trans, topN=10, horiz=T) 

basketSize<-size(trans) summary(basketSize)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      23      23      23      23      23      23
itemFreq <- itemFrequency(trans) itemCount <- (itemFreq/sum(itemFreq))*sum(basketSize) summary(itemCount) 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       4     156     600    1570    2346    8124
orderedItem <- sort(itemCount, decreasing = T) orderedItem[1:10] 
## V17=p V18=w  V7=f V19=o  V8=c  V9=b V13=s V14=s  V5=f V11=t 
##  8124  7924  7914  7488  6812  5612  5176  4936  4748  4608
#求关联规则
rules <- apriori(trans,parameter=list(support=0.3,confidence=1))
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport support minlen maxlen
##           1    0.1    1 none FALSE            TRUE     0.3      1     10
## target ext ## rules FALSE ## ## Algorithmic control: ## filter tree heap memopt load sort verbose ## 0.1 TRUE TRUE FALSE TRUE 2 TRUE ## ## apriori - find association rules with the apriori algorithm ## version 4.21 (2004.05.09) (c) 1996-2004 Christian Borgelt ## set item appearances ...[0 item(s)] done [0.00s]. ## set transactions ...[119 item(s), 8124 transaction(s)] done [0.00s]. ## sorting and recoding items ... [28 item(s)] done [0.00s]. ## creating transaction tree ... done [0.02s]. ## checking subsets of size 1 2 3 4 5 6 7 8 9 done [0.00s]. ## writing ... [4316 rule(s)] done [0.00s]. ## creating S4 object ... done [0.00s].
summary(rules)
## set of 4316 rules
## 
## rule length distribution (lhs + rhs):sizes ## 1 2 3 4 5 6 7 8 9 ## 1 42 293 832 1244 1107 594 179 24 ## ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 1.00 4.00 5.00 5.32 6.00 9.00 ## ## summary of quality measures: ## support confidence lift ## Min. :0.3003 Min. :1 Min. :1.000 ## 1st Qu.:0.3112 1st Qu.:1 1st Qu.:1.000 ## Median :0.3299 Median :1 Median :1.025 ## Mean :0.3540 Mean :1 Mean :1.141 ## 3rd Qu.:0.3712 3rd Qu.:1 3rd Qu.:1.027 ## Max. :1.0000 Max. :1 Max. :2.927 ## ## mining info: ## data ntransactions support confidence ## trans 8124 0.3 1
inspect(rules[1:10])
##    lhs        rhs     support   confidence lift    
## 1  {}      => {V17=p} 1.0000000 1          1.000000
## 2  {V12=?} => {V17=p} 0.3052683 1          1.000000
## 3  {V9=n}  => {V19=o} 0.3092073 1          1.084936
## 4 {V9=n} => {V7=f} 0.3092073 1 1.026535 ## 5 {V9=n} => {V17=p} 0.3092073 1 1.000000 ## 6 {V3=s} => {V17=p} 0.3146233 1 1.000000 ## 7 {V20=e} => {V7=f} 0.3417036 1 1.026535 ## 8 {V20=e} => {V17=p} 0.3417036 1 1.000000 ## 9 {V23=d} => {V18=w} 0.3874938 1 1.025240 ## 10 {V23=d} => {V17=p} 0.3874938 1 1.000000
edible <- subset(rules, rhs %in% c("V1=e")) inspect(edible[1:10]) 
##      lhs                        rhs    support   confidence lift    
## 126  {V6=n,V11=t}            => {V1=e} 0.3072378 1          1.930608
## 578  {V6=n,V9=b,V11=t}       => {V1=e} 0.3072378 1          1.930608
## 581  {V6=n,V11=t,V19=o}      => {V1=e} 0.3072378 1          1.930608
## 583 {V6=n,V7=f,V11=t} => {V1=e} 0.3072378 1 1.930608 ## 585 {V6=n,V11=t,V18=w} => {V1=e} 0.3072378 1 1.930608 ## 587 {V6=n,V11=t,V17=p} => {V1=e} 0.3072378 1 1.930608 ## 590 {V6=n,V9=b,V19=o} => {V1=e} 0.3308715 1 1.930608 ## 1595 {V6=n,V9=b,V11=t,V19=o} => {V1=e} 0.3072378 1 1.930608 ## 1599 {V6=n,V7=f,V9=b,V11=t} => {V1=e} 0.3072378 1 1.930608 ## 1603 {V6=n,V9=b,V11=t,V18=w} => {V1=e} 0.3072378 1 1.930608
#规则保存
write(rules, file="rules.csv", sep=",", quote=TRUE, row.names=FALSE) rules_df <- as(rules, "data.frame") 

利用arulesViz对结果进行可视化

#可视化
library(grid) library(RColorBrewer) library(arulesViz)
## 
## Attaching package: 'arulesViz'
## 
## The following object is masked from 'package:arules':
## ## abbreviate ## ## The following object is masked from 'package:base': ## ## abbreviate
mushroom.rules <- apriori(trans,parameter = list(support = 0.8, confidence = 1)) 
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport support minlen maxlen
##           1    0.1    1 none FALSE            TRUE     0.8      1     10
## target ext ## rules FALSE ## ## Algorithmic control: ## filter tree heap memopt load sort verbose ## 0.1 TRUE TRUE FALSE TRUE 2 TRUE ## ## apriori - find association rules with the apriori algorithm ## version 4.21 (2004.05.09) (c) 1996-2004 Christian Borgelt ## set item appearances ...[0 item(s)] done [0.00s]. ## set transactions ...[119 item(s), 8124 transaction(s)] done [0.02s]. ## sorting and recoding items ... [5 item(s)] done [0.00s]. ## creating transaction tree ... done [0.00s]. ## checking subsets of size 1 2 3 4 done [0.00s]. ## writing ... [16 rule(s)] done [0.00s]. ## creating S4 object ... done [0.00s].
plot(mushroom.rules,   
control=list(jitter=2, col = rev(brewer.pal(9, "Greens")[4:9])), shading = "lift") 

plot(mushroom.rules, method="grouped", control=list(k=100,col = rev(brewer.pal(9, "Greens")[4:9]))) 

plot(edible[1:20], measure="confidence", method="graph", control=list(type="items"), shading = "lift")

plot(edible, method="paracoord", control=list(reorder=TRUE))

蘑菇数据的决策树分类介绍详见

转载于:https://www.cnblogs.com/zhp2016/p/6005568.html

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值