#数据集
install.packages("arules")
library(arules)
data(Groceries)
summary(Groceries)
inspect(Groceries[1:5])
itemFrequency(Groceries[,1:3])
# frankfurter sausage liver loaf
# 0.058973055 0.093950178 0.005083884
itemFrequencyPlot(Groceries,support=0.1)
itemFrequencyPlot(Groceries,topN=20)
训练模型
#构建模型
freq.itemsets <- eclat(Groceries,parameter = list(supp=0.075,maxlen=15))
inspect(freq.itemsets)
# items support count
# [1] {whole milk} 0.25551601 2513
# [2] {other vegetables} 0.19349263 1903
# [3] {rolls/buns} 0.18393493 1809
# [4] {yogurt} 0.13950178 1372
# [5] {soda} 0.17437722 1715
# [6] {root vegetables} 0.10899847 1072
# [7] {tropical fruit} 0.10493137 1032
# [8] {bottled water} 0.11052364 1087
# [9] {sausage} 0.09395018 924
# [10] {shopping bags} 0.09852567 969
# [11] {citrus fruit} 0.08276563 814
# [12] {pastry} 0.08896797 875
# [13] {pip fruit} 0.07564820 744
# [14] {newspapers} 0.07981698 785
# [15] {bottled beer} 0.08052872 792
# [16] {canned beer} 0.07768175 764
#创建规则
groceryrules <- apriori(Groceries, parameter = list(support =0.006, confidence = 0.25, minlen = 2))
groceryrules
#set of 463 rules
模型的评估
#模型的评估
summary(groceryrules)
inspect(groceryrules[1:3])
提升关联规则的效果
#检查关联规则集的有效方法是查看具有更高提升度的规则,因为较大的提升度表示先前隐藏在项之间的强连接。
inspect(sort(groceryrules,by="lift")[1:5])
#检查数据规则的另一种有效方法是查看包含某特定规则子集。
#假如要发现包含berries的集合,实现方式是使用subset筛选出items包含berries的项目。
#如果要求一个人在特定季节制作促销berries的广告,这样的操作将非常有用。
berryrules <- subset(groceryrules,items %in% "berries")
inspect(berryrules)
关联规则的可视化
#关联规则的可视化
install.packages("arulesViz")
library(arulesViz)
library(grid)
plot(berryrules,method = "graph")
#圆圈颜色越深,表示提升度越高;圆圈越大,表示这条规则出现的次数越多。
#treemap图
install.packages("tidyverse")
install.packages("treemap")
library(tidyverse)
library(treemap)
occur1 <- Groceries@itemInfo %>% group_by(level1) %>%summarize(n=n())
occur2 <- Groceries@itemInfo %>% group_by(level1,level2) %>%summarize(n=n())
occur3 <- Groceries@itemInfo %>% group_by(level1,level2,labels) %>%summarize(n=n())
treemap(occur1,index = c("level1"),vSize = "n")
treemap(occur2,index = c("level1","level2"),vSize = "n")
treemap(occur3,index = c("level1","level2","labels"),vSize = "n")