library(arules) # association rules
library(arulesViz) # data visualization of association rules
library(RColorBrewer)
groceries <- read.transactions("groceries.csv", format="basket", sep=",")
summary(groceries)
#########进一步查看数据集的信息
class(groceries)
groceries
dim(groceries)
colnames(groceries)[1:5]
rownames(groceries)[1:5]
basketSize<-size(groceries)
summary(basketSize)
sum(basketSize) #count of all 1s in the sparse matrix
itemFreq <- itemFrequency(groceries)
sum(itemFreq) #本质上代表"平均一个transaction购买的item个数"
itemCount <- (itemFreq/sum(itemFreq))*sum(basketSize)#####itemCount表示每个item出现的次数。Support(X) = Xs / N, N是总的交易数,Xs就是Item X###的count。itemXCount = N * itemXFreq = (ItemXFreq / sum(itemFreq)) * sum(basketSize) ##
summary(itemCount)
orderedItem <- sort(itemCount, decreasing = )
orderedItem[1:10]
orderedItemFreq <- sort(itemFrequency(groceries), decreasing=)
orderedItemFreq[1:10]
itemFrequencyPlot(groceries, support=0.1)
itemFrequencyPlot(groceries, topN=10, horiz=T)
inspect(groceries[1:5])
image(groceries[1:10])
image(sample(groceries,100))
#############
groceryrules <- apriori(groceries, parameter = list(support = 0.006, confidence = 0.25, minlen = 2))
######inspect查看规则记录
inspect(groceryrules[1:10])
##########评估
ordered_groceryrules <- sort(groceryrules, by="lift")
inspect(ordered_groceryrules[1:5])
####### 搜索规则
yogurtrules <- subset(groceryrules, items %in% c("yogurt"))
inspect(yogurtrules)
###########
qualityMeasures<-interestMeasure(groceryrules,c(“coverage”,”fishersExactTest”,”conviction”, “chiSquared”), transactions=groceries)
summary(qualityMeasures)
########
write(groceryrules, file=”groceryrules.csv”, sep=”,”, quote=TRUE, row.names=FALSE)
转化data.frame分析:
groceryrules_df <- as(groceryrules, “data.frame”)?
str(groceryrules_df)
########### 可视化
plot(groceryrules, method=”scatterplot”,control=list(jitter=2, col = rev(brewer.pal(9, “Greens”))), shading = “lift”)
plot(groceryrules, control=list(jitter=2, col = rev(brewer.pal(9, “Greens”))), shading = “lift”,method = ‘grouped’)
关联规则demo
最新推荐文章于 2022-05-01 19:45:39 发布