##################################################################
使用数据:Titanic
# look for data
str(Titanic)
# transform table into data frame
df <- as.data.frame(Titanic)
head(df)
> head(df)
Class Sex AgeSurvivedFreq
1 1st MaleChild No 0
2 2nd MaleChild No 0
3 3rd MaleChild No 35
4 Crew MaleChild No 0
titanic.raw <- NULL
# 如果频率字段大于0,将该行记录按列追加到变量中,Freq=0,当然就不追加
for(iin1:4) {
titanic.raw <- cbind(titanic.raw, rep(as.character(df[,i]), df$Freq))
}
# 前35行都是一样的
]]]]> titanic.raw[1:36,]
[,1] [,2] [,3] [,4]
[1,]"3rd""Male" "Child""No"
[2,]"3rd""Male" "Child""No"
[3,]"3rd""Male" "Child""No"
[4,]"3rd""Male" "Child""No"
...
[35,]"3rd""Male" "Child""No"
[36,]"3rd""Female""Child""No"
# transform to data frame
titanic.raw <- as.data.frame(titanic.raw)
> head(titanic.raw)
V1 V2 V3V4
1 3rd MaleChildNo
2 3rd MaleChildNo
3 3rd MaleChildNo
4 3rd MaleChildNo
5 3rd MaleChildNo
6 3rd MaleChildNo
# 生成数据框后添加属性名称
names(titanic.raw) <- names(df)[1:4];dim(titanic.raw);
summary(titanic.raw)
# 转换后:每一行代表了一个人,可以用于关联规则。转换前是什么类型的数据? (按照class、sex、年龄汇总的生存人数的数据)
With the function, the default settings are:1) supp=0.1, which is the minimum support of rules;2) conf=0.8, which is the minimum confidence of rules; and 3) maxlen=10, which is the maximum length of rules.
library(arules)
rules <- apriori(titanic.raw) # apriori可以直接传递非transactions类型的对象,内部自动转换
rules # 根据最小的 (supp=0.1,conf=0.8),返回的规则的最多个数 10个
summary(rules);
inspect(rules);
quality(rules) <- quality(rules)