学习笔记
![在这里插入图片描述](https://img-blog.csdnimg.cn/093e4938192e400199af957b63604d77.bmp)
关联规则算法 Apriori 整体流程
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/c5b522f805a3b6dd89213070a9c9e920.png)
关联规则算法 Apriori 实现
""" @File : AL
@Author : BabyMuu
@Time : 2022/5/1 9:46
"""
class Apriori:
def __init__(self, dataset):
self.dataset = dataset
def create_c1(self):
"""构建数据集"""
C1 = []
for transaction in self.dataset:
for item in transaction:
if not [item] in C1:
C1.append([item])
C1.sort()
return list(map(frozenset, C1))
def scan_dataset(self, C_k, min_support):
"""
数据集扫描, 获取每一个项集出现的次数, 并计算相应的支持度
:param C_k: k-项集 collection of k
:param min_support: 最小支持度
:returns 满足设定支持度条件的项集, 每个项集对应的支持度
"""
ss_cnt = {}
for transaction in self.dataset:
for item in C_k:
if item.issubset(transaction):
if not item in ss_cnt:
ss_cnt[item] = 1
else:
ss_cnt[item] += 1
sat_list = []
num_items = len(list(self.dataset))
support_set = {}
for key in ss_cnt:
support = ss_cnt[key] / num_items
if support >= min_support:
sat_list.append(key)
support_set[key] = support
return sat_list, support_set
@staticmethod
def apriori_gen(L_k, k):
""" 项集拼接, 生成新的 C_k
当传入项集为1-项集时
所有项集都可过 L_1 == L_2 的条件:
原理: 1-项集[:k-2] 均为空 空==空恒成立
即 所有 1-项集都会与其他项集相互拼接
当传入项集为k-项集时: (k > 1)
当且仅当除最后一项不同外其余均相同时:
才将两个项集进行拼接
当存在项集可以拼接时, 返回有效数据
否则返回空, ===> 结束算法
"""
stitching_item_sets = []
len_lk = len(L_k)
for i in range(len_lk):
for j in range(i + 1, len_lk):
L_1 = list(L_k[i])[:k - 2]
L_2 = list(L_k[j])[:k - 2]
if L_1 == L_2:
stitching_item_sets.append(L_k[i] | L_k[j])
return stitching_item_sets
def apriori(self, min_support=0.5):
"""算法入口"""
c1 = self.create_c1()
L1, support_set = self.scan_dataset(c1, min_support)
L = [L1]
k = 2
while len(L[k - 2]) > 0:
C_k = self.apriori_gen(L[k - 2], k)
LK, support_k = self.scan_dataset(C_k, min_support)
support_set.update(support_k)
L.append(LK)
k += 1
return L, support_set
def gen_rate_rules(self, L, support_set, min_conf=0.5):
"""置信度"""
rule_list = []
for i in range(1, len(L)):
for fre_set in L[i]:
H_1 = [frozenset([item]) for item in fre_set]
self.rules_from_conseq(fre_set, H_1, support_set, rule_list, min_conf)
return rule_list
def rules_from_conseq(self, fre_set, H_k, support_set, rule_list, min_conf):
k = len(H_k[0])
while len(fre_set) > k:
H_k = self.cal_conf(fre_set, H_k, support_set, rule_list, min_conf)
if len(H_k) > 1:
self.apriori_gen(H_k, k + 1)
k += 1
else:
break
@staticmethod
def cal_conf(fre_set, H, support_set, rule_list, min_conf):
"""计算当前k-项集中满足置信度条件的项集"""
print(f'{fre_set}'.center(50, "-"))
sta_list = []
for item in H:
conf = support_set[fre_set] / support_set[fre_set - item]
if conf >= min_conf:
print(fre_set - item, '-->', item, 'conf:', conf)
rule_list.append((fre_set - item, item, conf))
sta_list.append(item)
print()
return sta_list
算法测试
dataset = [[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]]
a = Apriori(dataset)
L, support_ = a.apriori()
e = 0
for fre in L:
print(f'{e + 1}-项集: {fre}')
e += 1
rules = a.gen_rate_rules(L, support_)
结果展示
1-项集: [frozenset({1}), frozenset({3}), frozenset({2}), frozenset({5})]
2-项集: [frozenset({1, 3}), frozenset({2, 3}), frozenset({3, 5}), frozenset({2, 5})]
3-项集: [frozenset({2, 3, 5})]
4-项集: []
----------------frozenset({1, 3})-----------------
frozenset({3}) --> frozenset({1}) conf: 0.6666666666666666
frozenset({1}) --> frozenset({3}) conf: 1.0
----------------frozenset({2, 3})-----------------
frozenset({3}) --> frozenset({2}) conf: 0.6666666666666666
frozenset({2}) --> frozenset({3}) conf: 0.6666666666666666
----------------frozenset({3, 5})-----------------
frozenset({5}) --> frozenset({3}) conf: 0.6666666666666666
frozenset({3}) --> frozenset({5}) conf: 0.6666666666666666
----------------frozenset({2, 5})-----------------
frozenset({5}) --> frozenset({2}) conf: 1.0
frozenset({2}) --> frozenset({5}) conf: 1.0
---------------frozenset({2, 3, 5})---------------
frozenset({3, 5}) --> frozenset({2}) conf: 1.0
frozenset({2, 5}) --> frozenset({3}) conf: 0.6666666666666666
frozenset({2, 3}) --> frozenset({5}) conf: 1.0
---------------frozenset({2, 3, 5})---------------
frozenset({3, 5}) --> frozenset({2}) conf: 1.0
frozenset({2, 5}) --> frozenset({3}) conf: 0.6666666666666666
frozenset({2, 3}) --> frozenset({5}) conf: 1.