Python Apriori

class Multi_Item:
    def __init__(self):
        self.itemset = []
        self.support = 0
    def __str__(self):
        return "{}:{}".format(self.itemset, self.support)
    def set_support(self):
        self.support += 1

#D = [[1, 2, 5], [2, 4], [2, 3], [1, 2, 4], [1, 3], [2, 3], [1, 3], [1, 2, 3, 5], [1, 2, 3]]
D = [['M','O','N','K','E','Y'],['D','O','N','K','E','Y'],['M','A','K','E'],['M','U','C','K','Y'],['C','O','O','K','I','E']]
def create_C(D):

    C = []

    for item in D:
        for i in item:

            flag = False

            index = Multi_Item()
            index.itemset = i

            if not C:
                C.append(index)
            else:
                for i in range(len(C)):
                    if C[i].itemset == index.itemset:
                        C[i].set_support()
                        flag = True
                        break

                if not flag:
                    index.set_support()
                    C.append(index)
    return C

def find_frequent_l_itemsets(C, minsup):


    data = C[:] #创建临时列表,不能写data=C,会指向相同地址
    for item in C:
        if item.support < minsup:
            data.remove(item)

    return data


def L2C(L, item_set_number=2):

    item_set = []


    for first in L:
        for second in L:
            temp = list(set(first.itemset).union(set(second.itemset)))
            if len(temp) == item_set_number and temp not in item_set:
                item_set.append(temp)
    return item_set


#计算支持度
def calc_support(item_set, D):

    data_set = []

    for item in item_set:
        index = Multi_Item()
        for data in D:
            index.itemset = item
            if set(item) <= set(data):
                index.set_support()

        data_set.append(index)

    return data_set

def apriori(min_sup):

    C1 = create_C(D)

    L = find_frequent_l_itemsets(C1, min_sup)

    item_set_number = 2

    while True:

        C = L2C(L, item_set_number)
        C = calc_support(C, D)
        L = find_frequent_l_itemsets(C, min_sup)
        item_set_number += 1

        for i in range(len(L)):
            if L[i].support != min_sup:
                break

        if i >= len(L) - 1:

            return L

# C1 = create_C(D)
# for i in C1:
#     print(i)
# print("*"*20)
# L = find_frequent_l_itemsets(C1, 2)
# for i in L:
#     print(i)
# C2 = L2C(L)
# print("*"*20)
# for i in C2:
#     print(i)
# print("*"*20)
# C2 = calc_support(C2, D)
# for i in C2:
#     print(i)
# print("*"*20)
# L = find_frequent_l_itemsets(C2, 2)
# for i in L:
#     print(i)
# print("*"*20)
# L2 = L2C(L, 3)
# for i in L2:
#     print(i)
# print("*"*20)
# C3 = calc_support(L2, D)
# for i in C3:
#     print(i)
# print("*"*20)
# for i in find_frequent_l_itemsets(C3, 2):
#     print(i)

result = apriori(3)

for i in result:
    print(i)

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值