import numpy as np
import pandas as pd
## 方法一:
def generate_C1(data_set):
c1 = set()
for items in data_set:
for item in items:
item_set = frozenset([item])
c1.add(item_set)
return c1
#计算Ck在数据集D中的支持度,并返回支持度大于minSupport的数据集
def get_supports(data_set, ck, min_support,supports):
freq_set = set()
item_count = {}
# supports = {}
for data in data_set:
for item in ck:
if item.issubset(data):
if item not in item_count:
item_count[item] = 1
else:
item_count[item] += 1
n= float(len(data_set))
for item in item_count:
if (item_count[item] / n) >= min_support:
freq_set.add(item)
supports[item] = item_count[item] / n
return freq_set
#剪枝
def get_new_s