from sklearn.preprocessing import LabelEncoder
import dm2022exp
from typing import List, Tuple, Union
import copy
import numpy as np
classApriori():
encode =None
freq_set ={}defencoder(self, data):
self.encode = LabelEncoder()
label_list =[]for i in data:
label_list += i
self.encode.fit(label_list)return[set(self.encode.transform(i))for i in data]def__init__(self,min_sup:float=0.2):"""
Parameters:
- min_sup : 最小支持度
"""
self.min_sup = min_sup
self.ans =[]
self.len_of_data =0deffit(self, X: List[List[str]])-> List[Union[Tuple[frozenset,float], List[Union[frozenset,float]]]]:
data = self.encoder(X)
num_of_goods =len(self.encode.classes_)
self.len_of_data =len(X)
one_item_set = self.find_oneitem(num_of_goods, data)# print(len(one_item_set))
one_item_set_copy = self.find_oneitem(num_of_goods, data)
one_item_set =[(frozenset(i[0]), i[1])for i in one_item_set]
freq_ = one_item_set
res = one_item_set_copy
whileTrue:
candidate = self.cat_sub(freq_)iflen(candidate)==0:break# print("len: ", len(freq_),end = " ")# print(candidate)
freq_ =[(i, self.cal_sup(frozenset(i), data))for i in candidate if self.cal_sup(frozenset(i), data)>= self.min_sup]# print(freq_)iflen(candidate)!=0:for i in freq_:
res.append(i)
ans =[]for i in res:
ans.append((frozenset(self.encode.inverse_transform(list(set(i[0])))), i[1]))return ans
deffind_oneitem(self, num_of_goods, X):
good_list =[[i]for i inrange(num_of_goods)]
one_item =[]for i in good_list:
sup = self.cal_sup(frozenset(i), X)# print(sup)if sup >= self.min_sup:
one_item.append((frozenset(i), sup))return one_item
defcal_sup(self, i, x):if i in self.freq_set.keys():return self.freq_set[i]else:sum=0for t in x:# flag = True# if len(i) == 1 and flag:# print(1)# flag = False# if len(i) != 1 and flag:# print(2)# flag = Falseif i.issubset(t):sum+=1
self.freq_set[i]=sum/ self.len_of_data
# print(i, sum / self.len_of_data)returnsum/ self.len_of_data
defcat_sub(self, data):
candidate =[]for i inrange(len(data)):for j inrange(i+1,len(data)):# print(data[i][0], data[j][0])if self.issame(data[i][0], data[j][0]):
t = data[i][0]| data[j][0]if t in candidate:continue
candidate.append(t)# print(candidate)return candidate
defissame(self, x, y):returnlen(x&y)==len(x)-1
m = Apriori(0.005)
data = dm2022exp.load_ex5_data()
ans = m.fit(data)print(ans)# for i in range(len(ans)):# print(i,":" , ans[i])