from collections import defaultdict
import pandas as pd
def load_data(file):
res=[]
with open(file) as f:
for i in f:
res.append(list(map(int,i.split())))
return res
def tel_k_(a,b,k):
a.sort()
b.sort()
if k==0:
return True
for i in range(k):
if a[i]!=b[i]:
return False
return True
def generate_c(l):
l.sort()
n=len(l)
C = defaultdict(int)
if n==0:
return C
k=len(l[0])-1
for i in range(n-1):
for j in range(i+1,n):
if tel_k_(l[i],l[j],k):
#print(set(l[i]+l[j][k:]))
C[tuple(l[i]+l[j][k:])]+=0
return C
def apriori(data,sup):
threshold=len(data)*sup
#生成写入文件
with open("result.txt", "a+") as f:
f.write("\nminsupport="+str(sup)) # 自带文件关闭功能,不需要再写f.close()
#生成候选集-1
c1=defaultdict(int)
col=[]
for i in data:
for x in i:
if c1[x]==0:
col.append(x)
c1[x]+=1
#print(c1)
#生成dataframe
col.sort()
tmp={}
for i in col:
tmp[i]=[0]*len(data)
dt = pd.DataFrame(tmp)
for i in range(len(data)):
for x in data[i]:
dt.loc[i][x]=1
print(dt)
#生成频繁项集-1
L1=[]
for i in c1:
if c1[i]>threshold:
L1.append([i])
with open("result.txt", "a+") as f:
f.write("\nL1:"+str(L1)) # 自带文件关闭功能,不需要再写f.close()
#生成候选集-2
C=generate_c(L1)
k=2
while C:
for i in range(len(dt)):
for j in C:
#print(i,j)
flag=True
for x in j:
if dt.loc[i][x]==0:
flag=False
if flag:
C[j]+=1
L=[]
for i in C:
if C[i]>threshold:
L.append(list(i))
if L:
with open("result.txt", "a+") as f:
f.write("\nL"+str(k)+":"+str(L)) # 自带文件关闭功能,不需要再写f.close()
k+=1
C=generate_c(L)
else:
break
with open("result.txt", "a+") as f:
f.write("\n") # 自带文件关闭功能,不需要再写f.close()
return
if __name__=='__main__':
data=load_data('data.txt')
n=len(data)
#print(n,data)
apriori(data,0.22)
运行效果: