Python 实现Apriori算法

最新推荐文章于 2023-08-10 15:59:59 发布

Lincolnfather

最新推荐文章于 2023-08-10 15:59:59 发布

阅读量1.5k

点赞数

分类专栏：机器学习算法文章标签： python 算法机器学习

本文链接：https://blog.csdn.net/Lincolnfather/article/details/15811003

版权

机器学习算法专栏收录该内容

3 篇文章 0 订阅

订阅专栏

import os
import types
import sys
sys.setrecursionlimit(1000000)
'''
author liuzhenhua
date 20131113
the apriori algorithm for python
'''
original = {}
mp = {}

def loaddata(dgree):
f = open("D:/apriori.txt","r")
i = 0;
while True:
st = f.readline()
if len(st) == 0: break
strs = st.split("\t")
lvals = strs[1].strip("\n").split("#")
#print lvals
original[strs[0]] = lvals
i = i+1
for word in lvals:
if mp.has_key(word):
mp[word] = mp[word] + 1
else:
mp[word] = 1
for h in mp.keys():
if mp[h] == dgree:
del mp[h]
f.close()
'''
count = 0
pm = {}
for e in mp.keys():
count = count +1
pm[e] = mp[e]
if count == int(len(mp) * dgree):break
'''
return mp
def isContain(list1,list2):
flag = True
for m in list2:
for n in list1:
#print m,n
if m == n:
flag = True
break
flag = False
if flag == False:break
return flag
def isOriginal(ori,list1):
flag = False;
for xxx in ori.keys():
#print "original:",original[xxx]
if isContain(ori[xxx],list1):
flag = True
break;
return flag
'''
sz is the size of frequency items
dgree duplicate the item is not property for the frequency items
'''
def apriori(dic,dicty,ori,sz,dgree):
jie = {}
dup = {}
kys = dic.keys()
kys2 = dicty.keys()
for a in range(0,len(kys2)):
for b in range(0,len(kys)):
if(isContain(list(kys[b]),list(kys2[a]))): continue

tem = kys2[a]+ kys[b];

#print original,list(tem)
#if isOriginal(ori,list(tem)):
teml=list(tem)
teml.sort()
tem = ''.join(teml)
if(dup.has_key(tem)): continue
else:
dup[tem] = 1
nu = 0
for cc in ori.keys():
if isContain(ori[cc],teml):
if jie.has_key(tem):
jie[tem] = jie[tem] + 1
else:
jie[tem] = 1
for d in jie.keys():
if jie[d] == dgree:
del jie[d]
nu = stop(jie)
print "frequency item:",nu,"items:",jie
if nu == sz:
return jie
else:
return apriori(jie,dicty,ori,sz,dgree)

def stop(res):
a = 0
for rh in res.keys():
a = len(rh)
break
return a

bp = loaddata(1)
print "the original data:",original
print "frequency item:",1,"items:",bp

apriori(bp,bp,original,3,1)

apriori.txt数据：

10 A#C#D
20 B#C#E
30 A#B#C#E
40 B#E

Lincolnfather

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Python 实现Apriori算法

import osimport typesimport syssys.setrecursionlimit(1000000)'''author liuzhenhuadate 20131113the apriori algorithm for python'''original = {}mp = {}def loaddata(dgree):
复制链接

扫一扫