Apriori算法的Python实现！

最新推荐文章于 2024-07-30 17:00:31 发布

lrjnlp

最新推荐文章于 2024-07-30 17:00:31 发布

阅读量3.4k

点赞数

文章标签： python 算法 each file c import

#!/usr/bin/env python

#coding=utf-8

from Tkinter import *

import string

# get data from file,return a list that its items are also lists

# like this [ [],[],[] ]

def getsrcdata(filename):

f=open(filename,'r')

srcdata=[]

# read file each line

# it is equal to line in f.readlines()

for line in f:

# split string line seprated by space or table

srcdata.append(string.split(line))

f.close()

# if success,return tuple (True,the data)

return srcdata

# get the first candidates C1 from source data

# return a dict c1 whose key is frozenset which

# contains each item and its value is its count

def getC1(srcdata):

c1={}

# in each transaction

for i in srcdata:

# for each item in one transaction

for j in i:

# put into a set and then return a frozenset

# so that can be a key in dict

s=set()

s.add(j)

key=frozenset(s)

# if the item has appeared before,plus one

if key in c1:

c1[key]=c1[key]+1

else:

c1[key]=1

return c1

# compare candidate support count with the given

# minimum support count

def getL(c,supct):

# key in candidate which will be deleted

# the support count less than the given

d_key=[]

for key in c:

ct=c[key]

if ct < supct:

d_key.append(key)

# delete the items whose support count

# less than the given

for key in d_key:

del c[key]

return c

# get the next candidate from previous L

# and scan source data for count of each candidate

def getnextcandi(preL,srcdata):

c={}

for key1 in preL:

for key2 in preL:

if key1 != key2:

# preL cartesion product with preL

key=key1.union(key2)

c[key]=0

# count for each item

for i in srcdata:

for item in c:

if item.issubset(i):

c[item]=c[item]+1

return c

# Apriori algorithem

def Apriori(filename,supct):

#get source data from file

srcdata=getsrcdata(filename)

# get C1

c=getC1(srcdata)

# L

L={}

while True:

# temp L,if empty,over

# while not,this is the new L

temp_L=getL(c,supct)

if not temp_L:

break

else:

L=temp_L

# get the next candidate from pre L

c=getnextcandi(L,srcdata)

return L

# root window

def initwindow():

# define root windown's size,position and title

root=Tk()

root.title('Apriori')

root.geometry('640x480+200+100')

root.config(bg='#BAE7FE',bd=3,relief=GROOVE)

# for resize the root window

root.rowconfigure(0,weight=1)

root.rowconfigure(1,weight=1)

root.rowconfigure(2,weight=1)

root.columnconfigure(0,weight=1)

root.columnconfigure(1,weight=1)

root.columnconfigure(2,weight=1)

root.columnconfigure(3,weight=1)

return root

# the main function

def main():

# clean up the result label

def cleanup():

result.configure(text='')

# display the result in a label

def display(event):

# be care of int(supct.get())

f=filename.get()

sup=supct.get()

L={}

try:

# if sup is not a integer

sup=int(sup)

except ValueError,e:

cleanup()

er.set(str(e))

return

try:

L=Apriori(f,sup)

# open file error

except IOError,e:

cleanup()

er.set(str(e))

return

s="Items/t/tSup.count/n/n"

for item in L:

s=s+'{ '

for i in item:

s=s+i+' '

s=s+'}'+"/t/t"+str(L[item])+"/n"

result.configure(text=s)

# draw the root window

root=initwindow()

# label draw 'data file'

Label(root,text='Data.File :',relief=GROOVE,/

bg='#BE7A32',fg='white').grid/

(row=0,column=0,padx=10,pady=20,sticky=E)

# entry for enter file

filename=Entry(root,highlightbackground='black',bd=3,/

highlightcolor='green',fg='blue',bg='#FFFFDD',width=35)

filename.grid(row=0,column=1,padx=10,sticky=W)

filename.focus_set()

# bind callback function for all Entry widgets

root.bind_class('Entry','<Return>',display)

# label

Label(root,text='Min.Sup.Count:',relief=GROOVE,/

bg='#BE7A32',fg='white').grid/

(row=0,column=2,padx=10,sticky=E)

# entry for minimum support count

supct=Entry(root,width=4,highlightbackground='black',/

highlightcolor='green',bd=3,fg='blue',bg='#FFFFDD')

supct.grid(row=0,column=3,padx=10,sticky=W)

# display the result

result=Label(root,relief=SUNKEN,height=10,width=400,bd=4,/

bg='#FFFFDD',fg='#113BE8',font=('Times','16'))

result.grid(row=1,column=0,columnspan=4,padx=70,sticky=S+N)

# error string

er=StringVar()

# print error message

errmess=Label(root,bg=root['bg'],fg='red',textvariable=er,/

height=4,width=375,font=('Times','16','bold'))

errmess.grid(row=2,column=0,columnspan=4)

root.mainloop()

# call main fuction

if __name__=='__main__':

main()

lrjnlp

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫