#!/usr/bin/env python
#coding=utf-8
from Tkinter import *
import string
# get data from file,return a list that its items are also lists
# like this [ [],[],[] ]
def getsrcdata(filename):
f=open(filename,'r')
srcdata=[]
# read file each line
# it is equal to line in f.readlines()
for line in f:
# split string line seprated by space or table
srcdata.append(string.split(line))
f.close()
# if success,return tuple (True,the data)
return srcdata
# get the first candidates C1 from source data
# return a dict c1 whose key is frozenset which
# contains each item and its value is its count
def getC1(srcdata):
c1={}
# in each transaction
for i in srcdata:
# for each item in one transaction
for j in i:
# put into a set and then return a frozenset
# so that can be a key in dict
s=set()
s.add(j)
key=frozenset(s)
# if the item has appeared before,plus one
if key in c1:
c1[key]=c1[key]+1
else:
c1[key]=1
return c1
# compare candidate support count with the given
# minimum support count
def getL(c,supct):
# key in candidate which will be deleted
# the support count less than the given
d_key=[]
for key in c:
ct=c[key]
if ct < supct:
d_key.append(key)
# delete the items whose support count
# less than the given
for key in d_key:
del c[key]
return c
# get the next candidate from previous L
# and scan source data for count of each candidate
def getnextcandi(preL,srcdata):
c={}
for key1 in preL:
for key2 in preL:
if key1 != key2:
# preL cartesion product with preL
key=key1.union(key2)
c[key]=0
# count for each item
for i in srcdata:
for item in c:
if item.issubset(i):
c[item]=c[item]+1
return c
# Apriori algorithem
def Apriori(filename,supct):
#get source data from file
srcdata=getsrcdata(filename)
# get C1
c=getC1(srcdata)
# L
L={}
while True:
# temp L,if empty,over
# while not,this is the new L
temp_L=getL(c,supct)
if not temp_L:
break
else:
L=temp_L
# get the next candidate from pre L
c=getnextcandi(L,srcdata)
return L
# root window
def initwindow():
# define root windown's size,position and title
root=Tk()
root.title('Apriori')
root.geometry('640x480+200+100')
root.config(bg='#BAE7FE',bd=3,relief=GROOVE)
# for resize the root window
root.rowconfigure(0,weight=1)
root.rowconfigure(1,weight=1)
root.rowconfigure(2,weight=1)
root.columnconfigure(0,weight=1)
root.columnconfigure(1,weight=1)
root.columnconfigure(2,weight=1)
root.columnconfigure(3,weight=1)
return root
# the main function
def main():
# clean up the result label
def cleanup():
result.configure(text='')
# display the result in a label
def display(event):
# be care of int(supct.get())
f=filename.get()
sup=supct.get()
L={}
try:
# if sup is not a integer
sup=int(sup)
except ValueError,e:
cleanup()
er.set(str(e))
return
try:
L=Apriori(f,sup)
# open file error
except IOError,e:
cleanup()
er.set(str(e))
return
s="Items/t/tSup.count/n/n"
for item in L:
s=s+'{ '
for i in item:
s=s+i+' '
s=s+'}'+"/t/t"+str(L[item])+"/n"
result.configure(text=s)
# draw the root window
root=initwindow()
# label draw 'data file'
Label(root,text='Data.File :',relief=GROOVE,/
bg='#BE7A32',fg='white').grid/
(row=0,column=0,padx=10,pady=20,sticky=E)
# entry for enter file
filename=Entry(root,highlightbackground='black',bd=3,/
highlightcolor='green',fg='blue',bg='#FFFFDD',width=35)
filename.grid(row=0,column=1,padx=10,sticky=W)
filename.focus_set()
# bind callback function for all Entry widgets
root.bind_class('Entry','<Return>',display)
# label
Label(root,text='Min.Sup.Count:',relief=GROOVE,/
bg='#BE7A32',fg='white').grid/
(row=0,column=2,padx=10,sticky=E)
# entry for minimum support count
supct=Entry(root,width=4,highlightbackground='black',/
highlightcolor='green',bd=3,fg='blue',bg='#FFFFDD')
supct.grid(row=0,column=3,padx=10,sticky=W)
# display the result
result=Label(root,relief=SUNKEN,height=10,width=400,bd=4,/
bg='#FFFFDD',fg='#113BE8',font=('Times','16'))
result.grid(row=1,column=0,columnspan=4,padx=70,sticky=S+N)
# error string
er=StringVar()
# print error message
errmess=Label(root,bg=root['bg'],fg='red',textvariable=er,/
height=4,width=375,font=('Times','16','bold'))
errmess.grid(row=2,column=0,columnspan=4)
root.mainloop()
# call main fuction
if __name__=='__main__':
main()