python关联规则apriori算法_Python 实现关联规则分析Apriori算法

#-*- coding:utf-8 -*-

importsys

reload(sys)

sys.setdefaultencoding("utf8")defload_data_set():

data_set=[

['beer', 'baby diapers', 'shorts']

, ['baby diapers', 'shorts']

, ['baby diapers', 'milk']

, ['beer', 'baby diapers', 'shorts']

, ['beer', 'milk']

, ['baby diapers', 'milk']

, ['beer', 'milk']

, ['beer', 'baby diapers', 'milk', 'shorts']

, ['beer', 'baby diapers', 'milk']

]returndata_setdefcreate_C1(data_set):

C1=set()for t indata_set:for item int:

item_set=frozenset([item])

C1.add(item_set)returnC1defis_apriori(Ck_item, Lksub1):for item inCk_item:

sub_Ck= Ck_item -frozenset([item])if sub_Ck not inLksub1:returnFalsereturnTruedefcreate_Ck(Lksub1, k):

Ck=set()

len_Lksub1=len(Lksub1)

list_Lksub1=list(Lksub1)for i inrange(len_Lksub1):for j in range(1, len_Lksub1):

l1=list(list_Lksub1[i])

l2=list(list_Lksub1[j])

l1.sort()

l2.sort()if l1[0:k-2] == l2[0:k-2]:

Ck_item= list_Lksub1[i] |list_Lksub1[j]ifis_apriori(Ck_item, Lksub1):

Ck.add(Ck_item)returnCkdefgenerate_Lk_by_Ck(data_set, Ck, min_support, support_data):

Lk=set()

item_count={}for t indata_set:for item inCk:ifitem.issubset(t):if item not initem_count:

item_count[item]= 1

else:

item_count[item]+= 1t_num=float(len(data_set))for item initem_count:if (item_count[item] / t_num) >=min_support:

Lk.add(item)

support_data[item]= item_count[item] /t_numreturnLkdefgenerate_L(data_set, k, min_support):

support_data={}

C1=create_C1(data_set)

L1=generate_Lk_by_Ck(data_set, C1, min_support, support_data)

Lksub1=L1.copy()

L=[]

L.append(Lksub1)for i in range(2, k+1):

Ci=create_Ck(Lksub1, i)

Li=generate_Lk_by_Ck(data_set, Ci, min_support, support_data)

Lksub1=Li.copy()

L.append(Lksub1)returnL, support_datadefgenerate_big_rules(L, support_data, min_conf):

big_rule_list=[]

sub_set_list=[]for i inrange(0, len(L)):for freq_set inL[i]:for sub_set insub_set_list:ifsub_set.issubset(freq_set):

conf= support_data[freq_set] / support_data[freq_set -sub_set]

big_rule= (freq_set -sub_set, sub_set, conf)if conf >= min_conf and big_rule not inbig_rule_list:

big_rule_list.append(big_rule)

sub_set_list.append(freq_set)returnbig_rule_listif __name__ == "__main__":"""Test"""data_set=load_data_set()

L, support_data= generate_L(data_set, k=3, min_support=0.2)

big_rules_list= generate_big_rules(L, support_data, min_conf=0.7)for Lk inL:print "="*50

print "frequent" + str(len(list(Lk)[0])) + "-itemsets\t\tsupport"

print "="*50

for freq_set inLk:printfreq_set, support_data[freq_set]print

print "Big Rules"

for item inbig_rules_list:print item[0], "=>", item[1], "conf:", item[2]

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值