aprioir算法_关联挖掘和Aprioir算法 - mrbean

1 #coding=utf-8

2 def loadDataSet():

3 return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]]

4 def creteC1(dataSet):

5 C1 = []

6 for transaction in dataSet:

7 for item in transaction:

8 if [item] not in C1:

9 C1.append([item])

10 C1.sort()

11 return map(frozenset,C1)

12 def scanD(D, Ck, minSupport):

13 ssCnt = {}

14 for tid in D:

15 for can in Ck:

16 if can.issubset(tid):

17 if ssCnt.has_key(can):

18 ssCnt[can] += 1

19 else:

20 ssCnt[can] = 1

21 numItems = float(len(D))

22 retList = []

23 supportData = {}

24 for key in ssCnt:

25 supprt = ssCnt[key] / numItems

26 if supprt >= minSupport:

27 retList.append(key)

28 supportData[key] = supprt

29 return retList,supportData

30 def appriGen(Lk,k):

31 retList = []

32 lenLk = len(Lk)

33 for i in range(lenLk):

34 for j in range(i+1, lenLk):

35 L1 = list(Lk[i])[:k-2]#前k-1个

36 L2 = list(Lk[i])[:k-2]

37 L1.sort()

38 L2.sort()

39 if L1 == L2:

40 retList.append(Lk[i] | Lk[j])

41 return retList

42 def apriori(dataSet, minSupport=0.5):

43 C1 = creteC1(dataSet)

44 D = map(set, dataSet)

45 L1, supportData = scanD(D,C1,minSupport=0.7)

46 L = [L1]

47 k=2

48 while len(L[k-2]) > 0:

49 Ck = appriGen(L[k-2], k)

50 Lk, supK = scanD(D, Ck, minSupport)

51 supportData.update(supK)

52 L.append(Lk)

53 k += 1

54 return L,supportData

55 def generateRules(L, supportData, minConf=0.7):

56 bigRules = []

57 for i in range(1,len(L)):#从包含两个的开始

58 for freqSet in L[i]:

59 H1 = [frozenset([item]) for item in freqSet]

60 if (i>1):#频繁项集元素数目大于2

61 rulesFormConseq(freqSet,H1,supportData,bigRules,minConf)

62 else:

63 calcConf(freqSet,H1,supportData,bigRules,minConf)

64 return bigRules

65 def calcConf(freqSet, H, supportData,brl,minConf=0.7):

66 prunedH = []

67 for conseq in H:

68 conf = supportData[freqSet] / supportData[freqSet - conseq]

69 print supportData[freqSet] , supportData[freqSet - conseq]

70 if conf >= minConf:

71 print freqSet-conseq,'-->',conseq,'conf',conf

72 brl.append((freqSet-conseq,conseq,conf))

73 prunedH.append(conseq)

74 return prunedH

75 def rulesFromConseq(freqSet,H,supportData,brl,minConf=0.7):

76 m = len(H[0])

77 if len(freqSet) > m+1:

78 Hmp1 = appriGen(H,m+1)

79 Hmp1 = calcConf(freqSet,Hmp1,supportData,brl,minConf)

80 if len(Hmp1)>1:

81 rulesFromConseq(freqSet,Hmp1,supportData,brl,minConf)

82 def main():

83 dataSet = loadDataSet()

84 L,supportData = apriori(dataSet, minSupport=0.7)

85 print L

86 rules = generateRules(L,supportData,minConf=0.7)

87 print rules

88

89 if __name__ == '__main__':

90 main()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值