python实现fp_growth调包_fp-growth代码问题(Python)

网上的 python3 fp-growth代码每次在执行时可能会出现找出的频繁项集不一致的情况,这是因为每次执行代码时建的FP树可能不一致。

加了一行代码可以解决这个问题(第59行):先对 frequentItemsInRecord 按 key 的ASSIC码排序,然后再按照 key 的支持度(即value值)降序排列。

之所以这么做是因为 frequentItemsInRecord 中可能会出现支持度一样的项,如果不按ASSIC码先排一次的话,

有可能出现每次执行代码时 orderedFrequentItems (第60行)中相同支持度的项出现的顺序不一致,从而造成每次建的FP树不一致,导致找出的频繁项集不一致。

import pprint

def loadDataSet():

dataSet = [['bread', 'milk', 'vegetable', 'fruit', 'eggs'],

['noodle', 'beef', 'pork', 'water', 'socks', 'gloves', 'shoes', 'rice'],

['socks', 'gloves'],

['bread', 'milk', 'shoes', 'socks', 'eggs'],

['socks', 'shoes', 'sweater', 'cap', 'milk', 'vegetable', 'gloves'],

['eggs', 'bread', 'milk', 'fish', 'crab', 'shrimp', 'rice']]

return dataSet

def transfer2FrozenDataSet(dataSet):

frozenDataSet = {}

for elem in dataSet:

frozenDataSet[frozenset(elem)] = 1

return frozenDataSet

class TreeNode:

def __init__(self, nodeName, count, nodeParent):

self.nodeName = nodeName

self.count = count

self.nodeParent = nodeParent

self.nextSimilarItem = None

self.children = {}

def increaseC(self, count):

self.count += count

def createFPTree(frozenDataSet, minSupport):

# scan dataset at the first time, filter out items which are less than minSupport

headPointTable = {}

for items in frozenDataSet:

for item in items:

headPointTable[item] = headPointTable.get(item, 0) + frozenDataSet[items]

headPointTable = {

k: v

for k, v in headPointTable.items() if v >= minSupport

}

frequentItems = set(headPointTable.keys())

if len(frequentItems) == 0: return None, None

for k in headPointTable:

headPointTable[k] = [headPointTable[k], None]

fptree = TreeNode("null", 1, None)

# scan dataset at the second time, filter out items for each record

for items, count in frozenDataSet.items():

frequentItemsInRecord = {}

for item in items:

if item in frequentItems:

frequentItemsInRecord[item] = headPointTable[item][0]

if len(frequentItemsInRecord) > 0:

frequentItemsInRecord = sorted(frequentItemsInRecord.items(), key=lambda v: v[0])

orderedFrequentItems = [v[0] for v in sorted(frequentItemsInRecord, key=lambda v: v[1], reverse=True)]

updateFPTree(fptree, orderedFrequentItems, headPointTable, count)

return fptree, headPointTable

def updateFPTree(fptree, orderedFrequentItems, headPointTable, count):

# handle the first item

if orderedFrequentItems[0] in fptree.children:

fptree.children[orderedFrequentItems[0]].increaseC(count)

else:

fptree.children[orderedFrequentItems[0]] = TreeNode(orderedFrequentItems[0], count, fptree)

# update headPointTable

if headPointTable[orderedFrequentItems[0]][1] == None:

headPointTable[orderedFrequentItems[0]][1] = fptree.children[orderedFrequentItems[0]]

else:

updateHeadPointTable(headPointTable[orderedFrequentItems[0]][1], fptree.children[orderedFrequentItems[0]])

# handle other items except the first item

if (len(orderedFrequentItems) > 1):

updateFPTree(fptree.children[orderedFrequentItems[0]], orderedFrequentItems[1::], headPointTable, count)

def updateHeadPointTable(headPointBeginNode, targetNode):

while (headPointBeginNode.nextSimilarItem != None):

headPointBeginNode = headPointBeginNode.nextSimilarItem

headPointBeginNode.nextSimilarItem = targetNode

def mineFPTree(headPointTable, prefix, frequentPatterns, minSupport):

# for each item in headPointTable, find conditional prefix path, create conditional fptree,

# then iterate until there is only one element in conditional fptree

headPointItems = [v[0] for v in sorted(headPointTable.items(), key=lambda v: v[1][0])]

if (len(headPointItems) == 0): return

for headPointItem in headPointItems:

newPrefix = prefix.copy()

newPrefix.add(headPointItem)

support = headPointTable[headPointItem][0]

frequentPatterns[frozenset(newPrefix)] = support

prefixPath = getPrefixPath(headPointTable, headPointItem)

if (prefixPath != {}):

conditionalFPtree, conditionalHeadPointTable = createFPTree(prefixPath, minSupport)

if conditionalHeadPointTable != None:

mineFPTree(conditionalHeadPointTable, newPrefix, frequentPatterns, minSupport)

def getPrefixPath(headPointTable, headPointItem):

prefixPath = {}

beginNode = headPointTable[headPointItem][1]

prefixs = ascendTree(beginNode)

if ((prefixs != [])):

prefixPath[frozenset(prefixs)] = beginNode.count

while (beginNode.nextSimilarItem != None):

beginNode = beginNode.nextSimilarItem

prefixs = ascendTree(beginNode)

if (prefixs != []):

prefixPath[frozenset(prefixs)] = beginNode.count

return prefixPath

def ascendTree(treeNode):

prefixs = []

while ((treeNode.nodeParent != None) and (treeNode.nodeParent.nodeName != 'null')):

treeNode = treeNode.nodeParent

prefixs.append(treeNode.nodeName)

return prefixs

def rulesGenerator(frequentPatterns, minConf, rules):

for frequentset in frequentPatterns:

if (len(frequentset) > 1):

getRules(frequentset, frequentset, rules, frequentPatterns, minConf)

def removeStr(set, str):

tempSet = []

for elem in set:

if (elem != str):

tempSet.append(elem)

tempFrozenSet = frozenset(tempSet)

return tempFrozenSet

def getRules(frequentset, currentset, rules, frequentPatterns, minConf):

for frequentElem in currentset:

subSet = removeStr(currentset, frequentElem)

confidence = frequentPatterns[frequentset] / frequentPatterns[subSet]

if (confidence >= minConf):

flag = False

for rule in rules:

if (rule[0] == subSet and rule[1] == frequentset - subSet):

flag = True

if (flag == False):

rules.append((subSet, frequentset - subSet, confidence))

if (len(subSet) >= 2):

getRules(frequentset, subSet, rules, frequentPatterns, minConf)

if __name__ == '__main__':

dataSet = loadDataSet()

frozenDataSet = transfer2FrozenDataSet(dataSet)

minSupport = 3

fptree, headPointTable = createFPTree(frozenDataSet, minSupport)

frequentPatterns = {}

prefix = set([])

mineFPTree(headPointTable, prefix, frequentPatterns, minSupport)

print("frequent patterns:")

pprint.pprint(frequentPatterns)

minConf = 0.6

rules = []

rulesGenerator(frequentPatterns, minConf, rules)

print("association rules:")

pprint.pprint(rules)

print('rules num:', len(rules))

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值