python dataset[trans_Python编写FP-growth进行关联性分析实例讲解

该博客详细讲解了如何使用Python实现FP-growth算法进行关联性分析。首先,通过遍历数据集计算元素项的频度并构建头指针表。接着,基于最小支持度筛选元素项并创建FP树。然后,通过更新树结构和头指针来构建FP树。最后,通过递归方式挖掘频繁项集。提供的示例代码展示了整个过程。
摘要由CSDN通过智能技术生成

Python编写FP-growth进行关联性分析实例讲解

循环一遍数据集:

计算单个元素项出现的频度,保存到头指针表

根据最小频数筛选单个元素项

循环一遍数据集:

将每个数据的元素按照头指针表排序

创建FP树

创建FP树

第一项是否在树的子节点中

如果不在就创建

如果在就加一

第一项的头指针是否为none

如果为none则指向当前节点

如果不为none则沿指针找到最后一个点,并将最后一个点指向当前节点

import numpy as np

def loadSimpDat():

simpDat = [['r', 'z', 'h', 'j', 'p'],

['z', 'y', 'x', 'w', 'v', 'u', 't', 's'],

['z'],

['r', 'x', 'n', 'o', 's'],

['y', 'r', 'x', 'z', 'q', 't', 'p'],

['y', 'z', 'x', 'e', 'q', 's', 't', 'm']]

return simpDat

def createInitSet(dataSet):

retDict = {}

for trans in dataSet:

retDict[frozenset(trans)] = retDict.get(frozenset(trans),0) + 1

return retDict

class treeNode:

def __init__(self,nameValue,numOccur,parentNode):

self.name = nameValue

self.count = numOccur

self.nodeLink = None

self.parent = parentNode

self.children = {}

def inc(self,numOccur):

self.count += numOccur

def disp(self,ind=1):

print(' '*ind,self.name,' ',self.count)

for child in self.children.values():

child.disp(ind+1)

def createTree(dataSet,minSup=1):#生成树

headerTable = {}

for trans in dataSet:

for item in trans:

headerTable[item] = headerTable.get(item,0) + dataSet[trans]

# for key in headerTable.keys():

# if headerTable[key]=minSup}

freqItemSet = set(headerTable.keys())

if len(freqItemSet) == 0:

return None,None

for k in headerTable:

headerTable[k] = [headerTable[k],None]

retTree = treeNode('Null Set',0,None)

for transSet,count in dataSet.items():

localD = {}

for tran in transSet:

if tran in freqItemSet:

localD[tran] = headerTable[tran][0]

if len(localD) > 0:

orderedItems = [v[0] for v in sorted(localD.items(), key = lambda k: k[1],reverse = True)]

updateTree(orderedItems,retTree,headerTable,count)

return retTree,headerTable

def updateTree(items,inTree,headerTable,count):#更新树

if items[0] in inTree.children:

inTree.children[items[0]].inc(count)

else:

inTree.children[items[0]] = treeNode(items[0],count,inTree)

# print('this is ',inTree.children[items[0]].name,' items:',items)

if headerTable[items[0]][1] == None:

headerTable[items[0]][1] = inTree.children[items[0]]

else:

updateHeader(headerTable[items[0]][1],inTree.children[items[0]])

if len(items) > 1:

updateTree(items[1::],inTree.children[items[0]],headerTable,count)

def updateHeader(nodeToTest,targetNode):#更新头指针

i = 0

while(nodeToTest.nodeLink != None):

nodeToTest = nodeToTest.nodeLink

# print(nodeToTest.name,' ',i)

i = i+1

nodeToTest.nodeLink = targetNode

def ascendTree(leafNode,prefixPath):

if leafNode.parent != None:

prefixPath.append(leafNode.name)

ascendTree(leafNode.parent,prefixPath)

def findPrefixPath(basePat,treeNode):#寻找条件模式基

condPats = {}

while treeNode != None:

prefixPath = []

ascendTree(treeNode,prefixPath)

if len(prefixPath) > 1:

condPats[frozenset(prefixPath[1:])] = treeNode.count

treeNode = treeNode.nodeLink

return condPats

def mineTree(inTree,headerTable,minSup,preFix,freqItemList):#生成频繁项集

bigL = [v[0] for v in sorted(headerTable.items(),key=lambda p:p[1][0])]

for basePat in bigL:

newFreqSet = preFix.copy()

newFreqSet.add(basePat)

freqItemList.append(newFreqSet)

condPattBases = findPrefixPath(basePat,headerTable[basePat][1])

myCondTree,myHead = createTree(condPattBases,minSup)

if myHead != None:

mineTree(myCondTree,myHead,minSup,newFreqSet,freqItemList)

data = loadSimpDat()

data = createInitSet(data)

tree,header = createTree(data,3)

freqItems = []

mineTree(tree,header,3,set([]),freqItems)

print(freqItems)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值