在运行《机器学习实战》第12章-构建FP树时遇到的小状况
def createtree(dataset, minsup=1):
headertable = {}
for trans in dataset:
for item in trans:
headertable[item] = headertable.get(item, 0) + dataset[trans]
for k in headertable.keys():
if headertable[k] < minsup:
del(headertable[k]) #移除不满足最小支持度的元素项
freqitemset = set(headertable.keys())
if len(freqitemset) == 0:
return None, None #如果没有元素项满足要求则退出
for k in headertable:
headertable[k] = [headertable[k], None]
rettree = treenode('Null Set', 1, None)
for transet, count in dataset.items():
locald = {}
for item in transet:
if item in freqitemset:
locald[item] = headertable[item][0] #根据全局频率对每个事务中的元素进行排序
if len(locald) > 0:
ordereditems = [v[0] for v in sorted(locald.items(), key=lambda p: p[1], reverse=True)]
updatetree(ordereditems, rettree, headertable, count) #使用排序后的频率项对树进行填充
return rettree, headertable
运行结果如下:RuntimeError: dictionary changed size during iteration
得知遍历时不能修改字典元素
所以将for k in headertable.keys():改成 for k in list(headertable.keys()):即可
def createtree(dataset, minsup=1):
headertable = {}
for trans in dataset:
for item in trans:
headertable[item] = headertable.get(item, 0) + dataset[trans]
for k in list(headertable.keys()):
if headertable[k] < minsup:
del(headertable[k]) #移除不满足最小支持度的元素项
freqitemset = set(headertable.keys())
if len(freqitemset) == 0:
return None, None #如果没有元素项满足要求则退出
for k in headertable:
headertable[k] = [headertable[k], None]
rettree = treenode('Null Set', 1, None)
for transet, count in dataset.items():
locald = {}
for item in transet:
if item in freqitemset:
locald[item] = headertable[item][0] #根据全局频率对每个事务中的元素进行排序
if len(locald) > 0:
ordereditems = [v[0] for v in sorted(locald.items(), key=lambda p: p[1], reverse=True)]
updatetree(ordereditems, rettree, headertable, count) #使用排序后的频率项对树进行填充
return rettree, headertable