Python中亲和度分析时defaultdict的一种典型算法_python dict和defaultdict效率对比-CSDN博客

在做亲和度分析时，需要统计用户选择产品A之后同时选择产品B的支持度，此时需要使用defaultdict函数。

通过defaultdict(int) 生成了一个默认为0的带key（此时的key为同时包含A、B的tuple，即key = （A，B））的数据字典。

eg.

import numpy as np

from collections import defaultdict

X = [[ 0, 1, 0, 0, 0],
[ 1, 1, 0, 0, 0],
[ 0, 0, 1, 0, 1],
[ 1, 1, 0, 0, 0],
[ 0, 0, 1, 1, 1],
[ 0, 1, 0, 0, 0],
[ 0, 0, 1, 1, 1],
[ 0, 0, 1, 1, 0],
[ 0, 1, 0, 1, 0],
[ 0, 1, 0, 0, 1],
[ 0, 0, 0, 1, 0],
[ 1, 0, 1, 0, 0],
[ 1, 0, 0, 0, 1],
[ 0, 1, 1, 0, 0],
[ 0, 0, 1, 0, 1],
[ 0, 1, 0, 1, 0],
[ 1, 1, 0, 1, 1],
[ 0, 0, 0, 1, 1],
[ 0, 1, 0, 0, 1],
[ 1, 1, 0, 1, 0]]

goods = ['A','B','C','D','E']

"""计算支持度与可信度"""
valid_rules = defaultdict(int)
invalid_rules = defaultdict(int)
num_occurances = defaultdict(int)

for good in X:
    for premise in range(len(X[0])):
        if good[premise] == 0:
            continue
        num_occurances[premise] += 1 #选购一种产品A的订单量
        for conclusion in range(len(X[0])):
            if premise == conclusion:
                continue
            if good[conclusion] == 1:
                valid_rules[(premise,conclusion)] += 1 # 选购了产品A同时有选购了产品B的订单量
            else:
                invalid_rules[(premise,conclusion)] += 1 # 选购了产品A但没有选购产品B的订单量

support = valid_rules

confidence = defaultdict(float)
for premise,conclusion in valid_rules.keys():
    rule = (premise,conclusion)
    confidence[rule] = valid_rules[rule]/num_occurances[premise]

def print_rule(premise,conclusion,support,confidence,features):
    premise_name = features[premise]
    conclusion_name = features[conclusion]
    print("rule:if a persion buys {0} they will also buy {1}".format(premise_name,conclusion_name))
    print("-Support:{0}".format(support[(premise,conclusion)]))
    print("-Confidence:{0:.3f}".format(confidence[(premise,conclusion)]))
    

"""选购了A的用户同时选择B的支持度与置信度"""
premise = 1
conclusion = 2
print_rule(premise,conclusion,support,confidence,goods)

最终得到输出结果如下：
rule:if a persion buys B they will also buy C
-Support:1
-Confidence:0.091