支持度:Xi与Xj同时出现的次数
置信度:购买Xi的人,同时购买Xj的人 (Xi,Xj)/Xi
import numpy as np
from collections import defaultdict
'''num_apple_purchases=0
for sample in X:
if sample[3]==1: #一个人买了苹果
num_apple_purchases+=1
print num_apple_purchases'''
def calS(X,n_features):
valid_rules=defaultdict(int)
invalid_rules=defaultdict(int)
num_occurances=defaultdict(int)#记录每个物品的购买次数
#print X
for sample in X:
for premise in range(5):
if sample[premise]==0:continue
num_occurances[premise]+=1
for conclusion in range(n_features): # 确认这一条记录是否也购买了其他物品
if premise==conclusion:continue
if sample[conclusion]==1:
valid_rules[(premise,conclusion)] += 1 # Xi和Xj同时购买的次数+1
else:
invalid_rules[(premise,conclusion)] += 1 # Xi和Xj未被同时购买的次数+1
support=valid_rules
confidence=defaultdict(float)
#print(valid_rules.keys())
for premise,conclusion in valid_rules.keys():
rule=(premise,conclusion)
confidence[rule]=float(valid_rules[rule])/num_occurances[premise] #这里需要将valid_rules的规则条目数从int转成float
return support,confidence
def print_rule(premise,conclusion,support,confidence,features):
premise_name=features[premise]
conclusion_name=features[conclusion]
print("Rule:If a person buys {0} they will also buy {1}".format(premise_name,conclusion_name))
print("-Support:{0}".format(support[(premise,conclusion)]))
print("-Confidence:{0:.3f}".format(confidence[(premise,conclusion)]))
if __name__ == '__main__':
X=np.loadtxt("D:\\python27\\study\\code\\Chapter1\\affinity_dataset.txt")
n_samples,n_features=X.shape # 100行,5列
premise=1 # 任意定义的
conclusion=3 # 任意定义的
support,confidence=calS(X,n_features)
features = ["bread", "milk", "cheese", "apples", "bananas"]
print support,confidence
print_rule(premise,conclusion,support,confidence,features)