#库导入
import numpy as np
from collections import defaultdict
from operator import itemgetter
def Affinity_Analysis(dataset,features,nums_feature);
if __name__ == "__main__":
#数据导入(0、1矩阵,一行表示单一消费者的消费情况,一列表示单一商品售卖情况)
dataset_filename="数据包路径"
X=np.loadtxt(dataset_filename)
#特征名称
features=['bread','milk','cheese','apple','banana']
#特征数量
nums_feature=5
#函数调用
Affinity_Analysis(X,features,nums_feature)
def Affinity_Analysis(dataset,features,nums_feature):
#遍历数据
valid_rules=defaultdict(int)
invalid_rules=defaultdict(int)
nums_occurances=defaultdict(int)
for sample in dataset:
for primise in range(nums_feature):
if sample[primise]==0: continue
nums_occurances[primise]+=1
for conclusion in range(nums_feature):
if conclusion==primise: continue
if sample[conclusion]==1: valid_rules[(primise,conclusion)]+=1
else: invalid_rules[(primise,conclusion)]+=1
#支持度
support=valid_rules
#置信度
confidence=defaultdict(float)
for primise,conclusion in valid_rules.keys():
confidence[(primise,conclusion)]=valid_rules[(primise,conclusion)]/nums_occurances[primise]
#根据支持度和置信度进行降序排列
sorted_support=sorted(support.items(),key=itemgetter(1),reverse=True)
sorted_confidence=sorted(confidence.items(),key=itemgetter(1),reverse=True)
#结果展示
print('\n支持度由高到低:')
for i in sorted_support:
print("[{0} {1}]\t- Support: {2}".format(features[i[0][0]],features[i[0][1]],i[1]))
#print("- Support: {0}".format(confidence[(primise,conclusion)]))
#print("- Confidence: {0}".format(support[(primise,conclusion)]))
print('\n置信度由高到低:')
for i in sorted_confidence:
print("[{0} {1}]\t- Support: {2:.3f}".format(features[i[0][0]],features[i[0][1]],i[1]))
#print("- Support: {0}".format(confidence[(primise,conclusion)]))
#print("- Confidence: {0}".format(support[(primise,conclusion)]))
数据挖掘-亲和性分析函数(通用)
最新推荐文章于 2024-10-17 20:17:57 发布