FP算法挖掘疾病的关联规则

实验题目:

某医院为了研究几种内科疾病的关联,随机抽取了七十名病人的档案,得到的情况记录在 数据集.txt 中,其中一部分数据如下图所示:

Cardiacfailure表示心力衰竭,Myocardialinfarction表示心肌梗塞,uremia表示尿毒症,diabetes表示糖尿病,Renalfailure 表示肾衰竭,Other表示其他疾病

要求:请自行根据数据的实际情况指定最小支持度与最小置信度,并采用FP增长算法挖掘这些疾病之间的关联规则。

一、实验代码

import pprint
def loadDataSet():
    f2 = open("数据集.txt", "r")
    # f2 = open("data.txt", "r")
    lines = f2.readlines()
    retData = []
    for line in lines:
        items = line.strip().split(' ')
        retData.append([(items[i]) for i in range(1, len(items))])
    return retData
def transfer2FrozenDataSet(dataSet):
    frozenDataSet = {}
    for elem in dataSet:
        frozenDataSet[frozenset(elem)] = 1
    return frozenDataSet
class TreeNode:
    def __init__(self, nodeName, count, nodeParent):
        self.nodeName = nodeName
        self.count = count
        self.nodeParent = nodeParent
        self.nextSimilarItem = None
        self.children = {}
    def increaseC(self, count):
        self.count += count
    def disp(self, ind=1):
        # 将树以文本形式展示
        print(' ' * ind, self.nodeName, ' ', self.count)
        for child in self.children.values():
            child.disp(ind + 1)
def createFPTree(frozenDataSet, minSupport):
    #第一次扫描数据集,筛选出小于支持的项
    headPointTable = {}
    for items in frozenDataSet:
        for item in items:
            headPointTable[item] = headPointTable.get(item, 0) + frozenDataSet[items]
    headPointTable = {
        k: v
        for k, v in headPointTable.items() if v >= minSupport
    }
    frequentItems = set(headPointTable.keys())
    if len(frequentItems) == 0: return None, None
    for k in headPointTable:
        headPointTable[k] = [headPointTable[k], None]
    fptree = TreeNode("null", 1, None)
    #第二次扫描数据集,筛选出每个记录的项
    for items, count in frozenDataSet.items():
        frequentItemsInRecord = {}
        for item in items:
            if item in frequentItems:
                frequentItemsInRecord[item] = headPointTable [item][0]
        if len(frequentItemsInRecord) > 0:
            frequentItemsInRecord = sorted(frequentItemsInRecord. items(), key=lambda v: v[0])
            orderedFrequentItems = [v[0] for v in sorted(frequentItemsInRecord,key=lambda v:v[1], reverse=True)]
            updateFPTree(fptree, orderedFrequentItems, headPointTable, count)
    return fptree, headPointTable
def updateFPTree(fptree, orderedFrequentItems, headPointTable, count):
    #处理第一项
    if orderedFrequentItems[0] in fptree.children:
        fptree.children[orderedFrequentItems[0]].increaseC(count)
    else:
        fptree.children[orderedFrequentItems[0]] = TreeNode(orderedFrequentItems[0], count, fptree)
         # 修改头结点表
        if headPointTable[orderedFrequentItems[0]][1] == None:
            headPointTable[orderedFrequentItems[0]][1] = fptree.children[orderedFrequentItems[0]]
        else:            updateHeadPointTable(headPointTable[orderedFrequentItems[0]][1], fptree.children[orderedFrequentItems[0]])
    # 处理除第一项外的其他项
    if (len(orderedFrequentItems) > 1):
        updateFPTree(fptree.children[orderedFrequentItems[0]], orderedFrequentItems[1::], headPointTable, count)
def updateHeadPointTable(headPointBeginNode, targetNode):
    while (headPointBeginNode.nextSimilarItem != None):
        headPointBeginNode = headPointBeginNode.nextSimilarItem
    headPointBeginNode.nextSimilarItem = targetNode
def mineFPTree(headPointTable, prefix, frequentPatterns, minSupport):
    #对于头结点表的每一项, 查找条件前缀路径,创建条件FP树
    #然后迭代,直到条件FP树中只有一个元素
    headPointItems = [v[0] for v in sorted(headPointTable.items(), key=lambda v: v[1][0])]
    if (len(headPointItems) == 0): return
    for headPointItem in headPointItems:
        newPrefix = prefix.copy()
        newPrefix.add(headPointItem)
        support = headPointTable[headPointItem][0]
        frequentPatterns[frozenset(newPrefix)] = support
        prefixPath = getPrefixPath(headPointTable, headPointItem)
        if (prefixPath != {}):
            conditionalFPtree, conditionalHeadPointTable = createFPTree(prefixPath, minSupport)
            if conditionalHeadPointTable != None:
                mineFPTree(conditionalHeadPointTable, newPrefix, frequentPatterns, minSupport)
def getPrefixPath(headPointTable, headPointItem):
    prefixPath = {}
    beginNode = headPointTable[headPointItem][1]
    prefixs = ascendTree(beginNode)
    if ((prefixs != [])):
        prefixPath[frozenset(prefixs)] = beginNode.count
    while (beginNode.nextSimilarItem != None):
        beginNode = beginNode.nextSimilarItem
        prefixs = ascendTree(beginNode)
        if (prefixs != []):
            prefixPath[frozenset(prefixs)] = beginNode.count
    return prefixPath
def ascendTree(treeNode):
    prefixs = []
    while ((treeNode.nodeParent!=None) and (treeNode.nodeParent. nodeName != 'null')):
        treeNode = treeNode.nodeParent
        prefixs.append(treeNode.nodeName)
    return prefixs
def rulesGenerator(frequentPatterns, minConf, rules):
    for frequentset in frequentPatterns:
        if (len(frequentset) > 1):
            getRules(frequentset, frequentset, rules, frequentPatterns,minConf)
def removeStr(set, str):
    tempSet = []
    for elem in set:
        if (elem != str):
            tempSet.append(elem)
    tempFrozenSet = frozenset(tempSet)
    return tempFrozenSet
def getRules(frequentset, currentset, rules, frequentPatterns, minConf):
    for frequentElem in currentset:
        subSet = removeStr(currentset, frequentElem)
        confidence = frequentPatterns[frequentset] / frequentPatterns[subSet]
        if (confidence >= minConf):
            flag = False
            for rule in rules:
                if (rule[0] == subSet and rule[1] == frequentset - subSet):
                    flag = True
            if (flag == False):
                rules.append((subSet, frequentset - subSet, confidence))
            if (len(subSet) >= 2):
                getRules(frequentset, subSet, rules, frequentPatterns, minConf)
if __name__ == '__main__':
    dataSet = loadDataSet()
    frozenDataSet = transfer2FrozenDataSet(dataSet)
    min_sup = 3
    fptree, headPointTable = createFPTree(frozenDataSet, min_sup)
    fptree.disp()
    frequentPatterns = {}
    prefix = set([])
    mineFPTree(headPointTable, prefix, frequentPatterns, min_sup)
    print("")
    print("频繁模式:")
    pprint.pprint(frequentPatterns)
    min_conf = 0.7
    rules = []
    rulesGenerator(frequentPatterns, min_conf, rules)
    print("关联规则:")
    pprint.pprint(rules)
    print('规则总数:', len(rules))

二、实验结果

构建得到的FP-TREE如下:

设置最小支持度和最小置信度:

挖掘得到的频繁模式和关联规则:

 

三、结果分析

根据以上关联规则可得出以下结论:

最小支持度为3,最小置信度为70%时:

  • 糖尿病、尿毒症、肾衰竭三种疾病之间较强的关联关系,心力衰竭、肾衰竭、糖尿病三种疾病之间有很强的关联关系。 
  • 对于糖尿病和尿毒症的疾病人群而言,有80%的患者会并发肾衰竭。
  • 对于尿毒症和肾衰竭的疾病人群而言,也有80%的患者会并发糖尿病。
  • 对于肾衰竭和心力衰竭的疾病人群而言,有100%的患者会并发糖尿病。

附 数据集.txt

1 Cardiacfailure Myocardialinfarction Other
2 Cardiacfailure
3 Cardiacfailure uremia Myocardialinfarction
4 Renalfailure Cardiacfailure diabetes uremia
5 uremia Cardiacfailure Renalfailure diabetes
6 diabetes
7 diabetes Cardiacfailure Myocardialinfarction Other
8 diabetes uremia
9 diabetes
10 Renalfailure diabetes uremia
11 diabetes
12 Cardiacfailure diabetes uremia Renalfailure
13 uremia diabetes Renalfailure Cardiacfailure
14 Renalfailure
15 Other Renalfailure
16 Renalfailure diabetes
17 Myocardialinfarction Cardiacfailure
18 uremia Renalfailure
19 Renalfailure
20 uremia diabetes Renalfailure
21 uremia Renalfailure
22 uremia
23 Cardiacfailure uremia Renalfailure diabetes Myocardialinfarction
24 Renalfailure diabetes uremia Cardiacfailure
25 Myocardialinfarction Cardiacfailure Other
26 diabetes Renalfailure uremia Cardiacfailure
27 uremia Renalfailure diabetes Cardiacfailure Myocardialinfarction
28 uremia diabetes Renalfailure Myocardialinfarction
29 diabetes uremia
30 Myocardialinfarction
31 diabetes Renalfailure uremia Cardiacfailure
32 Cardiacfailure diabetes Other
33 Renalfailure diabetes
34 uremia diabetes Renalfailure
35 Myocardialinfarction Cardiacfailure
36 uremia Renalfailure
37 Other Renalfailure Myocardialinfarction
38 Renalfailure diabetes uremia 
39 Cardiacfailure Myocardialinfarction Other
40 Myocardialinfarction Other
41 uremia Renalfailure diabetes
42 Cardiacfailure diabetes uremia Renalfailure
43 Myocardialinfarction
44 diabetes uremia Renalfailure
45 Myocardialinfarction Renalfailure
46 Cardiacfailure Myocardialinfarction
47 diabetes
48 Myocardialinfarction Cardiacfailure
49 diabetes Renalfailure uremia
50 Renalfailure
51 diabetes uremia
52 uremia Renalfailure diabetes
53 Other Renalfailure Myocardialinfarction
54 Renalfailure diabetes uremia Cardiacfailure
55 Renalfailure diabetes uremia
56 Myocardialinfarction
57 Renalfailure diabetes
58 Cardiacfailure Myocardialinfarction Other
59 uremia diabetes Renalfailure
60 Cardiacfailure diabetes Other
61 uremia Renalfailure
62 Myocardialinfarction
63 uremia diabetes Renalfailure Myocardialinfarction
64 Renalfailure diabetes Cardiacfailure
65 uremia Renalfailure
66 diabetes
67 Cardiacfailure diabetes Other
68 diabetes Renalfailure uremia Cardiacfailure
69 uremia Renalfailure diabetes
70 Cardiacfailure Myocardialinfarction

  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

YiRano_0

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值