9、生存分析

import re
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import copy

def data_parameter_xls(name, idex_dead, idex_alive):
    
    print("文件名开始获取")
    sheets_name = pd.ExcelFile(name).sheet_names
    print("文件名已获取完")
    
    D_dead = {}
    D_alive = {}
    
    for i in range(len(sheets_name)):
        print(sheets_name[i],"开始获取")
        data = pd.read_excel(name,sheets_name[i])
        data = np.array(data).tolist()
    
        if '过世' in sheets_name[i]:
            for i in range(len(data)):
                data[i][0] = str(data[i][0])
                if data[i][0] in D_dead:
                    if data[i][idex_dead] in D_dead[data[i][0]]:
                        D_dead[data[i][0]][data[i][idex_dead]].append(data[i])
                    else:
                        D_dead[data[i][0]][data[i][idex_dead]] = [data[i]]
                else:
                    D_dead[data[i][0]] = {}
                    D_dead[data[i][0]][data[i][idex_dead]] = [data[i]]
        else:
            for i in range(len(data)):
                data[i][0] = str(data[i][0])
                if data[i][0] in D_alive:
                    if data[i][idex_alive] in D_alive[data[i][0]]:
                        D_alive[data[i][0]][data[i][idex_alive]].append(data[i])
                    else:
                        D_alive[data[i][0]][data[i][idex_alive]] = [data[i]]
                else:
                    D_alive[data[i][0]] = {}
                    D_alive[data[i][0]][data[i][idex_alive]] = [data[i]]

    return [D_dead, D_alive]

def data_parameter_ME_xls(name):
    
    print("文件名开始获取")
    sheets_name = pd.ExcelFile(name).sheet_names
    print("文件名已获取完")
    
    D_dead = {}
    D_alive = {}
    
    for i in range(len(sheets_name)):
        print(sheets_name[i],"开始获取")
        data = pd.read_excel(name,sheets_name[i])
        data = np.array(data).tolist()
    
        if '过世' in sheets_name[i]:
            for i in range(len(data)):
                if data[i][0] in D_dead:
                    D_dead[data[i][0]].append(data[i])
                else:
                    D_dead[data[i][0]] = [data[i]]
                    
        else:
            for i in range(len(data)):
                if data[i][0] in D_alive:
                    D_alive[data[i][0]].append(data[i])
                else:
                    D_alive[data[i][0]] = [data[i]]

    return [D_dead, D_alive]

def merge(data_A, data_DHD, data_FC, data_FVR, data_HV, data_SC):
     
    data = [data_A, data_DHD, data_FC, data_FVR, data_HV, data_SC]
    
    D_dead = {}
    D_alive = {}
    
    for i in range(len(data)):
        for j in data[i][0].keys():
            if j not in D_dead:
                D_dead[j] = {}
            for k in data[i][0][j].keys():
                D_dead[j][k] = data[i][0][j][k]
        for j in data[i][1].keys():
            if j not in D_alive:
                D_alive[j] = {}
            for k in data[i][1][j].keys():
                D_alive[j][k] = data[i][1][j][k]
        
    return [D_dead, D_alive]
    

def data_filter(data, Threshold_coefficient):
    
    #把所有键出现过的次数记录下来
    key_all = {}
    for peo in data[0].keys():
        for key in data[0][peo].keys():
            if key in key_all:
                key_all[key] += 1
            else:
                key_all[key] = 1
    
    for peo in data[1].keys():
        for key in data[1][peo].keys():
            if key in key_all:
                key_all[key] += 1
            else:
                key_all[key] = 1
    
    key_1 = {}
    for key in key_all.keys():
        if key_all[key] >= (len(data[0]) + len(data[1]))*Threshold_coefficient:
            key_1[key] = key_all[key]
    
    print("数据量大于",Threshold_coefficient*100,"%的特征如下:")
    for key in key_1.keys():
        print(key, key_1[key])
    
    D_dead = {}
    D_alive = {}
    
    for peo in data[0].keys():
        D_dead[peo] = {}
        for key in key_1.keys():
            if key in data[0][peo]:
                D_dead[peo][key] = data[0][peo][key]
            else:
                D_dead[peo][key] = 0
                
    for peo in data[1].keys():
        D_alive[peo] = {}
        for key in key_1.keys():
            if key in data[1][peo]:
                D_alive[peo][key] = data[1][peo][key]
            else:
                D_alive[peo][key] = 0
    
    return [D_dead, D_alive]


def quantification(data):
    
    D_dead = data[0]
    D_alive = data[1]
    
    

"""
#暂时不用的两份数据
#体检结果
data_ME = data_parameter_ME_xls('data\\MedicalExam20190108.xls')
#过世会员数据
data_M = np.array(pd.read_excel("data\\Members20190102.xls")).tolist()
"""

#可利用的6份数据
data_A = data_parameter_xls('data\\Activity20190109.xls', 4, 3)
data_DHD = data_parameter_xls('data\\DailyHealthData20190108.xls', 4, 3)
data_FC = data_parameter_xls('data\\FamilyCommunication20190108.xls', 6, 5)
data_FVR = data_parameter_xls('data\\FamilyVisitRecord20190104.xls', 4, 3)
data_HV = data_parameter_xls('data\\HospitalVisit20190108.xls', 5, 4)
data_SC = data_parameter_xls('data\\SocialClub20190109.xls', 4, 3)

#合并
data = merge(data_A, data_DHD, data_FC, data_FVR, data_HV, data_SC)
#根据数据量选取阈值筛选
data = data_filter(data, 0.1)

 

快乐活动次数          
 见面次数          
 其他次数          
 短信次数          
 健康活动次数          
 生活活动次数          
 亲友次数          
 亲戚次数          
 电话次数          
 朋友次数          
 眼科次数          
 养生次数          
 儿子次数          
 内科次数          
 女儿次数          
 外科次数          
 骨科次数          
 内科-其他次数          
 客人次数          
 唱歌次数          
 胸片钙化灶心室增大高密度影主动脉硬化病灶纹理增多     
 肝功能总胆红素直接胆红素谷丙转氨酶谷草转氨酶碱性磷酸酶     
 血糖(空腹)血糖值          
 血脂胆固醇甘油三酯低密度脂蛋白高密度脂蛋白      
 肾功能血肌酐血尿素氮血尿酸        
 血常规白细胞血红蛋白血小板淋巴细胞比率中性细胞比率     
 尿常规白细胞阴性蛋白质阴性隐血阴性镜检白细胞未见镜检红细胞未见其它
 血压收缩压舒张压         
 矫正视力         
 眼疾屈光不正眼底动脉硬化白内障眼底血管硬化青光眼视网膜变性    
 咽有咽炎无咽炎         
 喉有文字无文字         
 呼吸呼吸次数          
 脉搏脉搏次数          
 身高 身高          
 体重 体重          
 皮肤无,可剔除         
 淋巴结无,可剔除         
 头、颈无,可剔除         
 甲状腺甲状腺结节         
 脊柱无,可剔除         
 四肢小儿麻痹症大隐静脉曲张老烂脚        
 肛门前列腺增生便秘混合痔肛门下垂外痔内痔     
 生殖器前列腺增生         
 心电图窦性心动过速窦性心动过缓III呈QR型轻度ST-T改变快速心房颤动不纯性心房扑动不完全性右束支传导阻滞心电轴右偏窦性心律电轴右偏
 彩色多普勒超声(腹部)           
 癌胚抗原(CEA)           
 心率           
 CT           
 裸眼视力           
 前列腺肿瘤标志物测定           
 甲胎蛋白(AFP)           
 糖类抗原199(CA199)           
 糖类抗原125(CA125)           
 B超           
 听力           
 耳疾           
 鼻及鼻窦           
 发育及营养           
 神经及精神           
 肺及呼吸道           
 心脏及血管           
 肝、脾、双肾           
 腹部包块           
 糖化血红蛋白           
 甲状腺功能测定1           
 甲状腺功能测定2           
 nan           
 X线检查           
 核磁共振           
 血粘度           
 彩色多普勒超声(甲状腺)           
 血糖(餐后2小时)           
 24小时动态心电图           
 支气管镜           

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值