医疗异构信息网络 知识图谱建模

医疗异构信息网络 知识图谱建模

视频讲解: 基于医疗异构信息网络表征学习的病人相似性(聚类分析)_哔哩哔哩_bilibili

CDN9ZuKoAojAiIgAiIgAiIgAiIgAiIgAiIgAk1CQEp+kzSEiiECIiACIiACIiACIiACIiACIiAC9RKQkl8vQT0vAiIgAiIgAiIgAiIgAiIgAiIgAk1CQEp+kzSEiiECIiACIiACIiACIiACIiACIiAC9RKQkl8vQT0vAiIgAiIgAiIgAiIgAiIgAiIgAk1CQEp+kzSEiiECIiACIiACIiACIiACIiACIiAC9RKQkl8vQT0vAiIgAiIgAiIgAiIgAiIgAiIgAk1CQEp+kzSEiiECIiACIiACIiACIiACIiACIiAC9RKQkl8vQT0vAiIgAiIgAiIgAiIgAiIgAiIgAk1CQEp+kzSEiiECIiACIiACIiACIiACIiACIiAC9RKQkl8vQT0vAiIgAiIgAiIgAiIgAiIgAiIgAk1CQEp+kzSEiiECIiACIiACIiACIiACIiACIiAC9RKQkl8vQT0vAiIgAiIgAiIgAiIgAiIgAiIgAk1CQEp+kzSEiiECIiACIiACIiACIiACIiACIiAC9RKQkl8vQT0vAiIgAiIgAiIgAiIgAiIgAiIgAk1CQEp+kzSEiiECIiACIiACIiACIiACIiACIiAC9RKQkl8vQT0vAiIgAiIgAiIgAiIgAiIgAiIgAk1CQEp+kzSEiiECIiACIiACIiACIiACIiACIiAC9RKQkl8vQT0vAiIgAiIgAiIgAiIgAiIgAiIgAk1CQEp+kzSEiiECIiACIiACIiACIiACIiACIiAC9RKQkl8vQT0vAiIgAiIgAiIgAiIgAiIgAiIgAk1C4P8BeOWyxInK4PUAAAAASUVORK5CYII=wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw==

# pip install annoy -i https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn
# pip install gensim==4.2.0 -i https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn
from torch import tensor
from sklearn.metrics import f1_score
from datetime import datetime
import time
from collections import Counter
import re
import jieba

import pandas as pd
import time
import numpy as np
from tqdm import tqdm
import os
import gensim
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec
from gensim.models import Word2Vec
from gensim.models.word2vec import LineSentence
from tqdm import tqdm
import networkx as nx
import matplotlib.pyplot as plt
import random
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号

import pandas as pd
data=pd.read_excel('病人-疾病-症状.xlsx')
data = data.fillna(1)
print(data.head(5))
data=data.values

truple_lists=[]
for line in data[:1000]:
    # print(line)
    p=line[0]
    for i in line[1:]:
        if i!=1 and i!=' ':
            truple_lists.append((p,i))
print(truple_lists)
g = nx.Graph()

g.add_edges_from(truple_lists)

# print(g.edges())

print('g.nodes()',g.nodes())
#
# print(g.degree())

# 转换节点标签以便画图
# labels = {}
# for node in g.nodes():
#     labels[node] = str(node)

# nx.draw(g, labels=labels)
# plt.show()

random.seed(1)  # 设置随机种子用于后边的结果的复现
np.random.seed(0)


num_walks = 10
walk_length = 20
walks = []
for i in tqdm(range(num_walks)):
    for node in g.nodes():
        # print('node', node)
        if type(node) == str:
            walk = []
            walk.append(node)
            while (len(walk) < walk_length):
                    node_list = list(g.neighbors(node))
                    # print('node_list',node_list)
                    if len(node_list) == 0:  # 假设在有向图中这个节点没有有向的邻居节点
                        break
                    #             print("node_list",node_list)
                    node = np.random.choice(node_list, 1).item()
                    #             print(node)
                    #             print("*"*100)
                    walk.append(node)
            # print(walk)
            walks.append(walk)
print(len(walks))
print(np.array(walks).shape)

# 使用word2vec之前先进行word2vec的语料库训练 只需要训练一次 就可以
with open("word2vec_txt.txt", "a+", encoding='utf-8') as f:
    words = []
    for i in tqdm(walks):
        i = " ".join(i)
        f.write(i)
        f.write("\n")
model = Word2Vec(LineSentence(open('word2vec_txt.txt', 'r', encoding='utf-8')), sg=0, size=64, window=8,min_count=2, workers=4)
# 模型保存
model.save('word2vec.model')
# 通过模型加载词向量(recommend)

model_vec = gensim.models.Word2Vec.load('word2vec.model')
dic = model_vec.wv.index2word
# print(dic)
print(len(dic))
print(model_vec.wv['不稳定性心绞痛'])
print(model_vec.wv.most_similar('不稳定性心绞痛', topn=2))
print(model_vec.wv.most_similar('p4', topn=2))


# 病人的降维分析聚类可视化
from gensim.models import Word2Vec
from random import sample
from sklearn.manifold import TSNE
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['SimHei'] #中文字体
mpl.rcParams['axes.unicode_minus'] = False #防止负号出现异常显示
#进行图的选取 选取两个图的点在一个图中显示!!!!!!!
word_list_vec=[]
for word in dic:
    if str(word[0])=='p':
        vec = model.wv[word]
        # print(vec)
        word_list_vec.append([word,vec])


plt.figure(figsize=(15,15)) #定义画布大小
color=['b',"r","g","k"] # 定义颜色 参数c 可以等于:['c', 'b', 'g', 'r', 'm', 'y', 'k', 'w']
color_label=['b',"r","g","k"]
marker=[" "," "," "," "]
tokens = []
labels = []
for line in word_list_vec:
    labels.append(line[0])
    tokens.append(line[1]) # 存储的是向量

tsne_model = TSNE(perplexity=30, n_components=2, init='pca', n_iter=2500, random_state=23)
#  perplexity: 默认为30,数据集越大,需要参数值越大,建议值位5-50 , n_components=2 默认为2,嵌入空间的维度(嵌入空间的意思就是结果空间),别的参数估计不重要
print(len(tokens))
# print(tokens)
new_values = tsne_model.fit_transform(tokens)
#     将X投影到一个嵌入空间并返回转换结果
#降维处理
#     print(new_values)
x = []
y = []
for value in new_values:
    x.append(value[0])
    y.append(value[1])
for i in range(len(x)):
    plt.scatter(x[i],y[i],c=color[1],marker=marker[1])
    plt.text(x[i],y[i], labels[i], fontsize=10,color=color_label[1])
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

plt.show()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值