Silicon_pathological_region_selection_strategy:一种病理区域注释的方法

  • 本文意在实现“Ligand-receptor interactions combinedwith histopathology for improvedprognostic modeling in HpV-negativehead and neck squamous cell carcihomia”文献中Silicon_pathological_region_selection_strategy部分。
  • 文献解读可参考另一篇博客:https://blog.csdn.net/qq_44505899/article/details/147482032?spm=1001.2014.3001.5501
  • Silicon_pathological_region_selection_strategy作为采用聚类的方式,实现病理区域的注释(文章结论为簇0和簇2代表靠近肿瘤组织的间质区域;簇1为肿瘤组织;簇3和簇4被归类为基质和肌肉组织;簇5则被分类为肿瘤侵袭的边缘区域。),进而比较不同病理区域选择对模型性能的影像。

实现效果如下:
在这里插入图片描述


一、Silicon_pathological_region_selection_strategy流程图


本文流程图如下:

Step1: 将WSI切割成N张512x512的Patch

Step2:对于每张Patch,采用ResNet50提取一个2024维特征

Step3: Patch特征PCA降维到32维

Step4:Patch进行K-means聚类并采用t-SNE进行可视化

Step5:将具有聚类标签的Patch 映射至原始WSI上


在这里插入图片描述


二、具体代码实现:


Step1: 将WSI切割成N张512x512的Patch

Step2:对于每张Patch,采用ResNet50提取一个2024维特征

借用CLAM完成WSI分割和Patch特征提取。结果图如下:每个文件代表一张WSI,文件内部为Patch数x2024的特征矩阵。

在这里插入图片描述


Step3: Patch特征PCA降维到32维

import shutil
import torch
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA  
from sklearn.datasets import load_iris

directory_path = "PCA_feature"
if os.path.exists(directory_path):
    shutil.rmtree(directory_path)
    print(f"目录 {directory_path} 已删除。")
os.makedirs(directory_path)


## PCA批量对resnet50提取的特征进行降维
for file in os.listdir("../Extracted_feature/pt_files/"):
    file_path = '../Extracted_feature/pt_files/' + file
    features = torch.load(file_path)
    features = pd.DataFrame(features.numpy())
    pca = PCA(n_components=32)  # 加载PCA算法,设置降维后主成分数目为2
    reduced_x = pca.fit_transform(features)  # 对样本进行降维
    reduced_x = pd.DataFrame(reduced_x)
    # reduced_x = np.dot(reduced_x, pca.components_) + pca.mean_  # 还原数据
    # print("降维后的数据:\n", reduced_x.shape, reduced_x)
    output_path = os.path.join(directory_path,os.path.basename(file_path).replace(".pt",".csv"))
    reduced_x.to_csv(output_path, index=False)

## 合并降维后的数据带单个文件中
file = os.path.join(directory_path,os.listdir(directory_path)[0])
merge_pca_feature = pd.read_csv(file)
merge_pca_feature["slide_id"] = os.listdir(directory_path)[0].replace(".csv","")

for file in os.listdir(directory_path)[1:]:
    pca_feature = pd.read_csv(os.path.join(directory_path,file))
    pca_feature["slide_id"] = file.replace(".csv","")
    merge_pca_feature= pd.concat([merge_pca_feature,pca_feature])
merge_pca_feature.to_csv("pca_feature_in_patch.csv",index=False)
merge_pca_feature.head(2)

Step4:Patch进行K-means聚类并采用t-SNE进行可视化

  • 聚类及聚类数评估
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import pandas as pd

df_feature = pd.read_csv("./pca_feature_in_patch.csv")
slide_id = df_feature["slide_id"]
df_feature = df_feature.drop(["slide_id"],axis=1)
scaler = StandardScaler().fit(df_feature.values)
df_feature = pd.DataFrame(scaler.transform(df_feature.values))
df_feature.head(2)

# 聚类数评估
SSE = []
for cluster in range(1,20):
    kmeans = KMeans( n_clusters = cluster, init='k-means++')
    kmeans.fit(df_feature)
    SSE.append(kmeans.inertia_)

# 肘部图
# converting the results into a dataframe and plotting them
frame = pd.DataFrame({'Cluster':range(1,20), 'SSE':SSE})
plt.figure(figsize=(6,5))
plt.plot(frame['Cluster'], frame['SSE'], marker='o')
plt.title('Elbow method')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')


在这里插入图片描述

  • 获取聚类标签
best_cluster = 5

# 使用最佳聚类数重新训练 K-Means 模型
kmeans_best = KMeans(n_clusters=best_cluster, init='k-means++')
kmeans_best.fit(df_feature)

# 获取每个样本的类标签
labels = kmeans_best.labels_

df_feature['Labels'] = labels
df_feature["slide_id"]= slide_id

df_feature.to_csv("./pca_feature_in_patch_addLabel.csv",index=False)
df_feature.head()

在这里插入图片描述

  • 聚类标签分布
import seaborn as sns
import matplotlib.pyplot as plt

df_stat = pd.DataFrame(df_feature['Labels'].value_counts()).reset_index()

# 绘制柱状图
plt.figure(figsize=(4, 3))
sns.barplot(x='Labels', y='count', data=df_stat, palette='viridis', edgecolor="black")
plt.title('Bar Chart of Label Counts')
plt.show()

在这里插入图片描述

  • T-SNE可视化
import pandas as pd
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import seaborn as sns

# 提取特征和标签
df = df_feature.drop(["slide_id"],axis=1)
features = df.drop('Labels', axis=1).values
labels = df['Labels'].values

# 使用 t-SNE 进行降维
tsne = TSNE(n_components=2, random_state=42)  # 设置随机种子以保证结果可复现
tsne_result = tsne.fit_transform(features)

# 将降维结果和标签合并到一个新的 DataFrame 中
tsne_df = pd.DataFrame(tsne_result, columns=['x', 'y'])
tsne_df['label'] = labels

# 可视化
plt.figure(figsize=(7, 6))
sns.scatterplot(x='x', y='y', hue='label', palette='viridis', data=tsne_df, legend='full')
plt.title('Patchs Clustering')
plt.xlabel('t-SNE_1')
plt.ylabel('t-SNE_2')
plt.legend(title='Clusters',frameon=False)
plt.savefig('tsne_kmeans_label.pdf', bbox_inches='tight', pad_inches=0)

plt.show()
# 902831行,可视化约40min

在这里插入图片描述


Step5:将具有聚类标签的Patch 映射至原始WSI上

import h5py
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import os
import numpy as np
import shutil

directory_path = "Patch_mapping_WSI"
if os.path.exists(directory_path):
    shutil.rmtree(directory_path)
    print(f"目录 {directory_path} 已删除。")
os.makedirs(directory_path)


df_label = pd.read_csv("./pca_feature_in_patch_addLabel.csv")
df_label = df_label[["slide_id", "Labels"]]
wsi_lst = list(set(df_label.slide_id))
df_label.head()


for wsi_file in wsi_lst:
    print(wsi_file)
    # 打开 h5 文件
    file_path = '../Extracted_feature/h5_files/' + wsi_file+ '.h5'  # 替换为你的 h5 文件路径
    with h5py.File(file_path, 'r') as f:
        # 查看文件结构,找到坐标数据所在的路径
    #     print("Keys in the h5 file:", list(f.keys()))
        coords = f['coords'][:]  # 读取坐标数据
    print("Coordinates shape:", coords.shape)

    slide_labels = df_label[df_label.slide_id.isin([wsi_file])]
    print(slide_labels.Labels.value_counts())

    labels  = slide_labels.Labels.values
    label_colors = {
        0: (236,130,114),    # 红色
        1: (112,190,151),    # 绿色
        2: (243,188,23),     # 蓝色
        3: (206,89,22),
        4: (142,73,153),
    #     5: (152,152,152)
    }
    # 将颜色值从 0-255 转换为 0-1 范围
    label_colors_normalized = {label: (r / 255.0, g / 255.0, b / 255.0) for label, (r, g, b) in label_colors.items()}
    
    # 创建一个空白图片
    max_x = np.max(coords[:, 0])
    max_y = np.max(coords[:, 1])

    image = Image.new('RGB', (max_x + 256, max_y + 256), (255, 255, 255))  # 白色背景
    pixels = image.load()

    # 填充每个方块
    block_size = 256
    for (x, y), label in zip(coords, labels):
        color = label_colors[label]
        # 计算方块的范围
        x_start = x - block_size // 2
        x_end = x + block_size // 2
        y_start = y - block_size // 2
        y_end = y + block_size // 2

        # 填充方块
        for i in range(x_start, x_end):
            for j in range(y_start, y_end):
                if 0 <= i < image.width and 0 <= j < image.height:  # 确保不超出图片范围
                    pixels[i, j] = color

    image_np = np.array(image)
    legend_elements = [Patch(facecolor=color, label=f' {label}') for label, color in label_colors_normalized.items()]

    plt.figure(figsize=(10, 8))
    plt.imshow(image_np)
    plt.axis('off')  
    plt.legend(handles=legend_elements, loc='upper right', bbox_to_anchor=(1.1, 1.1),frameon=False)
    outfile = directory_path +"/"+ wsi_file + ".png"
    plt.savefig(outfile, bbox_inches='tight', pad_inches=0)
    plt.show()

输出如下:

可采用HovertNet对病理切片进行细胞层面上的注释,可以辅助类群的解释。


在这里插入图片描述
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

今天也不想动

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值