在这里插入代码片
t-SNE (t-Distributed Stochastic Neighbor Embedding) 是一种常用于高维数据的降维技术。要生成预测结果的t-SNE图,我们首先需要提取模型的输出特征(logits)作为输入,然后使用t-SNE进行降维。以下是详细的步骤和代码示例:
1. 提取模型的特征输出
首先,我们需要修改预测代码,以提取特征而不是最后的分类结果。
import os
import shutil
from tqdm import tqdm
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from PIL import Image
import pandas as pd
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
test_dir = '/media/wagnchogn/data_disk/artifact/revise_cla_normal_artifact/test_a2n'
new_path = '/media/wagnchogn/data_disk/artifact/revise_cla_normal_artifact/test_a2n_sel'
if not os.path.exists(new_path):
os.makedirs(new_path)
# 定义数据转换
data_transforms = transforms.Compose([
transforms.Resize((256, 256)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# 定义设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 加载预训练的ResNet18模型
model = models.resnet18(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 1) # 二分类任务输出1个节点
model.load_state_dict(torch.load('best_model_weights.pth'))
model = model.to(device)
# 提取特征层
feature_extractor = nn.Sequential(*list(model.children())[:-1])
feature_extractor = feature_extractor.to(device)
img_predictions = []
imgs = os.listdir(test_dir)
pred_imgs = []
pred_labels = []
pred_features = []
for img_name in tqdm(imgs):
img_path = os.path.join(test_dir, img_name)
img = Image.open(img_path)
# 进行预测
input = data_transforms(img).unsqueeze(0).to(device)
feature_extractor.eval()
with torch.no_grad():
features = feature_extractor(input)
features = features.view(features.size(0), -1) # 展平
output = model(input)
pred = torch.sigmoid(output)
if pred <= 0.5:
pred_label = 0
else:
pred_label = 1
pred_imgs.append(img_path)
pred_labels.append(pred_label)
pred_features.append(features.cpu().numpy().flatten())
if pred_label == 1:
shutil.copy(img_path, os.path.join(new_path, img_name))
df = pd.DataFrame({'img_path': pred_imgs, 'pred_label': pred_labels})
features_df = pd.DataFrame(pred_features)
df = pd.concat([df, features_df], axis=1)
df.to_csv('test_info.csv', index=False)
# 提取特征和标签
features = df.iloc[:, 3:].values
labels = df['pred_label'].values
# 计算t-SNE
tsne = TSNE(n_components=2, random_state=42)
features_tsne = tsne.fit_transform(features)
# 绘制t-SNE图
plt.figure(figsize=(10, 8))
for label in np.unique(labels):
plt.scatter(features_tsne[labels == label, 0], features_tsne[labels == label, 1], label=f'Label {label}', alpha=0.5)
plt.legend()
plt.title('t-SNE of Image Features')
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.savefig('tsne_plot.png')
plt.show()
详细步骤说明:
-
修改模型以提取特征:
- 使用ResNet18模型的所有层,除了最后一层全连接层,作为特征提取器。
feature_extractor = nn.Sequential(*list(model.children())[:-1])
-
提取特征:
- 对每张图像提取特征,并保存到一个列表中。
with torch.no_grad(): features = feature_extractor(input) features = features.view(features.size(0), -1) # 展平
-
将特征和标签保存到CSV文件:
- 将图像路径、预测标签和特征保存到CSV文件中。
features_df = pd.DataFrame(pred_features) df = pd.concat([df, features_df], axis=1) df.to_csv('test_info.csv', index=False)
-
计算t-SNE并绘制图像:
- 使用t-SNE对特征进行降维,并绘制散点图。
tsne = TSNE(n_components=2, random_state=42) features_tsne = tsne.fit_transform(features) plt.figure(figsize=(10, 8)) for label in np.unique(labels): plt.scatter(features_tsne[labels == label, 0], features_tsne[labels == label, 1], label=f'Label {label}', alpha=0.5) plt.legend() plt.title('t-SNE of Image Features') plt.xlabel('t-SNE Component 1') plt.ylabel('t-SNE Component 2') plt.savefig('tsne_plot.png') plt.show()
通过这些步骤,您可以生成t-SNE图,展示不同预测标签的图像特征分布。