指标定义
-
TP
正确预测为正类(本身对的,正确预测为对的)
-
TN
正确预测为负类(本身错的,正确预测为错的)
-
FP
错误预测为正类(本身错的,错误预测为正的)
-
FN
错误预测为负类(本身对的,错误预测为负的)
-
实际正类
TP+FN
-
预测正类
TP+FP
-
样本总数
TP+TN+FP+FN
指标公式
准确率 accuracy
预测正确的样本占总样本的比例
精确率 precision
模型预测为正类的样本中实际为正类的比例
召回率 recall
实际为正类的样本中被模型正确预测为正类的比例
F1 score
精确率和召回率的调和平均数
TOP1
模型预测得分最高的类别与真实标签相匹配的比例
TOP5
模型预测得分最高的前五个类别中是否包含真实标签的比例
代码Python Code
import os
import json
import torch
import pandas as pd
from torchvision import transforms, datasets
from timm import utils
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
form model import 你自己的模型
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
img_size = 224
data_transform = transforms.Compose(
[transforms.Resize(int(img_size * 1.14)),
transforms.CenterCrop(img_size),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
# read class_indict
json_path = '******'
assert os.path.exists(json_path), "file: '{}' not exist.".format(json_path)
with open(json_path, "r", encoding="utf-8") as f:
class_indict = json.load(f)
# create model
model = *****(num_classes=**).to(device)
# load model weights
# 这里放你训练好的权重路径
weights_path = r"************"
assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
state_dict = torch.load(weights_path, map_location=device)
# 将存储动态重新映射到可选设备上,device选择的是cuda0第一章gpu显卡
for key in list(state_dict.keys()):
if 'total_ops' in key or 'total_params' in key:
del state_dict[key]
# 加载模型
model.load_state_dict(state_dict,strict=False)
# 测试集根目录
test_data_dir = r'******'
# 使用datasets.ImageFolder加载测试集,用data_transform进行数据预处理
test_dataset = datasets.ImageFolder(test_data_dir, transform=data_transform)
# 创建数据加载器以批处理图像
batch_size = **
# 适应你的内存
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# 主要对数据集进行batch的划分,可以快速的迭代数据
# shuffle:是否在每个epoch开始时打乱数据顺序,默认为False。
model.eval()
top1_sum = 0
top5_sum = 0
total_samples = 0
y_true = [] # 用于存储真实标签
y_pred = [] # 用于存储模型的预测标签
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(tqdm(test_loader)):
inputs = inputs.to(device)
targets = targets.to(device)
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
acc1, acc5 = utils.accuracy(outputs, targets, topk=(1, 5))
# 累加 top-1 和 top-5 的准确率
batch_size = targets.size(0)
top1_sum += acc1.item() * batch_size
top5_sum += acc5.item() * batch_size
total_samples += batch_size
# 统计预测标签和真实标签
y_true += targets.cpu().numpy().tolist()
y_pred += predicted.cpu().numpy().tolist()
# 计算当前平均 top-1 和 top-5 准确率
avg_top1 = top1_sum / total_samples
avg_top5 = top5_sum / total_samples
print("")
print(
f"Batch {batch_idx + 1}/{len(test_loader)} - Top-1 Accuracy: {avg_top1:.2f}%, Top-5 Accuracy: {avg_top5:.2f}%")
# 假设 y_true 是真实标签,y_pred 是预测标签
conf_matrix = confusion_matrix(y_true, y_pred, labels=range(20))
# 初始化字典来存储每个类别的TP、FP、TN、FN
metrics = {f'class_{i}': {'TP': 0, 'FP': 0, 'FN': 0, 'TN': 0} for i in range(20)}
# 初始化准确率
acc_list=[]
# 初始化precision值
precision_list=[]
# 初始化recall值
recall_list=[]
# 初始化F1 score
F1_score_list=[]
total_samples = len(y_true)
for i in range(20):
TP = conf_matrix[i, i]#正确预测为正类
FP = conf_matrix[:, i].sum() - TP#错误预测为正类
FN = conf_matrix[i, :].sum() - TP#错误预测为负类
TN = total_samples - (TP + FP + FN)#正确预测为负类
#计算指标
acc= (TP+TN)/(TP+TN+FP+FN)
precision=(TP)/(TP+FP)
recall=(TP)/(TP+FN)
F1_score=2*((precision*recall)/(precision+recall))
#保存每一类的四个小指标
metrics[f'class_{i}']['TP'] = TP
metrics[f'class_{i}']['FP'] = FP
metrics[f'class_{i}']['FN'] = FN
metrics[f'class_{i}']['TN'] = TN
#添加到各指标列表
acc_list.append(acc)
precision_list.append(precision)
recall_list.append(recall)
F1_score_list.append(F1_score)
#计算平均值(宏平均)
#宏平均是对每个类别的指标进行简单平均,即所有类别的指标值相加后除以类别的总数。
#准确率
acc_average=sum(acc_list)/len(acc_list)
print("acc_average:"+str(acc_average))
#精确率
precision_average=sum(precision_list)/len(precision_list)
print("precision_average:"+str(precision_average))
#recall值
recall_average=sum(recall_list)/len(recall_list)
print("recall_average:"+str(recall_average))
#F1 score
F1_score_average=sum(F1_score_list)/len(F1_score_list)
print("F1_score_average:"+str(F1_score_average))
# 替换类的名称为class_indict中的中文名称
class_names = [class_indict[str(i)] for i in range(20)]
# 准备数据保存到Excel
# 子表1: 每一类的四个指标
df_metrics = pd.DataFrame({
'Class': class_names,
'Accuracy': acc_list,
'Precision': precision_list,
'Recall': recall_list,
'F1 Score': F1_score_list
})
# 子表2: 20类的宏平均值
df_macro_avg = pd.DataFrame({
'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score','TOP1-accuracy','TOP5-accuracy'],
'Macro Average': [acc_average, precision_average, recall_average, F1_score_average,avg_top1,avg_top5]
})
# 子表3: 混淆矩阵
df_conf_matrix = pd.DataFrame(conf_matrix, index=class_names, columns=class_names)
# 保存到Excel文件
with pd.ExcelWriter('********.xlsx') as writer:
df_metrics.to_excel(writer, sheet_name='Class Metrics', index=False)
df_macro_avg.to_excel(writer, sheet_name='Macro Averages', index=False)
df_conf_matrix.to_excel(writer, sheet_name='Confusion Matrix')
print("Results have been saved to '********.xlsx'")
if __name__ == '__main__':
main()
代码生成表格效果
Code中带星号的和最开始头文件的import model都需要大家自己修改一下,然后博主做的是20类图像分类,大家根据具体情况把20这个数字修改就行,最后的xlsx表格会保存三个子表:分别是每类的四个指标,平均的六个指标,混淆矩阵;上面的内容都是博主自己总结编写的,希望大家多多点赞多多关注多多收藏,谢谢,祝学习愉快!有问题可以在评论区讨论!