分析abnormal_scores(list格式),normal_scores(list格式)
画出以下的图并进行分析
把两分类的点用柱状图的形式画出来,以便找到分类阈值
#abnormal_scores的柱状图
plt.hist(abnormal_scores, bins=50, facecolor='orangered', alpha=0.8, label="Abnormal", density=True)
#normal_scores的柱状图
plt.hist(normal_scores, bins=50, facecolor='cornflowerblue', alpha=0.8, label="Normal", density=True)
plt.xlabel('Defect Score')
plt.ylabel('Probability Density')
plt.title('Distribution of Defect Score')
plt.legend()
plt.savefig(os.path.join(save_path, "Defect_score.png"))
plt.close()
# 保存原始缺陷分数
scores_csv = {}
scores_csv['scores'] = scores
scores_csv['labels'] = labels
csv = pd.DataFrame.from_dict(scores_csv)
csv.to_csv(os.path.join(save_path, "scores.csv")) # 存有缺陷分数的csv文件
# 绘制ROC曲线
fpr = dict() # 伪阳性率
tpr = dict() # 真阳性率
roc_auc = dict() # AUC值
fpr, tpr, _ = roc_curve(labels, scores) # 计算FPR、TPR
roc_auc = auc(fpr, tpr) # 计算AUC值
eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.) # Equal Error Rate
# 绘制
plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange', lw=lw, label='(AUC = %0.4f, EER = %0.4f)' % (roc_auc, eer))
plt.plot([eer], [1-eer], marker='o', markersize=5, color="navy")
plt.plot([0, 1], [1, 0], color='navy', lw=1, linestyle=':')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.savefig(os.path.join(save_path, "ROC.png"))
plt.close()
# 保存阈值相关信息
normal_scores.sort(reverse=False)
abnormal_scores.sort(reverse=False)
threshold_5 = normal_scores[int(len(normal_scores) * 0.95)]
threshold_10 = normal_scores[int(len(normal_scores) * 0.90)]
threshold_20 = normal_scores[int(len(normal_scores) * 0.80)]
threshold_25 = normal_scores[int(len(normal_scores) * 0.75)]