模拟不同模型不同决策结果
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
pos_data = np.random.multivariate_normal(
[0.7, 0.7],
[[0.03, 0.01],
[0.01, 0.03]],
# [[0.003, 0.001],
# [0.001, 0.003]],
1000
)
# plt.scatter(pos_data[:,0],pos_data[:,1])
# plt.show()
pos_data = pos_data[~np.any(pos_data < 0, 1)]
pos_data = pos_data[~np.any(pos_data > 1, 1)]
neg_data = np.random.multivariate_normal(
[0.3, 0.3],
[[0.03, 0.01],
[0.01, 0.03]],
# [[0.004, 0.003],
# [0.003, 0.004]],
1000
)
neg_data = neg_data[~np.any(neg_data < 0, 1)]
neg_data = neg_data[~np.any(neg_data > 1, 1)]
print(f'#pos: {len(pos_data)}, #neg: {len(neg_data)}')
plt.scatter(pos_data[:, 0], pos_data[:, 1], c='r', label='pos')
plt.scatter(neg_data[:, 0], neg_data[:, 1], c='b', label='neg')
plt.xlabel('model1')
plt.ylabel('model2')
plt.legend()
plt.show()
def get_auc(test_labels, test_scores):
fprs, tprs, thrs = roc_curve(test_labels, test_scores)
roc_auc = auc(fprs, tprs)
roc_auc = 1 - roc_auc if roc_auc < 0.5 else roc_auc
return roc_auc
all_labels = [1 for _ in range(len(pos_data))] + [0 for _ in range(len(neg_data))]
model1_score = np.concatenate([pos_data[:, 0], neg_data[:, 0]], 0)
model2_score = np.concatenate([pos_data[:, 1], neg_data[:, 1]], 0)
print(f'model1 auc: {get_auc(all_labels, model1_score)}, model2 auc: {get_auc(all_labels, model2_score)}')
# strategy1: stages
pred_score1 = model2_score.copy()
pred_score1[model1_score < 0.5] = 0
strategy1_auc = get_auc(all_labels, pred_score1)
# strategy2: prod
pred_score2 = model1_score * model2_score
strategy2_auc = get_auc(all_labels, pred_score2)
print(f'strategy1(stages) auc: {strategy1_auc}, strategy2(prod) auc: {strategy2_auc}')
################## ROC曲线
fprs1, tprs1, thrs1 = roc_curve(all_labels, pred_score1)
fprs2, tprs2, thrs2 = roc_curve(all_labels, pred_score2)
plt.figure(figsize=(10,10))
plt.title('ROC curve')
plt.plot(fprs1,tprs1,'b--',label='strategy1')
plt.plot(fprs2,tprs2,'r',label='strategy2')
plt.legend(loc='lower right')
plt.ylabel('TPR')
plt.xlabel('FPR')
plt.show()
模型散点分布
不同决策策略roc曲线