固定随机种子
def set_seed(seed=42):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
set_seed()
F1阈值搜索
def f_score_search(df):
t0 = 0.01
v = 0.001
best_t = t0
best_f = 0
best_p, best_r = 0, 0
for step in range(950):
curr_t = t0 + step * v
p, r, curr_f = f_beta(df_oof['pred'], df_oof['label'], threshold=curr_t)
if curr_f > best_f:
best_t = curr_t
best_f = curr_f
best_p = p
best_r = r
print(f'best threshold: {best_t}, precision={best_p}; recall={best_r}; best f_score: {best_f}')
return best_t, best_f
F-0.3实现
def f_beta(y_hat, y_true, threshold):
y_hat = y_hat > threshold
y_hat = np.int8(y_hat)
true_positive = np.sum(y_hat*y_true, axis=0)
false_positive = np.sum(y_hat*(1-y_true), axis=0)
false_negative = np.sum((1-y_hat)*y_true, axis=0)
precision = true_positive / (true_positive + false_positive )
recall = true_positive / (true_positive + false_negative )
f_score = (1 + (0.3 ** 2)) * precision * recall / ((0.3 ** 2) * precision + recall)
return precision, recall, f_score