在keras中自带的性能评估有准确性以及loss,当需要以auc作为评价验证集的好坏时,就得自己写个评价函数了:
- from sklearn.metrics import roc_auc_score
- from keras import backend as K
- # AUC for a binary classifier
- def auc(y_true, y_pred):
- ptas = tf.stack([binary_PTA(y_true,y_pred,k) for k in np.linspace(0, 1, 1000)],axis=0)
- pfas = tf.stack([binary_PFA(y_true,y_pred,k) for k in np.linspace(0, 1, 1000)],axis=0)
- pfas = tf.concat([tf.ones((1,)) ,pfas],axis=0)
- binSizes = -(pfas[1:]-pfas[:-1])
- s = ptas*binSizes
- return K.sum(s, axis=0)
- #-----------------------------------------------------------------------------------------------------------------------------------------------------
- # PFA, prob false alert for binary classifier
- def binary_PFA(y_true, y_pred, threshold=K.variable(value=0.5)):
- y_pred = K.cast(y_pred >= threshold, 'float32')
- # N = total number of negative labels
- N = K.sum(1 - y_true)
- # FP = total number of false alerts, alerts from the negative class labels
- FP = K.sum(y_pred - y_pred * y_true)
- return FP/N
- #-----------------------------------------------------------------------------------------------------------------------------------------------------
- # P_TA prob true alerts for binary classifier
- def binary_PTA(y_true, y_pred, threshold=K.variable(value=0.5)):
- y_pred = K.cast(y_pred >= threshold, 'float32')
- # P = total number of positive labels
- P = K.sum(y_true)
- # TP = total number of correct alerts, alerts from the positive class labels
- TP = K.sum(y_pred * y_true)
- return TP/P
- #接着在模型的compile中设置metrics
- #如下例子,我用的是RNN做分类
from keras.models import Sequential from keras.layers import Dense, Dropout import keras from keras.layers import GRU
model = Sequential() model.add(keras.layers.core.Masking(mask_value=0., input_shape=(max_lenth, max_features))) #masking用于变长序列输入 model.add(GRU(units=n_hidden_units,activation='selu',kernel_initializer='orthogonal', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.5, recurrent_dropout=0.0, implementation=1, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False)) model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[auc]) #写入自定义评价函数
接下来就自己作预测了...
方法二:
- from sklearn.metrics import roc_auc_score
- import keras
- class RocAucMetricCallback(keras.callbacks.Callback):
- def __init__(self, predict_batch_size=1024, include_on_batch=False):
- super(RocAucMetricCallback, self).__init__()
- self.predict_batch_size=predict_batch_size
- self.include_on_batch=include_on_batch
- def on_batch_begin(self, batch, logs={}):
- pass
- def on_batch_end(self, batch, logs={}):
- if(self.include_on_batch):
- logs['roc_auc_val']=float('-inf')
- if(self.validation_data):
- logs['roc_auc_val']=roc_auc_score(self.validation_data[1],
- self.model.predict(self.validation_data[0],
- batch_size=self.predict_batch_size))
- def on_train_begin(self, logs={}):
- if not ('roc_auc_val' in self.params['metrics']):
- self.params['metrics'].append('roc_auc_val')
- def on_train_end(self, logs={}):
- pass
- def on_epoch_begin(self, epoch, logs={}):
- pass
- def on_epoch_end(self, epoch, logs={}):
- logs['roc_auc_val']=float('-inf')
- if(self.validation_data):
- logs['roc_auc_val']=roc_auc_score(self.validation_data[1],
- self.model.predict(self.validation_data[0],
- batch_size=self.predict_batch_size))
- import numpy as np
- import tensorflow as tf
- from keras.models import Sequential
- from keras.layers import Dense, Dropout
- from keras.layers import GRU
- import keras
- from keras.callbacks import EarlyStopping
- from sklearn.metrics import roc_auc_score
- from keras import metrics
- cb = [
- my_callbacks.RocAucMetricCallback(), # include it before EarlyStopping!
- EarlyStopping(monitor='roc_auc_val',patience=300, verbose=2,mode='max')
- ]
- model = Sequential()
- model.add(keras.layers.core.Masking(mask_value=0., input_shape=(max_lenth, max_features)))
- # model.add(Embedding(input_dim=max_features+1, output_dim=64,mask_zero=True))
- model.add(GRU(units=n_hidden_units,activation='selu',kernel_initializer='orthogonal', recurrent_initializer='orthogonal',
- bias_initializer='zeros', kernel_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01),
- bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None,
- bias_constraint=None, dropout=0.5, recurrent_dropout=0.0, implementation=1, return_sequences=False,
- return_state=False, go_backwards=False, stateful=False, unroll=False)) #input_shape=(max_lenth, max_features),
- model.add(Dropout(0.5))
- model.add(Dense(1, activation='sigmoid'))
- model.compile(loss='binary_crossentropy',
- optimizer='adam',
- metrics=[auc]) #这里就可以写其他评估标准
- model.fit(x_train, y_train, batch_size=train_batch_size, epochs=training_iters, verbose=2,
- callbacks=cb,validation_split=0.2,
- shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0)
亲测有效