1 前言
数据来源:某游戏行为数据
目的:游戏流失预测,二分类模型
2 步骤
基本步骤
1.从原始数据集的CSV里面读取数据,并且加载到Tensorflow当中
2.构建一个基于神经网络的分类器
3.使用训练数据进行模型训练
4.使用测试数据进行模型评估
3 数据加载
train = pd.read_csv("data/input/data.csv")
x_train = train.drop(['label'], axis=1).values
le = LabelEncoder().fit(train["label"])
y_train = le.transform(train["label"])
scaler = StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)
temp = []
for a in y_train:
if a == 0:
temp.append([1,0])
else:
temp.append([0,1])
y_train = temp
train_x,test_x, train_y, test_y = cross_validation.train_test_split(x_train,y_train,
test_size=0.4, random_state=0)
4 神经网络准备
n_input_layer = len(x_train[0]) # 输入层
n_layer_1 = 1000 # hide layer
n_layer_2 = 1000 # hide layer
n_layer_3 = 1000 # hide layer
n_output_layer = 2 # 输出层
def neural_network(data):
layer_1_w_b = {'w_':tf.Variable(tf.random_normal([n_input_layer, n_layer_1])), 'b_':tf.Variable(tf.random_normal([n_layer_1]))}
layer_2_w_b = {'w_':tf.Variable(tf.random_normal([n_layer_1, n_layer_2])), 'b_':tf.Variable(tf.random_normal([n_layer_2]))}
layer_3_w_b = {'w_':tf.Variable(tf.random_normal([n_layer_2, n_layer_3])), 'b_':tf.Variable(tf.random_normal([n_layer_3]))}
layer_output_w_b = {'w_':tf.Variable(tf.random_normal([n_layer_2, n_output_layer])), 'b_':tf.Variable(tf.random_normal([n_output_layer]))}
layer_1 = tf.add(tf.matmul(data, layer_1_w_b['w_']), layer_1_w_b['b_'])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, layer_2_w_b['w_']), layer_2_w_b['b_'])
layer_2 = tf.nn.relu(layer_2)
layer_3 = tf.add(tf.matmul(layer_2, layer_3_w_b['w_']), layer_3_w_b['b_'])
layer_3 = tf.nn.relu(layer_3)
layer_output = tf.add(tf.matmul(layer_3, layer_output_w_b['w_']), layer_output_w_b['b_'])
return layer_output
batch_size = 50
X = tf.placeholder('float', [None, len(train_x[0])])
Y = tf.placeholder('float')
5 模型训练及评价
def train_neural_network(X, Y):
predict = neural_network(X)
cost_func = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = predict, labels=Y))
optimizer = tf.train.AdamOptimizer().minimize(cost_func) # learning rate 默认 0.001
epochs = 200
with tf.Session() as session:
session.run(tf.initialize_all_variables())
epoch_loss = 0
i = 0
for epoch in range(epochs):
while i < len(train_x):
start = i
end = i + batch_size
batch_x = train_x[start:end]
batch_y = train_y[start:end]
_, c = session.run([optimizer, cost_func], feed_dict={X:list(batch_x),Y:list(batch_y)})
epoch_loss += c
i += batch_size
print(epoch, ' : ', epoch_loss)
correct = tf.equal(tf.argmax(predict,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct,'float'))
print '准确率: ', accuracy.eval({X:list(test_x) , Y:list(test_y)})
y_p = tf.argmax(predict, 1)
val_accuracy, y_pred = session.run([accuracy, y_p], feed_dict={X:list(test_x), Y:list(test_y)})
y_true = np.argmax(list(test_y),1)
print "validation accuracy:", val_accuracy
print "Precision", metrics.precision_score(y_true, y_pred)
print "Recall", metrics.recall_score(y_true, y_pred)
print "f1_score", metrics.f1_score(y_true, y_pred)
print "confusion_matrix"
print metrics.confusion_matrix(y_true, y_pred)
train_neural_network(X,Y)