# Logistic regression example in TF using Kaggle's Titanic Dataset.
# Download train.csv from https://www.kaggle.com/c/titanic/data
# About data analysis you could refer this link
# https://github.com/yeLer/kaggle_yeler/blob/master/titanic/notebook/Titanic.ipynb
# 1、导入必要的库
import tensorflow as tf
import os
# 2、变量初始化
W = tf.Variable(tf.zeros([5,1]),name="weights")
b = tf.Variable(0.,name="bias")
# 3、定义拟合函数
def combine_inputs(X):
return tf.matmul(X,W) + b
# 4、定义激活函数,是在拟合函数的结果上塞入激活函数(获得分类结果)
def inference(X):
return tf.sigmoid(combine_inputs(X))
# 5、定义损失函数
def loss(X,Y):
return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=combine_inputs(X), labels=Y))
# 6、定义读取CSV工具函数,tensorflow的数据输入
# https://blog.csdn.net/zzk1995/article/details/54292859
def read_csv(batch_size,file_name,record_defaults):
filename_queue = tf.train.string_input_producer([os.path.join(os.getcwd(),file_name)])
# A Reader that outputs the lines of a file delimited by newlines
reader = tf.TextLineReader(skip_header_lines=1)
key , value = reader.read(filename_queue)
# Convert CSV records to tensors. Each column maps to one tensor
decode = tf.decode_csv(value,record_defaults=record_defaults)
return tf.train.shuffle_batch(decode,batch_size=batch_size,capacity=batch_size*50,min_after_dequeue=batch_size)
# 7、读取文件内容
def inputs():
passenger_id, survived, pclass, name, sex, age, sibsp, parch, ticket, fare, cabin, embarked = \
read_csv(100, "train.csv", [[0.0], [0.0], [0], [""], [""], [0.0], [0.0], [0.0], [""], [0.0], [""], [""]])
# convert categorical data
is_first_class = tf.to_float(tf.equal(pclass, [1]))
is_second_class = tf.to_float(tf.equal(pclass, [2]))
is_third_class = tf.to_float(tf.equal(pclass, [3]))
gender = tf.to_float(tf.equal(sex, ["female"]))
# Finally we pack all the features in a single matrix;
# We then transpose to have a matrix with one example per row and one feature per column.
features = tf.transpose(tf.stack([is_first_class, is_second_class, is_third_class, gender, age]))
survived = tf.reshape(survived, [100, 1])
return features, survived
# 8、训练模型
def train(total_loss):
learning_rate = 0.01
return tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)
# 9、评估模型
def evaluate(sess, X, Y):
predicted = tf.cast(inference(X) > 0.5, tf.float32)
print(sess.run(tf.reduce_mean(tf.cast(tf.equal(predicted, Y), tf.float32))))
# 10、执行调用整个过程
with tf.Session() as sess:
tf.initialize_all_variables().run()
X, Y = inputs()
total_loss = loss(X, Y)
train_op = train(total_loss)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# actual training loop
training_steps = 1000
for step in range(training_steps):
sess.run([train_op])
# for debugging and learning purposes, see how the loss gets decremented thru training steps
if step % 100 == 0:
print("loss: ", sess.run([total_loss]))
evaluate(sess, X, Y)
import time
time.sleep(5)
coord.request_stop()
coord.join(threads)
sess.close()
【08】逻辑回归(Titanic)
最新推荐文章于 2022-04-02 17:27:03 发布