对比线性回归逻辑回归不能再使用误差平方和代价函数,要是用交叉熵代价函数,要使用sigmoid激活函数
部分数据集展示
手写底层
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# 设置随机种子
tf.set_random_seed(1)
# 读取数据
data = np.loadtxt('../../datas/data-03-diabetes.csv',delimiter=',',dtype=np.float32)
# 预处理
data_x = data[:,:-1]
data_y = data[:,[-1]]
# 站位
x = tf.placeholder(tf.float32,[None,8])
y = tf.placeholder(tf.float32,[None,1])
# 定义变量
w = tf.Variable(tf.random_normal([8,1]),name='W')
b = tf.Variable(tf.random_normal([1]),name='B')
# model
z = tf.matmul(x,w) + b
a = tf.sigmoid(z)
# 代价
cost = -tf.reduce_mean(y*tf.log(a) + (1-y) *tf.log(1-a))
cost_history = []
#梯度下降
e = a - y
dw = tf.matmul(tf.transpose(x),e) / tf.cast(tf.shape(x)[0],tf.float32)
db = tf.reduce_mean(e,0)
alpha = 10e-2
update = [
tf.assign(w,w-alpha*dw),
tf.assign(b,b-alpha*db),
]
# 开启会话
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# 更新参数
for step in range(10001):
cost_cvl,_ = sess.run([cost,update],feed_dict={x:data_x,y:data_y})
if step % 500 == 0:
print('step',step,'cost_cvl',cost_cvl)
cost_history.append(cost_cvl)
# 准确率
pre = tf.cast(a>0.5,tf.float32)
score = tf.reduce_mean(tf.cast(tf.equal(pre,y),tf.float32))
score = sess.run(score,feed_dict={x:data_x,y:data_y})
print(score)
# 画图
plt.plot(cost_history[1:])
plt.show()
效果展示
step 0 cost_cvl 1.1368128
step 500 cost_cvl 0.5167124
step 1000 cost_cvl 0.4840871
step 1500 cost_cvl 0.47615495
step 2000 cost_cvl 0.47355393
step 2500 cost_cvl 0.4725495
step 3000 cost_cvl 0.4721149
step 3500 cost_cvl 0.47190967
step 4000 cost_cvl 0.47180566
step 4500 cost_cvl 0.47174993
step 5000 cost_cvl 0.47171864
step 5500 cost_cvl 0.4717005
step 6000 cost_cvl 0.47168958
step 6500 cost_cvl 0.4716829
step 7000 cost_cvl 0.4716786
step 7500 cost_cvl 0.47167602
step 8000 cost_cvl 0.47167423
step 8500 cost_cvl 0.47167304
step 9000 cost_cvl 0.47167236
step 9500 cost_cvl 0.47167182
step 10000 cost_cvl 0.47167143
0.76943344