概述
相关理论介绍可参阅【机器学习笔记2.1】线性模型之逻辑回归
代码示例
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
def plotDataMat(dataMat, labelMat, weights):
n = np.shape(dataMat)[0]
xcord1 = []
ycord1 = []
xcord2 = []
ycord2 = []
for i in range(n):
if int(labelMat[i]) == 1:
xcord1.append(dataMat[i, 0])
ycord1.append(dataMat[i, 1])
else:
xcord2.append(dataMat[i, 0])
ycord2.append(dataMat[i, 1])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
ax.scatter(xcord2, ycord2, s=30, c='green')
x = np.arange(-3.0, 3.0, 0.1)
#y = (-weights[0] - weights[1] * x) / weights[2]
y = (-1 - weights[0] * x) / weights[1]
ax.plot(x, y)
plt.xlabel('X1');
plt.ylabel('X2');
plt.show()
def loadDataSet(file_path):
dataMat = []
labelMat = []
fr = open(file_path)
for line in fr.readlines():
lineArr = line.strip().split()
dataMat.append([float(lineArr[0]), float(lineArr[1])])
labelMat.append(int(lineArr[2]))
return dataMat, labelMat
dataMat, labelMat = loadDataSet('testSet.txt') # 《机器学习实战》逻辑回归中用的数据集
dataMat = np.mat(dataMat).astype(np.float32)
labelMat = np.mat(labelMat).transpose().astype(np.float32)
sample_num = dataMat.shape[0]
threshold = 1.0e-2
weight = tf.Variable(tf.zeros([2, 1]))
bias = tf.Variable(tf.zeros([1, 1]))
x_ = tf.placeholder(tf.float32, [None, 2])
y_ = tf.placeholder(tf.float32, [None, 1])
g = tf.matmul(x_, weight) + bias
hyp = tf.sigmoid(g) # hypothesis,假设,假说
#hyp = tf.nn.softmax(g) # failed,没有调试通过
cost = (y_ * tf.log(hyp) + (1 - y_) * tf.log(1 - hyp)) / -sample_num # [1]
loss = tf.reduce_sum(cost)
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
step = 0
w = None
flag = 0
loss_buf = []
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
for _ in range(100):
for data, label in zip(dataMat, labelMat):
sess.run(train, feed_dict={x_: data, y_: label})
step += 1
if step % 10 == 0:
print(step, sess.run(weight).flatten(), sess.run(bias).flatten())
loss_val = sess.run(loss, {x_: data, y_: label})
print('loss_val = ', loss_val)
loss_buf.append(loss_val)
if loss_val <= threshold:
flag = 0
print('weight = ', weight.eval(sess))
w = weight.eval(sess)
# 画出loss曲线
loss_ndarray = np.array(loss_buf)
loss_size = np.arange(len(loss_ndarray))
plt.plot(loss_size, loss_ndarray, 'b+', label='loss')
plotDataMat(dataMat, labelMat, w)
print('end')
拟合出的最佳分类曲线:
loss曲线: