# 用线性单分逻辑回归分析肿瘤是良性还是恶性的

1. 生成样本集

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from sklearn.utils import shuffle

def generate(sample_size, mean, cov, diff, regression):
num_classes = 2
samples_per_class = int(sample_size / 2)

X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
Y0 = np.zeros(samples_per_class)

for ci, d in enumerate(diff):
X1 = np.random.multivariate_normal(mean + d, cov, samples_per_class)
Y1 = (ci + 1) * np.ones(samples_per_class)

X0 = np.concatenate((X0, X1))
Y0 = np.concatenate((Y0, Y1))

if regression == False:  # one-hot编码，将0转成1 0

class_ind = [Y == class_number for class_number in range(num_classes)]
Y = np.asarray(np.hstack(class_ind), dtype=np.float32)
X, Y = shuffle(X0, Y0)

return X, Y

• 定义随机数的种子值（这样可以保证每次运行代码时生成的随机值都一样）
• 定义生成类的个数num_classes = 2
• 传入X, Y = generate(1000, mean, cov, [3.0], True) 中的3.0是表明两类数据的x和y差距3.0。传入的最后一个参数regression = True表明使用非one-hot的编码标签。
input_dim = 2
np.random.seed(10)
num_classes = 2
mean = np.random.randn(num_classes)
cov = np.eye(num_classes)
X, Y = generate(1000, mean, cov, [3.0], True)
colors = ['r' if l == 0 else 'b' for l in Y[:]]
plt.scatter(X[:, 0], X[:, 1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")
plt.show()
lab_dim = 1

2. 构建网络结构

• 激活函数使用的是Sigmoid
• 损失函数loss仍然使用交叉熵，里面又加了一个平方差函数，用来评估模型的错误率
input_features = tf.placeholder(tf.float32, [None, input_dim])
input_lables = tf.placeholder(tf.float32, [None, lab_dim])
# 定义学习参数
W = tf.Variable(tf.random_normal([input_dim, lab_dim]), name="weight")
b = tf.Variable(tf.zeros([lab_dim]), name="bias")

output = tf.nn.sigmoid(tf.matmul(input_features, W) + b)
cross_entropy = -(input_lables * tf.log(output) + (1 - input_lables) * tf.log(1 - output))
ser = tf.square(input_lables - output)
loss = tf.reduce_mean(cross_entropy)
err = tf.reduce_mean(ser)
train = optimizer.minimize(loss)

3. 设置参数进行训练

maxEpochs = 50
minibatchSize = 25

# 启动session
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())

# 向模型输入数据
for epoch in range(maxEpochs):
sumerr = 0
for i in range(np.int32(len(Y) / minibatchSize)):
x1 = X[i * minibatchSize:(i + 1) * minibatchSize, :]
y1 = np.reshape(Y[i * minibatchSize:(i + 1) * minibatchSize], [-1, 1])
tf.reshape(y1, [-1, 1])
_, lossval, outputval, errval = sess.run([train, loss, output, err],
feed_dict={input_features: x1, input_lables: y1})
sumerr = sumerr + errval

print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(lossval), "err=", sumerr / minibatchSize)


Epoch: 0001 cost= 0.801177144 err= 0.7616751062870025
Epoch: 0002 cost= 0.477810502 err= 0.40522061169147494
Epoch: 0003 cost= 0.269513637 err= 0.21584961980581283
Epoch: 0004 cost= 0.186233699 err= 0.11567755311727523
Epoch: 0005 cost= 0.146733195 err= 0.07597910903394223
Epoch: 0006 cost= 0.123761795 err= 0.058058027923107144
Epoch: 0007 cost= 0.108445339 err= 0.04827395606786013
Epoch: 0008 cost= 0.097274102 err= 0.04213936511427164
Epoch: 0009 cost= 0.088647947 err= 0.03792460318654776
Epoch: 0010 cost= 0.081724465 err= 0.03484359847381711
Epoch: 0011 cost= 0.076012306 err= 0.03249063930474222
Epoch: 0012 cost= 0.071201079 err= 0.030635128300637006
Epoch: 0013 cost= 0.067082971 err= 0.029135804837569595
Epoch: 0014 cost= 0.063511737 err= 0.027900906056165697
Epoch: 0015 cost= 0.060381308 err= 0.026868006195873022
Epoch: 0016 cost= 0.057611823 err= 0.025993026215583085
Epoch: 0017 cost= 0.055142455 err= 0.025243847076781095
Epoch: 0018 cost= 0.052925304 err= 0.024596498371101914
Epoch: 0019 cost= 0.050922602 err= 0.024032677803188563
Epoch: 0020 cost= 0.049103562 err= 0.023538184347562493
Epoch: 0021 cost= 0.047443327 err= 0.02310181698296219
Epoch: 0022 cost= 0.045921441 err= 0.022714638628531247
Epoch: 0023 cost= 0.044520836 err= 0.022369401305913927
Epoch: 0024 cost= 0.043226957 err= 0.022060203843284398
Epoch: 0025 cost= 0.042027850 err= 0.021782163383904846
Epoch: 0026 cost= 0.040913254 err= 0.02153123596450314
Epoch: 0027 cost= 0.039874084 err= 0.021304014374036343
Epoch: 0028 cost= 0.038902953 err= 0.021097642207751052
Epoch: 0029 cost= 0.037993081 err= 0.020909675495931878
Epoch: 0030 cost= 0.037138738 err= 0.020738033957313745
Epoch: 0031 cost= 0.036334887 err= 0.020580934719182552
Epoch: 0032 cost= 0.035577167 err= 0.020436836200533433
Epoch: 0033 cost= 0.034861591 err= 0.020304409904638304
Epoch: 0034 cost= 0.034184616 err= 0.020182462889933958
Epoch: 0035 cost= 0.033543259 err= 0.020069989305338824
Epoch: 0036 cost= 0.032934550 err= 0.019966101199970582
Epoch: 0037 cost= 0.032356307 err= 0.019869991253945046
Epoch: 0038 cost= 0.031806041 err= 0.019780976595939137
Epoch: 0039 cost= 0.031281944 err= 0.019698412961442955
Epoch: 0040 cost= 0.030782087 err= 0.01962176572124008
Epoch: 0041 cost= 0.030304812 err= 0.019550517819589004
Epoch: 0042 cost= 0.029848620 err= 0.019484240100136958
Epoch: 0043 cost= 0.029412288 err= 0.019422541317762807
Epoch: 0044 cost= 0.028994296 err= 0.019365040251868776
Epoch: 0045 cost= 0.028593704 err= 0.019311425428604707
Epoch: 0046 cost= 0.028209429 err= 0.01926139475312084
Epoch: 0047 cost= 0.027840517 err= 0.01921468239481328
Epoch: 0048 cost= 0.027485942 err= 0.019171043474052567
Epoch: 0049 cost= 0.027145121 err= 0.019130255988566203
Epoch: 0050 cost= 0.026817065 err= 0.019092114479572046

4. 数据可视化

    train_X, train_Y = generate(100, mean, cov, [3.0], True)
colors = ['r' if l == 0 else 'b' for l in train_Y[:]]
plt.scatter(train_X[:, 0], train_X[:, 1], c=colors)
x = np.linspace(-1, 8, 200)
y = -x * (sess.run(W)[0] / sess.run(W)[1]) - sess.run(b) / sess.run(W)[1]
plt.plot(x, y, label='Fitted line')
plt.legend()
plt.show()

5. 线性可分概念