# X(x1,x2) y(0/1/2/3)
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(0)
Num = 100
x_1 = np.random.normal(-3,1,size=(Num))
x_2 = np.random.normal(-3,1,size=(Num))
y = np.zeros(Num)
c_0 = np.array([x_1,x_2,y]) # 0分类
x_1 = np.random.normal(3,1,size=(Num))
x_2 = np.random.normal(-3,1,size=(Num))
y = np.ones(Num)
c_1 = np.array([x_1,x_2,y]) # 1分类
x_1 = np.random.normal(-3,1,size=(Num))
x_2 = np.random.normal(3,1,size=(Num))
y = np.ones(Num)*2
c_2 = np.array([x_1,x_2,y]) # 2分类
x_1 = np.random.normal(3,1,size=(Num))
x_2 = np.random.normal(3,1,size=(Num))
y = np.ones(Num)*3
c_3 = np.array([x_1,x_2,y]) # 3分类
c_0 = c_0.T
c_1 = c_1.T
c_2 = c_2.T
c_3 = c_3.T
c_0.shape
(100, 3)
plt.scatter(c_0[:,0],c_0[:,1],marker='o')
plt.scatter(c_1[:,0],c_1[:,1],marker='.')
plt.scatter(c_2[:,0],c_2[:,1],marker='v')
plt.scatter(c_3[:,0],c_3[:,1],marker='s')
<matplotlib.collections.PathCollection at 0x1954ce90388>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-JH3YpJXS-1588587820347)(output_8_1.png)]
All_data = np.concatenate((c_0,c_1,c_2,c_3))
All_data.shape
np.random.shuffle(All_data)
train_data_X = All_data[:300,:2]
train_data_y = All_data[:300,-1].reshape(300,1)
test_data_X = All_data[300:,:2]
test_data_y = All_data[300:,-1].reshape(100,1)
train_data_X.shape,train_data_y.shape,test_data_X.shape,test_data_y.shape
((300, 2), (300, 1), (100, 2), (100, 1))
# y = w1*x1+w2*x2+b
# 0 = w1*x + w2*y +b
# y= -1*(w1*x+b)/w2
W = np.random.rand(4,2)
W
array([[0.99423308, 0.61476989],
[0.0371296 , 0.01425152],
[0.34210388, 0.82347172],
[0.86613471, 0.96081253]])
bias = np.random.rand(1,4)
bias.shape
(1, 4)
plt.scatter(c_0[:,0],c_0[:,1],marker='o')
plt.scatter(c_1[:,0],c_1[:,1],marker='.')
plt.scatter(c_2[:,0],c_2[:,1],marker='v')
plt.scatter(c_3[:,0],c_3[:,1],marker='s')
x = np.arange(-5,5)
y1 = y= -1*(W[0,0]*x+bias[0,0])/W[0,1]
plt.plot(x,y1,'b')
y2 = y= -1*(W[1,0]*x+bias[0,1])/W[1,1]
plt.plot(x,y2,'y')
y3 = y= -1*(W[2,0]*x+bias[0,2])/W[2,1]
plt.plot(x,y3,'g')
y4 = y= -1*(W[3,0]*x+bias[0,3])/W[3,1]
plt.plot(x,y4,'r')
[<matplotlib.lines.Line2D at 0x1954cf57248>]
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-qDbmUxeN-1588587820357)(output_13_1.png)]
# softmax(x) = e^x / sum(e^x)
def softmax(z):
return np.exp(z)/np.sum(np.exp(z))
a = np.array([1,2,3])
softmax(a)
array([0.09003057, 0.24472847, 0.66524096])
b = np.array([1,2,3,4,5,6]).reshape(2,3)
softmax(b) # softmax对于数组来说就不能这么写了
array([[0.00426978, 0.01160646, 0.03154963],
[0.08576079, 0.23312201, 0.63369132]])
def softmax(z):
exp = np.exp(z)
sum_exp = np.sum(np.exp(z),axis=1,keepdims=True)
return exp/sum_exp
# 完善两种维度情况的判断
def softmax(z):
if z.shape[0] == 1:
return np.exp(z)/np.sum(np.exp(z))
exp = np.exp(z)
sum_exp = np.sum(np.exp(z),axis=1,keepdims=True)
return exp/sum_exp
b = np.array([1,2,4,5,5,6]).reshape(2,3)
softmax(b)
array([[0.04201007, 0.1141952 , 0.84379473],
[0.21194156, 0.21194156, 0.57611688]])
# one-hot
# temp 1,2,3,4 [0,0,1,0]==3
def one_hot(temp):
one_hot = np.zeros((len(temp),len(np.unique(temp))))
one_hot[np.arange(len(temp)),temp.astype(np.int).T]=1
return one_hot
# 计算y_hat
def compute_y_hat(W,X,b):
return np.dot(X,W.T)+b
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-41EotBJo-1588587820361)(attachment:30669397-f43f-4df6-9961-11c36366af71.png)]
# 计算交叉熵
def cross_entropy(y,y_hat):
loss = -(1/len(y))*np.sum(y*np.log(y_hat))
return loss
# w = w-lr*grad
lr = 0.001
All_loss=[]
for i in range(10000):
# 计算loss
X = train_data_X
y = one_hot(train_data_y)
y_hat = softmax(compute_y_hat(W,X,bias))
loss = cross_entropy(y,y_hat)
All_loss.append(loss)
# 计算梯度
grad_w = (1/len(X))*np.dot(X.T,(y_hat-y))
grad_b = (1/len(X))*np.sum(y_hat-y)
# 更新参数
W = W - lr*grad_w.T
bias = bias - lr*grad_b
# 输出
if i%300 == 1:
print("i : %d, loss : %f"%(i,loss))
i : 1, loss : 1.833425
i : 301, loss : 0.952549
i : 601, loss : 0.518215
i : 901, loss : 0.343816
i : 1201, loss : 0.267130
i : 1501, loss : 0.224292
i : 1801, loss : 0.196245
i : 2101, loss : 0.176068
i : 2401, loss : 0.160658
i : 2701, loss : 0.148401
i : 3001, loss : 0.138360
i : 3301, loss : 0.129946
i : 3601, loss : 0.122771
i : 3901, loss : 0.116564
i : 4201, loss : 0.111130
i : 4501, loss : 0.106324
i : 4801, loss : 0.102038
i : 5101, loss : 0.098186
i : 5401, loss : 0.094703
i : 5701, loss : 0.091534
i : 6001, loss : 0.088637
i : 6301, loss : 0.085976
i : 6601, loss : 0.083521
i : 6901, loss : 0.081248
i : 7201, loss : 0.079136
i : 7501, loss : 0.077168
i : 7801, loss : 0.075329
i : 8101, loss : 0.073606
i : 8401, loss : 0.071986
i : 8701, loss : 0.070462
i : 9001, loss : 0.069023
i : 9301, loss : 0.067663
i : 9601, loss : 0.066375
i : 9901, loss : 0.065153
plt.plot(All_loss)
[<matplotlib.lines.Line2D at 0x1954d0173c8>]
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-0aBmbkbJ-1588587820369)(output_25_1.png)]
plt.scatter(c_0[:,0],c_0[:,1],marker='o')
plt.scatter(c_1[:,0],c_1[:,1],marker='.')
plt.scatter(c_2[:,0],c_2[:,1],marker='v')
plt.scatter(c_3[:,0],c_3[:,1],marker='s')
x = np.arange(-5,5)
y1 = y= -1*(W[0,0]*x+bias[0,0])/W[0,1]
plt.plot(x,y1,'b')
y2 = y= -1*(W[1,0]*x+bias[0,1])/W[1,1]
plt.plot(x,y2,'y')
y3 = y= -1*(W[2,0]*x+bias[0,2])/W[2,1]
plt.plot(x,y3,'g')
y4 = y= -1*(W[3,0]*x+bias[0,3])/W[3,1]
plt.plot(x,y4,'r')
[<matplotlib.lines.Line2D at 0x1954d09d288>]
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-LumLmFwk-1588587820377)(output_26_1.png)]
# 测试集预测
test_data_y.shape
(100, 1)
def predict(x):
y_hat = softmax(compute_y_hat(W,x,bias)) # 100,4
return np.argmax(y_hat,axis=1)
predict(test_data_X)
array([3, 0, 1, 0, 1, 3, 2, 2, 1, 2, 0, 2, 2, 3, 2, 3, 1, 2, 3, 1, 3, 3,
2, 0, 2, 1, 0, 1, 3, 1, 1, 3, 1, 3, 1, 2, 1, 2, 1, 0, 3, 2, 1, 1,
0, 2, 2, 3, 3, 2, 0, 1, 0, 2, 1, 2, 3, 2, 0, 0, 2, 3, 0, 3, 3, 2,
3, 2, 3, 0, 0, 0, 2, 3, 3, 2, 3, 0, 1, 1, 0, 0, 1, 2, 1, 3, 0, 1,
1, 2, 0, 1, 3, 0, 2, 1, 1, 3, 0, 2], dtype=int64)
W.shape
(4, 2)
predict(test_data_X).reshape(100,1) == test_data_y
#或者在上面增加一个维度, return np.argmax(y_hat,axis=1)[:,np.newaxis]
array([[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True],
[ True]])
np.sum(predict(test_data_X).reshape(100,1) == test_data_y)/len(test_data_y)
1.0
作业:使用softmax回归进行MNIST数据集的手写数字识别
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
print(tf.__version__)
2.1.0
(x_train,y_train),(x_test,y_test) = tf.keras.datasets.mnist.load_data()
x_train.shape,y_train.shape
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
8814592/11490434 [======================>.......] - ETA: 8:20
plt.imshow(x_train[0]/255,cmap='gray')