第一个神经网络算法 Logistic Regression
参考:https://blog.csdn.net/koala_tree/article/details/78057033
全代码 自用
Logistic Regression.py
理清楚这几个东西:
x_orig(m, num_px, num_px, 3) m是x的个数
x (num_px * num_px * 3, m) reshape后
w ( num_px * num_px * 3, 1 ) 因为还要转置 与x相乘
y (1, m ) 预测的y
import numpy as np
import matplotlib.pyplot as plt
import h5py #与存储在h5文件中的数据集 交互的包
import scipy #用来测试自己的图片
from PIL import Image #用来测试自己的图片
from scipy import ndimage
from lr_utils import load_dataset
from numpy_basic import sigmoid
#一、准备好数据集
train_x_orig, train_y, test_x_orig, test_y, classes = load_dataset()
# # Example of a picture
# index = 50
# plt.imshow(train_x_orig[index])
# # plt.show()
# print("y= " +str(train_y[:,index])+",it's a '" + classes[np.squeeze(train_y[:,index])].decode("utf-8")+"' picture.")
#1、Figure out the dimensions and shapes of data
# note: train_x_orig is a numpy-array of shape (m_train, num_px, num_px, 3)
m_train = train_x_orig.shape[0] #train x 的数量
m_test = test_x_orig.shape[0] #test x 的数量
num_px = train_x_orig.shape[1] #x的大小 多少*多少
# print("Number of training examples: m_train = "+ str(m_train)) 209
# print("Number of testing examples: m_test = "+str(m_test)) 50
# print("Height/Width of each image: num_px = "+str(num_px)) 64
# print("Each image is of size: ("+str(num_px)+", "+str(num_px)+", 3)") (64,64,3)
# print("train_x shape: "+str(train_x_orig.shape) + "\n train_y shape: "+str(train_y.shape) ) (209,64,64,3) (1,209)
# print("test_x shape: "+str(test_x_orig.shape) + "\n test_y shape: "+str(test_y.shape) ) (50,64,64,3) (1,50)
# 2、reshape images of shape (m_train, num_px, num_px, 3) in a numpy-array of shape (num_px∗ num_px ∗ 3, m_train)
# X_flatten = X.reshape(X.shape[0], -1).T # X.T is the transpose of x
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0],-1).T
# print ("train_set_x_flatten shape: " + str(train_x_flatten.shape)) (12288, 209)
# print ("test_set_x_flatten shape: " + str(test_x_flatten.shape)) (12288, 50)
# print ("sanity check after reshaping: " + str(train_x_flatten[0:5,0])) #检查完整性 0:5前5行 0第一列
#3、standardize 标准化
# 图像数据 标准化 意味着 每一行除以255
train_x = train_x_flatten/255 #12288, 209
test_x = test_x_flatten/255 #12288, 50
#Logistic Regression is actually a very simple Neural Network!
# 二、建立神经网络算法
# The main steps for building a Neural Network are:
# 1. Define the model structure (such as number of input features)
# 2. Initialize the model’s parameters
# 3. Loop:
# - Calculate current loss (forward propagation)
# - Calculate current gradient (backward propagation)
# - Update parameters (gradient descent)
# 1、sigmoid()
# 2、Initializing parameters
def initialize_parameters(dim):
w = np.zeros((dim,1)) #列向量
b = 0 #不用考虑是个多少维的矩阵 常量0 即可
assert(w.shape == (dim, 1)) #(dim, 1) 小括号
assert(isinstance(b, float) or isinstance(b, int))
return w, b
# 3、Forward and Backward propagation
def propagation(w, b, X, Y):
"""
Implement the cost function and its gradient for the propagation explained above
Arguments:
w -- weights, a numpy array of size (num_px * num_px * 3, 1)
b -- bias, a scalar
X -- data of size (num_px * num_px * 3, number of examples)
Y -- true "label" vector (containing 0 if non-cat, 1 if cat) of size (1, number of examples)
Return:
cost -- negative log-likelihood cost for logistic regression
dw -- gradient of the loss with respect to w, thus same shape as w
db -- gradient of the loss with respect to b, thus same shape as b
Tips:
- Write your code step by step for the propagation. np.log(), np.dot()
"""
m = X.shape[1]
# FORWARD PROPAGATION (FROM X TO COST)
Z = np.dot(w.T, X)+b
A = sigmoid(Z)
cost = -(1.0/m)*np.sum(Y*np.log(A) + (1-Y)*np.log(1-A))
# BACKWARD PROPAGATION (TO FIND GRAD)
dw = (1.0/m)*np.dot(X, (A-Y).T)
db = (1.0/m)*np.sum(A-Y)
assert(dw.shape == w.shape )
assert(db.dtype == float)
cost = np.squeeze(cost) #去掉维度为 1 的shape
assert(cost.shape == ()) #??? cost是否是个 元组? 其类似数组,但其中元素不能修改
grads = {"dw":dw,"db":db}
return grads, cost
w, b, X, Y = np.array([[1.],[2.]]), 2., np.array([[1.,2.,-1.],[3.,4.,-3.2]]), np.array([[1,0,1]])
# grads, cost = propagation(w, b, X, Y)
# print("dw= "+ str(grads["dw"]))
# print("db= "+ str(grads["db"]))
# print("cost= " + str(cost))
#4、iteration Optimization
def optimization(w, b, X, Y, num_iterations, learning_rate, print_cost=False):
"""
This function optimizes w and b by running a gradient descent algorithm
Arguments:
w -- weights, a numpy array of size (num_px * num_px * 3, 1)
b -- bias, a scalar
X -- data of shape (num_px * num_px * 3, number of examples)
Y -- true "label" vector (containing 0 if non-cat, 1 if cat), of shape (1, number of examples)
print_cost -- True to print the loss every 100 steps
Returns:
params -- dictionary containing the weights w and bias b
grads -- dictionary containing the gradients of the weights and bias with respect to the cost function
costs -- list of all the costs computed during the optimization, this will be used to plot the learning curve.
Tips:
You basically need to write down two steps and iterate through them:
1) Calculate the cost and the gradient for the current parameters. Use propagate().
2) Update the parameters using gradient descent rule for w and b.
"""
costs = []
for i in range(num_iterations):
grads, cost = propagation(w, b, X, Y)
dw = grads["dw"]
db = grads["db"]
w = w - learning_rate*dw
b = b - learning_rate*db
if i%100 == 0:
costs.append(cost)
if i%100 == 0 and print_cost:
print("Cost after iteration %i: %f" %(i, cost))
params = {"w": w, "b": b}
grads = {"dw": dw, "db": db}
return params, grads, costs
params, grads, costs = optimization(w, b, X, Y, num_iterations=100, learning_rate=0.009, print_cost= True)
# print ("w = " + str(params["w"]))
# print ("b = " + str(params["b"]))
# print ("dw = " + str(grads["dw"]))
# print ("db = " + str(grads["db"]))
# 5、Predictions
def predict(w, b, X):
'''
Predict whether the label is 0 or 1 using learned logistic regression parameters (w, b)
Arguments:
w -- weights 训练好了的 (num_px * num_px * 3, 1)
b -- bias
X -- test (num_px*num_px*3, number of examples)
Returns:
Y_prediction -- a numpy array (vector) containing all predictions (0/1) for the examples in X
'''
m = X.shape[1]
Y_prediction = np.zeros((1,m))
w =w.reshape(X.shape[0], 1) #??? 应该是保险起见?
A = sigmoid(np.dot(w.T, X)+b) #一块计算 直接出来A矩阵
for i in range(A.shape[1]): #对A中 进行判断 A (1, number)
if A[0,i] >0.5:
Y_prediction[0, i] = 1
else:
Y_prediction[0, i] = 0
assert(Y_prediction.shape == (1, m))
return Y_prediction
w = np.array([[0.1124579],[0.23106775]]) # 2*1 同理,2指num_px*num_px*3
b = -0.3
X = np.array([[1.,-1.1,-3.2],[1.2,2.,0.1]]) # 2*3 这里2指 num_px*num_px*3
# print ("predictions = " + str(predict(w, b, X)))
#6、 Merge all functions into a model
def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False):
"""
Builds the logistic regression model by calling the function you've implemented previously
Arguments:
X_train -- (num_px * num_px * 3, m_train)
Y_train -- (1, m_train)
X_test -- (num_px * num_px * 3, m_test)
Y_test -- (1, m_test)
print_cost -- Set to true to print the cost every 100 iterations
Returns:
d -- dictionary containing information about the model.
"""
# initialize_parameters、propagation、optimization、predict
w, b = initialize_parameters(X_train.shape[0])
params, grads, costs = optimization(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
Y_prediction_test = predict(params["w"], params["b"], X_test)
Y_prediction_train = predict(params["w"], params["b"], X_train) #用这个算 train accuracy
print("train accuracy:{} %" .format( 100- np.mean(abs(Y_prediction_train-train_y)) *100 ))
print("test accuracy:{} %" .format(100- np.mean(abs(Y_prediction_test-test_y)) *100))
d = {"w":params["w"],
"b":params["b"],
"costs":costs,
"Y_predict":Y_prediction_test,
"learning_rate":learning_rate}
return d
d = model(train_x, train_y, test_x, test_y, num_iterations = 2000, learning_rate =0.005, print_cost = False)
#test accuracy:70.0 % 这个结果对于这个简单的分类器 已经很好了
# 过拟合 了,以后要加 正则项
# Example of a picture that was wrongly classified
index = 1
# plt.imshow( test_x[:,index].reshape(num_px, num_px, 3) )
# plt.show()
# print ("y = " + str(test_y[0,index]) + ", you predicted that it is a \"" + classes[int(np.squeeze(d["Y_predict"][0,index]))].decode("utf-8") + "\" picture.")
# Plot learning curve (with costs)
costs= np.squeeze(d['costs'])
# plt.plot(costs)
# plt.ylabel('costs')
# plt.xlabel('iteration(per hundreds)')
# plt.title("learning rate= "+str(d["learning_rate"]))
# plt.show()
# analysis about learning rate α
learning_rate = [0.01, 0.001, 0.0001]
# models = {}
# for i in learning_rate:
# print("learning rate is :" + str(i))
# models[str(i)] = model(train_x, train_y, test_x, test_y, num_iterations=1500, learning_rate=i, print_cost=False)
# print('\n' + "-------------------------------------------------------" + '\n')
#
# for i in learning_rate:
# plt.plot(np.squeeze(models[str(i)]["costs"]), label=str(models[str(i)]["learning_rate"]))
#
# plt.xlabel('iterations')
# plt.ylabel('cost')
#
# legend = plt.legend(loc='upper center', shadow=True) #???
# frame = legend.get_frame()
# frame.set_facecolor('0.90')
# plt.show()
#Test with your own image
my_image = "aaaaa.jpg"
# preprocess the image to fit your algorithm.
fname = "images/" + my_image
image = np.array(ndimage.imread(fname, flatten=False))
my_image = scipy.misc.imresize(image, size=(num_px, num_px)).reshape((1, num_px*num_px*3)).T
my_predicted_image = predict(d["w"],d["b"],my_image)
plt.imshow(image)
print("y = " + str(np.squeeze(my_predicted_image)) + ", your algorithm predicts a \"" + classes[int(np.squeeze(my_predicted_image)),].decode("utf-8") + "\" picture.")
numpy_basic.py
# 1 - Building basic functions with numpy
import numpy as np
# sigmoid function
def sigmoid(x):
s = 1.0/(1+1/np.exp(x))
return s
# Sigmoid gradient
def sigmoid_derivative(x):
s= sigmoid(x)
ds = s*(1-s)
return ds
# Reshaping arrays
def image2vector(image):
# (length, height, 3) to (length*height*3, 1)
v = image.reshape(image.shape[0]*image.shape[1]*image.shape[2],1)
return v
# Normalizing rows
def normalizeRows(x):
x_norm = np.linalg.norm(x, axis=1, keepdims=True) #linalg 线性代数 norm范式 axis=1 行 keepdims=True保存矩阵原来维度
x = x/x_norm
return x
# Broadcasting and the softmax function
def softmax(x):
x_exp = np.exp(x) # x m*n矩阵
x_sum = np.sum(x_exp, axis=1, keepdims=True) #axis=1行相加 压缩列 x_sum 1*n矩阵
s = x_exp/x_sum #广播
return s
if __name__=="__main__":
x = np.array([1,2,3])
# print("sigmoid(x)= "+ str(sigmoid(x)))
# print("sigmoid_derivative(x) = " + str(sigmoid_derivative(x)))
image = np.array([[[0.67826139, 0.29380381],
[0.90714982, 0.52835647],
[0.4215251, 0.45017551]],
[[0.92814219, 0.96677647],
[0.85304703, 0.52351845],
[0.19981397, 0.27417313]],
[[0.60659855, 0.00533165],
[0.10820313, 0.49978937],
[0.34144279, 0.94630077]]]) #This is a 3 by 3 by 2 array, not typically images(3*3*3)
# print("image2vector(image)=" + str(image2vector(image)))
x=np.array([[0,3,4],[1,6,4]]) #3*2
# print("normalizeRows(x)= " + str(normalizeRows(x)))
x=np.array([[9,2,5,0,0],[7,5,0,0,0]])
print("softmax(x) = " + str(softmax(x)))
vectorization.py
import time
import numpy as np
x1 = [9, 2, 5, 0, 0, 7, 5, 0, 0, 0, 9, 2, 5, 0, 0]
x2 = [9, 2, 2, 9, 0, 9, 2, 5, 0, 0, 9, 2, 5, 0, 0]
# DOT PRODUCT 内积
tic = time.process_time()
dot = 0
for i in range(len(x1)):
dot += x1[i] * x2[i]
toc = time.process_time()
print("dot=" + str(dot) +"\n ----------Computation Time=" + str(1000*(toc-tic)) + "ms")
# OUTER PRODUCT 外积
tic = time.process_time()
outer = np.zeros((len(x1),len(x2))) #把x1转置了
for i in range(len(x1)):
for j in range((len(x2))):
outer[i,j] = x1[i]*x2[j]
toc = time.process_time()
print("outer=" + str(outer) +"\n ----------Computation Time=" + str(1000*(toc-tic)) + "ms")
#ELEMENTWISE 对位相乘
tic=time.process_time()
mul = np.zeros(len(x1))
for i in range(len(x1)):
mul[i] = x1[i] * x2[i]
toc=time.process_time()
print("elementwise mul=" + str(mul) +"\n ----------Computation Time=" + str(1000*(toc-tic)) + "ms")
#GENERAL DOT PRODUCT wT*x
tic=time.process_time()
W=np.random.rand((3,len(x1)))
gdot = np.zeros(W.shape[0]) #3
for i in range(W.shape[0]):
for j in range(len(x1)):
gdot[i] += W[i,j]*x1[j]
toc=time.process_time()
print("gdot =" + str(gdot ) +"\n ----------Computation Time=" + str(1000*(toc-tic)) + "ms")
#实际中直接用
np.dot(x1,x2)
np.outer(x1,x2)
np.multiply(x1,x2)
np.dot(W,x1)
L1_L2_Lost Function.py
import numpy as np
def L1(yhat, y):
"""
Arguments:
yhat -- vector of size m (predicted labels)
y -- vector of size m (true labels)
Returns:
loss -- the value of the L1 loss function defined above
"""
loss = np.sum(np.abs(y-yhat))
return loss
def L2(yhat, y):
"""
Arguments:
yhat -- vector of size m (predicted labels)
y -- vector of size m (true labels)
Returns:
loss -- the value of the L2 loss function defined above
"""
loss = np.dot(y-yhat,y-yhat)
#loss = np.sum(np.power((y-yhat),2))
return loss
yhat = np.array([.9, 0.2, 0.1, .4, .9])
y = np.array([1, 0, 0, 1, 1])
print("L1 = " + str(L1(yhat,y)))
yhat = np.array([.9, 0.2, 0.1, .4, .9])
y = np.array([1, 0, 0, 1, 1])
print("L2 = " + str(L2(yhat,y)))