吴恩达deep Learning ,class1,week1,assignmen1
1.code
#-*- coding:utf-8 -*-
'''
version3
coding_time: 2018/10/15 20:02
@author: Steve Chen
'''
import numpy as np
import matplotlib.pyplot as plt
import time
from lr_utils import load_dataset
def getdata():
train_x_orig, train_y, test_x_orig, test_y ,classes = load_dataset()
print('-' * 10 + 'dimensions of original data' + '-' * 10)
print('train_x_orig: ' + str(train_x_orig.shape))
print('train_y: ' + str(train_y.shape))
print('test_x_orig: ' + str(test_x_orig.shape))
print('test_y: ' + str(test_y.shape))
'''
version2中我忘了除以255,直接导致内存不够,报错
write_by _my_self_version2.py:50: RuntimeWarning: overflow encountered in exp
s = 1 / (1 + np.exp(-z))
write_by _my_self_version2.py:63: RuntimeWarning: divide by zero encountered in log
cost = (-1 / m) * np.sum(train_y * np.log(A) + (1 - train_y) * np.log(1 - A))
write_by _my_self_version2.py:63: RuntimeWarning: invalid value encountered in multiply
cost = (-1 / m) * np.sum(train_y * np.log(A) + (1 - train_y) * np.log(1 - A))
cost: nan
cost: nan
'''
train_x = (train_x_orig.reshape(train_x_orig.shape[0], -1).T) / 255
test_x = (test_x_orig.reshape(test_x_orig.shape[0], -1).T) / 255
print('-' * 10 + 'dimensions of processed data' + '-' * 10)
print('train_x: ' + str(train_x.shape))
print('train_y: ' + str(train_y.shape))
print('test_x: ' + str(test_x.shape))
print('test_y: ' + str(test_y.shape)+'\n')
return train_x, train_y, test_x, test_y, classes
def sigmoid(z):
s = 1 / (1 + np.exp(- z))
return s
def initialize(dim):
w_init = np.zeros((dim,1))
b_init = 0
return w_init, b_init
def propagate_for_one_time(w, b, train_x, train_y, learning_rate):
m = train_y.shape[1]
A = sigmoid(np.dot(w.T, train_x) + b) #don't forget '+b'
assert( A.shape == train_y.shape)
diff_matrix = A - train_y
cost = np.mean(-(train_y * np.log(A) + ( 1 - train_y) * np.log(1 -A)))
#dw = np.array((diff_mat * train_x).sum(axis = 1) / m) #it is take almost eightfold time in this kind of expression
#dw = dw.reshape((num_x,1)) #with the expression above, we have to use it to make a number to an array
dw = np.dot(train_x, diff_matrix.T) / m
db = np.sum(diff_matrix) / m
w = w - learning_rate * dw
b = b - learning_rate * db
assert(w.shape == (train_x.shape[0], 1))
assert(b.dtype == float or int)
return w, b, cost
def optimize(w, b, train_x, train_y, num_iterations, learning_rate, print_cost = False):
costs = []
for i in range(num_iterations):
w, b, cost = propagate_for_one_time(w, b, train_x, train_y, learning_rate)
if i%100 == 0:
costs.append(cost)
if i%100 == 0 and print_cost == True:
print(cost)
return w, b, costs
#def predict(w, b, test_x, test_y,print_cost = False): 函数需要具有普适性,因为这个是预测函数,所以参数中不可能出现Y 和 cost
def predict(w, b, test_x):
y_predict_orig = sigmoid(np.dot(w.T, test_x) + b)
#assert(y_predict_orig.shape == test_y.shape)
y_predict = np.array([[0 if x<0.5 else 1 for x in y_predict_orig.squeeze()]])#without squeeze() function,this array is in shape of (1,xx),whic is not iterable(etc.[[1,2]);
#squeeze it, make it look like [1,2],then it is ok
assert(y_predict.shape ==(1, test_x.shape[1]))
return y_predict
def model(w_init, b_init, train_x, train_y, test_x, teset_y, num_iterations, learning_rate, print_cost = False):
tick = time.time()
w, b, costs_train = optimize(w_init, b_init, train_x, train_y, num_iterations, learning_rate, print_cost)
y_predict_test = predict(w, b, test_x)
y_predict_train = predict(w, b, train_x)
#num_correct = np.sum(x_predict & test_y)错误,这个应该用同或而不是与操作(否则原来0,预测0 这种正确判断会被漏掉)
num_correct_test = test_y.shape[1] - np.sum(abs(test_y - y_predict_test))
correct_ratio_test = num_correct_test / test_y.shape[1]
num_correct_train = train_y.shape[1] - np.sum(abs(train_y - y_predict_train))
correct_ratio_train = num_correct_train / train_y.shape[1]
print('correct_ratio_test: ' + str(correct_ratio_test * 100) + '%')
print('correct_ratio_train: ' + str(correct_ratio_train * 100) + '%')
d = {
'w': w,
'b': b,
'learning_rate': learning_rate,
'y_predict_test': y_predict_test,
'y_predict_train':y_predict_train,
'correct_ratio_test': correct_ratio_test,
'correct_ratio_train': correct_ratio_train,
'costs': np.array([costs_train])
}
tock = time.time()
print('time: ' + str((tock - tick) * 1000))
return d
if __name__ == '__main__':
print('verson3(final version)')
train_x, train_y, test_x, test_y, classes = getdata()
w_init, b_init = initialize(train_x.shape[0])
#d = model(w_init, b_init, train_x, train_y, test_x, test_y, num_iterations = 2000, learning_rate = 0.005, print_cost = True)
#print('correct_ratio_test: ' + str(d['correct_ratio_test'] * 100) + '%')
#print('correct_ratio_train: ' + str(d['correct_ratio_train'] * 100) + '%')
learning_rate = [0.01, 0.001, 0.0001]
models = {}
for i in range(len(learning_rate)):
print('-' * 25)
print('learning rate: ' + str(learning_rate[i]))
d = model(w_init, b_init, train_x, train_y, test_x, test_y, 2000, learning_rate[i], print_cost = False)
models[str(i)] = d
plt.plot(models[str(i)]['costs'].squeeze(), label = str(learning_rate[i]))
plt.legend()
plt.xlabel('iterations(per hundreds)')
plt.ylabel('cost')
plt.show()
2.the understanding of logistic regression and the LOST FUNCTION,COST FUNCTION
①logistic regression
②sigmoid function and lost function
③cost function