机器学习公开课第六讲
这篇笔记对应的是公开课视频的第六个,Ng原来的视频中讲到了多项式事件模型(Multivariate Event Model)、神经网络模型和支持向量机(SVM)的一些概念。由于视频比较老,Ng在这里对神经网络模型的介绍并不详细,我参考Ng最新的视频重点介绍神经网络模型,同时将支持向量机放在下一节中作为一个完整的算法展开介绍。
python代码实现:
import numpy as np
import xlrd
import matplotlib.pyplot as plt
# 神经元结构:2-5-3-1
input_dim = 2 # input layer dimensionality
output_dim = 1 # output layer dimensionality
lr = 0.5 # learning rate for gradient descent
# 加载数据,返回的特征集是数组,标签集也是数组
def load_data(filename):
workbook = xlrd.open_workbook(filename)
boyinfo = workbook.sheet_by_index(0)
col_num = boyinfo.ncols
row_num = boyinfo.nrows
# 定义特征集和标签集
dataset = []
labelset = []
print(col_num,row_num)
for i in range(1, row_num):
row = boyinfo.row_values(i)[0:]
dataset.append([row[0], row[1]])
labelset.append(row[2])
return np.array(dataset), np.array(labelset)
# 归一化数据
def normalization(X):
Xmin = np.min(X, axis=0)
Xmax = np.max(X, axis=0)
Xmu = np.mean(X, axis=0)
X_norm = (X-Xmu)/(Xmax-Xmin)
return X_norm
# 随机初始化权重
def randInitializeWeights(L_in,L_out):
W = np.zeros((L_out, 1+L_in))
epsilon = 0.12
#本函数可以返回一个或一组服从“0-1”均匀分布的随机样本值,随机样本取值范围是[0, 1),不包括1
W = np.random.rand(L_out, 1+L_in)*(2*epsilon)-epsilon
return W
# 定义sigmoid激活函数,将输入数据压缩在0-1之间
def sigmoid(z):
g = 1 / (1 + np.exp(-z)) # 根据sigmoid 函数公式写出
return g # 返回函数输出值
# 定义sigmoidGradient函数,计算sigmoid函数的梯度值
def sigmoidGradient(z):
g = np.array(1.0 / (1.0 + np.exp(-z))) # 根据sigmoid 函数公式写出
g = g * (1 - g) # 根据sigmoid 函数公式写出
return np.mat(g) # 返回梯度值
# 损失函数
def calculate_loss(model, x_m, y):
m = y.shape[0]
W1, W2, W3 = model["W1"], model["W2"], model["W3"]
a1 = np.mat(x_m)
z2 = a1 * W1.T
a2 = sigmoid(z2)
a2 = np.column_stack((np.ones((m, 1)), a2))
z3 = a2 * W2.T
a3 = sigmoid(z3)
a3 = np.column_stack((np.ones((m, 1)), a3))
z4 = a3 * W3.T
a4 = sigmoid(z4)
one = np.multiply(y, np.log(a4))
two = np.multiply((1-y), np.log(1-a4))
media_para = (one+two).sum()
data_loss = -media_para/m
return data_loss
# 定义compare函数,将预测值大于0.5的归为正例,小于0.5的归为负例
def compare(X):
num = X.shape[0]
result = []
for i in range(num):
if X[i, 0]>0.5:
result.append(1.0)
else:
result.append(0.0)
return result
# 预测输出结果的函数
def precision(model, x_m):
m = x_m.shape[0]
W1, W2, W3 = model["W1"], model["W2"], model["W3"]
a1 = np.mat(x_m)
z2 = a1 * W1.T
a2 = sigmoid(z2)
a2 = np.column_stack((np.ones((m, 1)), a2))
z3 = a2 * W2.T
a3 = sigmoid(z3)
a3 = np.column_stack((np.ones((m, 1)), a3))
z4 = a3 * W3.T
a4 = sigmoid(z4)
result = compare(a4)
return result
# 构建神经网络模型
# 定义build_model函数,输入为特征矩阵X,标签向量y,第一层隐藏层神经元个数,第二层隐藏层神经元个数,迭代次数,
# 是否打印损失函数的布尔变量
# 作用是完成神经网络的前向和反向传播,训练参数W1 W2 W3
def build_model(X, y, hidden1_dim,hidden2_dim, iterNum=2000, print_loss=False):
# 样本数量
m = X.shape[0]
# 初始化网络结构中的所有权重
W1 = randInitializeWeights(input_dim, hidden1_dim)
W2 = randInitializeWeights(hidden1_dim, hidden2_dim)
W3 = randInitializeWeights(hidden2_dim, output_dim)
# 将model声明为字典数据格式
model = {}
# 梯度下降
logName = "logText.txt" # 日志文件名称
logFile = open(logName, "w") # 调用open函数,打开文件,模式为写
for t in range(iterNum):
# forward propagation
x_m = np.column_stack((np.ones((m, 1)), X))
a1 = np.mat(x_m)
# print("a1:", a1.shape)
# print("W1:", W1.shape)
# print("W2:", W2.shape)
# print("W3:", W3.shape)
z2 = a1*W1.T
# print("z2:", z2.shape)
a2 = sigmoid(z2)
a2 = np.column_stack((np.ones((m, 1)), a2))
# print("a2:", a2.shape)
z3 = a2*W2.T
# print("z3:", z3.shape)
a3 = sigmoid(z3)
a3 = np.column_stack((np.ones((m, 1)), a3))
# print("a3:", a3.shape)
z4 = a3*W3.T
a4 = sigmoid(z4)
# print("a4:", a4.shape)
# Backpropagation
y_m = np.reshape(y, [-1, 1])
#print("y_m:", y_m.shape)
delta4 = a4-y_m
#print("delta4:", delta4.shape)
delta3 = np.multiply((delta4*W3)[:, 1:], sigmoidGradient(z3))
#print("delta3:", delta3.shape)
delta2 = np.multiply((delta3*W2)[:, 1:], sigmoidGradient(z2))
#print("delta2:",delta2.shape)
DW3 = (delta4.T * a3)/m
#print("DW3:", DW3.shape)
DW2 = (delta3.T * a2)/m
#print("DW2:", DW2.shape)
DW1 = (delta2.T * a1)/m
#print("DW1:", DW1.shape)
# 更新参数
W1 -= lr * DW1
W2 -= lr * DW2
W3 -= lr * DW3
model = {"W1":W1, "W2":W2, "W3":W3}
#print(y.shape)
if print_loss and t % 1000 == 0:
print("Loss after iteration %i: %f" % (t, calculate_loss(model, x_m, y_m)))
logFile.write("Loss after iteration %i: %f" % (t, calculate_loss(model, x_m, y_m)))
logFile.write("\n")
result = precision(model, x_m)
print("Traning Set Accuracy: {:f}".format((np.mean(result == y) * 100)))
logFile.write("Traning Set Accuracy: {:f}".format((np.mean(result == y) * 100)))
logFile.write("\n")
logFile.close()
return model
# 定义预测函数,输入为特征矩阵和训练好的模型
def predict(X,model):
m = X.shape[0]
W1, W2, W3 = model["W1"], model["W2"], model["W3"]
x_m = np.column_stack((np.ones((m, 1)), X))
a1 = np.mat(x_m)
z2 = a1 * W1.T
a2 = sigmoid(z2)
a2 = np.column_stack((np.ones((m, 1)), a2))
z3 = a2 * W2.T
a3 = sigmoid(z3)
a3 = np.column_stack((np.ones((m, 1)), a3))
z4 = a3 * W3.T
a4 = sigmoid(z4)
return a4
# 定义一个 plotData 函数,输入参数是 数据 X 和标志 flag: y,返回作图操作 plt, p1, p2 , 目的是为了画图
def plotData(X, y):
# 找到标签为1和0的索引组成数组赋给pos 和 neg
pos = np.where(y==1)
neg = np.where(y==0)
p1 = plt.plot(X[pos, 0], X[pos, 1], marker='s', markersize=7, color='red')[0]
p2 = plt.plot(X[neg, 0], X[neg, 1], marker='o', markersize=7, color='green')[1]
return p1,p2
# 画出决策边界
def plot_decision_boundary(predict_func,X_norm,y):
x_min, x_max = X_norm[:, 0].min()-0.5, X_norm[:, 0].max()+0.5
y_min, y_max = X_norm[:, 1].min()-0.5, X_norm[:, 1].max()+0.5
# 定义步长
stride = 0.01
# 由np.arrange生成一维数组作为np.meshgrid的参数,返回xx矩阵,yy矩阵
x_med = np.arange(x_min, x_max, stride)
y_med = np.arange(y_min, y_max, stride)
xx, yy = np.meshgrid(x_med, y_med)
# .ravel()方法将xx,yy矩阵压缩为一维向量;np.c_:是按行连接两个矩阵,就是把两矩阵左右相加,要求行数相等
# 合成的矩阵作为pred_func的输入,返回预测值
mat = np.c_[xx.ravel(), yy.ravel()]
Z = predict_func(mat)
Z = (Z.T).reshape(xx.shape)
p1, p2 = plotData(X_norm, y)
p3 = plt.contour(xx, yy, Z, levels=0, linewidths=2)
plt.xlabel("tall") # 横坐标的标签为tall
plt.ylabel("salary") # 纵坐标的标签为salary
plt.legend((p1, p2, p3), ('y = I like you', "y = I don't like you", 'Decision Boundary'), numpoints=1,
handlelength=0)
plt.title("ANN")
# 可视化函数
def visualize(X_norm,y,model):
plot_decision_boundary(lambda x:predict(x, model), X_norm, y)
plt.savefig("result.png")
plt.show()
if __name__ == "__main__":
dataset, labelset = load_data("data.xls")
print("输入矩阵维度:",dataset.shape)
print("标签维度:",labelset.shape)
# 原始数据进行归一化处理
X_norm = normalization(dataset)
# 训练模型
model = build_model(X_norm, labelset, 5, 3, iterNum=20000, print_loss=True)
# 可视化
visualize(X_norm, labelset, model)
运行代码:
可以看出来,神经网络模型找到了一个非线性分类器对样本实现了正确的分类。