Python手写前馈神经网络
1. 算法思维导图
以下是使用mermaid代码表示的前馈神经网络的实现原理:
2. 手写算法的必要性和市场调查
手写算法的必要性在于深入理解算法的原理和实现细节,从而能够更好地应用和调优算法。在市场调查中,前馈神经网络作为一种常见的机器学习算法,在各个领域都有广泛的应用,如图像识别、自然语言处理等。
3. 算法手写实现详细介绍和步骤
3.1 数据准备
首先,我们需要准备训练数据和测试数据。训练数据包括输入特征和对应的输出标签,用于训练神经网络模型。测试数据用于评估模型的性能。
3.2 搭建神经网络结构
我们需要定义神经网络的结构,包括输入层、隐藏层和输出层的神经元数量。可以使用类来表示神经网络,其中包括初始化函数和前向传播函数。
class NeuralNetwork:
def __init__(self, input_size, hidden_size, output_size):
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
3.3 初始化权重和偏置
在神经网络中,权重和偏置是需要学习的参数。我们需要随机初始化权重和偏置。
import numpy as np
class NeuralNetwork:
def __init__(self, input_size, hidden_size, output_size):
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.weights1 = np.random.randn(self.input_size, self.hidden_size)
self.bias1 = np.zeros((1, self.hidden_size))
self.weights2 = np.random.randn(self.hidden_size, self.output_size)
self.bias2 = np.zeros((1, self.output_size))
3.4 前向传播
在前向传播过程中,我们通过输入特征和学习到的权重和偏置计算输出结果。
import numpy as np
class NeuralNetwork:
def __init__(self, input_size, hidden_size, output_size):
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.weights1 = np.random.randn(self.input_size, self.hidden_size)
self.bias1 = np.zeros((1, self.hidden_size))
self.weights2 = np.random.randn(self.hidden_size, self.output_size)
self.bias2 = np.zeros((1, self.output_size))
def forward(self, X):
self.z1 = np.dot(X, self.weights1) + self.bias1
self.a1 = self.sigmoid(self.z1)
self.z2 = np.dot(self.a1, self.weights2) + self.bias2
self.a2 = self.sigmoid(self.z2)
return self.a2
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
3.5 计算损失函数
损失函数用于衡量模型的预测结果与真实标签之间的差异。常见的损失函数有均方误差(Mean Squared Error)和交叉熵(Cross Entropy)等。
import numpy as np
class NeuralNetwork:
def __init__(self, input_size, hidden_size, output_size):
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.weights1 = np.random.randn(self.input_size, self.hidden_size)
self.bias1 = np.zeros((1, self.hidden_size))
self.weights2 = np.random.randn(self.hidden_size, self.output_size)
self.bias2 = np.zeros((1, self.output_size))
def forward(self, X):
self.z1 = np.dot(X, self.weights1) + self.bias1
self.a1 = self.sigmoid(self.z1)
self.z2 = np.dot(self.a1, self.weights2) + self.bias2
self.a2 = self.sigmoid(self.z2)
return self.a2
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def loss(self, X, y):
m = X.shape[0]
predicted_output = self.forward(X)
cost = -1/m * np.sum(y * np.log(predicted_output) + (1-y) * np.log(1-predicted_output))
return cost
3.6 反向传播
反向传播用于计算权重和偏置的梯度,从而更新参数。我们需要计算输出层和隐藏层的梯度,并根据梯度下降算法更新参数。
import numpy as np
class NeuralNetwork:
def __init__(self, input_size, hidden_size, output_size):
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.weights1 = np.random.randn(self.input_size, self.hidden_size)
self.bias1 = np.zeros((1, self.hidden_size))
self.weights2 = np.random.randn(self.hidden_size, self.output_size)
self.bias2 = np.zeros((1, self.output_size))
def forward(self, X):
self.z1 = np.dot(X, self.weights1) + self.bias1
self.a1 = self.sigmoid(self.z1)
self.z2 = np.dot(self.a1, self.weights2) + self.bias2
self.a2 = self.sigmoid(self.z2)
return self.a2
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def loss(self, X, y):
m = X.shape[0]
predicted_output = self.forward(X)
cost = -1/m * np.sum(y * np.log(predicted_output) + (1-y) * np.log(1-predicted_output))
return cost
def backward(self, X, y):
m = X.shape[0]
dZ2 = self.a2 - y
dW2 = 1/m * np.dot(self.a1.T, dZ2)
db2 = 1/m * np.sum(dZ2, axis=0, keepdims=True)
dZ1 = np.dot(dZ2, self.weights2.T) * self.sigmoid_derivative(self.a1)
dW1 = 1/m * np.dot(X.T, dZ1)
db1 = 1/m * np.sum(dZ1, axis=0, keepdims=True)
return dW1, db1, dW2, db2
def sigmoid_derivative(self, x):
return x * (1 - x)
3.7 更新参数
根据反向传播计算得到的梯度,我们可以使用梯度下降算法来更新参数。
import numpy as np
class NeuralNetwork:
def __init__(self, input_size, hidden_size, output_size):
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.weights1 = np.random.randn(self.input_size, self.hidden_size)
self.bias1 = np.zeros((1, self.hidden_size))
self.weights2 = np.random.randn(self.hidden_size, self.output_size)
self.bias2 = np.zeros((1, self.output_size))
def forward(self, X):
self.z1 = np.dot(X, self.weights1) + self.bias1
self.a1 = self.sigmoid(self.z1)
self.z2 = np.dot(self.a1, self.weights2) + self.bias2
self.a2 = self.sigmoid(self.z2)
return self.a2
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def loss(self, X, y):
m = X.shape[0]
predicted_output = self.forward(X)
cost = -1/m * np.sum(y * np.log(predicted_output) + (1-y) * np.log(1-predicted_output))
return cost
def backward(self, X, y):
m = X.shape[0]
dZ2 = self.a2 - y
dW2 = 1/m * np.dot(self.a1.T, dZ2)
db2 = 1/m * np.sum(dZ2, axis=0, keepdims=True)
dZ1 = np.dot(dZ2, self.weights2.T) * self.sigmoid_derivative(self.a1)
dW1 = 1/m * np.dot(X.T, dZ1)
db1 = 1/m * np.sum(dZ1, axis=0, keepdims=True)
return dW1, db1, dW2, db2
def sigmoid_derivative(self, x):
return x * (1 - x)
def update_parameters(self, dW1, db1, dW2, db2, learning_rate):
self.weights1 -= learning_rate * dW1
self.bias1 -= learning_rate * db1
self.weights2 -= learning_rate * dW2
self.bias2 -= learning_rate * db2
3.8 训练模型
使用前向传播、反向传播和参数更新等步骤,我们可以训练模型。
import numpy as np
class NeuralNetwork:
def __init__(self, input_size, hidden_size, output_size):
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.weights1 = np.random.randn(self.input_size, self.hidden_size)
self.bias1 = np.zeros((1, self.hidden_size))
self.weights2 = np.random.randn(self.hidden_size, self.output_size)
self.bias2 = np.zeros((1, self.output_size))
def forward(self, X):
self.z1 = np.dot(X, self.weights1) + self.bias1
self.a1 = self.sigmoid(self.z1)
self.z2 = np.dot(self.a1, self.weights2) + self.bias2
self.a2 = self.sigmoid(self.z2)
return self.a2
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def loss(self, X, y):
m = X.shape[0]
predicted_output = self.forward(X)
cost = -1/m * np.sum(y * np.log(predicted_output) + (1-y) * np.log(1-predicted_output))
return cost
def backward(self, X, y):
m = X.shape[0]
dZ2 = self.a2 - y
dW2 = 1/m * np.dot(self.a1.T, dZ2)
db2 = 1/m * np.sum(dZ2, axis=0, keepdims=True)
dZ1 = np.dot(dZ2, self.weights2.T) * self.sigmoid_derivative(self.a1)
dW1 = 1/m * np.dot(X.T, dZ1)
db1 = 1/m * np.sum(dZ1, axis=0, keepdims=True)
return dW1, db1, dW2, db2
def sigmoid_derivative(self, x):
return x * (1 - x)
def update_parameters(self, dW1, db1, dW2, db2, learning_rate):
self.weights1 -= learning_rate * dW1
self.bias1 -= learning_rate * db1
self.weights2 -= learning_rate * dW2
self.bias2 -= learning_rate * db2
def train(self, X, y, num_epochs, learning_rate):
for epoch in range(num_epochs):
output = self.forward(X)
dW1, db1, dW2, db2 = self.backward(X, y)
self.update_parameters(dW1, db1, dW2, db2, learning_rate)
cost = self.loss(X, y)
if epoch % 100 == 0:
print(f"Epoch {epoch}, loss: {cost}")
现在我们可以使用训练集来训练模型。
import numpy as np
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])
model = NeuralNetwork(input_size=2, hidden_size=4, output_size=1)
model.train(X, y, num_epochs=1000, learning_rate=0.1)
输出结果如下:
Epoch 0, loss: 0.698546791115
Epoch 100, loss: 0.693150743216
Epoch 200, loss: 0.69314718056
...
Epoch 900, loss: 0.69314718056
3.9 预测
训练完成后,我们可以使用模型进行预测。
import numpy as np
X_test = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
predictions = model.forward(X_test)
print(predictions)
输出结果如下:
[[0.5]
[0.5]
[0.5]
[0.5]]
由于我们的模型只有一个隐藏层,无法准确地拟合异或逻辑运算。我们可以尝试增加隐藏层的大小或增加训练次数来提高模型的准确性。