神经网络
我们需要想一下这个神经网络的大概模样,它应该至少有三个函数:
—格式化函数:设定输入层节点、隐藏节点和输出层节点的数量。
—训练:给定训练集样本后,优化权重。
—查询:给定输入,从输出节点给出答案。
所以,先写一个简单的框架
#neural network class definition
class neuralNetwork:
#initialise the neural network
def __init__(self):
pass
#train the neural network
def train(self):
pass
#query the neural network
def query(self):
pass
初始化网络
初始化网络的几个节点:
—输入的节点
—中间隐藏节点
—输出节点
#neural network class definition
class neuralNetwork:
#initialise the neural network
def __init__(self,inputnodes,hiddennodes,outputnodes,learningrate):
#set number of nodes in each input,hidden,output,layer
self.inodes=inputnodes
self.hnodes=hiddennodes
self.onodes=outputnodes
#learn rate
self.lr=learningrate
pass
#train the neural network
def train(self):
pass
#query the neural network
def query(self):
pass
if __name__ == '__main__':
#number of input,hidden and output nodes
input_nodes=3
hidden_nodes=3
output_nodes=3
#learning rate
learning_rate=0.5
#create instance of neural network
n=neuralNetwork(input_nodes,hidden_nodes,output_nodes,learning_rate)
尝试建一个每层3个节点,学习率为0.5的小型神经网络对象。
if __name__ == '__main__':
#number of input,hidden and output nodes
input_nodes=3
hidden_nodes=3
output_nodes=3
#learning rate
learning_rate=0.5
#create instance of neural network
n=neuralNetwork(input_nodes,hidden_nodes,output_nodes,learning_rate)
权重——网络的核心
在输出层与隐藏层之间的链接权重矩阵W(input_hidden),大小为hidden_nodes乘以input_nodes。
在隐藏层和输出层之间的链接权重矩阵W(hidden_output),大小为hidden_nodes乘以out_nodes。
##链接的权重初始值应该很小,并且随机的。下面的numpy函数生成一个数组,数组中元素为0~1的随机值,数组大小为rows乘以columns。
#link weight matrices ,wih and who
#weights iniside the arrays are w_i_j ,where link is from node i to node j in the next layer
#w11 w12
#w12 w22 etc
self.wih=(numpy.random.rand(self.hnodes,self.inodes)-0.5)
self.who=(numpy.random.rand(self.hnodes,self.inodes)-0.5)
pass
可选择项:较为复杂的权重
使用正态概率分布采样权重,其中平均值为0,标准方差为传入链接数目的开方,即1/(传入链接数目)^(1/2)
#more complex
self.wih=numpy.random.normal(0.0,pow(self.hnodes,-0.5),(self.hnodes,self.inodes))
self.who=numpy.random.normal(0.0, pow(self.hnodes, -0.5), (self.onodes, self.hnodes))
#这个意思是,把正态分布的中心点设为0.0,与下一层中节点相关的标准式,简单说来,这个表达式是表示节点数的-0.5次方。
查询网络
这个本来是一个十分复杂的过程,由于每个权重都要调节的,但是,每写多一个节点,都需要相应的代码,但是,如果写成这样的公式:*X(hidden)=W(input_hidden)I
就会发现如此简单一行代码可以实现:
hidden_inputs=numpy.dot(self.wih,inputs)
接下来我们需要定义一个s函数,这时一个神奇的库就出现了scipy库,这就体现了python 的便捷之处,丰富的库资源
import scipy.special
#activation function is the sigmoid function
self.activation_finction=lambda x: scipy.special.expit(x)
剩余的就是与前面代码十分相像的计算隐藏层信号和输出层信号
#calculate the signals emerging from hidden layer
hidden_outputs=self.activation_finction(hidden_inputs)
#calculate signals into final output layer
final_inputs=numpy.dot(self.who,hidden_outputs)
#calculate the signals emerging from final oytput layer
final_outputs=self.activation_finction(final_inputs)
pass
def query(self,inputs,):
#calculate signals into hidden layer
hidden_inputs=numpy.dot(self.wih,inputs)
#scipy.special for the sigmoid function expit()
import scipy.special
#activation function is the sigmoid function
self.activation_finction=lambda x: scipy.special.expit(x)
#calculate the signals emerging from hidden layer
hidden_outputs=self.activation_finction(hidden_inputs)
#calculate signals into final output layer
final_inputs=numpy.dot(self.who,hidden_outputs)
#calculate the signals emerging from final oytput layer
final_outputs=self.activation_finction(final_inputs)
pass
训练网络
第一部分,针对给定的训练样本计算输出。这与我们刚刚在query()函数上所做的部分没什么区别。
第二部分,将计算的输出与所需输出对比,使用差值来知道网络权重的更新。
第一部分代码:
#train the neural network
def train(self,inputs_list,target_list):
#convert inputs list to 2d array
inputs=numpy.array(inputs_list,ndmin=2).T
targets=numpy.array(target_list,ndmin=2).T
#calculate signal into hidden layer
hidden_inputs=numpy.dot(self.wih,inputs)
#calculate the signals emerging from hidden layer
hidden_outputs=self.activation_finction(hidden_inputs)
#calculate signals into final output layer
final_inputs=numpy.dot(self.who,hidden_outputs)
#calculate the signals emerging from final out layer
final_outputs=self.activation_finction(final_inputs)
pass
训练部分写完后,我们需要计算误差,这个值等于训练样本所提供的预期目标输出值与实际计算得到的输出值之差。
#error is the (target - actual)
out_errors=targets-final_outputs
其在神经网络中矩阵形式为:
errors(hidden)=weights^T(hidden_output)*errors(output)
所以我们得出以下代码:
#hidden layer error is the out_errors ,split by weights,recombined at hidden nodes
hidden_errors=numpy.dot(self.who.T,output_errors)
之前我们有更新节点j与下一层节点k之间的链接权重矩阵形式表达式。
#update the weights for the links between the hidden and output layers
self.who+=self.lr*numpy.dot((output_errors*final_outputs*(1.0-final_Outputs)),numpy.transponse(hidden_outputs))
#update the weights for the links between the input and hidden layers
self.wih+=self.lr*numpy.dot((hidden_errors*hidden_outputs*(1.0-hidden_outputs)),numpy.transpose(inputs))
完整代码:
import numpy
#neural network class definition
class neuralNetwork:
#initialise the neural network
def __init__(self,inputnodes,hiddennodes,outputnodes,learningrate):
#set number of nodes in each input,hidden,output,layer
self.inodes=inputnodes
self.hnodes=hiddennodes
self.onodes=outputnodes
#learn rate
self.lr=learningrate
#link weight matrices ,wih and who
#weights iniside the arrays are w_i_j ,where link is from node i to node j in the next layer
#w11 w12
#w12 w22 etc
self.wih=(numpy.random.rand(self.hnodes,self.inodes)-0.5)
self.who=(numpy.random.rand(self.hnodes,self.inodes)-0.5)
#more complex
# self.wih=numpy.random.normal(0.0,pow(self.hnodes,-0.5),(self.hnodes,self.inodes))
# self.who=numpy.random.normal(0.0, pow(self.hnodes, -0.5), (self.onodes, self.hnodes))
pass
#train the neural network
def train(self,inputs_list,target_list):
#convert inputs list to 2d array
inputs=numpy.array(inputs_list,ndmin=2).T
targets=numpy.array(target_list,ndmin=2).T
#calculate signal into hidden layer
hidden_inputs=numpy.dot(self.wih,inputs)
#calculate the signals emerging from hidden layer
hidden_outputs=self.activation_finction(hidden_inputs)
#calculate signals into final output layer
final_inputs=numpy.dot(self.who,hidden_outputs)
#calculate the signals emerging from final out layer
final_outputs=self.activation_finction(final_inputs)
# error is the (target - actual)
out_errors = targets - final_outputs
# hidden layer error is the out_errors ,split by weights,recombined at hidden nodes
hidden_errors = numpy.dot(self.who.T, out_errors)
# update the weights for the links between the hidden and output layers
self.who += self.lr * numpy.dot((out_errors * final_outputs * (1.0 - final_outputs)),numpy.transponse(hidden_outputs))
# update the weights for the links between the input and hidden layers
self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)),numpy.transpose(inputs))
pass
#query the neural network
def query(self,inputs,):
#calculate signals into hidden layer
hidden_inputs=numpy.dot(self.wih,inputs)
#scipy.special for the sigmoid function expit()
import scipy.special
#activation function is the sigmoid function
self.activation_finction=lambda x: scipy.special.expit(x)
#calculate the signals emerging from hidden layer
hidden_outputs=self.activation_finction(hidden_inputs)
#calculate signals into final output layer
final_inputs=numpy.dot(self.who,hidden_outputs)
#calculate the signals emerging from final oytput layer
final_outputs=self.activation_finction(final_inputs)
return final_outputs
pass
if __name__ == '__main__':
#number of input,hidden and output nodes
input_nodes=3
hidden_nodes=3
output_nodes=3
#learning rate
learning_rate=0.5
#create instance of neural network
n=neuralNetwork(input_nodes,hidden_nodes,output_nodes,learning_rate)
csv文件和它的手写训练集
这个网站提供了两个csv文件:
–训练集:http://www.pjreddie.com/media/files/mnist_train.csv
–测试集:http://www.pjreddie.com/media/files/mnist_test.csv
还有一些较小的训练集:
MNIST测试集中的10条记录——
http://raw.githubusercontent.com/makeyourownneuralnetwork/makeyourownneuralnetwork/master/mnist_dataset/mnist_test_10.csv
MNIST测试集中100条100条记录——
http://raw.githubusercontent.com/makeyourownneuralnetwork/makeyourownneuralnetwork/master/mnist_dataset/mnist_train_100.csv
打开csv文件
#打开csv文件 以行的方式读取 并且存入列表
data_file=open("mnist_train_10.csv","r")
data_list=data_file.readlines()
data_file.close()
处理csv文件为图像形式
#打印列表长度 和拿出列表第一行并且以,进行分割
#print(len(file_list),file_list[0].split(","))
all_values=data_list[0].split(",")
#将一个char“1”转换为int形式 并且转换为矩阵形式展示
image_array=numpy.asfarray(all_values[1:]).reshape((28,28))
#绘制图像 并且调整灰度 以更好的显示手写字符
pl.imshow(image_array,cmap="Greys",interpolation="None")
pl.show()
运行结果:
准备MNIST的训练数据
第一件事情就是将输入颜色值从0到255的范围转换为0,1范围(注意:0不可取(0值输入会导致权重的更新失败))
先把像素值除以255变为0,1之间的值,在乘以0.99加上0.01转变为0.01,1之间的值
然后放入神经网络进行检验,保证检验出低概率选择非正确的值,高概率选择正确值。
理想状态下对应数字5的输出值为:[0,0,0,0,0,1,0,0,0,0]
但是,实际上输出权重为0时,输入值为0则永远不可能达到。
所以有权重[0.01,0.01,0.01,0.01,0.01,0.99,0.01,0.01,0.01,001]才是比较理想的状态
创建预估的output
#output nodes is 10(example)
onodes=10
targets=numpy.zeros(onodes)+0.01
targets[int(all_values[0])]=0.99
好了一切完成导入神经网络即可:
总和代码:
import numpy
import time
#neural network class definition
class neuralNetwork:
#initialise the neural network
def __init__(self,inputnodes,hiddennodes,outputnodes,learningrate):
#set number of nodes in each input,hidden,output,layer
self.inodes=inputnodes
self.hnodes=hiddennodes
self.onodes=outputnodes
#learn rate
self.lr=learningrate
#more complex wih and who
self.wih = numpy.random.normal(0.0, pow(self.hnodes, -0.5), (self.hnodes, self.inodes))
self.who = numpy.random.normal(0.0, pow(self.hnodes, -0.5), (self.onodes, self.hnodes))
#scipy.special for the sigmoid function expit()
import scipy.special
#activation function is the sigmoid function
self.activation_finction=lambda x: scipy.special.expit(x)
#calculate the signals emerging from hidden layer
pass
#train the neural network
def train(self,inputs_list,target_list):
#convert inputs list to 2d array
inputs=numpy.array(inputs_list,ndmin=2).T
targets=numpy.array(target_list,ndmin=2).T
#calculate signal into hidden layer
hidden_inputs=numpy.dot(self.wih,inputs)
#calculate the signals emerging from hidden layer
hidden_outputs=self.activation_finction(hidden_inputs)
#calculate signals into final output layer
final_inputs=numpy.dot(self.who,hidden_outputs)
#calculate the signals emerging from final out layer
final_outputs=self.activation_finction(final_inputs)
# error is the (target - actual)
out_errors = targets - final_outputs
# hidden layer error is the out_errors ,split by weights,recombined at hidden nodes
hidden_errors = numpy.dot(self.who.T, out_errors)
# update the weights for the links between the hidden and output layers
self.who += self.lr * numpy.dot((out_errors * final_outputs * (1.0 - final_outputs)),numpy.transpose(hidden_outputs))
# update the weights for the links between the input and hidden layers
self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)),numpy.transpose(inputs))
pass
#query the neural network
def query(self,inputs,):
#calculate signals into hidden layer
hidden_inputs=numpy.dot(self.wih,inputs)
hidden_outputs=self.activation_finction(hidden_inputs)
#calculate signals into final output layer
final_inputs=numpy.dot(self.who,hidden_outputs)
#calculate the signals emerging from final oytput layer
final_outputs=self.activation_finction(final_inputs)
return final_outputs
pass
if __name__ == '__main__':
#begin time
begin_time=time.time()
#number of input,hidden and output nodes
input_nodes=784
hidden_nodes=100
output_nodes=10
#learning rate
learning_rate=0.3
#create instance of neural network
n=neuralNetwork(input_nodes,hidden_nodes,output_nodes,learning_rate)
#load the mnist training data csv file into a list
train_data_file=open("mnist_train_100.csv","r")
train_data_list=train_data_file.readlines()
train_data_file.close()
#train the neural network
#go through all records in the training data set
for record in train_data_list:
all_values=record.split(",")
#scale and shift the inputs
inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
#create the target output values(all 0.01,except the desired label which is 0.99)
targets=numpy.zeros(output_nodes)+0.01
#all_values[0] is the target label for this record
targets[int(all_values[0])]=0.99
n.train(inputs,targets)
#print running time
print(time.time()-begin_time)
测试网络
#导入csv文件:
#load the mnist test data csv file into list
test_data_file=open("mnist_test.csv","r")
test_data_list=test_data_file.readlines()
test_data_file.close()
#获取并且改进学习率:
#test the neural network
#scorecard for how well the network performs ,initially empyty
scorecard=[]
#go through all the records in the test data set
for record in test_data_list:
all_values=record.split(",")
correct_label=int(all_values[0])
#scale and shift the inputs
inputs=(numpy.asfarray(all_values[1:])/255.0*0.99)+0.01
outputs=n.query(inputs)
#print("train result:",outputs)
label=numpy.argmax(outputs)
if (label==correct_label):
scorecard.append(1)
else:
scorecard.append(0)
#总和代码:
#load the mnist test data csv file into list
test_data_file=open("mnist_test.csv","r")
test_data_list=test_data_file.readlines()
test_data_file.close()
#test the neural network
#scorecard for how well the network performs ,initially empyty
scorecard=[]
#go through all the records in the test data set
for record in test_data_list:
all_values=record.split(",")
correct_label=int(all_values[0])
#scale and shift the inputs
inputs=(numpy.asfarray(all_values[1:])/255.0*0.99)+0.01
outputs=n.query(inputs)
#print("train result:",outputs)
label=numpy.argmax(outputs)
if (label==correct_label):
scorecard.append(1)
else:
scorecard.append(0)
#calculate the performance score ,the fraction of correct answers
scorecard_array=numpy.asarray(scorecard)
print("performance=",scorecard_array.sum()/scorecard_array.size)
一些改进
条件学习率
这个直接有个结论:
学习率在0.1到0.3之间学习效果最好。
一般我们使用0.2的学习率,有最好的性能。
多次运行
#设置回响为2
epochs=2
for e in range(epochs):
pass
这个多次运行这个也是一个结论:
一般运行5—10次性能最佳。
我们一般选择运行7次。
改变网络形状
隐藏节点就像一辆公交车,如果,有10个人上车,但车的容量只有5个,所以,容纳不下这些人,学习效率很低,但是,若车容量为100个就会有90个位置剩余,会造成神经网络性能的剩余。
这里又又又是一个结论:隐藏节点数为200个时性能最好,但是训练时间大大增加,所以我们选择性能差不多的但训练时间大大减少的100个节点。
最终代码
import numpy
import time
#neural network class definition
class neuralNetwork:
#initialise the neural network
def __init__(self,inputnodes,hiddennodes,outputnodes,learningrate):
#set number of nodes in each input,hidden,output,layer
self.inodes=inputnodes
self.hnodes=hiddennodes
self.onodes=outputnodes
#learn rate
self.lr=learningrate
#more complex wih and who
self.wih = numpy.random.normal(0.0, pow(self.hnodes, -0.5), (self.hnodes, self.inodes))
self.who = numpy.random.normal(0.0, pow(self.hnodes, -0.5), (self.onodes, self.hnodes))
#scipy.special for the sigmoid function expit()
import scipy.special
#activation function is the sigmoid function
self.activation_finction=lambda x: scipy.special.expit(x)
#calculate the signals emerging from hidden layer
pass
#train the neural network
def train(self,inputs_list,target_list):
#convert inputs list to 2d array
inputs=numpy.array(inputs_list,ndmin=2).T
targets=numpy.array(target_list,ndmin=2).T
#calculate signal into hidden layer
hidden_inputs=numpy.dot(self.wih,inputs)
#calculate the signals emerging from hidden layer
hidden_outputs=self.activation_finction(hidden_inputs)
#calculate signals into final output layer
final_inputs=numpy.dot(self.who,hidden_outputs)
#calculate the signals emerging from final out layer
final_outputs=self.activation_finction(final_inputs)
# error is the (target - actual)
out_errors = targets - final_outputs
# hidden layer error is the out_errors ,split by weights,recombined at hidden nodes
hidden_errors = numpy.dot(self.who.T, out_errors)
# update the weights for the links between the hidden and output layers
self.who += self.lr * numpy.dot((out_errors * final_outputs * (1.0 - final_outputs)),numpy.transpose(hidden_outputs))
# update the weights for the links between the input and hidden layers
self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)),numpy.transpose(inputs))
pass
#query the neural network
def query(self,inputs,):
#calculate signals into hidden layer
hidden_inputs=numpy.dot(self.wih,inputs)
hidden_outputs=self.activation_finction(hidden_inputs)
#calculate signals into final output layer
final_inputs=numpy.dot(self.who,hidden_outputs)
#calculate the signals emerging from final oytput layer
final_outputs=self.activation_finction(final_inputs)
return final_outputs
pass
if __name__ == '__main__':
#begin time
begin_time=time.time()
#number of input,hidden and output nodes
input_nodes=784
hidden_nodes=200
output_nodes=10
#learning rate
learning_rate=0.1
#create instance of neural network
n=neuralNetwork(input_nodes,hidden_nodes,output_nodes,learning_rate)
#load the mnist training data csv file into a list
train_data_file=open("mnist_train.csv","r")
train_data_list=train_data_file.readlines()
train_data_file.close()
#train the neural network
#loop for epochs
epochs=10
for e in range(epochs):
#go through all records in the training data set
for record in train_data_list:
all_values=record.split(",")
#scale and shift the inputs
inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
#create the target output values(all 0.01,except the desired label which is 0.99)
targets=numpy.zeros(output_nodes)+0.01
#all_values[0] is the target label for this record
targets[int(all_values[0])]=0.99
n.train(inputs,targets)
pass
#print running time
print("train time",time.time()-begin_time)
#load the mnist test data csv file into list
test_data_file=open("mnist_test.csv","r")
test_data_list=test_data_file.readlines()
test_data_file.close()
#test the neural network
#scorecard for how well the network performs ,initially empyty
scorecard=[]
#go through all the records in the test data set
for record in test_data_list:
all_values=record.split(",")
correct_label=int(all_values[0])
#scale and shift the inputs
inputs=(numpy.asfarray(all_values[1:])/255.0*0.99)+0.01
outputs=n.query(inputs)
#print("train result:",outputs)
label=numpy.argmax(outputs)
if (label==correct_label):
scorecard.append(1)
else:
scorecard.append(0)
#calculate the performance score ,the fraction of correct answers
scorecard_array=numpy.asarray(scorecard)
print("performance=",scorecard_array.sum()/scorecard_array.size)