边看程序边解释(程序还没有优化过,凑活着看,没有加入batch norm,没有批量学习)
1层隐藏层,100个神经元
network.py
#network.py 构建神经网络,以及一些主要的算法
import numpy#这里可以用cupy替代,看你喜欢CPU还是GPU计算
import scipy.special
import openpyxl
import random
import os
import functions
class n_network:
def __init__(self,w_i_h,w_h_o,d1,d2,rate):#初始化,w_i_h、w_h_o是权重矩阵,d1、d2偏量数组,rate学习率
self.rate=rate
self.sigmoid=lambda x:scipy.special.expit(x)#sigmoid函数
self.w_i_h=w_i_h
self.w_h_o=w_h_o
self.d1=d1
self.d2=d2
self.lossadd=0.0
def save_weight(self):#用于将学习到的权重保存到XLSX文件
wb_i_h=openpyxl.Workbook()
ws_i_h=wb_i_h.active
for i in range(784):
row = []
for j in range(100):
row.append(self.w_i_h[i][j])
ws_i_h.append(row)
wb_i_h.save("w_i_h.xlsx")
wb_h_o=openpyxl.Workbook()
ws_h_o=wb_h_o.active
for i in range(100):
row = []
for j in range(10):
row.append(self.w_h_o[i][j])
ws_h_o.append(row)
wb_h_o.save("w_h_o.xlsx")
db1=openpyxl.Workbook()
ds1=db1.active
row=[]
for i in range(100):
row.append(self.d1[i])
ds1.append(row)
db1.save("d1.xlsx")
db2=openpyxl.Workbook()
ds2=db2.active
row=[]
for i in range(10):
row.append(self.d2[i])
ds2.append(row)
db2.save("d2.xlsx")
def check(self,input_arr):#验证输入与输出
input=numpy.array(input_arr)
hidden_input=numpy.dot(input,self.w_i_h)+self.d1
hidden_output=self.sigmoid(hidden_input)
target_input=numpy.dot(hidden_output,self.w_h_o)+self.d2
target_output=functions.softmax(target_input)
return target_output
def loss(self,x,t):#用于计算损失值
y=self.check(x)
return functions.cross_entropy_error(y,t)
def grandient(self,w,x,t):#用于计算每一个权重的梯度
h=0.0001
grad=numpy.zeros_like(w)
it = numpy.nditer(w, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
tmp_val = w[idx]
w[idx] = float(tmp_val) + h
loss_x1 = self.loss(x,t)
w[idx] = tmp_val - h
loss_x2 = self.loss(x,t)
grad[idx] = (loss_x1 - loss_x2) / (2*h)
w[idx] = tmp_val
it.iternext()
return grad
def train(self,x,t):#将计算出来的梯度用于修改权重
loss_w=self.loss(x,t)
self.lossadd+=loss_w
print(loss_w)#输出加入梯度前的损失值
grad_w_i_h=self.grandient(self.w_i_h,x,t)
grad_w_h_o=self.grandient(self.w_h_o,x,t)
grad_d1=self.grandient(self.d1,x,t)
grad_d2=self.grandient(self.d2,x,t)
self.w_i_h-=self.rate*grad_w_i_h
self.w_h_o-=self.rate*grad_w_h_o
self.d1-=self.rate*grad_d1
self.d2-=self.rate*grad_d2
loss_w=self.loss(x,t)
print(loss_w)#输出加入梯度后的损失值
functions.py
#functions.py 一些方法,其实也可以写入netwok.py里面
def softmax(a):softmax方法,把最后的输出数组变成总和为1的数组,便于计算
c=numpy.max(a)
exp_a=numpy.exp(a-c)
sum_exp_a=numpy.sum(exp_a)
num=exp_a/sum_exp_a
return num
def cross_entropy_error(a,b):#用交叉熵误差函数计算损失值
delta=0.00000001
#print(str(a.shape[0]))
return -numpy.sum(b*numpy.log(a+delta))
file_io.py
#file_io.py用于XLSX的权重文件等方法
import numpy
import openpyxl
from PIL import Image
import os
def init_weight():#初始化权重
#Xavier方法初始权重值,定义神经元100个
w_i_h=numpy.random.randn(784,100)/numpy.sqrt(784)
w_h_o=numpy.random.randn(100,10)/numpy.sqrt(100)
d1=numpy.random.randn(100)/numpy.sqrt(784)
d2=numpy.random.randn(10)/numpy.sqrt(10)
#将初始值写入XLSX文件
wb_i_h=openpyxl.Workbook()
ws_i_h=wb_i_h.active
for i in range(784):
row = []
for j in range(100):
row.append(w_i_h[i][j])
ws_i_h.append(row)
wb_i_h.save("w_i_h.xlsx")
wb_h_o=openpyxl.Workbook()
ws_h_o=wb_h_o.active
for i in range(100):
row = []
for j in range(10):
row.append(w_h_o[i][j])
ws_h_o.append(row)
wb_h_o.save("w_h_o.xlsx")
db1=openpyxl.Workbook()
ds1=db1.active
row=[]
for i in range(100):
row.append(d1[i])
ds1.append(row)
db1.save("d1.xlsx")
db2=openpyxl.Workbook()
ds2=db2.active
row=[]
for i in range(10):
row.append(d2[i])
ds2.append(row)
db2.save("d2.xlsx")
def read_weight():#读取保存的权重值
w_i_h=[]
wb_i_h=openpyxl.load_workbook("w_i_h.xlsx")
ws_i_h=wb_i_h.active
for i in range(784):
for j in range(100):
w_i_h.append(ws_i_h.cell(i+1,j+1).value)
w_h_o=[]
wb_h_o=openpyxl.load_workbook("w_h_o.xlsx")
ws_h_o=wb_h_o.active
for i in range(100):
for j in range(10):
w_h_o.append(ws_h_o.cell(i+1,j+1).value)
d1=[]
db1=openpyxl.load_workbook("d1.xlsx")
ds1=db1.active
for i in range(100):
d1.append(ds1.cell(1,i+1).value)
d2=[]
db2=openpyxl.load_workbook("d2.xlsx")
ds2=db2.active
for i in range(10):
d2.append(ds2.cell(1,i+1).value)
return w_i_h,w_h_o,d1,d2
def img2vec(fname):#将图片转换成数值为(-1,1)的矩阵
im = Image.open(fname).convert('L')
im = im.resize((28,28))
tmp = numpy.array(im)
vec = tmp.ravel()/255*2-1
return vec
main.py
#main.py启动文件
import numpy
import random
import os
import file_io
import network
def inti():#用于初始化
a,b,c,d=file_io.read_weight()
w_i_h=numpy.array(a)
w_h_o=numpy.array(b)
d1=numpy.array(c)
d2=numpy.array(d)
w_i_h=w_i_h.reshape((784,100))
w_h_o=w_h_o.reshape((100,10))
rate=0.1
n=network.n_network(w_i_h,w_h_o,d1,d2,rate)
return n
#file_io.init_weight() #第一次执行的时候需要,并且将下行的 while True改成while False
while True:
n=inti()
y=[0,0,0,0,0,0,0,0,0,0]
s=int(input("1:训练 2:分析 3:正确率 4:重置权重 0:退出:"))
while s!=1 and s!=2 and s!=3 and s!=4 and s!=0:
s=int(input("输入错误,重新输入:1:训练 2:分析 3:正确率 4:重置权重 0:退出:"))
if s==1:#训练
times=10000#训练10000张图片
for m in range(times):
print("------------------"+str(m/times*100)+"%"+"------------------")
if m%10==0 and m>0:
n.save_weight()#每10张图片保存一次权重
t=[]
z=[]
i=random.randint(0,9)#图片保存在0-9的文件夹中,对应保存文件夹名称的数字的图片,随机取文件夹
path="mnist_train/"+str(i)+"/"
for j in range(10):
y[j]=0
y[i]=1#生成答案数组
print(y)
fn_list=os.listdir(path)
sample = random.sample(fn_list,1)#在该随机文件夹中取一个文件
for i,name in enumerate(sample):
z.append(file_io.img2vec(path+name))
t=numpy.array(y)
x=numpy.array(z)
n.train(x,t)
elif s==2:#验证一张图片的正确与否
j=random.randint(0,9)
path="mnist_test/"+str(j)+"/"
print(j)
fn_list=os.listdir(path)
sample = random.sample(fn_list,1)
for i,name in enumerate(sample):
x=[]
x.append(file_io.img2vec(path+name))
y=n.check(x)
print(y)
if numpy.argmax(y)==j:
print("正确")
else:
print("错误")
elif s==3:#验证1000张图片的正确率
corret=0
all=1000
for k in range(all):
j=random.randint(0,9)
#j=1
path="mnist_test/"+str(j)+"/"
#print()
fn_list=os.listdir(path)
sample = random.sample(fn_list,1)
for i,name in enumerate(sample):
x=[]
x.append(file_io.img2vec(path+name))
y=n.check(x)
if numpy.argmax(y)==j:
corret+=1
print("正确率:"+str(corret/all*100)+"%")
elif s==4:#重置权重
file_io.init_weight()
print("重置权重完成")
elif s==0:
break
训练文件夹mnist_train和测试文件夹mnist_test的内容,0-9共10个文件夹,分别保存0-9的数字图片
SGD方法训练非常耗费时间