github 地址:https://github.com/tudoupaisimalingshu/Handwritten-numeral-recognition
本文描述并实现了一个可视化的基于Python平台和梯度下降算法的神经网络的手写数字识别程序。采用web服务,用户在前台页面的手写板中输入手写数字,并可以输入神经网络进行学习,同时还能进行预测。较好的解决了MNIST数据库以及传统手写数字识别程序过于抽象的问题。但如效果图所示,准确度还大大有待提高,这也是今后努力的方向。
一、效果图
二、整体架构
前台手写板使用开源项目,项目地址:https://github.com/szimek/signature_pad
鉴于项目所需图片最终要压缩成28*28黑白图片,因此我做了些许调整,将手写板缩小,将笔迹放大,并将背景由无色透明改为白色不透明(signature_pad.js)
同时对主页html做了修改,添加了提交训练数据按钮、预测数字按钮,并为按钮添加了JavaScript代码。提交训练数据按钮点击后,将手写板和输入框数字使用Ajax传递到后台,后台将其保存到文件中,并加入训练数据集。预测数字按钮添加后,将手写板数据传递到后台,后台使用之前编写的神经网络进行预测,并将结果返回前台。
后台采用Python平台下的web.py,未采用笔者熟悉的JavaWeb平台,以方便调用前面的Python代码。
三、前端代码
1、页面HTML代码
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Signature Pad demo</title>
<meta name="description" content="Signature Pad - HTML5 canvas based smooth signature drawing using variable width spline interpolation.">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=1, user-scalable=no">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="black">
<link rel="stylesheet" href="css/signature-pad.css">
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-39365077-1']);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script>
<script src="/static/js/jquery-1.8.2.js"></script>
<script type="text/javascript">
function submitCanvas()
{
var canvas = document.getElementById("canvas");
var dataURL = canvas.toDataURL("image/png");
//alert(dataURL);
url="Judge?dataURL="+dataURL;
//alert(url);
$.ajax({
type: "POST",
url:"/Judge",
async:false,
dataType:"html",
data:{dataURL:dataURL},
success:function(msg){
alert(msg);
}
});
}
function joinExercise()
{
var number = document.getElementById("number").value;
var canvas = document.getElementById("canvas");
var dataURL = canvas.toDataURL("image/png");
//alert(dataURL);
url="Judge";
//alert(url);
$.ajax({
type: "POST",
url:"/Join",
async:false,
dataType:"html",
data:{dataURL:dataURL,number:number},
success:function(msg){
alert(msg);
}
});
}
</script>
</head>
<body onselectstart="return false">
<a id="github" href="https://github.com/szimek/signature_pad">
<img style="position: absolute; top: 0; right: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_gray_6d6d6d.png" alt="Fork me on GitHub">
</a>
<div id="signature-pad" class="m-signature-pad">
<div class="m-signature-pad--body">
<canvas id="canvas"></canvas>
</div>
<div class="m-signature-pad--footer">
<div class="description">
<input id="number" class="number"/>
</div>
<div class="left">
<button type="button" class="button clear" data-action="clear">Clear</button>
</div>
<div class="right">
<button type="button" class="button save" οnclick="joinExercise()">joinExercise</button>
<button type="button" class="button save" οnclick="submitCanvas()">submitCanvas</button>
</div>
</div>
</div>
<script src="js/signature_pad.js"></script>
<script src="js/app.js"></script>
</body>
</html>
2、采用的signature_pad.js修改部分
function SignaturePad(canvas, options) {
//this.minWidth = opts.minWidth || 0.5;
//this.maxWidth = opts.maxWidth || 2.5;
this.minWidth = opts.minWidth || 8.5;
this.maxWidth = opts.maxWidth || 15.5;
//20170806
//this.penColor = opts.penColor || 'black';
//this.backgroundColor = opts.backgroundColor || 'rgba(0,0,0,0)';
this.penColor = opts.penColor || 'black';
this.backgroundColor = opts.backgroundColor || 'rgba(255,255,255,1.0)';
};
1、web.py后台核心代码
import web
#web.py库
import base64
#base64库,字符串<=>图片
import numpy as np
render = web.template.render('templates')
import network
#神经网络
import mnist_loader
#MINIST加载
net = network.Network([784, 30, 10])
#新建一个三层神经网络
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
#获得训练数据,验证数据,测试数据
training_data = list(training_data)
import mnist_data_loder
#自定义手写数据库加载
loder = mnist_data_loder.DataLoder()
#自定义手写数据库加载
#training_data = list()
urls = (
'/index', 'index',
'/Judge','judge',
'/Join','join',
'/static/(.*)','StaticFile' ,
'/(.*.css)', 'StaticFile', # 处理css文件
'/(.*.js)', 'StaticFile', # 处理js文件
'/(.*)', 'hello'
)
#web.py URL映射
app = web.application(urls, globals())
#新建web应用
#如果是join请求,分发到此类处理
class join:
def POST(self):
print("join POST ")
print("input:", web.input().get("dataURL"))
print("input:",web.input().get("number"))
number = (int)(web.input().get("number"))
dataURL = str(web.input().get("dataURL"))
base64_str = dataURL.replace("data:image/png;base64,", "")
#获得手写图片的base64字符串
#print(base64_str)
ori_image_data = base64.b64decode(base64_str)
#将base64字符串转换为图片数据
import uuid
#唯一随机数
name = str(uuid.uuid1()) + ".jpg"
fout = open(name, 'wb')
fout.write(ori_image_data)
fout.close()
#将图片数据保存为图片文件
import test4
data = test4.Data2().getTestPicArray(name)
#将图片文件进行压缩等处理,得到784*1数组
#print("data.shape=",data.shape)
training_result = self.vectorized_result(number)
#将图片对应的正确分类转化为10*1向量
#print("training_result.shape=", training_result.shape)
#print("training_result=", training_result)
loder.join(data,training_result)
#将数据加入自定义加载器中
training_data, validation_data, test_data = loder.zip()
#使用自定义加载器将数据打包成MNIST格式
training_data = list(training_data)
import threading
#t1 = threading.Thread(target=net.SGD, args=(training_data,30,10,3.0,test_data))
#t1.start()
#net.SGD(training_data,30,10,3.0,test_data=test_data)
#TODO 尝试将手写数据写入文本文件,不再使用MNIST
"""
len= 2
len data[0]= 784
data[0].shape= (784, 1)
len data[1]= 10
data[1].shape= (10, 1)
"""
return "加入训练集并后台开始训练"
"""
training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
training_results = [vectorized_result(y) for y in tr_d[1]]
training_data = zip(training_inputs, training_results)
"""
def vectorized_result(self,j):
"""Return a 10-dimensional unit vector with a 1.0 in the jth
position and zeroes elsewhere. This is used to convert a digit
(0...9) into a corresponding desired output from the neural
network."""
e = np.zeros((10, 1))
e[j] = 1.0
return e
class judge:
def GET(self):
print("judge GET")
def POST(self):
print("judge POST ")
print("input:",web.input().get("dataURL"))
dataURL = str(web.input().get("dataURL"))
base64_str = dataURL.replace("data:image/png;base64,","")
print(base64_str)
ori_image_data = base64.b64decode(base64_str)
import uuid
name = str(uuid.uuid1()) + ".jpg"
fout = open(name, 'wb')
fout.write(ori_image_data)
fout.close()
import test4
data = test4.Data2().getTestPicArray(name)
#print(data)
print("开始训练,训练集有:",len(training_data))
net.SGD(training_data, 30, 10, 3.0, test_data=test_data)
print("训练完成,开始预测")
return "预测的结果为:",net.cjtest(data)
class StaticFile:
def GET(self, file):
print("调用StaticFile GET. file=",file)
web.seeother('/static/'+file); #重定向
class index:
def GET(self):
html = open("index.html",encoding="utf-8")
return html.read()
class hello:
def GET(self, name):
html = open("index.html", encoding="utf-8")
return html.read()
"""
print("name=", name)
if not name:
return render.hello2("welcome")
# return open(r"hello2.html").read()
return render.hello2(name)
:param name:
:return:
"""
if __name__ == "__main__":
#net.SGD(training_data, 30, 10, 2.0, test_data=test_data)
print("神经网络初始化完成,启动web服务中...")
app.run()
2、自定义仿MINIST数据加载类
import pickle
import gzip
# Third-party libraries
import numpy as np
class DataLoder():
def __init__(self):
tr_d, va_d, te_d = self.load_data()
#self.training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
#self.training_results = [self.vectorized_result(y) for y in tr_d[1]]
#以上是MNIST加载过程
self.training_inputs = []
self.training_results = []
#自定义则清空
#self.numbers = []
#training_inputs = []
#training_results = []
import csv
#导入csv模块,将数据以csv格式存储起来
try:
csv_reader = csv.reader(open('shouxie.csv', encoding='utf-8'))
for row in csv_reader:
if len(row) == 794:
# print(row)
# print(row[0:784])
# print(row[784:794])
input = []
for i in row[0:784]:
input.append(float(i))
result = []
for f in row[784:794]:
# print(f)
result.append(float(f))
print("input", input)
print("result", result)
inputs = np.zeros((784, 1))
for i in range(0, 784):
inputs[i] = input[i]
# print("inputs", inputs)
e = np.zeros((10, 1))
for i in range(0, 10):
e[i] = result[i]
# print("result'=", e)
self.training_inputs.append(inputs)
self.training_results.append(e)
except:
print("文件打开失败")
print("从shouxie.csv中读入", len(self.training_inputs), "条数据")
training_data = zip(self.training_inputs, self.training_results)
print("初始化训练数据成功,现有",len(self.training_inputs),"条数据")
def join2(self, training_input, number):
self.training_inputs.append(training_input)
self.numbers.append(number)
print("加入训练数据成功,现有", len(self.training_inputs), "条数据")
for training_input, training_result in zip(self.training_inputs, self.training_results):
data = []
for i in training_input:
data.append(i[0])
data.append(number)
self.spamwriter.writerow(data)
def join(self,training_input,training_result):
self.training_inputs.append(training_input)
self.training_results.append(training_result)
print("加入训练数据成功,现有",len(self.training_inputs),"条数据")
import csv
with open('shouxie.csv', 'w+') as csvfile:
spamwriter = csv.writer(csvfile, dialect='excel')
for training_input,training_result in zip(self.training_inputs,self.training_results):
data = []
for i in training_input:
data.append(i[0])
for j in training_result:
data.append(j[0])
spamwriter.writerow(data)
print("data",data)
csvfile.close()
print("训练数据已保存到文件中")
def zip(self):
tr_d, va_d, te_d = self.load_data()
training_data = zip(self.training_inputs, self.training_results)
validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
validation_data = zip(validation_inputs, va_d[1])
test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
test_data = zip(test_inputs, te_d[1])
return (training_data, validation_data, test_data)
def load_data(self):
training_inputs = []
training_results = []
import csv
try:
csv_reader = csv.reader(open('shouxie.csv', encoding='utf-8'))
for row in csv_reader:
if len(row) == 794:
# print(row)
# print(row[0:784])
# print(row[784:794])
input = []
for i in row[0:784]:
input.append(float(i))
result = []
for f in row[784:794]:
# print(f)
result.append(float(f))
print("input", input)
print("result", result)
inputs = np.zeros((784, 1))
for i in range(0, 784):
inputs[i] = input[i]
#print("inputs", inputs)
e = np.zeros((10, 1))
for i in range(0, 10):
e[i] = result[i]
#print("result'=", e)
training_inputs.append(inputs)
training_results.append(e)
except:
print("文件打开失败")
print("从shouxie.csv中读入", len(training_inputs), "条数据")
training_data = zip(training_inputs, training_results)
f = gzip.open('mnist.pkl.gz', 'rb')
#training_data, validation_data, test_data = pickle.load(f, encoding="latin1")
training_data, validation_data, test_data = pickle.load(f, encoding="latin1")
f.close()
return (training_data, validation_data, test_data)
def load_data_wrapper(self):
tr_d, va_d, te_d = self.load_data()
training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
training_results = [self.vectorized_result(y) for y in tr_d[1]]
training_data = zip(training_inputs, training_results)
validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
validation_data = zip(validation_inputs, va_d[1])
test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
test_data = zip(test_inputs, te_d[1])
"""
for data in training_data:
print("-------------一条数据---------------")
print("len=", len(data))
print("len data[0]=", len(data[0]))
print("data[0].shape=",data[0].shape)
print("len data[1]=", len(data[1]))
print("data[1].shape=", data[1].shape)
print("---------------------------------")
"""
#print(training_data[0])
return (training_data, validation_data, test_data)
def vectorized_result(self,j):
e = np.zeros((10, 1))
e[j] = 1.0
return e
#load_data_wrapper()
3、预测方法
def cjtest(self,data):
#print(self.feedforward(data))
return np.argmax(self.feedforward(data))