import numpy as np
import paddle
paddle.enable_static()
import paddle.dataset.mnist as mnist
import paddle.fluid as fluid
from PIL import Image
# 搭建简单的卷积神经网络:输入层、卷积层、池化层、卷积层、池化层、输出层
def convolutional_neural_netwrk(input):
# 卷积层(input:数据输入,num_filters:卷积核数量,filter_size:卷积核大小,stride:卷积核滑动步长)
conv1 = fluid.layers.conv2d(input=input,
num_filters=32,
filter_size=3,
stride=1)
# 池化层(最大值池化)
pool1 = fluid.layers.pool2d(input=conv1,
pool_size=2,
pool_stride=1,
pool_type='max')
conv2 = fluid.layers.conv2d(input=pool1,
num_filters=64,
filter_size=3,
stride=1)
pool2 = fluid.layers.pool2d(input=conv2,
pool_size=2,
pool_stride=1,
pool_type='max')
# 全连接层,指定大小为10,因为MNIST数据集的类别数量是10。softmax通过用于分类任务中,每个类别的概率总和为1
fc = fluid.layers.fc(input=pool2, size=10, act='softmax')
return fc
# 定义标签层,图像是单通道28pxX28px,所以[1, 28, 28]
image = fluid.data(name='image', shape=[None, 1, 28, 28], dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
model = convolutional_neural_netwrk(image)
# 在通过深度神经获取输出之后,就可以从主程序中复制一个程序用于训练结束时的预测
infer_program = fluid.default_main_program().clone(for_test=True)
cost = fluid.layers.cross_entropy(input=model, label=label) # 交叉熵
avg_cost = fluid.layers.mean(cost) # 求均值
acc = fluid.layers.accuracy(input=model, label=label) # 准确率
# 从主程序中复制多一个测试程序,为了能够在测试中输出损失值和准确率
test_program = fluid.default_main_program().clone(for_test=True)
# 优化器(Adam是一种自适应调整学习率的方法,使用大数据高维空间场景)
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.0005)
opts = optimizer.minimize(avg_cost)
# 通过mnist.train()、mnist.test()获取训练集、测试集,batch_size把数据集分割一个个批次,一批次数据为128张
train_reader = paddle.batch(mnist.train(), batch_size=128)
test_reader = paddle.batch(mnist.test(), batch_size=128)
# 初始化执行器
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
# 通过feed_list指定每组数据输入顺序,通过place指定训练数据向CPU输入
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
# 训练两轮
for pass_id in range(2):
for batch_id, data in enumerate(train_reader()):
# fetch_list:后面的参数是决定上面功能
train_cost, train_acc = exe.run(program=fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost, acc]) # 损失值、准确率
if batch_id % 100 == 0:
print('Pass:%d, Batch:%d, Cost:%0.5f, Accuracy:%0.5f' % (pass_id, batch_id, train_cost[0], train_acc[0]))
# 训练结束,再进行一次测试,使用测试集进行测试
test_accs = []
test_costs = []
for batch_id, data in enumerate(test_reader()):
test_cost, test_acc = exe.run(program=test_program,
feed=feeder.feed(data),
fetch_list=[avg_cost, acc]) # 损失值、准确率
test_accs.append(test_acc[0])
test_costs.append(test_cost[0])
# 求测试结果的平均值
test_cost = (sum(test_costs) / len(test_costs))
test_acc = (sum(test_accs) / len(test_accs))
print("Test:%d, Cost:%0.5f, Accuracy:%0.5f" % (pass_id, test_cost, test_acc))
# 上述训练结束,接下来预测一张实际的图片
# 先将图片经过预处理转换成张量加载到PaddlePaddle训练。(灰度化缩放大小)
def load_image(file):
im = Image.open(file).convert('L')
im = im.resize((28,28), Image.ANTIALIAS) # ANTIALIAS:高质量
im = np.array(im).reshape(1,1,28,28).astype(np.float32)
im = im / 255.0 * 2.0 - 1.0
return im
img = load_image('8.jpg')
results = exe.run(program=infer_program, # program是上面复制的预测程序
feed={image.name:img}, # 只有输入层,为加载的图片
fetch_list=[model]) # 为神经网络模型最后的分类器,输出一个分类结果
# np.argsort(results)得到的是每个类别的概率,通过下标找出最大概率测试结果的标签
lab = np.argsort(results)[0][0][-1]
c = np.argsort(results)
print('图片中识别出来的数字是: %d' % lab)
print(c)
数字图像识别
最新推荐文章于 2024-09-12 23:40:25 发布