概述
基于卷积神经网络(CNN)实现水果识别。用仿真实现机械臂对水果的分拣。
1 实现过程
1.1 应用场景实现过程
1). 水果图像数据采集以及上传
2). 图像识别(经过卷积神经网络训练后得到的模型)
3). 图像识别后的结果通过串口发送数据给芯片
4). 芯片控制控制舵机使机械臂分拣水果
整体过程如下图:
1.2 仿真场景实现过程
实现功能: 软件模拟传送带上水果识别,接收上层传入控制命令,进行相应操作,界面上机械臂会动态进行识别水果推出。
实现过程如下:
1). 仿真软件会把水果图片路径通过串口发送到pc
2). pc依据串口收到图片路径信息,读取图片信息,进行识别
3). 然后pc通过串口发送命令进行机械臂操作
演示图片如下:
2 代码实现
只用了苹果和香蕉两种水果识别
2.1 基于卷积神经网络图像识别模型搭建与训练模块
# 导入需要的函数库
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout # Dropout将在训练过程中每次更新参数时按一定概率(rate)随机断开输入神经元,Dropout层用于防止过拟合。
from keras.layers import Flatten # Flatten层用来将输入“压平”,即把多维的输入一维化,常用在从卷积层到全连接层的过渡。Flatten不影响batch的大小。
from keras.layers.convolutional import Conv2D # 二维卷积层,即对图像的空域卷积。
from keras.layers.convolutional import MaxPooling2D # 空间池化(也叫亚采样或下采样)降低了每个特征映射的维度,但是保留了最重要的信息
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
from keras import backend as K
import cv2
#K.set_image_dim_ordering('tf') # 设置图像的维度顺序(‘tf’或‘th’)# 当前的维度顺序如果为'th',则输入图片数据时的顺序为:channels,rows,cols,否则:rows,cols,channels
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 选择N卡的GPU0
from glob import glob
# 参数设置
img_h, img_w, n_channels = 100, 100, 3 # 图像高,宽,通道数
classes = 2 # 类别
train_batch_size = 32 # 训练batch大小
test_batch_size = 32 # 测试batch大小,本次实验用作验证集
train_path = './data/Training/'
test_path = './data/Test/'
train_epoch = 10
MODEL_PATH = 'final_model_apple.h5'
class Dataset:
def __init__(self, path_name):
# 训练集
self.train_images = None
self.train_labels = None
# 验证集
self.valid_images = None
self.valid_labels = None
# 测试集
self.test_images = None
self.test_labels = None
# 数据集加载路径
self.path_name = path_name
# 当前库采用的维度顺序
self.input_shape = None
def load_dataset(self):
# 获取目录下所有文件
all_paths = glob(self.path_name+"*")
images = []
labels = []
for i, img_name in enumerate(all_paths):
img = cv2.imread(img_name, 1)
img = cv2.resize(img, dsize=(img_w, img_h)) # 归一化图片输入尺寸
images.append(img)
labels.append(img_name[-9])
# 进行图像 翻转处理
tmpFlag = np.random.random() # 生成随机数[0,1)
if tmpFlag > 0.5:
tmpFlag = np.random.random()
if tmpFlag > 0.5:
flip_horizontal = cv2.flip(img, 1) # 水平翻转
images.append(flip_horizontal)
else:
flip_vertical = cv2.flip(img, 0) # 垂直翻转
images.append(flip_vertical)
labels.append(img_name[-9])
images = np.array(images)
return images, labels
# 加载数据集并按照交叉验证的原则划分数据集并进行相关预处理工作 64*64*3 3个组(my,he,other)
def load(self, img_rows=img_h, img_cols=img_w,
img_channels=n_channels, nb_classes=classes):
# 加载数据集到内存
images, labels = self.load_dataset()
#生成训练集、验证集
train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, test_size=0.3)
#生成测试集
_, test_images, _, test_labels = train_test_split(images, labels, test_size=0.5)
# tensorflow 图像矢量 rows,cols,channels
train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels)
valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels)
test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels)
self.input_shape = (img_rows, img_cols, img_channels)
# 输出训练集、验证集、测试集的数量
print(train_images.shape[0], 'train samples')
print(valid_images.shape[0], 'valid samples')
print(test_images.shape[0], 'test1 samples')
# 我们的模型使用categorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes将
# 类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维
train_labels = np_utils.to_categorical(train_labels, nb_classes)
valid_labels = np_utils.to_categorical(valid_labels, nb_classes)
test_labels = np_utils.to_categorical(test_labels, nb_classes)
# 像素数据浮点化以便归一化
train_images = train_images.astype('float32')
valid_images = valid_images.astype('float32')
test_images = test_images.astype('float32')
# 将其归一化,图像的各像素值归一化到0~1区间
train_images /= 255
valid_images /= 255
test_images /= 255
self.train_images = train_images
self.valid_images = valid_images
self.test_images = test_images
self.train_labels = train_labels
self.valid_labels = valid_labels
self.test_labels = test_labels
dataset = Dataset(train_path) # 读取照片
dataset.load()
# load data
#(X_train, y_train), (X_test, y_test) = mnist.load_data()
# reshape to be [samples][pixels][width][height]
# X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32')
# X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32')
#
# X_train = X_train / 255 #像素最大255
# X_test = X_test / 255
# # one hot encode outputs
# y_train = np_utils.to_categorical(y_train)
# y_test = np_utils.to_categorical(y_test)
# num_classes = 10
# 接下来构造CNN。
# 第一层是卷积层。该层有32个feature map,或者叫滤波器,作为模型的输入层,接受[pixels][width][height]大小的输入数据。feature map的大小是5*5,其输出接一个‘relu’激活函数。
# 下一层是pooling层,使用了MaxPooling,大小为2*2。
# 下一层是Dropout层,该层的作用相当于对参数进行正则化来防止模型过拟合。
# 接下来是全连接层,有128个神经元,激活函数采用‘relu’。
# 最后一层是输出层,有10个神经元,每个神经元对应一个类别,输出值表示样本属于该类别的概率大小。
def baseline_model(dataset):
# create model
model = Sequential()
model.add(Conv2D(32, (5, 5), input_shape=dataset.input_shape, activation='relu')) #32 output 卷积
model.add(MaxPooling2D(pool_size=(2, 2))) #池化
model.add(Dropout(0.2))
model.add(Conv2D(64, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten()) #一维化
model.add(Dense(128, activation='relu'))
model.add(Dense(classes, activation='softmax')) #散列分布(00001000)
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
# 开始训练
# build the model
model = baseline_model(dataset)
# Fit the model
model.fit(dataset.train_images,dataset.train_labels, validation_data=(dataset.valid_images, dataset.valid_labels), epochs=train_epoch, batch_size=train_batch_size, verbose=1)
# 1、模型概括打印
model.summary()
# Final evaluation of the model
scores = model.evaluate(dataset.test_images, dataset.test_labels, verbose=1)
print("Baseline Error: %.2f%%" % (100 - scores[1] * 100))
model.save('final_model_apple.h5')
2.2 串口配置和图像识别模块
import cv2
import numpy as np
from keras.models import load_model
import time
import os
import serial
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
model = load_model('./final_model_apple.h5') #加载模型 换成自己的模型路径
# os.chdir('./data/pic_test') #切换到测试集图库,换成自己的测试集图库
portx = "COM3"
bps = 115200
timex = 5
#初始化串口
ser = serial.Serial(portx, bps, timeout=timex)
print("begin....")
while True:
file = ser.readline()
if not file.strip(): # 空文件就跳过
continue
file=file[:-1].decode() # 转字符串
print("recv:", file)
img = cv2.imread(file, cv2.IMREAD_COLOR)
print(img)
img1 = cv2.resize(img,(100,100),interpolation = cv2.INTER_AREA)
img = (img1.reshape(1,100,100,3)).astype("float32")/255
predict = model.predict(img)
print(predict)
predict=np.argmax(predict,axis=1)
#print (i, '识别为:', predict) #0 -- 苹果 1--香蕉
if 0 == predict[0] :
cmd = bytes.fromhex('ff 01')
ser.write(cmd)
print("图片中的水果是苹果")
else:
print("图片中的水果是香蕉")
结束语
首先要感谢华清李老师对这次项目的指导,由于机械臂仿真软件是老师提供故没有上传
链接:https://pan.baidu.com/s/1OIeyeEOKmpMapnOsLtFqgQ
提取码:pnqu
后期在这个网站上下载更多数据来训练 :https://www.kaggle.com/moltean/fruits