# 图像样本预处理
import cv2
import numpy as np
import os
import random
import matplotlib.pyplot as plt
from global_var import *
from math import *
# 不切边旋转
def remote(img, angle):
h, w = img.shape[:2]
h_new = int(w * fabs(sin(radians(angle))) + h * fabs(cos(radians(angle))))
w_new = int(h * fabs(sin(radians(angle))) + w * fabs(cos(radians(angle))))
matRotation = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
matRotation[0, 2] += (w_new - w) / 2
matRotation[1, 2] += (h_new - h) / 2
imgRotation = cv2.warpAffine(img, matRotation, (w_new, h_new), borderValue=(255, 255, 255))
return imgRotation
def do_rotate(im, angle, center=None, scale=1.0):
"""
图像旋转变换
:param im: 原始图像数据
:param angle: 旋转角度
:param center: 旋转中心,如果为None则以原图中心为旋转中心
:param scale: 缩放比例,默认为1
:return: 返回旋转后的图像
"""
h, w = im.shape[:2] # 获取图像高、宽
# 旋转中心默认为图像中心
if center is None:
center = (w / 2, h / 2)
# 计算旋转矩阵
M = cv2.getRotationMatrix2D(center, angle, scale)
# 使用openCV仿射变换实现函数旋转
rotated = cv2.warpAffine(im, M, (w, h))
return rotated # 返回旋转后的矩阵
def rotate_all():
dirs = os.listdir(data_root_path) # 列出所有子目录
for d in dirs:
dir_path = os.path.join(data_root_path, d) # 拼接路径
if not os.path.isdir(dir_path): # 不是目录
continue
sub_dir_path = os.path.join(dir_path, "Imgs") # 子目录下的Imgs目录
imgs = os.listdir(sub_dir_path) # 列出所有子目录下的原始样本
for img_file in imgs: # 遍历
img_full_path = os.path.join(sub_dir_path, img_file) # 拼接完整路径
print(img_full_path)
im = cv2.imread(img_full_path) # img_full_path
pos = img_file.find(".") # 返回.的位置
name = img_file[0:pos] # 取出名称部分
suffix = img_file[pos:] # 取出后缀名
# 旋转45/90/135/180/225/270/315度
for i in range(1, 8):
img_new = remote(im, 45 * i)
# 拼一个新的文件名,格式如:AIBJ-KG-00001_rotate_1.jpg
img_new_name = "%s_rotate_%d%s" % (name, i, suffix)
cv2.imwrite(os.path.join(sub_dir_path, img_new_name), img_new) # 将裁剪后的图片保存至新文件
print("save ok:", os.path.join(sub_dir_path, img_new_name))
if __name__ == "__main__":
# 图像旋转
rotate_all()
print("图像预处理结束")
# 人脸(水果)识别示例:数据预处理
import paddle.fluid as fluid
import os
import json
from global_var import *
name_data_list = {} # 记录每个人多少张训练图片、多少张测试图片
def get_file_lines(file_path, type): # 获取文件行数
with open(file_path) as f:
i = 0
for line in f.readlines():
line = line.strip().replace("\n", "")
tmp = line.split("\t")
if len(tmp) < 2:
continue
else:
if int(tmp[1]) == type:
i += 1
return i
def save_train_test_file(path, name):
if name not in name_data_list: # 未在字典中
img_list = []
img_list.append(path) # 将图片添加到列表
name_data_list[name] = img_list # 存入字典
else: # 已经在字典中
name_data_list[name].append(path) # 加入
# 获取所有类别保存的文件夹名称
dirs = os.listdir(data_root_path)
for d in dirs:
full_path = os.path.join(data_root_path, d) # 完整路径
if os.path.isdir(full_path): # 目录
full_path = os.path.join(full_path, "Imgs")
imgs = os.listdir(full_path)
for img in imgs:
# print(img + "," + d)
save_train_test_file(os.path.join(full_path, img), d)
else: # 文件
pass
# 清空数据文件
with open(test_file_path, "w") as f:
pass
with open(train_file_path, "w") as f:
pass
for name, img_list in name_data_list.items():
i = 0
num = len(img_list)
print("%s: %d张" % (name, num))
for img in img_list:
if i % 10 == 0: # 每10笔取一笔测试数据
with open(test_file_path, "a") as f:
line = "%s\t%d\n" % (img, name_dict[name])
# print(line)
f.write(line)
else: # 其它作为训练数据
with open(train_file_path, "a") as f:
line = "%s\t%d\n" % (img, name_dict[name])
# print(line)
f.write(line)
i += 1
print('生成数据列表完成!')
# 训练
train_img_size = 200 # 图像大小
BATCH_SIZE = 16 # 批次大小
BUF_SIZE = 10000
learning_rate = 0.0001
EPOCHS = 40
def train_mapper(sample):
"""
根据传入的样本(一行文本),读取图像数据并返回
:param sample: 元组,格式:(路径, 类别)
:return: 图像数据、类别
"""
img, label = sample # img为图像路径, label为类别
if not os.path.exists(img):
print("图片不存在:", img)
# 读取图片内容
img = paddle.dataset.image.load_image(img)
# 对图像数据设置为固定大小
img = paddle.dataset.image.simple_transform(
im=img, # 原始图像
resize_size=train_img_size, # 要设置的图像大小
crop_size=train_img_size, # 裁剪图像大小
is_color=True, # 彩色图像
is_train=True) # 是否用于训练
# 对图像进行归一化处理,将像素值转换为0~1之间
# 加快收敛速度、防止梯度消失、使得CNN模型更稳定
img = img.astype("float32") / 255.0
return img, label # 返回图像数据、类别
# 读取器
def train_r(train_list, buffered_size=BUF_SIZE):
def reader():
with open(train_list, "r") as f: # 打开训练集
lines = [line.strip() for line in f]
for line in lines: # 遍历每行
line = line.replace("\n", "") # 去除换行符
img_path, lab = line.split("\t") # 拆分
yield img_path, int(lab)
return paddle.reader.xmap_readers(
train_mapper, # 将reader读取到的数据进行二次处理
reader, # 读取文件函数,读取的内容交给train_mapper
cpu_count(), # 线程数量(和逻辑CPU保持一致)
buffered_size) # 缓冲区大小
train_reader = train_r(train_file_path) # 原始reader
random_train_reader = paddle.reader.shuffle(
reader=train_reader,
buf_size=BUF_SIZE) # 随机读取器
batch_train_reader = paddle.batch(
random_train_reader,
batch_size=BATCH_SIZE) # 批量读取器
# 变量
image = fluid.layers.data(name="image", # 名称
shape=[3, train_img_size, train_img_size], # 形状
dtype="float32") # 类型
label = fluid.layers.data(name="label",
shape=[1],
dtype="int64")
# 搭建CNN
# 结构:输入层 --> 卷积/激活/池化/drop --> 卷积/激活/池化/drop
# --> 卷积/激活/池化/drop --> fc --> drop --> fc
def convolution_neural_network(image, # 图像
type_size): # 类别数量
# 第一组卷积/激活/池化/drop
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=image, # 输入(原始图像数据)
filter_size=3, # 卷积核大小 3*3
num_filters=32, # 卷积核数量
pool_size=2, # 池化区域大小2*2
pool_stride=2, # 池化步长值
act="relu") # 激活函数
drop = fluid.layers.dropout(x=conv_pool_1, # 输入
dropout_prob=0.5) # 丢弃率
# 第二组卷积/激活/池化/drop
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=drop, # 输入(前一层drop输出)
filter_size=3, # 卷积核大小 3*3
num_filters=64, # 卷积核数量
pool_size=2, # 池化区域大小2*2
pool_stride=2, # 池化步长值
act="relu") # 激活函数
drop = fluid.layers.dropout(x=conv_pool_2, # 输入
dropout_prob=0.5) # 丢弃率
# 第三组卷积/激活/池化/drop
conv_pool_3 = fluid.nets.simple_img_conv_pool(
input=drop, # 输入(前一层drop输出)
filter_size=3, # 卷积核大小 3*3
num_filters=64, # 卷积核数量
pool_size=2, # 池化区域大小2*2
pool_stride=2, # 池化步长值
act="relu") # 激活函数
drop = fluid.layers.dropout(x=conv_pool_3, # 输入
dropout_prob=0.5) # 丢弃率
# fc
fc = fluid.layers.fc(input=drop,
size=512, # 输出值个数
act="relu")
# dropout
drop = fluid.layers.dropout(x=fc, dropout_prob=0.5)
# 输出层(fc)
predict = fluid.layers.fc(input=drop,
size=type_size, # 输出值个数(类别数量)
act="softmax") # 输出层采用softmax激活函数
return predict
# 调用函数创建CNN模型
predict = convolution_neural_network(image=image,
type_size=6)
# 损失函数
cost = fluid.layers.cross_entropy(input=predict, # 预测值
label=label) # 真实值
avg_cost = fluid.layers.mean(cost)
# 准确率
accuracy = fluid.layers.accuracy(input=predict, # 预测值
label=label) # 真实值
# 优化器
optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
optimizer.minimize(avg_cost) # 优化损失函数
# 执行器
place = fluid.CUDAPlace(0) # GPU
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) # 初始化
# feeder
feeder = fluid.DataFeeder(feed_list=[image, label], # 需喂入的数据
place=place)
model_save_dir = "model/" # 模型保存路径
costs = [] # 记录损失值
accs = [] # 记录准确率
times = 0
batches = [] # 记录迭代次数
# if os.path.exists(model_save_dir): # 先加载模型执行增量训练
# fluid.io.load_persistables(exe, model_save_dir, fluid.default_main_program())
# print("加载增量模型成功.")
# 开始训练
for pass_id in range(EPOCHS):
train_cost = 0 # 临时变量,记录每次训练损失值
for batch_id, data in enumerate(batch_train_reader()):
times += 1
train_cost, train_acc = exe.run(
feed=feeder.feed(data),
fetch_list=[avg_cost, accuracy])
if batch_id % 20 == 0:
print("pass_id:%d, batch:%d, cost:%f, acc:%f" %
(pass_id, batch_id,
train_cost[0], train_acc[0]))
accs.append(train_acc[0]) # 记录准确率
costs.append(train_cost[0]) # 记录损失值
batches.append(times) # 记录迭代次数
# # 保存增量模型
# if not os.path.exists(model_save_dir): # 如果存储模型的目录不存在,则创建
# os.makedirs(model_save_dir)
# fluid.io.save_persistables(exe, model_save_dir, fluid.default_main_program())
# print("保存增量模型成功!")
# 保存固化模型
model_freeze_dir = "model_freeze/"
if not os.path.exists(model_freeze_dir):
os.makedirs(model_freeze_dir)
fluid.io.save_inference_model(dirname=model_freeze_dir,
feeded_var_names=["image"],
target_vars=[predict],
executor=exe)
print("保存固化模型成功!")
# 训练过程可视化
plt.title("training", fontsize=24)
plt.xlabel("iter", fontsize=20)
plt.ylabel("cost/acc", fontsize=20)
plt.plot(batches, costs, color='red', label="Training Cost")
plt.plot(batches, accs, color='green', label="Training Acc")
plt.legend()
plt.grid()
plt.savefig("train.png")
plt.show()
# 人脸(水果)识别示例:预测
import paddle
import paddle.fluid as fluid
import numpy as np
import sys
import os
import matplotlib.pyplot as plt
from PIL import Image
from global_var import *
place = fluid.CPUPlace()
infer_exe = fluid.Executor(place)
inference_scope = fluid.core.Scope()
model_freeze_dir = "model_freeze/"
# 加载数据
def load_image(path):
img = paddle.dataset.image.load_and_transform(path, 240, 240, False).astype("float32")
img = img / 255.0
return img
infer_imgs = []
# 类别0
test_img = "data/MagneticTile/MT_Blowhole/Imgs/exp6_num_4841.jpg"
# 类别1
# test_img = "data/MagneticTile/MT_Break/Imgs/exp2_num_271384.jpg"
# 类别2
# test_img = "data/MagneticTile/MT_Crack/Imgs/exp1_num_32128.jpg"
# 类别3
# test_img = "data/MagneticTile/MT_Fray/Imgs/exp1_num_20362.jpg"
# 类别4
# test_img = "data/MagneticTile/MT_Free/Imgs/exp3_num_344580.jpg"
# test_img = "data/MagneticTile/MT_Free/Imgs/exp6_num_293912.jpg"
# 类别5
# test_img = "data/MagneticTile/MT_Uneven/Imgs/exp1_num_45007.jpg"
infer_imgs.append(load_image(test_img))
infer_imgs = np.array(infer_imgs)
print("infer_imgs.shape:", infer_imgs.shape)
with fluid.scope_guard(inference_scope):
[inference_program, feed_target_names, fetch_targets] = \
fluid.io.load_inference_model(model_freeze_dir, infer_exe)
# 开始预测
results = infer_exe.run(inference_program,
feed={feed_target_names[0]: infer_imgs},
fetch_list=fetch_targets)
print("results:", results)
result = results[0]
print(result.shape)
max_index = np.argmax(result)
for k, v in name_dict.items():
if max_index == v:
print("预测结果: 类别编号[%d], 名称[%s], 概率[%.4f]" % (max_index, k, result[0][max_index] * 100))
# 显示原图
img = Image.open(test_img)
plt.imshow(img)
plt.show()
1)数据集
- 1000多个3D打印件产品的图像,包含6个质量瑕疵类型(空洞、色斑、裂痕、刮痕、缺块、其它类型)。经过数据增强策略后,样本数量增加到10000以上(旋转、水平翻转、垂直翻转)
2)项目目标
根据数据样本,训练一个模型,使用模型进行质量检测,判断3D打印件产品有、没有瑕疵,属于哪个类别瑕疵
3)技术路线
- 深度学习(样本数量足够)
- 模型:图像分类模型,卷积/激活/池化 --> 卷积/激活/池化 --> 卷积/激活/池化 --> fc --> dropout --> fc
- 模型参数:图像大小200*200,批次大小16,学习率0.0001固定学习率
4)效果
经过120轮次训练后,训练集下准确率98%左右, 测试集97%