文章目录
深度学习-07(PaddlePaddle图像分类)
图像分类概述
概述
什么是图像分类
图像分类粒度
图像分类发展历程
图像分类问题的挑战
常用数据集介绍
MNIST数据集
CIFAR10数据集
ImageNet数据集
FDDB人脸数据集
WIDER Face数据集
图像分类的应用
利用CNN实现图片分类
思路及实现
数据集介绍
总体步骤
数据预处理
模型结构
案例1: 利用CNN实现图片分类
- 数据预处理部分:
# 02_fruits.py
# 利用深层CNN实现水果分类
# 数据集:爬虫从百度图片搜索结果爬取
# 内容:包含1036张水果图片,共5个类别(苹果288张、香蕉275张、葡萄216张、橙子276张、梨251张)
############################# 预处理部分 ################################
import os
name_dict = {
“apple”:0, “banana”:1, “grape”:2, “orange”:3, “pear”:4}
data_root_path = “data/fruits/” # 数据样本所在目录
test_file_path = data_root_path + “test.txt” #测试文件路径
train_file_path = data_root_path + “train.txt” # 训练文件路径
name_data_list = {
} # 记录每个类别有哪些图片 key:水果名称 value:图片路径构成的列表
# 将图片路径存入name_data_list字典中
def save_train_test_file(path, name):
if name not in name_data_list: # 该类别水果不在字典中,则新建一个列表插入字典
img_list = []
img_list.append(path) # 将图片路径存入列表
name_data_list[name] = img_list # 将图片列表插入字典
else: # 该类别水果在字典中,直接添加到列表
name_data_list[name].append(path)
# 遍历数据集下面每个子目录,将图片路径写入上面的字典
dirs = os.listdir(data_root_path) # 列出数据集目下所有的文件和子目录
for d in dirs:
full_path = data_root_path + d # 拼完整路径
<span class="token keyword">if</span> os<span class="token punctuation">.</span>path<span class="token punctuation">.</span>isdir<span class="token punctuation">(</span>full_path<span class="token punctuation">)</span><span class="token punctuation">:</span> <span class="token comment"># 是一个子目录</span>
imgs <span class="token operator">=</span> os<span class="token punctuation">.</span>listdir<span class="token punctuation">(</span>full_path<span class="token punctuation">)</span> <span class="token comment"># 列出子目录中所有的文件</span>
<span class="token keyword">for</span> img <span class="token keyword">in</span> imgs<span class="token punctuation">:</span>
save_train_test_file<span class="token punctuation">(</span>full_path <span class="token operator">+</span> <span class="token string">"/"</span> <span class="token operator">+</span> img<span class="token punctuation">,</span> <span class="token comment">#拼图片完整路径</span>
d<span class="token punctuation">)</span> <span class="token comment"># 以子目录名称作为类别名称</span>
<span class="token keyword">else</span><span class="token punctuation">:</span> <span class="token comment"># 文件</span>
<span class="token keyword">pass</span>
# 将name_data_list字典中的内容写入文件
## 清空训练集和测试集文件
with open(test_file_path, “w”) as f:
pass
with open(train_file_path, “w”) as f:
pass
# 遍历字典,将字典中的内容写入训练集和测试集
for name, img_list in name_data_list.items():
i = 0
num = len(img_list) # 获取每个类别图片数量
print(“%s: %d张” % (name, num))
# 写训练集和测试集
for img in img_list:
if i % 10 == 0: # 每10笔写一笔测试集
with open(test_file_path, “a”) as f: #以追加模式打开测试集文件
line = “%s\t%d\n” % (img, name_dict[name]) # 拼一行
f.write(line) # 写入文件
else: # 训练集
with open(train_file_path, “a”) as f: #以追加模式打开测试集文件
line = “%s\t%d\n” % (img, name_dict[name]) # 拼一行
f.write(line) # 写入文件
i <span class="token operator">+=</span> <span class="token number">1</span> <span class="token comment"># 计数器加1</span>
print(“数据预处理完成.”)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 模型训练评估
import paddle
import paddle.fluid as fluid
import numpy
import sys
import os
from multiprocessing import cpu_count
import time
import matplotlib.pyplot as plt
def train_mapper(sample):
“”"
根据传入的样本数据(一行文本)读取图片数据并返回
:param sample: 元组,格式为(图片路径,类别)
:return:返回图像数据、类别
“”"
img, label = sample # img为路基,label为类别
if not os.path.exists(img):
print(img, “图片不存在”)
<span class="token comment"># 读取图片内容</span>
img <span class="token operator">=</span> paddle<span class="token punctuation">.</span>dataset<span class="token punctuation">.</span>image<span class="token punctuation">.</span>load_image<span class="token punctuation">(</span>img<span class="token punctuation">)</span>
<span class="token comment"># 对图片数据进行简单变换,设置成固定大小</span>
img <span class="token operator">=</span> paddle<span class="token punctuation">.</span>dataset<span class="token punctuation">.</span>image<span class="token punctuation">.</span>simple_transform<span class="token punctuation">(</span>im<span class="token operator">=</span>img<span class="token punctuation">,</span> <span class="token comment"># 原始图像数据</span>
resize_size<span class="token operator">=</span><span class="token number">100</span><span class="token punctuation">,</span> <span class="token comment"># 图像要设置的大小</span>
crop_size<span class="token operator">=</span><span class="token number">100</span><span class="token punctuation">,</span> <span class="token comment"># 裁剪图像大小</span>
is_color<span class="token operator">=</span><span class="token boolean">True</span><span class="token punctuation">,</span> <span class="token comment"># 彩色图像</span>
is_train<span class="token operator">=</span><span class="token boolean">True</span><span class="token punctuation">)</span> <span class="token comment"># 随机裁剪</span>
<span class="token comment"># 归一化处理,将每个像素值转换到0~1</span>
img <span class="token operator">=</span> img<span class="token punctuation">.</span>astype<span class="token punctuation">(</span><span class="token string">"float32"</span><span class="token punctuation">)</span> <span class="token operator">/</span> <span class="token number">255.0</span>
<span class="token keyword">return</span> img<span class="token punctuation">,</span> label <span class="token comment"># 返回图像、类别</span>
# 从训练集中读取数据
def train_r(train_list, buffered_size=1024):
def reader():
with open(train_list, “r”) as f:
lines = [line.strip() for line in f] # 读取所有行,并去空格
for line in lines:
# 去掉一行数据的换行符,并按tab键拆分,存入两个变量
img_path, lab = line.replace(“\n”,“”).split(“\t”)
yield img_path, int(lab) # 返回图片路径、类别(整数)
return paddle.reader.xmap_readers(train_mapper, # 将reader读取的数进一步处理
reader, # reader读取到的数据传递给train_mapper
cpu_count(), # 线程数量
buffered_size) # 缓冲区大小
# 定义reader
BATCH_SIZE = 32 # 批次大小
trainer_reader = train_r(train_list=train_file_path) #原始reader
random_train_reader = paddle.reader.shuffle(reader=trainer_reader,
buf_size=1300) # 包装成随机读取器
batch_train_reader = paddle.batch(random_train_reader,
batch_size=BATCH_SIZE) # 批量读取器
# 变量
image = fluid.layers.data(name=“image”, shape=[3, 100, 100], dtype=“float32”)
label = fluid.layers.data(name=“label”, shape=[1], dtype=“int64”)
# 搭建CNN函数
# 结构:输入层 --> 卷积/激活/池化/dropout --> 卷积/激活/池化/dropout -->
# 卷积/激活/池化/dropout --> fc --> dropout --> fc(softmax)
def convolution_neural_network(image, type_size):
“”"
创建CNN
:param image: 图像数据
:param type_size: 输出类别数量
:return: 分类概率
“”"
# 第一组 卷积/激活/池化/dropout
conv_pool_1 = fluid.nets.simple_img_conv_pool(input=image, # 原始图像数据
filter_size=3, # 卷积核大小
num_filters=32, # 卷积核数量
pool_size=2, # 2*2区域池化
pool_stride=2, # 池化步长值
act=“relu”)#激活函数
drop = fluid.layers.dropout(x=conv_pool_1, dropout_prob=0.5)
<span class="token comment"># 第二组</span>
conv_pool_2 <span class="token operator">=</span> fluid<span class="token punctuation">.</span>nets<span class="token punctuation">.</span>simple_img_conv_pool<span class="token punctuation">(</span><span class="token builtin">input</span><span class="token operator">=</span>drop<span class="token punctuation">,</span> <span class="token comment"># 以上一个drop输出作为输入</span>
filter_size<span class="token operator">=</span><span class="token number">3</span><span class="token punctuation">,</span> <span class="token comment"># 卷积核大小</span>
num_filters<span class="token operator">=</span><span class="token number">64</span><span class="token punctuation">,</span> <span class="token comment"># 卷积核数量</span>
pool_size<span class="token operator">=</span><span class="token number">2</span><span class="token punctuation">,</span> <span class="token comment"># 2*2区域池化</span>
pool_stride<span class="token operator">=</span><span class="token number">2</span><span class="token punctuation">,</span> <span class="token comment"># 池化步长值</span>
act<span class="token operator">=</span><span class="token string">"relu"</span><span class="token punctuation">)</span><span class="token comment">#激活函数</span>
drop <span class="token operator">=</span> fluid<span class="token punctuation">.</span>layers<span class="token punctuation">.</span>dropout<span class="token punctuation">(</span>x<span class="token operator">=</span>conv_pool_2<span class="token punctuation">,</span> dropout_prob<span class="token operator">=</span><span class="token number">0.5</span><span class="token punctuation">)</span>
<span class="token comment"># 第三组</span>
conv_pool_3 <span class="token operator">=</span> fluid<span class="token punctuation">.</span>nets<span class="token punctuation">.</span>simple_img_conv_pool<span class="token punctuation">(</span><span class="token builtin">input</span><span class="token operator">=</span>drop<span class="token punctuation">,</span> <span class="token comment"># 以上一个drop输出作为输入</span>
filter_size<span class="token operator">=</span><span class="token number">3</span><span class="token punctuation">,</span> <span class="token comment"># 卷积核大小</span>
num_filters<span class="token operator">=</span><span class="token number">64</span><span class="token punctuation">,</span> <span class="token comment"># 卷积核数量</span>
pool_size<span class="token operator">=</span><span class="token number">2</span><span class="token punctuation">,</span> <span class="token comment"># 2*2区域池化</span>
pool_stride<span class="token operator">=</span><span class="token number">2</span><span class="token punctuation">,</span> <span class="token comment"># 池化步长值</span>
act<span class="token operator">=</span><span class="token string">"relu"</span><span class="token punctuation">)</span><span class="token comment">#激活函数</span>
drop <span class="token operator">=</span> fluid<span class="token punctuation">.</span>layers<span class="token punctuation">.</span>dropout<span class="token punctuation">(</span>x<span class="token operator">=</span>conv_pool_3<span class="token punctuation">,</span> dropout_prob<span class="token operator">=</span><span class="token number">0.5</span><span class="token punctuation">)</span>
<span class="token comment"># 全连接层</span>
fc <span class="token operator">=</span> fluid<span class="token punctuation">.</span>layers<span class="token punctuation">.</span>fc<span class="token punctuation">(</span><span class="token builtin">input</span><span class="token operator">=</span>drop<span class="token punctuation">,</span> size<span class="token operator">=</span><span class="token number">512</span><span class="token punctuation">,</span> act<span class="token operator">=</span><span class="token string">"relu"</span><span class="token punctuation">)</span>
<span class="token comment"># dropout</span>
drop <span class="token operator">=</span> fluid<span class="token punctuation">.</span>layers<span class="token punctuation">.</span>dropout<span class="token punctuation">(</span>x<span class="token operator">=</span>fc<span class="token punctuation">,</span> dropout_prob<span class="token operator">=</span><span class="token number">0.5</span><span class="token punctuation">)</span>
<span class="token comment"># 输出层(fc)</span>
predict <span class="token operator">=</span> fluid<span class="token punctuation">.</span>layers<span class="token punctuation">.</span>fc<span class="token punctuation">(</span><span class="token builtin">input</span><span class="token operator">=</span>drop<span class="token punctuation">,</span> <span class="token comment"># 输入</span>
size<span class="token operator">=</span>type_size<span class="token punctuation">,</span> <span class="token comment"># 输出值的个数(5个类别)</span>
act<span class="token operator">=</span><span class="token string">"softmax"</span><span class="token punctuation">)</span> <span class="token comment"># 输出层采用softmax作为激活函数</span>
<span class="token keyword">return</span> predict
# 调用函数,创建CNN
predict = convolution_neural_network(image=image, type_size=5)
# 损失函数:交叉熵
cost = fluid.layers.cross_entropy(input=predict, # 预测结果
label=label) # 真实结果
avg_cost = fluid.layers.mean(cost)
# 计算准确率
accuracy = fluid.layers.accuracy(input=predict, # 预测结果
label=label) # 真实结果
# 优化器
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
optimizer.minimize(avg_cost) # 将损失函数值优化到最小
# 执行器
# place = fluid.CPUPlace()
place = fluid.CUDAPlace(0) # GPU训练
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
# feeder
feeder = fluid.DataFeeder(feed_list=[image, label], # 指定要喂入数据
place=place)
model_save_dir = “model/fruits/” # 模型保存路径
costs = [] # 记录损失值
accs = [] # 记录准确度
times = 0
batches = [] # 迭代次数
# 开始训练
for pass_id in range(40):
train_cost = 0 # 临时变量,记录每次训练的损失值
for batch_id, data in enumerate(batch_train_reader()): # 循环读取样本,执行训练
times += 1
train_cost, train_acc = exe.run(program=fluid.default_main_program(),
feed=feeder.feed(data), # 喂入参数
fetch_list=[avg_cost, accuracy])# 获取损失值、准确率
if batch_id % 20 == 0:
print(“pass_id:%d, step:%d, cost:%f, acc:%f” %
(pass_id, batch_id, train_cost[0], train_acc[0]))
accs.append(train_acc[0]) # 记录准确率
costs.append(train_cost[0]) # 记录损失值
batches.append(times) # 记录迭代次数
# 训练结束后,保存模型
if not os.path.exists(model_save_dir):
os.makedirs(model_save_dir)
fluid.io.save_inference_model(dirname=model_save_dir,
feeded_var_names=[“image”],
target_vars=[predict],
executor=exe)
print(“训练保存模型完成!”)
# 训练过程可视化
plt.title(“training”, fontsize=24)
plt.xlabel(“iter”, fontsize=20)
plt.ylabel(“cost/acc”, fontsize=20)
plt.plot(batches, costs, color=‘red’, label=“Training Cost”)
plt.plot(batches, accs, color=‘green’, label=“Training Acc”)
plt.legend()
plt.grid()
plt.savefig(“train.png”)
plt.show()
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
- 100
- 101
- 102
- 103
- 104
- 105
- 106
- 107
- 108
- 109
- 110
- 111
- 112
- 113
- 114
- 115
- 116
- 117
- 118
- 119
- 120
- 121
- 122
- 123
- 124
- 125
- 126
- 127
- 128
- 129
- 130
- 131
- 132
- 133
- 134
- 135
- 136
- 137
- 138
- 139
- 140
- 141
- 142
- 143
- 144
- 145
- 146
- 147
- 148
- 149
- 150
- 151
- 152
- 153
- 154
- 155
- 156
- 157
- 158
- 159
- 160
- 161
- 162
- 163
- 164
- 165
- 166
- 预测
from PIL import Image
# 定义执行器
place = fluid.CPUPlace()
infer_exe = fluid.Executor(place)
model_save_dir = “model/fruits/” # 模型保存路径
# 加载数据
def load_img(path):
img = paddle.dataset.image.load_and_transform(path, 100, 100, False).astype(“float32”)
img = img / 255.0
return img
infer_imgs = [] # 存放要预测图像数据
test_img = “./data/grape_1.png” #待预测图片
infer_imgs.append(load_img(test_img)) #加载图片,并且将图片数据添加到待预测列表
infer_imgs = numpy.array(infer_imgs) # 转换成数组
# 加载模型
infer_program, feed_target_names, fetch_targets =
fluid.io.load_inference_model(model_save_dir, infer_exe)
# 执行预测
results = infer_exe.run(infer_program, # 执行预测program
feed={
feed_target_names[0]: infer_imgs}, # 传入待预测图像数据
fetch_list=fetch_targets) #返回结果
print(results)
result = numpy.argmax(results[0]) # 取出预测结果中概率最大的元素索引值
for k, v in name_dict.items(): # 将类别由数字转换为名称
if result == v: # 如果预测结果等于v, 打印出名称
print(“预测结果:”, k) # 打印出名称
# 显示待预测的图片
img = Image.open(test_img)
plt.imshow(img)
plt.show()
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36