图像分类,使用GoogleNet网络,教学参考意义更多。 使用公开鲜花据集,数据集压缩包里包含五个文件夹,每个文件夹一种花卉。分别是雏菊,蒲公英,玫瑰,向日葵,郁金香。每种各690-890张不等。
1.源代码:
import codecs
import os
import random
import shutil
from PIL import Image train_ratio = 4.0 / 5 all_file_dir = 'data/data2815' class_list = [c for c in os.listdir(all_file_dir) if os.path.isdir(os.path.join(all_file_dir, c)) and not c.endswith('Set') and not c.startswith('.')] class_list.sort() print(class_list) train_image_dir = os.path.join(all_file_dir, "trainImageSet") if not os.path.exists(train_image_dir): os.makedirs(train_image_dir) eval_image_dir = os.path.join(all_file_dir, "evalImageSet") if not os.path.exists(eval_image_dir): os.makedirs(eval_image_dir) train_file = codecs.open(os.path.join(all_file_dir, "train.txt"), 'w') eval_file = codecs.open(os.path.join(all_file_dir, "eval.txt"), 'w') with codecs.open(os.path.join(all_file_dir, "label_list.txt"), "w") as label_list: label_id = 0 for class_dir in class_list: label_list.write("{0}\t{1}\n".format(label_id, class_dir)) image_path_pre = os.path.join(all_file_dir, class_dir) for file in os.listdir(image_path_pre): try: img = Image.open(os.path.join(image_path_pre, file)) if random.uniform(0, 1) <= train_ratio: shutil.copyfile(os.path.join(image_path_pre, file), os.path.join(train_image_dir, file)) train_file.write("{0}\t{1}\n".format(os.path.join(train_image_dir, file), label_id)) else: shutil.copyfile(os.path.join(image_path_pre, file), os.path.join(eval_image_dir, file)) eval_file.write("{0}\t{1}\n".format(os.path.join(eval_image_dir, file), label_id)) except Exception as e: pass # 存在一些文件打不开,此处需要稍作清洗 label_id += 1 train_file.close() eval_file.close()
# -*- coding: UTF-8 -*-
"""
训练常用视觉基础网络,用于分类任务
需要将训练图片,类别文件 label_list.txt 放置在同一个文件夹下
程序会先读取 train.txt 文件获取类别数和图片数量
"""
from __future__ import absolute_import
from __future__ import division from __future__ import print_function import os import numpy as np import time import math import paddle import paddle.fluid as fluid import codecs import logging from paddle.fluid.initializer import MSRA from paddle.fluid.initializer import Uniform from paddle.fluid.param_attr import ParamAttr from PIL import Image from PIL import ImageEnhance train_parameters = { "input_size": [3, 224, 224], "class_dim": -1, # 分类数,会在初始化自定义 reader 的时候获得 "image_count": -1, # 训练图片数量,会在初始化自定义 reader 的时候获得 "label_dict": {}, "data_dir": "data/data2815", # 训练数据存储地址 "train_file_list": "train.txt", "label_file": "label_list.txt", "save_freeze_dir": "./freeze-model", "save_persistable_dir": "./persistable-params", "continue_train": False, # 是否接着上一次保存的参数接着训练,优先级高于预训练模型 "pretrained": True, # 是否使用预训练的模型 "pretrained_dir": "data/data6593/GoogleNet_pretrained", "mode": "train", "num_epochs": 120, "train_batch_size": 30, "mean_rgb": [127.5, 127.5, 127.5], # 常用图片的三通道均值,通常来说需要先对训练数据做统计,此处仅取中间值 "use_gpu": True, "dropout_seed": None, "image_enhance_strategy": { # 图像增强相关策略 "need_distort": True, # 是否启用图像颜色增强 "need_rotate": True, # 是否需要增加随机角度 "need_crop": True, # 是否要增加裁剪 "need_flip": True, # 是否要增加水平随机翻转 "hue_prob": 0.5, "hue_delta": 18, "contrast_prob": 0.5, "contrast_delta": 0.5, "saturation_prob": 0.5, "saturation_delta": 0.5, "brightness_prob": 0.5, "brightness_delta": 0.125 }, "early_stop": { "sample_frequency": 50, "successive_limit": 3, "good_acc1": 0.92 }, "rsm_strategy": { "learning_rate": 0.001, "lr_epochs": [20, 40, 60, 80, 100], "lr_decay": [1, 0.5, 0.25, 0.1, 0.01, 0.002] }, "momentum_strategy": { "learning_rate": 0.001, "lr_epochs": [20, 40, 60, 80, 100], "lr_decay": [1, 0.5, 0.25, 0.1, 0.01, 0.002] }, "sgd_strategy": { "learning_rate": 0.001, "lr_epochs": [20, 40, 60, 80, 100], "lr_decay": [1, 0.5, 0.25, 0.1, 0.01, 0.002] }, "adam_strategy": { "learning_rate": 0.002 } } class GoogleNet(): def __init__(self): self.params = train_parameters def conv_layer(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): channels = input.shape[1] stdv = (3.0 / (filter_size**2 * channels))**0.5 param_attr = ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv), name=name + "_weights") conv = fluid.layers.conv2d( input=input, num_filters=num_filters, filter_size=filter_size, stride=stride, padding=(filter_size - 1) // 2, groups=groups, act=act, param_attr=param_attr, bias_attr=False, name=name) return conv def xavier(self, channels, filter_size, name): stdv = (3.0 / (filter_size**2 * channels))**0.5 param_attr = ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv), name=name + "_weights") return param_attr def inception(self, input, channels, filter1, filter3R, filter3, filter5R, filter5, proj, name=None): conv1 = self.conv_layer( input=input, num_filters=filter1, filter_size=1, stride=1, act=None, name="inception_" + name + "_1x1") conv3r = self.conv_layer( input=input, num_filters=filter3R, filter_size=1, stride=1, act=None, name="inception_" + name + "_3x3_reduce") conv3 = self.conv_layer( input=conv3r, num_filters=filter3, filter_size=3, stride=1, act=None, name="inception_" + name + "_3x3") conv5r = self.conv_layer( input=input, num_filters=filter5R, filter_size=1, stride=1, act=None, name="inception_" + name + "_5x5_reduce") conv5 = self.conv_layer( input=conv5r, num_filters=filter5, filter_size=5, stride=1, act=None, name="inception_" + name + "_5x5") pool = fluid.layers.pool2d( input=input, pool_size=3, pool_stride=1, pool_padding=1, pool_type='max') convprj = fluid.layers.conv2d( input=pool, filter_size=1, num_filters=proj, stride=1, padding=0, name="inception_" + name + "_3x3_proj", param_attr=ParamAttr( name="inception_" + name + "_3x3_proj_weights"), bias_attr=False) cat = fluid.layers.concat(input=[conv1, conv3, conv5, convprj], axis=1) cat = fluid.layers.relu(cat) return cat def net(self, input, class_dim=1000): conv = self.conv_layer( input=input, num_filters=64, filter_size=7, stride=2, act=