基于GoogleNet的图像分类

图像分类,使用GoogleNet网络,教学参考意义更多。 使用公开鲜花据集,数据集压缩包里包含五个文件夹,每个文件夹一种花卉。分别是雏菊,蒲公英,玫瑰,向日葵,郁金香。每种各690-890张不等。

1.源代码:

import codecs  
import os  
import random  
import shutil  
from PIL import Image train_ratio = 4.0 / 5 all_file_dir = 'data/data2815' class_list = [c for c in os.listdir(all_file_dir) if os.path.isdir(os.path.join(all_file_dir, c)) and not c.endswith('Set') and not c.startswith('.')] class_list.sort() print(class_list) train_image_dir = os.path.join(all_file_dir, "trainImageSet") if not os.path.exists(train_image_dir): os.makedirs(train_image_dir) eval_image_dir = os.path.join(all_file_dir, "evalImageSet") if not os.path.exists(eval_image_dir): os.makedirs(eval_image_dir) train_file = codecs.open(os.path.join(all_file_dir, "train.txt"), 'w') eval_file = codecs.open(os.path.join(all_file_dir, "eval.txt"), 'w') with codecs.open(os.path.join(all_file_dir, "label_list.txt"), "w") as label_list: label_id = 0 for class_dir in class_list: label_list.write("{0}\t{1}\n".format(label_id, class_dir)) image_path_pre = os.path.join(all_file_dir, class_dir) for file in os.listdir(image_path_pre): try: img = Image.open(os.path.join(image_path_pre, file)) if random.uniform(0, 1) <= train_ratio: shutil.copyfile(os.path.join(image_path_pre, file), os.path.join(train_image_dir, file)) train_file.write("{0}\t{1}\n".format(os.path.join(train_image_dir, file), label_id)) else: shutil.copyfile(os.path.join(image_path_pre, file), os.path.join(eval_image_dir, file)) eval_file.write("{0}\t{1}\n".format(os.path.join(eval_image_dir, file), label_id)) except Exception as e: pass # 存在一些文件打不开,此处需要稍作清洗 label_id += 1 train_file.close() eval_file.close() 
# -*- coding: UTF-8 -*-
"""
训练常用视觉基础网络,用于分类任务
需要将训练图片,类别文件 label_list.txt 放置在同一个文件夹下
程序会先读取 train.txt 文件获取类别数和图片数量
"""
from __future__ import absolute_import
from __future__ import division from __future__ import print_function import os import numpy as np import time import math import paddle import paddle.fluid as fluid import codecs import logging from paddle.fluid.initializer import MSRA from paddle.fluid.initializer import Uniform from paddle.fluid.param_attr import ParamAttr from PIL import Image from PIL import ImageEnhance train_parameters = { "input_size": [3, 224, 224], "class_dim": -1, # 分类数,会在初始化自定义 reader 的时候获得 "image_count": -1, # 训练图片数量,会在初始化自定义 reader 的时候获得 "label_dict": {}, "data_dir": "data/data2815", # 训练数据存储地址 "train_file_list": "train.txt", "label_file": "label_list.txt", "save_freeze_dir": "./freeze-model", "save_persistable_dir": "./persistable-params", "continue_train": False, # 是否接着上一次保存的参数接着训练,优先级高于预训练模型 "pretrained": True, # 是否使用预训练的模型 "pretrained_dir": "data/data6593/GoogleNet_pretrained", "mode": "train", "num_epochs": 120, "train_batch_size": 30, "mean_rgb": [127.5, 127.5, 127.5], # 常用图片的三通道均值,通常来说需要先对训练数据做统计,此处仅取中间值 "use_gpu": True, "dropout_seed": None, "image_enhance_strategy": { # 图像增强相关策略 "need_distort": True, # 是否启用图像颜色增强 "need_rotate": True, # 是否需要增加随机角度 "need_crop": True, # 是否要增加裁剪 "need_flip": True, # 是否要增加水平随机翻转 "hue_prob": 0.5, "hue_delta": 18, "contrast_prob": 0.5, "contrast_delta": 0.5, "saturation_prob": 0.5, "saturation_delta": 0.5, "brightness_prob": 0.5, "brightness_delta": 0.125 }, "early_stop": { "sample_frequency": 50, "successive_limit": 3, "good_acc1": 0.92 }, "rsm_strategy": { "learning_rate": 0.001, "lr_epochs": [20, 40, 60, 80, 100], "lr_decay": [1, 0.5, 0.25, 0.1, 0.01, 0.002] }, "momentum_strategy": { "learning_rate": 0.001, "lr_epochs": [20, 40, 60, 80, 100], "lr_decay": [1, 0.5, 0.25, 0.1, 0.01, 0.002] }, "sgd_strategy": { "learning_rate": 0.001, "lr_epochs": [20, 40, 60, 80, 100], "lr_decay": [1, 0.5, 0.25, 0.1, 0.01, 0.002] }, "adam_strategy": { "learning_rate": 0.002 } } class GoogleNet(): def __init__(self): self.params = train_parameters def conv_layer(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): channels = input.shape[1] stdv = (3.0 / (filter_size**2 * channels))**0.5 param_attr = ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv), name=name + "_weights") conv = fluid.layers.conv2d( input=input, num_filters=num_filters, filter_size=filter_size, stride=stride, padding=(filter_size - 1) // 2, groups=groups, act=act, param_attr=param_attr, bias_attr=False, name=name) return conv def xavier(self, channels, filter_size, name): stdv = (3.0 / (filter_size**2 * channels))**0.5 param_attr = ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv), name=name + "_weights") return param_attr def inception(self, input, channels, filter1, filter3R, filter3, filter5R, filter5, proj, name=None): conv1 = self.conv_layer( input=input, num_filters=filter1, filter_size=1, stride=1, act=None, name="inception_" + name + "_1x1") conv3r = self.conv_layer( input=input, num_filters=filter3R, filter_size=1, stride=1, act=None, name="inception_" + name + "_3x3_reduce") conv3 = self.conv_layer( input=conv3r, num_filters=filter3, filter_size=3, stride=1, act=None, name="inception_" + name + "_3x3") conv5r = self.conv_layer( input=input, num_filters=filter5R, filter_size=1, stride=1, act=None, name="inception_" + name + "_5x5_reduce") conv5 = self.conv_layer( input=conv5r, num_filters=filter5, filter_size=5, stride=1, act=None, name="inception_" + name + "_5x5") pool = fluid.layers.pool2d( input=input, pool_size=3, pool_stride=1, pool_padding=1, pool_type='max') convprj = fluid.layers.conv2d( input=pool, filter_size=1, num_filters=proj, stride=1, padding=0, name="inception_" + name + "_3x3_proj", param_attr=ParamAttr( name="inception_" + name + "_3x3_proj_weights"), bias_attr=False) cat = fluid.layers.concat(input=[conv1, conv3, conv5, convprj], axis=1) cat = fluid.layers.relu(cat) return cat def net(self, input, class_dim=1000): conv = self.conv_layer( input=input, num_filters=64, filter_size=7, stride=2, act=

转载于:https://www.cnblogs.com/bigdata-sanya/p/11348572.html

GoogleNet是一种深度卷积神经网络架构,常用于图像识别任务。而PaddlePaddle是一种深度学习开源平台,提供了丰富的神经网络模型和训练工具。GoogleNet和PaddlePaddle可以结合应用于手写数字识别。 手写数字识别是一种常见的图像分类任务,其目标是将手写数字的图像分类为0到9的数字。利用GoogleNet模型的卷积和池化层可以有效地提取图像的特征,而全连接层可以进一步将这些特征映射到0到9的类别上。这样,通过GoogleNet模型可以对手写数字进行准确的识别。 在PaddlePaddle中,我们可以使用提供的图像分类工具箱,利用GoogleNet模型进行手写数字识别的训练和推断。首先,我们可以利用PaddlePaddle的数据处理模块对手写数字的图像进行预处理,例如,将图像调整为统一的大小、进行灰度化处理等。然后,我们可以使用PaddlePaddle的模型定义模块构建GoogleNet模型,并设置合适的超参数和损失函数。接下来,我们可以使用PaddlePaddle的训练模块对GoogleNet模型进行训练,通过反向传播算法不断优化模型的参数。训练完成后,我们可以使用PaddlePaddle的预测模块对新的手写数字图像进行推断,即预测其所属的数字类别。 总结来说,GoogleNet在手写数字识别上的应用存在于PaddlePaddle这个深度学习开源平台中。利用PaddlePaddle提供的工具和模块,我们可以方便地构建和训练GoogleNet模型,从而实现准确和高效的手写数字识别。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值