Caffe学习3-使用预训练模型finetuning

最新推荐文章于 2025-10-10 22:24:42 发布

原创

最新推荐文章于 2025-10-10 22:24:42 发布 · 1.2w 阅读

10 ·

CC 4.0 BY-SA版权

文章标签：

#caffe #finetuning #imagenet #caffemodel

这篇博客详细介绍了如何使用Caffe预先训练的CaffeNet模型，针对Imagenet数据集进行微调，然后将这个模型应用于图像风格数据库的Finetuning。结果显示，从ImageNet初始化的模型在风格识别上的准确率为55.6%，而随机初始化的模型准确率为45.8%。

该篇陈述了在采用imagenet大数据集合上使用caffenet预训练得到caffemodel，然后应用该caffemodel进一步fintuning图像风格数据库style。下面为主要步骤：

#采用别人的预训练模型，在自己的数据库上进行微调（fine-tunning）
#fine-tune是应用别人在大数据集合上训练到一定程度的caffemodel，在这进行微调。这比随机化参数要好，因为该模型可能已经接近最优！
#可以省时间，省资源。也可以克服没有大数据的困扰
#这里采用imagenet数据集上预训练模型caffemodel进行fine-tuning  style recognition
#第一步，加载caffe相关模块，准备数据，主要是style数据集合
caffe_root='../../'        #这里依据自己工程所在的地址，将地址转为caffe根目录。我的地址是caffe/example/test，所以是../../
import sys
#定义了caffe的python接口路径
sys.path.insert(0,caffe_root + 'python')        
import caffe

caffe.set_device(0)
caffe.set_mode_gpu()

#加载相关模块
import numpy as np
from pylab import *
%matplotlib inline
import tempfile

#定义图像预处理函数
def deprocess_net_image(image):
    image = image.copy()
    image = image[::-1]   #BGR->RGB
    image = image.transpose(1, 2, 0)
    image += [123, 117, 104]

    image[image < 0], image[image > 255] = 0,255
    image = np.round(image)
    image = np.require(image, dtype=np.uint8)
    return image

#第二步，下载数据集，在80K的style中下载2000张，20种风格中的五种标签，加入下载全部，full_dataset=True
#下载imagenet的mean文件，预训练模型caffemodel等
full_dataset = False
if full_dataset:
    NUM_STYLE_IAMGES = NUM_STYLE_LABELS = -1
else:
    NUM_STYLE_IMAGES = 2000
    NUM_STYLE_LABELS = 5

import os
#change direction=chdir
#os.chdir(caffe_root)
#!data/ilsvrc12/get_ilsvrc_aux.sh
#!scripts/download_model_binary.py models/bvlc_reference_caffenet
#!python examples/finetune_flickr_style/assemble_data.py \
#    --workers=-1 --seed=1701 \
#    --images=$NUM_STYLE_IMAGES --label=$NUM_STYLE_LABELS
#os.chdir('examples')

#定义参数，也即定义预训练模型的路径
import os
weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
assert os.path.exists(weights)

#加载imagenet标签
imagenet_label_file = caffe_root + 'data/ilsvrc12/synset_words.txt'
imagenet_labels = list(np.loadtxt(imagenet_label_file,str,delimiter='\t'))
assert len(imagenet_labels) == 1000
print 'loaded imagenet labels:\n','\n'.join(imagenet_labels[:10]+['...'])

#加载风格标签
style_label_file = caffe_root + 'examples/finetune_flickr_style/style_names.txt'
style_labels = list(np.loadtxt(style_label_file,str,delimiter='\n'))
if NUM_STYLE_LABELS > 0:
    style_labels = style_labels[:NUM_STYLE_LABELS]
print '\nLoaded style labels:\n',','.join(style_labels)

loaded imagenet labels:
n01440764 tench, Tinca tinca
n01443537 goldfish, Carassius auratus
n01484850 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
n01491361 tiger shark, Galeocerdo cuvieri
n01494475 hammerhead, hammerhead shark
n01496331 electric ray, crampfish, numbfish, torpedo
n01498041 stingray
n01514668 cock
n01514859 hen
n01518878 ostrich, Struthio camelus
…

Loaded style labels:
Detailed,Pastel,Melancholy,Noir,HDR

#定义网络，训练
from caffe import layers as L
from caffe import params as P
weight_param = dict(lr_mult=1, decay_mult=1)
bias_param = dict(lr_mult=2,decay_mult=0)
learned_param = [weight_param, bias_param]

frozen_param = [dict(lr_mult=0)]*2

#这里需要将参数filter全部改成filler，官网有错误！
def conv_relu(bottom,ks,nout,stride=1,pad=0,group=1,param=learned_param,weight_filler=dict(type='gaussian',std=0.01),bias_filler=dict(type='constant',value=0.1)):
    conv=L.Convolution(bottom,kernel_size=ks,stride=stride,num_output=nout,pad=pad,group=group,param=param,weight_filler=weight_filler,bias_filler=bias_filler)
    return conv,L.ReLU(conv,in_place=True)

def  fc_relu(bottom, nout, param=learned_param,weight_filler=dict(type='gaussian', std=0.005),bias_filler=dict(type='constant', value=0.1)):
    fc = L.InnerProduct(bottom, num_output=nout, param=param,
                        weight_filler=weight_filler,
                        bias_filler=bias_filler)

最低0.47元/天解锁文章