Extract CNN features using Caffe

最新推荐文章于 2020-06-24 18:39:45 发布

guten_nacht

最新推荐文章于 2020-06-24 18:39:45 发布

阅读量653

点赞数

分类专栏： Caffe 文章标签： Caffe cnn

本文链接：https://blog.csdn.net/guten_nacht/article/details/49053091

版权

Caffe 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

Here we summarze several main steps in extracting CNN features using Caffe, including the extraction of LMDB format features by the pretrained AlexNet, and the method to convert that LMDB files into .Mat files for later manipulation.

1 Extracting LMDB files

This could be done by simply following the instructions in [1]. However we copy it here for convenience.

1.1 download models:

scripts/download_model_binary.py models/bvlc_reference_caffenet

1.2 select data

mkdir examples/_temp
find `pwd`/examples/images -type f -exec echo {} \; > examples/_temp/temp.txt
sed "s/$/ 0/" examples/_temp/temp.txt > examples/_temp/file_list.txt

1.3 Define the Feature Extraction Network Architecture

./data/ilsvrc12/get_ilsvrc_aux.sh  
cp examples/feature_extraction/imagenet_val.prototxt examples/_temp

1.4 Extract features

./build/tools/extract_features.bin models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel examples/_temp/imagenet_val.prototxt fc7 examples/_temp/features 100 lmdb

    
    
     
     The last but one parameter denotes the mini-batches, and requires adjustments in practice.
     
      
     
     2 convert lmdb into .mat files

Once the LMDB feature is obtained, we can readily convert it into .mat files, using the following two sub-routines written in python, which is a modified version of [2] :

The first one is a helper function named feat_helper_pb2.py:

<span style="font-size:10px;"># Generated by the protocol buffer compiler.  DO NOT EDIT!


from google.protobuf import descriptor
from google.protobuf import message
from google.protobuf import reflection
from google.protobuf import descriptor_pb2

# @@protoc_insertion_point(imports)



DESCRIPTOR = descriptor.FileDescriptor(
  name='datum.proto',
  package='feat_extract',
  serialized_pb='\n\x0b\x64\x61tum.proto\x12\x0c\x66\x65\x61t_extract\"i\n\x05\x44\x61tum\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\r\n\x05width\x18\x03 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\x0c\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x12\n\nfloat_data\x18\x06 \x03(\x02')


_DATUM = descriptor.Descriptor(
  name='Datum',
  full_name='feat_extract.Datum',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    descriptor.FieldDescriptor(
      name='channels', full_name='feat_extract.Datum.channels', index=0,
      number=1, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None),
    descriptor.FieldDescriptor(
      name='height', full_name='feat_extract.Datum.height', index=1,
      number=2, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None),
    descriptor.FieldDescriptor(
      name='width', full_name='feat_extract.Datum.width', index=2,
      number=3, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None),
    descriptor.FieldDescriptor(
      name='data', full_name='feat_extract.Datum.data', index=3,
      number=4, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value="",
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None),
    descriptor.FieldDescriptor(
      name='label', full_name='feat_extract.Datum.label', index=4,
      number=5, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None),
    descriptor.FieldDescriptor(
      name='float_data', full_name='feat_extract.Datum.float_data', index=5,
      number=6, type=2, cpp_type=6, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  options=None,
  is_extendable=False,
  extension_ranges=[],
  serialized_start=29,
  serialized_end=134,
)

DESCRIPTOR.message_types_by_name['Datum'] = _DATUM

class Datum(message.Message):
  __metaclass__ = reflection.GeneratedProtocolMessageType
  DESCRIPTOR = _DATUM

  # @@protoc_insertion_point(class_scope:feat_extract.Datum)


# @@protoc_insertion_point(module_scope)</span>

Then follows the real function for conversion, lmdb2mat.py,in the same directory as the helper function:

<span style="font-size:10px;">import lmdb
import sys
sys.path.append('/usr/lib/python2.7/dist-packages')
import feat_helper_pb2
import numpy as np
import scipy.io as sio
import time

def main(argv):
    lmdb_name = sys.argv[1]
    print "%s" % sys.argv[1]
    batch_num = int(sys.argv[2]);
    batch_size = int(sys.argv[3]);
    window_num = batch_num*batch_size;

    start = time.time()
    if 'db' not in locals().keys():
        db = lmdb.open(lmdb_name)
        txn= db.begin()
        cursor = txn.cursor()
        cursor.iternext()
        datum = feat_helper_pb2.Datum()

        keys = []
        values = []
        for key, value in enumerate( cursor.iternext_nodup()):
            keys.append(key)
            values.append(cursor.value())

    ft = np.zeros((window_num, int(sys.argv[4])))
    for im_idx in range(window_num):
        datum.ParseFromString(values[im_idx])
        ft[im_idx, :] = datum.float_data

    print 'time 1: %f' %(time.time() - start)
    sio.savemat(sys.argv[5], {'feats':ft})
    print 'time 2: %f' %(time.time() - start)
    print 'done!'

if __name__ == '__main__':
    import sys
    main(sys.argv)
</span>

Finally, we still need a bash file to call the above two sub-routines, as follows:

#!/usr/bin/env sh


LMDB=_temp/features
BATCHNUM=50
BATCHSIZE=100


# DIM=290400 




# DIM=43264 # conv5


DIM=4096
OUT=_temp/features.mat
python ./lmdb2mat.py $LMDB $BATCHNUM $BATCHSIZE $DIM $OUT

Citations

[1] http://caffe.berkeleyvision.org/gathered/examples/feature_extraction.html

[2] http://blog.csdn.net/lijiancheng0614/article/details/48180331

guten_nacht

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Extract CNN features using Caffe

Here we summarze several main steps in extracting CNN features using Caffe, including the extraction of LMDB format features by the pretrained AlexNet, and the method to convert that LMDB files into .
复制链接

扫一扫