Extract CNN features using Caffe

Here we summarze several main steps in extracting CNN features using Caffe, including the extraction of LMDB format features by the pretrained AlexNet, and the method to convert that LMDB files into .Mat files for later manipulation.


1 Extracting LMDB files

This could be done by simply following the instructions in [1]. However we copy it here for convenience.

1.1  download models:
scripts/download_model_binary.py models/bvlc_reference_caffenet

1.2 select data

mkdir examples/_temp
find `pwd`/examples/images -type f -exec echo {} \; > examples/_temp/temp.txt
sed "s/$/ 0/" examples/_temp/temp.txt > examples/_temp/file_list.txt


1.3 Define the Feature Extraction Network Architecture
./data/ilsvrc12/get_ilsvrc_aux.sh  
cp examples/feature_extraction/imagenet_val.prototxt examples/_temp  

1.4 Extract features
./build/tools/extract_features.bin models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel examples/_temp/imagenet_val.prototxt fc7 examples/_temp/features 100 lmdb  

    
    
The last but one parameter denotes the mini-batches, and requires adjustments in practice.
 
convert lmdb into .mat files

Once the LMDB feature is obtained, we can readily convert it into .mat files, using the following two sub-routines written in python, which is a modified version of [2] :

The first one is a helper function named feat_helper_pb2.py:

<span style="font-size:10px;"># Generated by the protocol buffer compiler.  DO NOT EDIT!


from google.protobuf import descriptor
from google.protobuf import message
from google.protobuf import reflection
from google.protobuf import descriptor_pb2

# @@protoc_insertion_point(imports)



DESCRIPTOR = descriptor.FileDescriptor(
  name='datum.proto',
  package='feat_extract',
  serialized_pb='\n\x0b\x64\x61tum.proto\x12\x0c\x66\x65\x61t_extract\"i\n\x05\x44\x61tum\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\r\n\x05width\x18\x03 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\x0c\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x12\n\nfloat_data\x18\x06 \x03(\x02')


_DATUM = descriptor.Descriptor(
  name='Datum',
  full_name='feat_extract.Datum',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    descriptor.FieldDescriptor(
      name='channels', full_name='feat_extract.Datum.channels', index=0,
      number=1, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None),
    descriptor.FieldDescriptor(
      name='height', full_name='feat_extract.Datum.height', index=1,
      number=2, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None),
    descriptor.FieldDescriptor(
      name='width', full_name='feat_extract.Datum.width', index=2,
      number=3, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None),
    descriptor.FieldDescriptor(
      name='data', full_name='feat_extract.Datum.data', index=3,
      number=4, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value="",
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None),
    descriptor.FieldDescriptor(
      name='label', full_name='feat_extract.Datum.label', index=4,
      number=5, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None),
    descriptor.FieldDescriptor(
      name='float_data', full_name='feat_extract.Datum.float_data', index=5,
      number=6, type=2, cpp_type=6, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  options=None,
  is_extendable=False,
  extension_ranges=[],
  serialized_start=29,
  serialized_end=134,
)

DESCRIPTOR.message_types_by_name['Datum'] = _DATUM

class Datum(message.Message):
  __metaclass__ = reflection.GeneratedProtocolMessageType
  DESCRIPTOR = _DATUM

  # @@protoc_insertion_point(class_scope:feat_extract.Datum)


# @@protoc_insertion_point(module_scope)</span>

Then follows the real function for conversion, lmdb2mat.py,in the same directory as the helper function:

<span style="font-size:10px;">import lmdb
import sys
sys.path.append('/usr/lib/python2.7/dist-packages')
import feat_helper_pb2
import numpy as np
import scipy.io as sio
import time

def main(argv):
    lmdb_name = sys.argv[1]
    print "%s" % sys.argv[1]
    batch_num = int(sys.argv[2]);
    batch_size = int(sys.argv[3]);
    window_num = batch_num*batch_size;

    start = time.time()
    if 'db' not in locals().keys():
        db = lmdb.open(lmdb_name)
        txn= db.begin()
        cursor = txn.cursor()
        cursor.iternext()
        datum = feat_helper_pb2.Datum()

        keys = []
        values = []
        for key, value in enumerate( cursor.iternext_nodup()):
            keys.append(key)
            values.append(cursor.value())

    ft = np.zeros((window_num, int(sys.argv[4])))
    for im_idx in range(window_num):
        datum.ParseFromString(values[im_idx])
        ft[im_idx, :] = datum.float_data

    print 'time 1: %f' %(time.time() - start)
    sio.savemat(sys.argv[5], {'feats':ft})
    print 'time 2: %f' %(time.time() - start)
    print 'done!'

if __name__ == '__main__':
    import sys
    main(sys.argv)
</span>

Finally, we still need a bash file to call the above two sub-routines, as follows:

#!/usr/bin/env sh


LMDB=_temp/features
BATCHNUM=50
BATCHSIZE=100


# DIM=290400 




# DIM=43264 # conv5


DIM=4096
OUT=_temp/features.mat
python ./lmdb2mat.py $LMDB $BATCHNUM $BATCHSIZE $DIM $OUT


Citations

[1] http://caffe.berkeleyvision.org/gathered/examples/feature_extraction.html

[2] http://blog.csdn.net/lijiancheng0614/article/details/48180331



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值