hdf5格式的matlab读写操作

最近要用caffe处理一个multi-label的回归问题,就是输出是一个向量,不是一个具体的数值,这个时候之前的leveldb格式就不凑效了,因为caffe源代码里面默认label是一个数值,网上搜了下,都说hdf5格式可以解决这个问题


在caffe里面,有一个hdf5的datalayer作为数据输入,从源代码来看,对于label的维数没做限制,剩下的问题就是如何生成hdf5的数据,目前只是找到了github上的一个人共享的用matlab写的hdf5数据的读写操作,在这我把代码粘贴出来

testHDF5.m


 
 
  1. %% WRITING TO HDF5
  2. filename= 'trial.h5';
  3. num_total_samples= 10000;
  4. % to simulate data being read from disk / generated etc.
  5. data_disk=rand( 5, 5, 1,num_total_samples);
  6. label_disk=rand( 10,num_total_samples);
  7. chunksz= 100;
  8. created_flag=false;
  9. totalct= 0;
  10. for batchno= 1:num_total_samples/chunksz
  11. fprintf( 'batch no. %d\n', batchno);
  12. last_read=(batchno- 1)*chunksz;
  13. % to simulate maximum data to be held in memory before dumping to hdf5 file
  14. batchdata=data_disk(:,:, 1,last_read+ 1:last_read+chunksz);
  15. batchlabs=label_disk(:,last_read+ 1:last_read+chunksz);
  16. % store to hdf5
  17. startloc=struct( 'dat',[ 1, 1, 1,totalct+ 1], 'lab', [ 1,totalct+ 1]);
  18. curr_dat_sz=store2hdf5(filename, batchdata, batchlabs, ~created_flag, startloc, chunksz);
  19. created_flag=true;% flag set so that file is created only once
  20. totalct=curr_dat_sz( end);% updated dataset size (#samples)
  21. end
  22. % display structure of the stored HDF5 file
  23. h5disp(filename);
  24. %% READING FROM HDF5
  25. % Read data and labels for samples #1000 to 1999
  26. data_rd=h5read(filename, '/data', [ 1 1 1 1000], [ 5, 5, 1, 1000]);
  27. label_rd=h5read(filename, '/label', [ 1 1000], [ 10, 1000]);
  28. fprintf( 'Testing ...\n');
  29. try
  30. assert(isequal(data_rd, single(data_disk(:,:,:, 1000: 1999))), 'Data do not match');
  31. assert(isequal(label_rd, single(label_disk(:, 1000: 1999))), 'Labels do not match');
  32. fprintf( 'Success!\n');
  33. catch err
  34. fprintf( 'Test failed ...\n');
  35. getReport(err)
  36. end
  37. %delete(filename);
  38. % CREATE list.txt containing filename, to be used as source for HDF5_DATA_LAYER
  39. FILE=fopen( 'list.txt', 'w');
  40. fprintf( FILE, '%s', filename);
  41. fclose( FILE);
  42. fprintf( 'HDF5 filename listed in %s \n', 'list.txt');
  43. % NOTE: In net definition prototxt, use list.txt as input to HDF5_DATA as:
  44. % layers {
  45. % name: "data"
  46. % type: HDF5_DATA
  47. % top: "data"
  48. % top: "labelvec"
  49. % hdf5_data_param {
  50. % source: "/path/to/list.txt"
  51. % batch_size: 64
  52. % }
  53. % }


store2hdf5.m


 
 
  1. <span style= "font-family:Microsoft YaHei;font-size:18px;"> function [curr_dat_sz, curr_lab_sz] = store2hdf5(filename, data, labels, create, startloc, chunksz)
  2. % *data* is W*H*C*N matrix of images should be normalized (e.g. to lie between 0 and 1) beforehand
  3. % *label* is D*N matrix of labels (D labels per sample)
  4. % *create* [ 0/ 1] specifies whether to create file newly or to append to previously created file, useful to store information in batches when a dataset is too big to be held in memory ( default: 1)
  5. % *startloc* (point at which to start writing data). By default,
  6. % if create= 1 (create mode), startloc.data=[ 1 1 1 1], and startloc.lab=[ 1 1];
  7. % if create= 0 (append mode), startloc.data=[ 1 1 1 K+ 1], and startloc.lab = [ 1 K+ 1]; where K is the current number of samples stored in the HDF
  8. % chunksz (used only in create mode), specifies number of samples to be stored per chunk (see HDF5 documentation on chunking) for creating HDF5 files with unbounded maximum size - TLDR; higher chunk sizes allow faster read-write operations
  9. % verify that format is right
  10. dat_dims=size(data);
  11. lab_dims=size(labels);
  12. num_samples=dat_dims( end);
  13. assert(lab_dims( end)==num_samples, 'Number of samples should be matched between data and labels');
  14. if ~exist( 'create','var')
  15. create= true;
  16. end
  17. if create
  18. %fprintf( 'Creating dataset with %d samples\n', num_samples);
  19. if ~exist( 'chunksz', 'var')
  20. chunksz= 1000;
  21. end
  22. if exist(filename, 'file')
  23. fprintf( 'Warning: replacing existing file %s \n', filename);
  24. delete(filename);
  25. end
  26. h5create(filename, '/data', [dat_dims(1:end-1) Inf], 'Datatype', 'single', 'ChunkSize', [dat_dims(1:end-1) chunksz]); % width, height, channels, number
  27. h5create(filename, '/label', [lab_dims(1:end-1) Inf], 'Datatype', 'single', 'ChunkSize', [lab_dims(1:end-1) chunksz]); % width, height, channels, number
  28. if ~exist( 'startloc','var')
  29. startloc.dat=[ones( 1,length(dat_dims) -1), 1];
  30. startloc.lab=[ones( 1,length(lab_dims) -1), 1];
  31. end
  32. else % append mode
  33. if ~exist( 'startloc','var')
  34. info=h5info(filename);
  35. prev_dat_sz=info.Datasets( 1).Dataspace.Size;
  36. prev_lab_sz=info.Datasets( 2).Dataspace.Size;
  37. assert(prev_dat_sz( 1: end -1)==dat_dims( 1: end -1), 'Data dimensions must match existing dimensions in dataset');
  38. assert(prev_lab_sz( 1: end -1)==lab_dims( 1: end -1), 'Label dimensions must match existing dimensions in dataset');
  39. startloc.dat=[ones( 1,length(dat_dims) -1), prev_dat_sz( end)+ 1];
  40. startloc.lab=[ones( 1,length(lab_dims) -1), prev_lab_sz( end)+ 1];
  41. end
  42. end
  43. if ~isempty(data)
  44. h5write(filename, '/data', single(data), startloc.dat, size(data));
  45. h5write(filename, '/label', single(labels), startloc.lab, size(labels));
  46. end
  47. if nargout
  48. info=h5info(filename);
  49. curr_dat_sz=info.Datasets( 1).Dataspace.Size;
  50. curr_lab_sz=info.Datasets( 2).Dataspace.Size;
  51. end
  52. end</span>


  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值