hdf5格式的matlab读写操作

最新推荐文章于 2024-06-26 09:46:00 发布

good_learn

最新推荐文章于 2024-06-26 09:46:00 发布

阅读量1.7k

点赞数 1

分类专栏： matlab 文章标签： hdf5 matlab

原文链接：https://blog.csdn.net/kuaitoukid/article/details/43448517

版权

matlab 专栏收录该内容

35 篇文章 13 订阅

订阅专栏

最近要用caffe处理一个multi-label的回归问题，就是输出是一个向量，不是一个具体的数值，这个时候之前的leveldb格式就不凑效了，因为caffe源代码里面默认label是一个数值，网上搜了下，都说hdf5格式可以解决这个问题

在caffe里面，有一个hdf5的datalayer作为数据输入，从源代码来看，对于label的维数没做限制，剩下的问题就是如何生成hdf5的数据，目前只是找到了github上的一个人共享的用matlab写的hdf5数据的读写操作，在这我把代码粘贴出来

testHDF5.m


 
 
   
   
    
    
   
   
   
   
    
    
     
     %% WRITING 
     
     TO HDF5
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     filename=
     
     'trial.h5';
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     num_total_samples=
     
     10000;
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     % 
     
     to simulate data being 
     
     read from disk / generated etc.
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     data_disk=rand(
     
     5,
     
     5,
     
     1,num_total_samples); 
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     label_disk=rand(
     
     10,num_total_samples); 
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     chunksz=
     
     100;
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     created_flag=false;
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     totalct=
     
     0;
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     for batchno=
     
     1:num_total_samples/chunksz
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       fprintf(
     
     'batch no. %d\n', batchno);
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       last_read=(batchno-
     
     1)*chunksz;
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       % 
     
     to simulate maximum data 
     
     to be held 
     
     in memory before dumping 
     
     to hdf5 
     
     file 
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       batchdata=data_disk(:,:,
     
     1,last_read+
     
     1:last_read+chunksz); 
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       batchlabs=label_disk(:,last_read+
     
     1:last_read+chunksz);
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       % store 
     
     to hdf5
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       startloc=struct(
     
     'dat',[
     
     1,
     
     1,
     
     1,totalct+
     
     1], 
     
     'lab', [
     
     1,totalct+
     
     1]);
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       curr_dat_sz=store2hdf5(filename, batchdata, batchlabs, ~created_flag, startloc, chunksz); 
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       created_flag=true;% flag 
     
     set so that 
     
     file 
     
     is created only once
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       totalct=curr_dat_sz(
     
     end);% updated dataset size (#samples)
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     end
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     % display structure 
     
     of the 
     
     stored HDF5 
     
     file
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     h5disp(filename);
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     %% READING FROM HDF5
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     % 
     
     Read data 
     
     and labels 
     
     for samples 
     
     #1000 
     
     to 
     
     1999
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     data_rd=h5read(filename, 
     
     '/data', [
     
     1 
     
     1 
     
     1 
     
     1000], [
     
     5, 
     
     5, 
     
     1, 
     
     1000]);
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     label_rd=h5read(filename, 
     
     '/label', [
     
     1 
     
     1000], [
     
     10, 
     
     1000]);
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     fprintf(
     
     'Testing ...\n');
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     try 
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       assert(isequal(data_rd, single(data_disk(:,:,:,
     
     1000:
     
     1999))), 
     
     'Data do not match');
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       assert(isequal(label_rd, single(label_disk(:,
     
     1000:
     
     1999))), 
     
     'Labels do not match');
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       fprintf(
     
     'Success!\n');
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     catch err
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       fprintf(
     
     'Test failed ...\n');
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       getReport(err)
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     end
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     %delete(filename);
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     % CREATE list.txt containing filename, 
     
     to be used 
     
     as source 
     
     for HDF5_DATA_LAYER
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     FILE=fopen(
     
     'list.txt', 
     
     'w');
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     fprintf(
     
     FILE, 
     
     '%s', filename);
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     fclose(
     
     FILE);
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     fprintf(
     
     'HDF5 filename listed in %s \n', 
     
     'list.txt');
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     % NOTE: 
     
     In net definition prototxt, use list.txt 
     
     as input 
     
     to HDF5_DATA 
     
     as: 
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     % layers 
     
     {
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     %   name: "data"
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     %   type: HDF5_DATA
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     %   top: "data"
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     %   top: "labelvec"
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     %   hdf5_data_param {
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     %     source: "/path/to/list.txt"
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     %     batch_size: 64
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     %   }
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     % }

store2hdf5.m


 
 
   
   
    
    
   
   
   
   
    
    
     
     <span style=
     
     "font-family:Microsoft YaHei;font-size:18px;">
     
     function [curr_dat_sz, curr_lab_sz] = store2hdf5(filename, data, labels, create, startloc, chunksz)  
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       % *data* 
     
     is W*H*C*N matrix 
     
     of images should be normalized (e.g. 
     
     to lie between 
     
     0 
     
     and 
     
     1) beforehand
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       % *label* 
     
     is D*N matrix 
     
     of labels (D labels per sample) 
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       % *create* [
     
     0/
     
     1] specifies whether 
     
     to create file newly 
     
     or 
     
     to append 
     
     to previously created file, useful 
     
     to store information 
     
     in batches 
     
     when a dataset 
     
     is too big 
     
     to be held 
     
     in memory  (
     
     default: 
     
     1)
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       % *startloc* (point at which 
     
     to start writing data). 
     
     By 
     
     default, 
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       % 
     
     if create=
     
     1 (create mode), startloc.data=[
     
     1 
     
     1 
     
     1 
     
     1], 
     
     and startloc.lab=[
     
     1 
     
     1]; 
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       % 
     
     if create=
     
     0 (append mode), startloc.data=[
     
     1 
     
     1 
     
     1 K+
     
     1], 
     
     and startloc.lab = [
     
     1 K+
     
     1]; 
     
     where K 
     
     is the current number 
     
     of samples stored 
     
     in the HDF
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       % chunksz (used only 
     
     in create mode), specifies number 
     
     of samples 
     
     to be stored per chunk (see HDF5 documentation 
     
     on chunking) 
     
     for creating HDF5 files 
     
     with unbounded maximum size - TLDR; higher chunk sizes allow faster read-write operations 
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       % verify that format 
     
     is right
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       dat_dims=size(data);
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       lab_dims=size(labels);
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       num_samples=dat_dims(
     
     end);
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
       assert(lab_dims(
     
     end)==num_samples, 
     
     'Number of samples should be matched between data and labels');
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
      
     
     if ~exist(
     
     'create','var')
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         create=
     
     true;
    
    
   
   

   
   
    
    
   
   
   
   
    
      
     
     end
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
      
    
    
   
   

   
   
    
    
   
   
   
   
    
      
     
     if create
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         %fprintf(
     
     'Creating dataset with %d samples\n', num_samples);
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     if ~exist(
     
     'chunksz', 'var')
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
           chunksz=
     
     1000;
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     end
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     if exist(filename, 
     
     'file')
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
           fprintf(
     
     'Warning: replacing existing file %s \n', filename);
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
           delete(filename);
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     end      
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         h5create(filename, 
     
     '/data', [dat_dims(1:end-1) Inf], 'Datatype', 'single', 'ChunkSize', [dat_dims(1:end-1) chunksz]); % width, height, channels, number 
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         h5create(filename, 
     
     '/label', [lab_dims(1:end-1) Inf], 'Datatype', 'single', 'ChunkSize', [lab_dims(1:end-1) chunksz]); % width, height, channels, number 
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     if ~exist(
     
     'startloc','var') 
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
           startloc.dat=[ones(
     
     1,length(dat_dims)
     
     -1), 
     
     1];
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
           startloc.lab=[ones(
     
     1,length(lab_dims)
     
     -1), 
     
     1];
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     end 
    
    
   
   

   
   
    
    
   
   
   
   
    
      
     
     else  % append mode
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     if ~exist(
     
     'startloc','var')
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
           info=h5info(filename);
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
           prev_dat_sz=info.Datasets(
     
     1).Dataspace.Size;
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
           prev_lab_sz=info.Datasets(
     
     2).Dataspace.Size;
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
           assert(prev_dat_sz(
     
     1:
     
     end
     
     -1)==dat_dims(
     
     1:
     
     end
     
     -1), 
     
     'Data dimensions must match existing dimensions in dataset');
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
           assert(prev_lab_sz(
     
     1:
     
     end
     
     -1)==lab_dims(
     
     1:
     
     end
     
     -1), 
     
     'Label dimensions must match existing dimensions in dataset');
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
           startloc.dat=[ones(
     
     1,length(dat_dims)
     
     -1), prev_dat_sz(
     
     end)+
     
     1];
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
           startloc.lab=[ones(
     
     1,length(lab_dims)
     
     -1), prev_lab_sz(
     
     end)+
     
     1];
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     end
    
    
   
   

   
   
    
    
   
   
   
   
    
      
     
     end
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
      
     
     if ~isempty(data)
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         h5write(filename, 
     
     '/data', single(data), startloc.dat, size(data));
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         h5write(filename, 
     
     '/label', single(labels), startloc.lab, size(labels));  
    
    
   
   

   
   
    
    
   
   
   
   
    
      
     
     end
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
      
     
     if nargout
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         info=h5info(filename);
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         curr_dat_sz=info.Datasets(
     
     1).Dataspace.Size;
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         curr_lab_sz=info.Datasets(
     
     2).Dataspace.Size;
    
    
   
   

   
   
    
    
   
   
   
   
    
      
     
     end
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     end</span>

good_learn

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
hdf5格式的matlab读写操作

最近要用caffe处理一个multi-label的回归问题，就是输出是一个向量，不是一个具体的数值，这个时候之前的leveldb格式就不凑效了，因为caffe源代码里面默认label是一个数值，网上搜了下，都说hdf5格式可以解决这个问题在caffe里面，有一个hdf5的datalayer作为数据输入，从源代码来看，对于label的维数没做限制，剩下的问题就是如何生成hdf5的数据，目前只是找...
复制链接

扫一扫

专栏目录