总说: 这篇博客主要是讲VCNN框架的一些东西。表示还没怎么看懂。虽然有点眉目了。还是记录一下吧。
https://github.com/Georgezhouzhou/vcnn_double-bladed
Notes on gen_training_data.m
addpath applications/deep_edge_aware_filters/utility/GT_filters/
addpath applications/deep_edge_aware_filters/utility/GT_filters/L0smoothing/
addpath data/
clear;
patch_dim = 64;
num_patches = 1000;
listing = dir('data/deepeaf/BSDS500/*.jpg');
fListing = dir('data/deepeaf/fImgs/*.jpg');
for m = 1 : 101
fprintf('Extracting patch batch: %d / %d\n', m, 101);
% extract random patches
samples = zeros(patch_dim, patch_dim, 3, num_patches);
labels = zeros(size(samples));
for i = 1 : num_patches / 8 % 1000张共要随机抽取125次图,有重复,每次随机选取左上角的点提取patch
if (mod(i,100) == 0)
fprintf('Extracting patch: %d / %d\n', i*8, num_patches);
end
r_idx = random('unid', size(listing, 1));
%由于命名顺序相同,因此图片x对应的index与该图片滤波后对应的index相同
I = imread(strcat('data/deepeaf/BSDS500/', listing(r_idx).name));
fI = imread(strcat('data/deepeaf/fImgs/',fListing(r_idx).name));
orig_img_size = size(I);
r = random('unid', orig_img_size(1) - patch_dim + 1);
c = random('unid', orig_img_size(2) - patch_dim + 1);
% EdgeExtract只进行了竖直方向的提取,进行左右翻转,再到后面4个90度的旋转
% 就涵盖了matrix的8种全部形态
patch = I(r:r+patch_dim-1, c:c+patch_dim-1, :);
fpatch = fI(r:r+patch_dim-1, c:c+patch_dim-1, :);
patchHoriFlipped = fliplr(patch);
fpatch = fliplr(fpatch); % 同样进行翻转
idx_list = (i-1)*8+1:(i-1)*8+8;
for idx = 1:4
% samples存储的是in,也就是原始图片的梯度
% labels存储的是vout,也就是滤波后的图片的梯度
% 8个一组,1~4存储原始图像滤波后的图片的Iy, Ix, -Iy, -Ix
% 5~8存储着原始/滤波图片经过左对称后的图片的 Iy, Ix, -Iy, -Ix
% 每间隔4的两张图片左右对称
patch_rotated = im2double(imrotate(patch, (idx-1)*90));
patch_filtered = GT_filter(patch_rotated);
[vin, vout] = EdgeExtract(im2double(patch_rotated), im2double(patch_filtered));
samples(:,:,:,idx_list(idx)) = vin;
labels(:,:,:,idx_list(idx)) = vout;
patch_rotated = im2double(imrotate(patchHoriFlipped, (idx-1)*90));
patch_filtered = GT_filter(patch_rotated);
[vin, vout] = EdgeExtract(im2double(patch_rotated), im2double(patch_filtered));
samples(:,:,:,idx_list(idx+4)) = vin;
labels(:,:,:,idx_list(idx+4)) = vout;
end
end
samples = single(samples);
labels = single(labels);
% save it
filename = strcat('data/deepeaf/certainFilter/train/patches_', num2str(m));
save(filename, '-v7.3', 'samples', 'labels');
end
Notes on init.m
首先是根据config进行相应的配置
config.GEN_OUTPUT = @gen_output_copy;
.NEW_MEM = @to_gpu (这个就是把数据转换成gpu数据,gpuArray(single(x)) )
.IM2COL = @im2col_gpu
.NONLINEARITY = @relu;
.OUT_ACT = @nonlinearity_nil;
config.COST_FUN = @L2_norm;
.misc下面放置的是杂项,其他的一些配置
.misc.current_layer = 1
初始化权值以及计算一些统计量
第一层
r = config.weight_range;
conv_layer_c = 0;
pool_layer_c = 0;
full_layer_c = 0;
layer_num = length(config.forward_pass_scheme)-1;
config.layer_num = layer_num;
config.feature_map_sizes = {}; %这个是三维,第三维表示改成的feturemap的层数或是说“厚度”,即conv_hidden_size
config.weights = {};
for idx = 1:layer_num
if idx == 1
....
if strcmp(config.forward_pass_scheme{idx}, 'conv_v_sr')
...
elseif strcmp(config.forward_pass_scheme{idx}, 'conv_v')
config.weights{idx} = config.NEW_MEM(randn(config.feature_map_sizes{idx}(3), ...
config.kernel_size(conv_layer_c, 1)*config.kernel_size(conv_layer_c, 2)*config.chs)*r);
if config.normalize_init_weights
config.weights{idx} = config.weights{idx} / sqrt(config.kernel_size(conv_layer_c, 1) * config.kernel_size(conv_layer_c, 2) * config.conv_hidden_size(conv_layer_c));
end
这里主要是针对不同层的形式,进行设置。
对于第一层来说,conv_v的weights进行初始化,如果进行normalize_init_weights,则进行normalize一下。
其他层
如果是其他层(非第一层),那么
elseif strcmp(config.forward_pass_scheme{idx}, 'conv_v')
conv_layer_c = conv_layer_c + 1;
config.feature_map_sizes{idx} = [config.feature_map_sizes{idx-1}(1)-config.kernel_size(conv_layer_c,1)+1 ...
config.feature_map_sizes{idx-1}(2)-config