西风东渐,青出于蓝。各位看官可移步 BUG1989/caffe-int8-convert-tools。另推荐一篇原理介绍:TensorRT(5)-INT8校准原理。
Intel int8 校准的代码源自 Ristretto,虽然没有 8-bit Inference with TensorRT 和 [MXNET-133] Model Quantization with Calibration 中使用 KL散度(Kullback-Leibler Divergence) 的功能高级,但仍可作为 Caffe 用户的福音。待 Tencent/ncnn
int8 成熟之后,边缘计算的速度又可再获提升。
// Message for layers with reduced word with arithmetic
message QuantizationParameter{
enum Precision {
DYNAMIC_FIXED_POINT = 0;
}
optional Precision precision = 1 [default = DYNAMIC_FIXED_POINT];
enum Rounding {
NEAREST = 0;
}
optional Rounding rounding_scheme = 2 [default = NEAREST];
// Dynamic fixed point word width
optional uint32 bw_layer_in = 3 [default = 32];
optional uint32 bw_layer_out = 4 [default = 32];
optional uint32 bw_params = 5 [default = 32];
repeated int32 fl_layer_in = 6;
repeated int32 fl_layer_out = 7;
repeated int32 fl_params = 8;
repeated float scale_in = 20;
repeated float scale_out = 21;
repeated float scale_params = 22;
}
calibrator.py
解析参数。
if __name__ == '__main__':
usage_string = 'Usage: 1.Build the caffe\n ' \
'2.cd /path/to/caffe/scripts\n ' \
'3.python calibrator.py ' \
' -r /path/to/caffe/build ' \
' -w pre-trained-fp32 weights ' \
' -m typology ' \
' -i iterations ' \
' -l acceptable accuracy loss value, the default value is 0.01 stands for one percent' \
' -d 1(0 means classification while 1 means detection, the default value is 0' \
' -n blob name which means accuracy' \
' -c scaling mode, the default value is single' \
' -s sampling iterations'
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('-h', '--help', action='help', help=usage_string)
parser.add_argument('-i', '--iterations', action='store', dest='iterations', default=10,
help='equal to the number to complete one epoch.')
parser.add_argument('-w', '--weights', action='store', dest='weights', default='',
help='pre-trained-fp32-weights.')
parser.add_argument('-m', '--model', action='store', dest='model', default='',
help='topology definition prototxt.')
parser.add_argument('-l', '--accuracy_loss', action='store', dest='loss', default=0.01,
help='the acceptable accuracy loss that raised by 8-Bit quantization, '
'default value is 0.01(one percent).')
parser.add_argument('-d', '--detection', action='store', dest='is_detection', default=0,
help='0 for classification while 1 for detection, default value is 0.')
parser.add_argument('-r', '--root', action='store', dest='root', default='',
help='caffe build path')
parser.add_argument('-n', '--blob_name', action='store', dest='blob_name', default='',
help='top blob name which stands for accuracy')
parser.add_argument('-c', '--weights_channel', action='store', dest='scaling_mode', default='single',
help='the scaling mode for weights')
parser.add_argument('-s', '--sampling_iterations', action='store', dest='sampling_iterations', default=10,
help='iteration number of sampling, the default value is 10.')
params = parser.parse_args()
如果未指定 caffe 的根路径则退出。
if not check_existence(params.root):
print 'Please check the {} existence.'.format(params.root)
sys.exit(-1)
在系统中加入 pycaffe 的路径,导入 caffe。
pycaffe_path = os.path.abspath(os.path.dirname(os.path.abspath(params.root))) + os.path.sep + 'python'
if not check_existence(pycaffe_path):
print "Please check the pycaffe existence.Suggest to rebuild pycaffe via 'make pycaffe'"
sys.path.insert(0, pycaffe_path)
import caffe
from caffe.proto import caffe_pb2
设置迭代次数。
try:
user_input_iterations = int(params.iterations)
except:
print 'Set the iterations to the default value 1000'
user_input_iterations = 1000
else:
if user_input_iterations < 1:
print 'Invalid iterations!The value should be larger than zero.'
sys.exit(-1)
设置采样次数。
try:
user_sampling_iteration = int(params.sampling_iterations)
except:
print 'Set the sampling iteration to the default value 10'
user_sampling_iteration = 10
else:
if user_sampling_iteration < 1:
print 'Invalid sampling iteration!The value should be larger than zero.'
sys.exit(-1)
设置权重的缩放模式。
if params.scaling_mode != 'multipe' and params.scaling_mode != 'single':
user_scaling_mode = 'single'
else:
user_scaling_mode = params.scaling_mode
设置损失的容忍度。
try:
toleration = float(params.loss)
if toleration >= 1 or toleration < 0:
toleration = 0.01
except:
print 'Set the toleration to 1%.'
toleration = 0.01
设置检测标志。
try:
detection_flag = 1 if int(params.is_detection) == 1 else 0
except:
print 'Set the test type to classification.'
detection_flag = 0
检查模型 prototxt 是否存在。
model = os.path.abspath(params.model)
if not check_existence(model):
print 'Please check model: {} existence.'.format(model)
sys.exit(-1)
检查权重是否存在。
user_input_weights = os.path.abspath(params.weights)
if not check_existence(user_input_weights):
print 'Please check weights: {} existence.'.format(user_input_weights)
sys.exit(-1)
检查tools
路径下的采样程序是否存在。
sample = os.path.abspath(params.root + os.path.sep + 'tools/sample')
if not check_existence(sample):
print 'Please check sample: {} existence.'.format(sample)
sys.exit(-1)
检查 caffe 可执行程序是否存在。
caffe_bin_path = os.path.abspath(params.root + os.path.sep + 'tools/caffe')
if not check_existence(caffe_bin_path):
print 'Please check model/weights/sample existence.'
sys.exit(-1)
setup_env() 改变当前工作目录到指定的路径,设置 caffe 模式为 cpu。
setup_env()
如果未指定target_blob_name
且不是检测,则调用accuracy_blob_name_parser函数。
对于检查任务,需要指定用于预期精度或检测输出值的 blob 名称。
accuracy_blob_name_parser函数读取模型描述文件,并试图找到Accuracy
层的输出。
target_blob_name = params.blob_name
if not target_blob_name and not detection_flag:
target_blob_name = accuracy_blob_name_parser(model)
如果未指定target_blob_name
或模型描述文件中没有target_blob_name
,则退出。
if not target_blob_name or not check_blob_name_existence(model, target_blob_name):
print 'Please specify valid blob name and rerun the script.'
sys.exit(-1)
str.rsplit使用sep作为分隔符字符串,返回字符串中单词的列表。
生成一个新的网络描述文件名和权重文件名。
generate_sample通过 shell 脚本的形式调用 sample 程序。
quantized_prototxt = model.rsplit('.')[0] + '_quantized.prototxt'
quantized_weights = user_input_weights.rsplit('.')[0] + '_quantized.caffemodel'
enable_power_of_2 = 0
print 'Sampling...'
generate_sample(sample, model, user_input_weights, quantized_prototxt, detection_flag, user_scaling_mode,
user_sampling_iteration, 100 * toleration, enable_power_of_2)
print 'Sampling done'
get_the_accuracy 通过 shell 脚本的形式调用 test 程序获得模型准确率。
print 'Generating the FP32 accuracy...'
top_1 = get_the_accuracy(caffe_bin_path, model, user_input_weights, user_input_iterations, detection_flag,
target_blob_name)
print 'FP32 accuracy is: {}'.format(top_1)
tuning_quantized_topology 从前往后取消卷积层的量化,直至准确率满足要求。
tuning_quantized_topology(top_1, quantized_prototxt, caffe_bin_path, user_input_weights, user_input_iterations,
enable_power_of_2, toleration, detection_flag, target_blob_name)
print 'Updated prototxt {} is generated.'.format(quantized_prototxt)
setup_env()
os.chdir() 方法用于改变当前工作目录到指定的路径。
os.path.dirname(path) 返回路径名路径的目录名称。这是通过将路径传递给函数split()返回的对中的第一个元素。
def setup_env():
os.chdir(os.path.dirname(os.path.abspath(params.root)))
caffe.set_mode_cpu()
accuracy_blob_name_parser
read_prototxt通过google.protobuf.text_format
解析模型描述文件,得到网络的定义。
net = read_prototxt(prototxt)
if not net:
print 'Please check the model prototxt integrity.'
sys.exit(-1)
尝试寻找网络的Accuracy
层,如果该层有accuracy_param
参数则设置 top_k
;否则将输出存储到res
。
res = {}
for i in net.layer:
if i.type == 'Accuracy':
if i.HasField('accuracy_param'):
res[i.accuracy_param.top_k] = i.top[0]
else:
res[1] = i.top[0]
对res
的键进行排序,返回第一个键的值。
return res[sorted(res.keys())[0]] if res else ''
generate_sample
os.system 在子 shell 中执行命令(字符串)。这是通过调用标准C 函数system()
来实现的,并且具有相同的限制。对sys.stdin等的更改不会反映在已执行命令的环境中。
def generate_sample(sample_path, input_model, weights,
quantized_model, detection, scaling_mode, iterations=1, error_margin=1, power=0):
cmd = '{0} quantize -model {1} -weights {2} -model_quantized {3} -iterations {4} -error_margin {5} -power {6}' \
' -scaling {7} -trimming_mode dynamic_fixed_point'.format(sample_path, input_model, weights, quantized_model,
iterations, error_margin, power, scaling_mode)
if detection:
cmd += ' --detection=1'
os.system(cmd)
get_the_accuracy
在子 shell 中调用 test。tee命令将数据重定向到文件 。
output_log_name = 'calibrator_log.txt'
cmd = '{} test -model {} -weights {} -iterations {}'.format(caffe_bin, model_def, model_weights, iterations)
if detection:
cmd += ' -detection'
cmd += ' 2>&1|tee {}'.format(output_log_name)
os.system(cmd)
tuning_quantized_topology
如果使用定点,调用 transform_convolutions。
get_compiled_net通过 shell 脚本的形式调用 test 程序得到输出文件。
if is_floating_point == 0:
print 'Updating quantization parameter...'
transform_convolutions(prototxt, get_compiled_net(caffe_bin, prototxt, model_weights, detection))
获取当前的准确率。
current_top1_accuracy = get_the_accuracy(caffe_bin, prototxt, model_weights, iterations, detection, blob_name)
执行remove_top_quantized_parameter直到准确率损失满足要求。
remove_top_quantized_parameter从前往后,每次清除一层量化卷积的参数值。
while abs(current_top1_accuracy - base_top1_accuracy) >= accuracy_loss:
print 'Tuning... '
print abs(current_top1_accuracy - base_top1_accuracy)
remove_top_quantized_parameter(prototxt)
current_top1_accuracy = get_the_accuracy(caffe_bin, prototxt, model_weights, iterations, detection, blob_name)
transform_convolutions
将文件中的信息合并到net
中。
net = caffe_pb2.NetParameter()
with open(model_path) as f:
s = f.read()
txtf.Merge(s, net)
get_fusion_conv_names返回输入大于1的卷积层名及其第2个输入。fusion 是 intel/ideep 的一种优化。
fusion_layer = get_fusion_conv_names(compiled_model_path)
new_net = copy.deepcopy(net)
获得卷积层名及其索引。
convolution_layers = [(value, index) for index, value in enumerate(net.layer) if value.type == 'Convolution']
interesting_layers = ['ReLU']
skip_layers = ['Convolution', 'Eltwise', 'Concat']
filter_fusion_layers 过滤已在fusion_layer
中的层。
u8_max = 255
s8_max = 127
u8_layers = filter_fusion_layers(net, fusion_layer, convolution_layers)
get_all_bottom_layers 获得输出给 Relu 的层?
get_all_top_layers 获得输入接 Relu 的层?
for (l, index) in convolution_layers:
outputwith_relu = get_all_bottom_layers(net.layer[index].top[0], net, index + 1, skip_layers,
interesting_layers)
conv_relu_flag = check_relu_existence(net, index,
convolution_layers[convolution_layers.index((l, index)) + 1][1]
if (l, index) != convolution_layers[-1]
else len(net.layer), [i[0] for i in fusion_layer])
inputwith_relu = get_all_top_layers(l, net, index, skip_layers, interesting_layers)
对于层输出,卷积后接 Relu 的层使用 u8 ?
Quantization::QuantizeNet() 获得输入输出及参数的数值范围,这里计算其缩放系数。系数四舍五入到小数点后两位。
for si in range(0, len(new_net.layer[index].quantization_param.scale_out)):
if len(outputwith_relu) > 0 or l.name in u8_layers or conv_relu_flag: # u8
new_net.layer[index].quantization_param.scale_out[si] = round(u8_max / new_net.layer[index].
quantization_param.scale_out[si], 2)
else: # s8
new_net.layer[index].quantization_param.scale_out[si] = round(s8_max / new_net.layer[index].
quantization_param.scale_out[si], 2)
对于层输入,如果其接 Relu 层或者其为卷积,则使用 u8,否则不进行量化。
for si in range(0, len(new_net.layer[index].quantization_param.scale_in)):
if len(inputwith_relu) > 0 or l.type == 'Convolution': # u8
new_net.layer[index].quantization_param.scale_in[si] = round(u8_max / new_net.layer[index].
quantization_param.scale_in[si], 2)
else:
new_net.layer[index].ClearField('quantization_param')
continue
层参数使用 s8。
for si in range(0, len(new_net.layer[index].quantization_param.scale_params)):
new_net.layer[index].quantization_param.scale_params[si] = round(s8_max / new_net.layer[index].
quantization_param.scale_params[si], 2)
将网络描述保存为文件。
with open(model_path, 'w') as f:
f.write(str(new_net))
filter_fusion_layers
filter_fusion_layers 同时输入原网络和量化网络的卷积层。
如果输入的fusion_layer
或conv_layer
为空,则返回。
if not fusion_layer or not conv_layer:
return []
get_all_bottom_layers 获取所有以特定 blob 为输入的层。
如果没有以该层为输入,则加入到output_with_relu_layer
。
interesting_layers = ['ReLU']
skip_layers = ['Convolution', 'Eltwise', 'Concat']
output_with_relu_layer = [(l.name, net.layer[index].top[0]) for l, index in conv_layer
if len(get_all_bottom_layers(net.layer[index].top[0], net, index + 1,
skip_layers, interesting_layers)) == 0]
删除已在fusion_layer
中的层。
output_without_dict = {v: k for (k, v) in output_with_relu_layer}
for layer_name, top_name in fusion_layer:
if top_name in output_without_dict.keys():
del output_without_dict[top_name]
return output_without_dict.values()
get_fusion_conv_names
读取量化后网络(quantized_prototxt) test 输出的日志文件,返回输入大于1的卷积层层名和第2个 blob。
compiled_net = caffe_pb2.NetParameter()
with open(compiled_model) as f:
s = f.read()
txtf.Merge(s, compiled_net)
return [(layer.name, layer.bottom[1]) for _, layer in enumerate(compiled_net.layer)
if layer.type == 'Convolution' and len(layer.bottom) > 1]
get_all_bottom_layers
get_bottom_layers 获得从start
层开始,以top_name
为输入的层。
如果为空则中止。
all_bottom_layers = []
bottom_layers = get_bottom_layers(top_name, net, start)
while True:
if len(bottom_layers) == 0:
break
对于这些层,去除属于skip_layers
中类型的层,仅保留一个interesting_layers
中的层,否则去除该层并追加以该层为输入的层。
感觉不如 if else 结构清晰。修改bottom_layers
也会影响processed_layers
。
processed_layers = bottom_layers # sync bottom_layers change
for (li, ln, lt) in processed_layers:
if lt in skip_layers:
bottom_layers.remove((li, ln, lt))
continue
if lt in interesting_layers:
lp = (li, ln, lt)
bottom_layers.remove(lp)
if lp not in all_bottom_layers:
all_bottom_layers.append(lp)
continue
new_bottom_layers = get_bottom_layers(ln, net, li + 1)
bottom_layers.remove((li, ln, lt))
bottom_layers.extend(new_bottom_layers)
get_bottom_layers
获得从start
层开始,以top_name
为输入的层。
bottom_layers = []
for index, value in enumerate(net.layer[start:]):
for sub_index, sub_value in enumerate(value.bottom):
if sub_value == top_name:
bottom_layers.append((index, value.name, value.type))
return bottom_layers
get_all_top_layers
get_top_layers 返回输出为l
层输入的层列表。
all_top_layers = []
top_layers = get_top_layers(l, net, end)
while True:
if len(top_layers) == 0:
break
处理模式与 get_all_bottom_layers 相同。
processed_layers = top_layers # sync topLayers change
for (li, ln, lt) in processed_layers:
if lt in skip_layers:
top_layers.remove((li, ln, lt))
continue
if lt in interesting_layers:
lp = (li, ln, lt)
top_layers.remove(lp)
if lp not in all_top_layers:
all_top_layers.append(lp)
continue
new_top_layers = get_top_layers(net.layer[li], net, li)
top_layers.remove((li, ln, lt))
top_layers.extend(new_top_layers)
return all_top_layers
get_top_layers
get_top_layers 从第end - 1
层开始逆序遍历。
对于每一层,如果其输出是l
层的输入,则添加到top_layers
中。
top_layers = []
for layerIndex in range(0, end):
reverse_layer_index = end - layerIndex - 1
for blobIndex in range(0, len(net.layer[reverse_layer_index].top)):
if net.layer[reverse_layer_index].top[blobIndex] in l.bottom:
top_layers.append((reverse_layer_index, net.layer[reverse_layer_index].name,
net.layer[reverse_layer_index].type))
return top_layers
Quantization::QuantizeNet()
CheckWritePermissions(model_quantized_);
float accuracy;
Net<float>* net_test = new Net<float>(model_, caffe::TEST);
net_test->CopyTrainedLayersFrom(weights_);
RunForwardBatches(this->iterations_, net_test, &accuracy, true, this->score_number); // RangeInLayer during sampling
delete net_test;
// Do network quantization and scoring.
if (trimming_mode_ == "dynamic_fixed_point") {
Quantize2DynamicFixedPoint();
} else {
LOG(FATAL) << "Unknown trimming mode: " << trimming_mode_;
}
Quantization::Quantize2DynamicFixedPoint()
找到动态定点数的整数长度。选择使得不发生饱和的整数长度。这种近似假定了一个无限长的小数部分。对于层激活,我们将整数长度减少一位。
对于每一层,如果采用幂表示,则计算出输入和输出的整型长度,否则保存输入和输出的幅值。
Net::RangeInLayers产生layer_names_
的内容。
vector<int> lens;
vector<float> scales;
for (int i = 0; i < layer_names_.size(); ++i) {
if (this->power) {
il_in_.push_back((int)ceil(log2(max_in_[i])));
il_out_.push_back((int)ceil(log2(max_out_[i])));
} else {
scale_in_.push_back(max_in_[i]);
scale_out_.push_back(max_out_[i]);
}
如果缩放是"single"模式,根据最大参数计算长度,否则根据每个卷积核的最大参数计算出相应长度。
if (this->scaling == "single") {
if (this->power)
lens.push_back((int)ceil(log2(max_params_[i][0])+1));
else
scales.push_back(max_params_[i][0]);
} else {
for (int j = 0; j < max_params_[i].size(); j++) {
if (this->power)
lens.push_back((int)ceil(log2(max_params_[i][j])+1));
else
scales.push_back(max_params_[i][j]+0.0);
}
}
获得参数的整型长度。
if (this->power) {
il_params_.push_back(lens);
lens.clear();
} else {
scale_params_.push_back(scales);
scales.clear();
}
}
输出日志信息。
// Debug
for (int k = 0; k < layer_names_.size(); ++k) {
if (this->scaling != "single") {
if (this->power)
LOG(INFO) << "Layer " << layer_names_[k] << ", parameters channel=" << il_params_[k].size();
else
LOG(INFO) << "Layer " << layer_names_[k] << ", parameters channel=" << scale_params_[k].size();
}
if (this->power) {
LOG(INFO) << "Integer length input=" << il_in_[k];
LOG(INFO) << "Integer length output=" << il_out_[k];
} else {
LOG(INFO) << "Scale input=" << scale_in_[k];
LOG(INFO) << "Scale output=" << scale_out_[k];
}
if (this->scaling == "single") {
if (this->power)
LOG(INFO) << "Integer length param=" << il_params_[k][0];
else
LOG(INFO) << "Scale param=" << scale_params_[k][0];
} else {
if (this->power){
for (int j = 0; j < il_params_[k].size(); j++) {
LOG(INFO) << "Integer length params[" << j << "]=" << il_params_[k][j];
}
} else{
for (int j = 0; j < scale_params_[k].size(); j++) {
LOG(INFO) << "Scale params[" << j << "]=" << scale_params_[k][j];
}
}
}
}
设置卷积参数及输入输出的位宽。
// Choose bit-width for different network parts
bw_conv_params_ = 8;
bw_out_ = 8;
bw_in_ = bw_out_;
从网络描述文件读取参数,调用EditNetDescriptionDynamicFixedPoint函数,保存量化参数到文件。
NetParameter param;
// Score dynamic fixed point network.
// This network combines dynamic fixed point parameters in convolutional and
// inner product layers, as well as dynamic fixed point activations.
caffe::ReadNetParamsFromTextFileOrDie(model_, ¶m);
EditNetDescriptionDynamicFixedPoint(¶m, "Convolution",
"Parameters_and_Activations", bw_conv_params_, bw_in_,
bw_out_);
WriteProtoToTextFile(param, model_quantized_);
EditNetDescriptionDynamicFixedPoint
对于每一层,如果layer_quantize
中有卷积层,且该层为卷积层。
std::string::npos为size_t
的最大值。
int index = 0;
bool first_convolution = false;
for (int i = 0; i < param->layer_size(); ++i) {
// TODO: move first convolution check to transform script
if (layer_quantize.find("Convolution") != string::npos &&
param->layer(i).type().find("Convolution") != string::npos) {
若是第一个卷积层则跳过。第一个卷积不量化?
if (!first_convolution) {
first_convolution = true;
continue;
}
如果net_part
有参数,设置层类型为卷积,设置卷积位宽。
如果是幂表示,获得小数部分位宽,否则设置缩放参数。
GetIntegerLengthParams 获得层参数在il_params_
中的索引。
GetScaleParams 获得层参数在scale_params_
中的索引。
// quantize parameters
if (net_part.find("Parameters") != string::npos) {
LayerParameter* param_layer = param->mutable_layer(i);
param_layer->set_type("Convolution");
if (trimming_mode_ == "dynamic_fixed_point") {
param_layer->mutable_quantization_param()->set_bw_params(bw_conv);
if (this->power) {
vector<int> vals = GetIntegerLengthParams(param->layer(i).name());
for (int j = 0; j < vals.size(); j++) {
vals[j] = bw_conv - vals[j];
param_layer->mutable_quantization_param()->add_fl_params(vals[j]);
}
} else {
vector<float> vals = GetScaleParams(param->layer(i).name());
for (int j = 0; j < vals.size(); j++) {
param_layer->mutable_quantization_param()->add_scale_params(vals[j]);
}
}
}
}
如果net_part
,中有激活层,设置层类型为卷积,设置输入输出的位宽。
// quantize activations
if (net_part.find("Activations") != string::npos) {
LayerParameter* param_layer = param->mutable_layer(i);
param_layer->set_type("Convolution");
if (trimming_mode_ == "dynamic_fixed_point") {
param_layer->mutable_quantization_param()->set_bw_layer_in(bw_in);
param_layer->mutable_quantization_param()->set_bw_layer_out(bw_out);
if (this->power) {
int val = GetIntegerLengthIn(param->layer(i).name());
param_layer->mutable_quantization_param()->add_fl_layer_in(bw_in - val);
val = GetIntegerLengthOut(param->layer(i).name());
param_layer->mutable_quantization_param()->add_fl_layer_out(bw_out - val);
} else {
float val = GetScaleIn(param->layer(i).name());
param_layer->mutable_quantization_param()->add_scale_in(val);
val = GetScaleOut(param->layer(i).name());
param_layer->mutable_quantization_param()->add_scale_out(val);
}
}
}
设置层参数的精度类型。
LayerParameter* param_layer = param->mutable_layer(i);
if (trimming_mode_ == "dynamic_fixed_point") {
param_layer->mutable_quantization_param()->set_precision(caffe::QuantizationParameter_Precision(0));
} else {
LOG(FATAL) << "Unknown trimming mode: " << trimming_mode_;
}
index++;
}
}
Net::RangeInLayers
如果layer_name
为空,遍历layers_
,找出卷积层并初始化layer_name
、max_in
、max_out
和max_param
。
// Initialize vector elements, if needed.
if(layer_name->size()==0) {
for (int layer_id = 0; layer_id < layers_.size(); ++layer_id) {
if (strcmp(layers_[layer_id]->type(), "Convolution") == 0) {
layer_name->push_back(this->layer_names()[layer_id]);
max_in->push_back(0);
max_out->push_back(0);
if (scaling == "single") {
max_param->push_back(vector<Dtype>(1, 0));
}
else {
int param_shape = (&(*layers_[layer_id]->blobs()[0]))->shape(0);
max_param->push_back(vector<Dtype>(param_shape, 0));
}
}
}
}
对于每个卷积层,调用 FindMax 函数获得第一个 bottom 和 top blob 的最大值。
// Find maximal values.
int index = 0;
vector<Dtype> max_vals;
for (int layer_id = 0; layer_id < layers_.size(); ++layer_id) {
if (strcmp(layers_[layer_id]->type(), "Convolution") == 0) {
max_vals = FindMax(bottom_vecs_[layer_id][0]);
max_in->at(index) = std::max(max_in->at(index), max_vals.at(0));
max_vals = FindMax(top_vecs_[layer_id][0]);
max_out->at(index) = std::max(max_out->at(index), max_vals.at(0));
如果缩放模式为"single",找到参数的最大值,否则记录每个卷积核的最大值。
// Consider the weights only, ignore the bias
if (scaling == "single") {
max_vals = FindMax(&(*layers_[layer_id]->blobs()[0]));
max_param->at(index).at(0) = std::max(max_param->at(index).at(0), max_vals.at(0));
} else {
max_vals = FindMax(&(*layers_[layer_id]->blobs()[0]), false);
for(int i = 0; i < max_vals.size(); ++i)
max_param->at(index).at(i) = std::max(max_param->at(index).at(i), max_vals.at(i));
}
index++;
}
}
Net::FindMax
获取 blob 数据。
const Dtype* data = blob->cpu_data();
int cnt = blob->count();
vector<Dtype> max_vals;
Dtype max_val = (Dtype)(-10);
如果 blob 为4维,通过循环遍历找出最大值。
channel
表意不明。
int index = 0;
if(blob->shape().size() == 4) {
if(is_single) {
max_vals = vector<Dtype>(1, Dtype(-10));
for (int i = 0; i < cnt; ++i) {
max_val = std::max(max_val, (Dtype)fabs(data[i]));
}
max_vals.at(0) = max_val;
} else { // output_channel * input_channel * kernel_height * kernel_width
int height = blob->shape(2);
int width = blob->shape(3);
int channel = blob->shape(0);
max_vals = vector<Dtype>(channel, Dtype(-10));
int step = blob->shape(1) * height * width;
for (int i = 0; i < cnt; ++i) {
if((i + 1) % step == 0) {
max_vals.at(index) = std::max(max_val, (Dtype)fabs(data[i]));
++index;
} else {
max_val = std::max(max_val, (Dtype)fabs(data[i]));
}
}
}
处理 CHW 格式的数据。
} else {
if(is_single) {
max_vals = vector<Dtype>(1, Dtype(-10));
for (int i = 0; i < cnt; ++i) {
max_val = std::max(max_val, (Dtype)fabs(data[i]));
}
max_vals.at(0) = max_val;
} else { // output_channel * input_channel
int channel = blob->shape(0);
max_vals = vector<Dtype>(channel, Dtype(-10));
int step = blob->shape(1);
for (int i = 0; i < cnt; ++i) {
if((i + 1) % step == 0) {
max_vals.at(index) = std::max(max_val, (Dtype)fabs(data[i]));
++index;
} else {
max_val = std::max(max_val, (Dtype)fabs(data[i]));
}
}
}
}
return max_vals;
Quantization::RunForwardBatches
初始化变量。使用网络描述文件中的输入。
LOG(INFO) << "Running for " << iterations << " iterations.";
vector<Blob<float>* > bottom_vec;
vector<int> test_score_output_id;
vector<float> test_score;
float loss = 0;
对于每次迭代,统计各层卷积数值分布,记录损失值。
for (int i = 0; i < iterations; ++i) {
float iter_loss;
// Do forward propagation.
const vector<Blob<float>*>& result =
caffe_net->Forward(bottom_vec, &iter_loss);
// Find maximal values in network.
if(do_stats) {
caffe_net->RangeInLayers(&layer_names_, &max_in_, &max_out_, &max_params_, this->scaling);
}
// Keep track of network score over multiple batches.
loss += iter_loss;
如果是检测,跳过后续步骤。
if (this->detection) continue;
累加每次迭代的测试得分。
int idx = 0;
for (int j = 0; j < result.size(); ++j) {
const float* result_vec = result[j]->cpu_data();
for (int k = 0; k < result[j]->count(); ++k, ++idx) {
const float score = result_vec[k];
if (i == 0) {
test_score.push_back(score);
test_score_output_id.push_back(j);
} else {
test_score[idx] += score;
}
}
}
LOG(INFO) << "Iteration: " << i;
}
计算损失均值。检测直接返回。
loss /= iterations;
LOG(INFO) << "Loss: " << loss;
if (this->detection) return;
输出损失权重。
for (int i = 0; i < test_score.size(); ++i) {
const float loss_weight = caffe_net->blob_loss_weights()[
caffe_net->output_blob_indices()[test_score_output_id[i]]];
std::ostringstream loss_msg_stream;
const float mean_score = test_score[i] / iterations;
if (loss_weight) {
loss_msg_stream << " (* " << loss_weight
<< " = " << loss_weight * mean_score << " loss)";
}
}
*accuracy = test_score[score_number] / iterations;