1、卷积的量化参数(权重、输出的min max由fakequant给定)
2、Pooling、Reshape层输出量化参数与输入一致
3、concat(ConcatenationWithScaling)(https://github.com/tensorflow/tensorflow/blob/f1f5dfc5b8f66f2d52594283583c568ec06c045b/tensorflow/lite/tools/optimize/quantize_model_test.cc)
const float inverse_output_scale = 1.f / output_scale;
uint8* output_ptr = output_data;
for (int k = 0; k < outer_size; k++) {
for (int i = 0; i < inputs_count; ++i) {
const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
const uint8* input_ptr = input_data[i] + k * copy_size;
if (input_zeropoint[i] == output_zeropoint &&
input_scale[i] == output_scale) {
memcpy(output_ptr, input_ptr, copy_size);
} else {
const float scale = input_scale[i] * inverse_output_scale;
const float bias = -input_zeropoint[i] * scale;
for (int j = 0; j < copy_size; ++j) {
const int32_t value =
static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) +
output_zeropoint;
output_ptr[j] =
static_cast<uint8_t>(std::max(std::min(255, value), 0));
}
}
output_ptr += copy_size;
}
}
也就是说,如果输入和输出的min/max不一样,就需要通过output min/max,对输入数据重新量化,然后再做concat。但是输出的min/max如何确定呢?还在找。
看到一部分代码,先抄一下:
// Sets the minimum and maximum values for the concatenated array. If it's
// already set (e.g. because of previous pass in TOCO), it doesn't change it and
// returns. Otherwise it uses the input arrays min and max values to compute the
// concatenated array min and max.
void SetMinMaxForConcatenedArray(GraphTransformation* transformation,
const std::vector<Array*>& input_arrays,
Array* concatenated_array) {
CHECK(concatenated_array->data_type == ArrayDataType::kFloat);
// If the minmax is already set, use it
if (concatenated_array->minmax) return;
double concat_min = std::numeric_limits<double>::infinity();
double concat_max = -std::numeric_limits<double>::infinity();
for (Array* input_array : input_arrays) {
// If any of the input arrays minmax is not set, return.
// TODO(ghodrat): shall we add the logic to compute the minmax?
if (!input_array->minmax) return;
const MinMax& input_minmax = input_array->GetMinMax();
concat_min = std::min(concat_min, input_minmax.min);
concat_max = std::max(concat_max, input_minmax.max);
}
MinMax& minmax = concatenated_array->GetOrCreateMinMax();
minmax.min = concat_min;
minmax.max = concat_max;
transformation->AddMessageF("Setting concatenated array min/max to %g,%g",
concat_min, concat_max);
}
也就是说concat的min取输入中min的最小值,max取输出中max的最大值,但是和实际不一样,需要继续找。
如果设置了change_concat_input_ranges为true的话,就直接用第一个输入的Min max?
if ((op.type == OperatorType::kConcatenation &&
model->flags.change_concat_input_ranges()) ||
op.type == OperatorType::kDepthToSpace ||
op.type == OperatorType::kSpaceToDepth ||
op.type == OperatorType::kReshape || op.type == OperatorType::kSplit ||
op.type == OperatorType::kRelu || op.type == OperatorType::kRelu1 ||
op.type == OperatorType::kRelu6 || op.type == OperatorType::kPRelu) {
int data_input_index = 0;
if (op.type == OperatorType::kSplit) {
data_input_index = 1;
}
// Copying and rearrangement ops should preserve the quantization parameters
// of the input array.
const auto& input_array = model->GetArray(op.inputs[data_input_index]);
const auto& input_quantization_params = input_array.GetQuantizationParams();
*quantized_data_type =
GetQuantizedDataType(input_array, ArrayDataType::kUint8);
*quantized_data_type = GetQuantizedDataType(array, *quantized_data_type);
quantization_params->zero_point = input_quantization_params.zero_point;
quantization_params->scale = input_quantization_params.scale;
transformation->AddMessageF(
"Output array %s is produced by a %s operator. Copying quantization "
"params from input array.",
output, OperatorTypeName(op.type));
return true;
}