TensorFlow Lite分析
Toco
Quantization
量化后数据类型
默认量化数据类型为:
// tensorflow-r1.12/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
*quantized_data_type = GetQuantizedDataType(array, ArrayDataType::kUint8)
特殊 OP 的量化数据类型:
if (op.type == OperatorType::kLstmCell) {
if (input_index == LstmCellOperator::PREV_STATE_INPUT) {
*quantized_data_type = ArrayDataType::kInt16;
ChooseQuantizationParamsForArrayAndQuantizedDataType(
array, *quantized_data_type, quantization_params);
return true;
}
}
Bias 量化的默认数据类型:
if (is_bias_vector) {
...
*quantized_data_type = GetQuantizedDataType(array, ArrayDataType::kInt32);
...
}
量化过程
if (quantization_params.scale == 0) {
CHECK_EQ(src_val, 0) << "The quantization scale for this array is 0, "
<< "so all its values should be 0.";
scaled_val = quantization_params.zero_point;
} else {
scaled_val = quantization_params.zero_point + inverse_scale * src_val;
}
auto integer_val = tflite::SafeCast<DataType<A>>(std::round(scaled_val));
// In addition to its effect on the choice of quantization params upstream
// of here, narrow_range also means nudge the min quantized value by +1,
// so e.g. uint8 values get constrained to [1, 255].
if (integer_val == std::numeric_limits<DataType<A>>::min() &&
array.narrow_range) {
integer_val++;
}
quantized_buffer->data[i] = integer_val;
QuantizationParams
scale 和 zeropoint 的计算过程
template <typename T>
QuantizationParams ChooseQuantizationParams(double rmin, double rmax,
bool narrow_range) {
const T qmin = std::numeric_limits<T>::min() + (narrow_range ? 1 : 0);
const T qmax = std::numeric_limits<T>::max();
const double qmin_double = qmin;
const double qmax_double = qmax;
// 0 should always be a representable value. Let's assume that the initial
// min,max range contains 0.
TFLITE_CHECK_LE(rmin, 0.);
TFLITE_CHECK_GE(rmax, 0.);
if (rmin == rmax) {
// Special case where the min,max range is a point. Should be {0}.
TFLITE_CHECK_EQ(rmin, 0.);
TFLITE_CHECK_EQ(rmax, 0.);
QuantizationParams quantization_params;
quantization_params.zero_point = 0;
quantization_params.scale = 0.;
return quantization_params;
}
// General case.
//
// First determine the scale.
const double scale = (rmax - rmin) / (qmax_double - qmin_double);
// Zero-point computation.
// First the initial floating-point computation. The zero-point can be
// determined from solving an affine equation for any known pair
// (real value, corresponding quantized value).
// We know two such pairs: (rmin, qmin) and (rmax, qmax).
// The arithmetic error on the zero point computed from either pair
// will be roughly machine_epsilon * (sum of absolute values of terms)
// so we want to use the variant that adds the smaller terms.
const double zero_point_from_min = qmin_double - rmin / scale;
const double zero_point_from_max = qmax_double - rmax / scale;
const double zero_point_from_min_error =
std::abs(qmin_double) + std::abs(rmin / scale);
const double zero_point_from_max_error =
std::abs(qmax_double) + std::abs(rmax / scale);
const double zero_point_double =
zero_point_from_min_error < zero_point_from_max_error
? zero_point_from_min
: zero_point_from_max;
// Now we need to nudge the zero point to be an integer
// (our zero points are integer, and this is motivated by the requirement
// to be able to represent the real value "0" exactly as a quantized value,
// which is required in multiple places, for example in Im2col with SAME
// padding).
T nudged_zero_point = 0;
if (zero_point_double < qmin_double) {
nudged_zero_point = qmin;
} else if (zero_point_double > qmax_double) {
nudged_zero_point = qmax;
} else {
nudged_zero_point = static_cast<T>(round(zero_point_double));
}
// The zero point should always be in the range of quantized value,
// [qmin, qmax].
TFLITE_CHECK_GE(nudged_zero_point, qmin);
TFLITE_CHECK_LE(nudged_zero_point, qmax);
// Finally, store the result nudged quantization params.
QuantizationParams quantization_params;
quantization_params.zero_point = nudged_zero_point;
quantization_params.scale = scale;
return quantization_params;
}