TensorFlow Lite分析

Toco

Quantization

量化后数据类型

默认量化数据类型为:

// tensorflow-r1.12/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc
*quantized_data_type = GetQuantizedDataType(array, ArrayDataType::kUint8)

特殊 OP 的量化数据类型:

  if (op.type == OperatorType::kLstmCell) {
    if (input_index == LstmCellOperator::PREV_STATE_INPUT) {
      *quantized_data_type = ArrayDataType::kInt16;
      ChooseQuantizationParamsForArrayAndQuantizedDataType(
          array, *quantized_data_type, quantization_params);
      return true;
    }
  }

Bias 量化的默认数据类型:

  if (is_bias_vector) {
	...
    *quantized_data_type = GetQuantizedDataType(array, ArrayDataType::kInt32);
	...
  }

量化过程

    if (quantization_params.scale == 0) {
      CHECK_EQ(src_val, 0) << "The quantization scale for this array is 0, "
                           << "so all its values should be 0.";
      scaled_val = quantization_params.zero_point;
    } else {
      scaled_val = quantization_params.zero_point + inverse_scale * src_val;
    }
    auto integer_val = tflite::SafeCast<DataType<A>>(std::round(scaled_val));
    // In addition to its effect on the choice of quantization params upstream
    // of here, narrow_range also means nudge the min quantized value by +1,
    // so e.g. uint8 values get constrained to [1, 255].
    if (integer_val == std::numeric_limits<DataType<A>>::min() &&
        array.narrow_range) {
      integer_val++;
    }
    quantized_buffer->data[i] = integer_val;

QuantizationParams

scale 和 zeropoint 的计算过程

template <typename T>
QuantizationParams ChooseQuantizationParams(double rmin, double rmax,
                                            bool narrow_range) {
  const T qmin = std::numeric_limits<T>::min() + (narrow_range ? 1 : 0);
  const T qmax = std::numeric_limits<T>::max();
  const double qmin_double = qmin;
  const double qmax_double = qmax;
  // 0 should always be a representable value. Let's assume that the initial
  // min,max range contains 0.
  TFLITE_CHECK_LE(rmin, 0.);
  TFLITE_CHECK_GE(rmax, 0.);
  if (rmin == rmax) {
    // Special case where the min,max range is a point. Should be {0}.
    TFLITE_CHECK_EQ(rmin, 0.);
    TFLITE_CHECK_EQ(rmax, 0.);
    QuantizationParams quantization_params;
    quantization_params.zero_point = 0;
    quantization_params.scale = 0.;
    return quantization_params;
  }

  // General case.
  //
  // First determine the scale.
  const double scale = (rmax - rmin) / (qmax_double - qmin_double);

  // Zero-point computation.
  // First the initial floating-point computation. The zero-point can be
  // determined from solving an affine equation for any known pair
  // (real value, corresponding quantized value).
  // We know two such pairs: (rmin, qmin) and (rmax, qmax).
  // The arithmetic error on the zero point computed from either pair
  // will be roughly machine_epsilon * (sum of absolute values of terms)
  // so we want to use the variant that adds the smaller terms.
  const double zero_point_from_min = qmin_double - rmin / scale;
  const double zero_point_from_max = qmax_double - rmax / scale;
  const double zero_point_from_min_error =
      std::abs(qmin_double) + std::abs(rmin / scale);
  const double zero_point_from_max_error =
      std::abs(qmax_double) + std::abs(rmax / scale);

  const double zero_point_double =
      zero_point_from_min_error < zero_point_from_max_error
          ? zero_point_from_min
          : zero_point_from_max;

  // Now we need to nudge the zero point to be an integer
  // (our zero points are integer, and this is motivated by the requirement
  // to be able to represent the real value "0" exactly as a quantized value,
  // which is required in multiple places, for example in Im2col with SAME
  // padding).
  T nudged_zero_point = 0;
  if (zero_point_double < qmin_double) {
    nudged_zero_point = qmin;
  } else if (zero_point_double > qmax_double) {
    nudged_zero_point = qmax;
  } else {
    nudged_zero_point = static_cast<T>(round(zero_point_double));
  }
  // The zero point should always be in the range of quantized value,
  // [qmin, qmax].
  TFLITE_CHECK_GE(nudged_zero_point, qmin);
  TFLITE_CHECK_LE(nudged_zero_point, qmax);

  // Finally, store the result nudged quantization params.
  QuantizationParams quantization_params;
  quantization_params.zero_point = nudged_zero_point;
  quantization_params.scale = scale;
  return quantization_params;
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值