1.方式一: converter.post_training_quantize=True
def convert_model_qtv1():
converter = tf.lite.TFLiteConverter.from_frozen_graph('model_convert/ckpt_new/densenet_ocr.pb', input_arrays=["Placeholder"],
input_shapes={"Placeholder": [1, 32, 280, 1]}, output_arrays=[
"sequence_rnn_module/transpose_time_major"])
converter.post_training_quantize=True
tflite_model = converter.convert()
open("model_convert/ckpt_new/densenet_ocr_metal_v1.tflite", "wb").write(tflite_model)
2.方式二:Dynamic range quantization
The simplest form of post-training quantization statically quantizes only the weights from floating point to integer, which has 8-bits of precision:
def convert_model_qtv1():
converter = tf.lite.TFLiteConverter.from_frozen_graph('model_convert/ckpt_new/densenet_ocr.pb', input_arrays=["Placeholder"],
input_shapes={"Placeholder": [1, 32, 280, 1]}, output_arrays=[
"sequence_rnn_module/transpose_time_major"])
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()
open("model_convert/ckpt_new/densenet_ocr_metal-DY_v1.tflite", "wb").write(tflite_model)
3.方式三:Full integer quantization
Integer with float fallback (using default float input/output):
def representative_dataset():
for _ in range(100):
#这里一定记得要修改data的输入shape与模型的输入shape吻合
data = np.random.rand(1, 32, 280, 1)
yield [data.astype(np.float32)]
def convert_model_qtv1():
converter = tf.lite.TFLiteConverter.from_frozen_graph('model_convert/ckpt_new/densenet_ocr.pb', input_arrays=["Placeholder"],
input_shapes={"Placeholder": [1, 32, 280, 1]}, output_arrays=[
"sequence_rnn_module/transpose_time_major"])
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
tflite_model = converter.convert()
open("model_convert/ckpt_new/densenet_ocr_metal-int_v1.tflite", "wb").write(tflite_model)
4.方式四:Float16 quantization
To enable float16 quantization of weights, use the following steps:
def convert_model_qtv1():
converter = tf.lite.TFLiteConverter.from_frozen_graph('model_convert/ckpt_new/densenet_ocr.pb', input_arrays=["Placeholder"],
input_shapes={"Placeholder": [1, 32, 280, 1]}, output_arrays=[
"sequence_rnn_module/transpose_time_major"])
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
tflite_model = converter.convert()
open("model_convert/ckpt_new/densenet_ocr_metal-fl16_v1.tflite", "wb").write(tflite_model)
转换之后的tflite大小对比:
---没量化前
---方式一
---方式二
---方式三
---方式四