387_YuNetV2 TFLite integer models cannot allocate tensors
RyanHir opened this issue · comments
Issue Type
Bug
OS
Ubuntu, Other
OS architecture
x86_64
Programming Language
Python
Framework
TensorFlowLite
Model name and Weights/Checkpoints URL
face_detection_yunet_2023mar_float32.tflite
face_detection_yunet_2023mar_integer_quant.tflite
https://s3.ap-northeast-2.wasabisys.com/pinto-model-zoo/387_YuNetV2/resources.tar.gz
Description
When using the integer quantized model of 387_YuNetV2, Tensorflow Lite is unable to allocate the tensors. But when using the float32 tflite model, the interpreter can allocate the tensors. When using 144_YuNet, both float32 and int8 quantized models work.
Target Machine:
Arch Linux
Python 3.11
TensorFlow Lite 2.13.0
Relevant Log Output
### Float32
2023-07-27 11:26:10.113918: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-27 11:26:10.166771: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-27 11:26:10.167247: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-07-27 11:26:11.068221: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
Using TensorFlow 2.13.0
Using model saved_model/face_detection_yunet_2023mar_float32.tflite
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
Allocated Tensors
### integer quant
2023-07-27 11:26:40.671847: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-27 11:26:40.722477: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-07-27 11:26:40.722923: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-07-27 11:26:41.599576: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
Using TensorFlow 2.13.0
Using model saved_model/face_detection_yunet_2023mar_integer_quant.tflite
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
Traceback (most recent call last):
File "/home/user/Downloads/resources/./test.py", line 34, in <module>
interpreter.allocate_tensors()
File "/home/user/Downloads/resources/env/lib/python3.11/site-packages/tensorflow/lite/python/interpreter.py", line 531, in allocate_tensors
return self._interpreter.AllocateTensors()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: tensorflow/lite/kernels/activations.cc:470 output->params.scale == 1. / 256 was not true.Node number 41 (LOGISTIC) failed to prepare.Failed to apply the default TensorFlow Lite delegate indexed at 0.
URL or source code for simple inference testing code
#! /usr/bin/env python3
import numpy as np
import cv2
import tensorflow as tf
print("Using TensorFlow", tf.__version__)
FLOAT = 0
if FLOAT:
model = "saved_model/face_detection_yunet_2023mar_float32.tflite"
else:
model = "saved_model/face_detection_yunet_2023mar_integer_quant.tflite"
print("Using model", model)
interpreter = tf.lite.Interpreter(model_path=model)
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Test the model on random input data.
input_shape = input_details[0]['shape']
_, H, W, _ = input_shape
def splitter( fn ):
# split at the rightmost '_'
name, num = fn.rsplit('_',1)
return name, int(num)
output_details.sort(key=lambda x: splitter(x["name"]))
interpreter.allocate_tensors()
print("Allocated Tensors")
cap = cv2.VideoCapture(0)
while True:
ret, img = cap.read()
blob = cv2.resize(img, (W, H))
blob = np.float32(blob) / 255.
blob = np.expand_dims(blob, axis=0)
interpreter.set_tensor(input_details[0]['index'], blob)
interpreter.invoke()
output_data = dict()
for output_detail in output_details:
output_data[output_detail["name"]] = interpreter.get_tensor(output_detail['index'])
# print(output_data)
if (kernel_type == kGenericOptimized || kernel_type == kReference) {
if (input->type == kTfLiteUInt8) {
TF_LITE_ENSURE(context, output->params.scale == 1. / 256);
LUTPopulate<uint8_t>(
input->params.scale, input->params.zero_point, output->params.scale,
output->params.zero_point,
[](float value) { return 1.0f / (1.0f + std::exp(-value)); },
data->lut_uint8);
} else if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE(context, output->params.scale == 1. / 256);
LUTPopulate<int8_t>(
input->params.scale, input->params.zero_point, output->params.scale,
output->params.zero_point,
[](float value) { return 1.0f / (1.0f + std::exp(-value)); },
data->lut_int8);
} else if (input->type == kTfLiteInt16) {
TF_LITE_ENSURE(context, output->params.scale == 1. / 32768);
TF_LITE_ENSURE(context, output->params.zero_point == 0);
}
}
from pprint import pprint
print('')
pprint(interpreter._get_op_details(41))
print('')
pprint(interpreter.get_tensor_details()[159])
print(f"TFLite quant param: {interpreter.get_tensor_details()[159]['quantization_parameters']['scales']}")
print('')
pprint(interpreter.get_tensor_details()[160])
print(f"TFLite quant param: {interpreter.get_tensor_details()[160]['quantization_parameters']['scales']}")
print('')
pprint(interpreter.get_tensor_details()[161])
print(f"TFLite quant param: {interpreter.get_tensor_details()[161]['quantization_parameters']['scales']}")
print('')
print(f"1. / 256: {np.asarray([1./256.], dtype=np.float32)}")
{'index': 41,
'inputs': array([159], dtype=int32),
'op_name': 'LOGISTIC',
'outputs': array([160], dtype=int32)}
{'dtype': <class 'numpy.int8'>,
'index': 159,
'name': 'model/tf.math.add_53/Add;model/tf.nn.convolution_18/convolution;model/tf.nn.convolution_31/convolution;Const_6',
'quantization': (0.010518999770283699, -101),
'quantization_parameters': {'quantized_dimension': 0,
'scales': array([0.010519], dtype=float32),
'zero_points': array([-101], dtype=int32)},
'shape': array([ 1, 80, 80, 1], dtype=int32),
'shape_signature': array([ 1, 80, 80, 1], dtype=int32),
'sparsity_parameters': {}}
TFLite quant param: [0.010519]
{'dtype': <class 'numpy.int8'>,
'index': 160,
'name': 'model/cls_8/Sigmoid;model/tf.reshape_8/Reshape',
'quantization': (0.009999999776482582, -128),
'quantization_parameters': {'quantized_dimension': 0,
'scales': array([0.01], dtype=float32),
'zero_points': array([-128], dtype=int32)},
'shape': array([ 1, 80, 80, 1], dtype=int32),
'shape_signature': array([ 1, 80, 80, 1], dtype=int32),
'sparsity_parameters': {}}
TFLite quant param: [0.01]
{'dtype': <class 'numpy.int8'>,
'index': 161,
'name': 'PartitionedCall:51',
'quantization': (0.0039059999398887157, -128),
'quantization_parameters': {'quantized_dimension': 0,
'scales': array([0.003906], dtype=float32),
'zero_points': array([-128], dtype=int32)},
'shape': array([ 1, 6400, 1], dtype=int32),
'shape_signature': array([ 1, 6400, 1], dtype=int32),
'sparsity_parameters': {}}
TFLite quant param: [0.003906]
1. / 256: [0.00390625]