forked from alibaba/TinyNeuralNetwork
-
Notifications
You must be signed in to change notification settings - Fork 0
/
converter_qat_test.py
169 lines (126 loc) · 4.85 KB
/
converter_qat_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import torch
import gc
import os
import re
import sys
import unittest
import numpy as np
from tinynn.converter import TFLiteConverter
from tinynn.graph.tracer import model_tracer
from tinynn.graph.quantization.quantizer import QATQuantizer
from common_utils import IS_CI, collect_torchvision_models, prepare_inputs
HAS_TF = False
try:
import tensorflow as tf
HAS_TF = True
except ImportError:
pass
def data_to_tf(inputs, input_transpose):
tf_inputs = list(map(lambda x: x.cpu().detach().numpy(), inputs))
for i in range(len(tf_inputs)):
if input_transpose[i]:
tf_inputs[i] = np.transpose(tf_inputs[i], [0, 2, 3, 1])
return tf_inputs
def get_tflite_out(model_path, inputs):
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
for i in range(len(inputs)):
interpreter.set_tensor(input_details[i]['index'], inputs[i])
interpreter.invoke()
outputs = []
for i in range(len(output_details)):
output_data = interpreter.get_tensor(output_details[i]['index'])
outputs.append(output_data)
return outputs
# Unsupported models
# resnext: group convs
# regnet: group convs
# yolov4: 5-d slices
# swin: roll
# vit: native_multi_head_attention
BLACKLIST = (
'resnext.*',
'Build_Model',
'regnet.*',
'swin.*',
'vit.*',
)
class TestModelMeta(type):
@classmethod
def __prepare__(mcls, name, bases):
d = dict()
test_classes = collect_torchvision_models()
for test_class in test_classes:
test_name = f'test_torchvision_model_{test_class.__name__}'
simple_test_name = test_name + '_simple'
d[simple_test_name] = mcls.build_model_test(test_class)
# test_classes = collect_custom_models()
# for test_class in test_classes:
# test_name = f'test_custom_model_{test_class.__name__}'
# simple_test_name = test_name + '_simple'
# d[simple_test_name] = mcls.build_model_test(test_class)
return d
@classmethod
def build_model_test(cls, model_class):
def prepare_q_model(model_name):
args = ()
kwargs = dict()
if model_name in ('googlenet', 'inception_v3'):
kwargs = {'aux_logits': False}
with model_tracer():
m = model_class(*args, **kwargs)
m.cpu()
m.eval()
inputs = prepare_inputs(m)
config = {'remove_weights_after_load': True, 'ignore_layerwise_config': True}
if sys.platform == 'win32':
config.update({'backend': 'fbgemm', 'per_tensor': False})
quantizer = QATQuantizer(m, inputs, work_dir='out', config=config)
qat_model = quantizer.quantize()
return qat_model, inputs
def f(self):
model_name = model_class.__name__
model_file = model_name
model_file += '_qat_simple'
for item in BLACKLIST:
if re.match(item, model_name):
raise unittest.SkipTest('IN BLACKLIST')
if os.path.exists(f'out/{model_file}.tflite'):
raise unittest.SkipTest('TESTED')
qat_model, inputs = prepare_q_model(model_name)
with torch.no_grad():
qat_model.eval()
qat_model.cpu()
qat_model = torch.quantization.convert(qat_model)
out_path = f'out/{model_file}.tflite'
extra_kwargs = {}
if IS_CI:
out_pt = f'out/{model_file}.pt'
extra_kwargs.update({'dump_jit_model_path': out_pt, 'gc_when_reload': True})
if sys.platform == 'win32':
extra_kwargs.update({'quantize_target_type': 'int8'})
converter = TFLiteConverter(qat_model, inputs, out_path, **extra_kwargs)
converter.convert()
if IS_CI:
os.remove(out_pt)
if HAS_TF:
outputs = converter.get_outputs()
input_transpose = converter.input_transpose
input_tf = data_to_tf(inputs, input_transpose)
tf_outputs = get_tflite_out(out_path, input_tf)
self.assertTrue(len(outputs) == len(tf_outputs))
if IS_CI and os.path.exists(out_path):
os.remove(out_path)
if IS_CI:
# Lower memory usage
del qat_model
del converter
gc.collect()
return f
class TestModel(unittest.TestCase, metaclass=TestModelMeta):
pass
if __name__ == '__main__':
unittest.main()