1 from __future__
import absolute_import
2 from __future__
import division
3 from __future__
import print_function
4 from __future__
import unicode_literals
6 from caffe2.proto
import caffe2_pb2
10 from onnx.helper
import make_node, make_graph, make_tensor, make_tensor_value_info, make_model
11 from onnx.backend.base
import namedtupledict
25 from six.moves.urllib.request
import urlretrieve
28 for i
in net.external_input:
29 print(
"Input: {}".format(i))
30 for i
in net.external_output:
31 print(
"Output: {}".format(i))
33 print(
"Op {}".format(op.type))
35 print(
" input: {}".format(x))
37 print(
" output: {}".format(y))
40 def _base_url(opset_version):
41 return 'https://s3.amazonaws.com/download.onnx/models/opset_{}'.format(opset_version)
45 def _download_onnx_model(model_name, opset_version):
46 onnx_home = os.path.expanduser(os.getenv(
'ONNX_HOME', os.path.join(
'~',
'.onnx')))
47 models_dir = os.getenv(
'ONNX_MODELS',
48 os.path.join(onnx_home,
'models'))
49 model_dir = os.path.join(models_dir, model_name)
50 if not os.path.exists(os.path.join(model_dir,
'model.onnx')):
51 if os.path.exists(model_dir):
54 dest =
'{}.old.{}'.format(model_dir, bi)
55 if os.path.exists(dest):
58 shutil.move(model_dir, dest)
60 os.makedirs(model_dir)
64 url =
'{}/{}.tar.gz'.format(_base_url(opset_version), model_name)
65 download_file = tempfile.NamedTemporaryFile(delete=
False)
68 print(
'Start downloading model {} from {}'.format(
70 urlretrieve(url, download_file.name)
72 with tarfile.open(download_file.name)
as t:
73 t.extractall(models_dir)
74 except Exception
as e:
75 print(
'Failed to prepare data for model {}: {}'.format(
79 os.remove(download_file.name)
83 def _test_relu_graph(self, X, batch_size, trt_max_batch_size):
84 node_def = make_node(
"Relu", [
"X"], [
"Y"])
85 Y_c2 = c2.run_node(node_def, {
"X": X})
86 graph_def = make_graph(
89 inputs=[make_tensor_value_info(
"X", onnx.TensorProto.FLOAT, [batch_size, 1, 3, 2])],
90 outputs=[make_tensor_value_info(
"Y", onnx.TensorProto.FLOAT, [batch_size, 1, 3, 2])])
91 model_def = make_model(graph_def, producer_name=
'relu-test')
92 op_outputs = [x.name
for x
in model_def.graph.output]
93 op = convert_onnx_model_to_trt_op(model_def, max_batch_size=trt_max_batch_size)
94 device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
95 op.device_option.CopyFrom(device_option)
98 with core.DeviceScope(device_option):
100 ws.RunOperatorsOnce([op])
101 output_values = [ws.FetchBlob(name)
for name
in op_outputs]
102 Y_trt = namedtupledict(
'Outputs', op_outputs)(*output_values)
103 np.testing.assert_almost_equal(Y_c2, Y_trt)
106 @unittest.skipIf(
not workspace.C.use_trt,
"No TensortRT support")
107 def test_relu_graph_simple(self):
108 X = np.random.randn(1, 1, 3, 2).astype(np.float32)
112 @unittest.skipIf(
not workspace.C.use_trt,
"No TensortRT support")
113 def test_relu_graph_big_batch(self):
114 X = np.random.randn(52, 1, 3, 2).astype(np.float32)
117 def _test_onnx_importer(self, model_name, data_input_index,
118 opset_version = onnx.defs.onnx_opset_version()):
119 model_dir = _download_onnx_model(model_name, opset_version)
120 model_def = onnx.load(os.path.join(model_dir,
'model.onnx'))
121 input_blob_dims = [int(x.dim_value)
for x
in model_def.graph.input[data_input_index].type.tensor_type.shape.dim]
122 op_inputs = [x.name
for x
in model_def.graph.input]
123 op_outputs = [x.name
for x
in model_def.graph.output]
124 print(
"{}".format(op_inputs))
125 data = np.random.randn(*input_blob_dims).astype(np.float32)
126 Y_c2 = c2.run_model(model_def, {op_inputs[data_input_index]: data})
127 op = convert_onnx_model_to_trt_op(model_def, verbosity=3)
128 device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
129 op.device_option.CopyFrom(device_option)
132 with core.DeviceScope(device_option):
133 ws.FeedBlob(op_inputs[data_input_index], data)
134 if opset_version >= 5:
136 ws.FeedBlob(
"data_0", data)
137 ws.RunOperatorsOnce([op])
138 output_values = [ws.FetchBlob(name)
for name
in op_outputs]
139 Y_trt = namedtupledict(
'Outputs', op_outputs)(*output_values)
140 np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)
142 @unittest.skipIf(
not workspace.C.use_trt,
"No TensortRT support")
143 def test_resnet50(self):
146 @unittest.skipIf(
not workspace.C.use_trt,
"No TensortRT support")
147 def test_bvlc_alexnet(self):
150 @unittest.skip(
"Until fixing Unsqueeze op")
151 def test_densenet121(self):
154 @unittest.skipIf(
not workspace.C.use_trt,
"No TensortRT support")
155 def test_inception_v1(self):
158 @unittest.skip(
"Until fixing Unsqueeze op")
159 def test_inception_v2(self):
162 @unittest.skip(
'Need to revisit our ChannelShuffle exporter to avoid generating 5D tensor')
163 def test_shufflenet(self):
166 @unittest.skipIf(
not workspace.C.use_trt,
"No TensortRT support")
167 def test_squeezenet(self):
170 @unittest.skipIf(
not workspace.C.use_trt,
"No TensortRT support")
171 def test_vgg16(self):
174 @unittest.skipIf(
not workspace.C.use_trt,
"No TensortRT support")
175 def test_vgg19(self):
180 def _model_dir(self, model):
181 caffe2_home = os.path.expanduser(os.getenv(
'CAFFE2_HOME',
'~/.caffe2'))
182 models_dir = os.getenv(
'CAFFE2_MODELS', os.path.join(caffe2_home,
'models'))
183 return os.path.join(models_dir, model)
185 def _get_c2_model(self, model_name):
187 if not os.path.exists(model_dir):
189 c2_predict_pb = os.path.join(model_dir,
'predict_net.pb')
190 c2_predict_net = caffe2_pb2.NetDef()
191 with open(c2_predict_pb,
'rb')
as f:
192 c2_predict_net.ParseFromString(f.read())
193 c2_predict_net.name = model_name
195 c2_init_pb = os.path.join(model_dir,
'init_net.pb')
196 c2_init_net = caffe2_pb2.NetDef()
197 with open(c2_init_pb,
'rb')
as f:
198 c2_init_net.ParseFromString(f.read())
199 c2_init_net.name = model_name +
'_init' 201 with open(os.path.join(model_dir,
'value_info.json'))
as f:
202 value_info = json.load(f)
203 return c2_init_net, c2_predict_net, value_info
205 def _add_head_tail(self, pred_net, new_head, new_tail):
206 orig_head = pred_net.external_input[0]
207 orig_tail = pred_net.external_output[0]
210 head = caffe2_pb2.OperatorDef()
212 head.input.append(new_head)
213 head.output.append(orig_head)
214 dummy = caffe2_pb2.NetDef()
215 dummy.op.extend(pred_net.op)
217 pred_net.op.extend([head])
218 pred_net.op.extend(dummy.op)
219 pred_net.external_input[0] = new_head
222 tail = caffe2_pb2.OperatorDef()
224 tail.input.append(orig_tail)
225 tail.output.append(new_tail)
226 pred_net.op.extend([tail])
227 pred_net.external_output[0] = new_tail
230 @unittest.skipIf(
not workspace.C.use_trt,
"No TensortRT support")
231 def test_resnet50_core(self):
235 print(
"Batch size: {}, repeat inference {} times, warmup {} times".format(N, repeat, warmup))
238 input_blob_dims = (N, 3, 224, 224)
239 input_name =
"real_data" 241 device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
242 init_net.device_option.CopyFrom(device_option)
243 pred_net.device_option.CopyFrom(device_option)
244 for op
in pred_net.op:
245 op.device_option.CopyFrom(device_option)
247 net_outputs = pred_net.external_output
249 data = np.random.randn(*input_blob_dims).astype(np.float32)
251 workspace.SwitchWorkspace(
"gpu_test",
True)
252 with core.DeviceScope(device_option):
253 workspace.FeedBlob(input_name, data)
254 workspace.RunNetOnce(init_net)
255 workspace.CreateNet(pred_net)
256 for _
in range(warmup):
257 workspace.RunNet(pred_net.name)
259 for _
in range(repeat):
260 workspace.RunNet(pred_net.name)
262 c2_time = end - start
263 output_values = [workspace.FetchBlob(name)
for name
in net_outputs]
264 Y_c2 = namedtupledict(
'Outputs', net_outputs)(*output_values)
265 workspace.ResetWorkspace()
268 with core.DeviceScope(device_option):
269 workspace.RunNetOnce(init_net)
273 pred_net_cut = transform_caffe2_net(pred_net,
274 {input_name: input_blob_dims},
275 build_serializable_op=
False)
276 del init_net, pred_net
277 pred_net_cut.device_option.CopyFrom(device_option)
278 for op
in pred_net_cut.op:
279 op.device_option.CopyFrom(device_option)
283 input_name = pred_net_cut.external_input[0]
284 print(
"C2 runtime: {}s".format(c2_time))
285 with core.DeviceScope(device_option):
286 workspace.FeedBlob(input_name, data)
287 workspace.CreateNet(pred_net_cut)
289 print(
"Conversion time: {:.2f}s".format(end -start))
291 for _
in range(warmup):
292 workspace.RunNet(pred_net_cut.name)
294 for _
in range(repeat):
295 workspace.RunNet(pred_net_cut.name)
297 trt_time = end - start
298 print(
"TRT runtime: {}s, improvement: {}%".format(trt_time, (c2_time-trt_time)/c2_time*100))
299 output_values = [workspace.FetchBlob(name)
for name
in net_outputs]
300 Y_trt = namedtupledict(
'Outputs', net_outputs)(*output_values)
301 np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)
def _test_relu_graph(self, X, batch_size, trt_max_batch_size)
def _download(self, model)
def _test_onnx_importer(self, model_name, data_input_index, opset_version=onnx.defs.onnx_opset_version())