Caffe2 - Python API
A deep learning, cross platform ML framework
test_trt.py
1 from __future__ import absolute_import
2 from __future__ import division
3 from __future__ import print_function
4 from __future__ import unicode_literals
5 
6 from caffe2.proto import caffe2_pb2
7 from caffe2.python import core, workspace
8 import onnx
9 import onnx.defs
10 from onnx.helper import make_node, make_graph, make_tensor, make_tensor_value_info, make_model
11 from onnx.backend.base import namedtupledict
12 from caffe2.python.models.download import downloadFromURLToFile, getURLFromName, deleteDirectory
13 import caffe2.python.onnx.backend as c2
14 from caffe2.python.onnx.workspace import Workspace
15 from caffe2.python.trt.transform import convert_onnx_model_to_trt_op, transform_caffe2_net
16 from caffe2.python.onnx.tests.test_utils import TestCase, DownloadingTestCase
17 import numpy as np
18 import os.path
19 import json
20 import time
21 import unittest
22 import tarfile
23 import tempfile
24 import shutil
25 from six.moves.urllib.request import urlretrieve
26 
27 def _print_net(net):
28  for i in net.external_input:
29  print("Input: {}".format(i))
30  for i in net.external_output:
31  print("Output: {}".format(i))
32  for op in net.op:
33  print("Op {}".format(op.type))
34  for x in op.input:
35  print(" input: {}".format(x))
36  for y in op.output:
37  print(" output: {}".format(y))
38 
39 
40 def _base_url(opset_version):
41  return 'https://s3.amazonaws.com/download.onnx/models/opset_{}'.format(opset_version)
42 
43 # TODO: This is copied from https://github.com/onnx/onnx/blob/master/onnx/backend/test/runner/__init__.py. Maybe we should
44 # expose a model retrival API from ONNX
45 def _download_onnx_model(model_name, opset_version):
46  onnx_home = os.path.expanduser(os.getenv('ONNX_HOME', os.path.join('~', '.onnx')))
47  models_dir = os.getenv('ONNX_MODELS',
48  os.path.join(onnx_home, 'models'))
49  model_dir = os.path.join(models_dir, model_name)
50  if not os.path.exists(os.path.join(model_dir, 'model.onnx')):
51  if os.path.exists(model_dir):
52  bi = 0
53  while True:
54  dest = '{}.old.{}'.format(model_dir, bi)
55  if os.path.exists(dest):
56  bi += 1
57  continue
58  shutil.move(model_dir, dest)
59  break
60  os.makedirs(model_dir)
61 
62  # On Windows, NamedTemporaryFile can not be opened for a
63  # second time
64  url = '{}/{}.tar.gz'.format(_base_url(opset_version), model_name)
65  download_file = tempfile.NamedTemporaryFile(delete=False)
66  try:
67  download_file.close()
68  print('Start downloading model {} from {}'.format(
69  model_name, url))
70  urlretrieve(url, download_file.name)
71  print('Done')
72  with tarfile.open(download_file.name) as t:
73  t.extractall(models_dir)
74  except Exception as e:
75  print('Failed to prepare data for model {}: {}'.format(
76  model_name, e))
77  raise
78  finally:
79  os.remove(download_file.name)
80  return model_dir
81 
83  def _test_relu_graph(self, X, batch_size, trt_max_batch_size):
84  node_def = make_node("Relu", ["X"], ["Y"])
85  Y_c2 = c2.run_node(node_def, {"X": X})
86  graph_def = make_graph(
87  [node_def],
88  name="test",
89  inputs=[make_tensor_value_info("X", onnx.TensorProto.FLOAT, [batch_size, 1, 3, 2])],
90  outputs=[make_tensor_value_info("Y", onnx.TensorProto.FLOAT, [batch_size, 1, 3, 2])])
91  model_def = make_model(graph_def, producer_name='relu-test')
92  op_outputs = [x.name for x in model_def.graph.output]
93  op = convert_onnx_model_to_trt_op(model_def, max_batch_size=trt_max_batch_size)
94  device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
95  op.device_option.CopyFrom(device_option)
96  Y_trt = None
97  ws = Workspace()
98  with core.DeviceScope(device_option):
99  ws.FeedBlob("X", X)
100  ws.RunOperatorsOnce([op])
101  output_values = [ws.FetchBlob(name) for name in op_outputs]
102  Y_trt = namedtupledict('Outputs', op_outputs)(*output_values)
103  np.testing.assert_almost_equal(Y_c2, Y_trt)
104 
105 
106  @unittest.skipIf(not workspace.C.use_trt, "No TensortRT support")
107  def test_relu_graph_simple(self):
108  X = np.random.randn(1, 1, 3, 2).astype(np.float32)
109  self._test_relu_graph(X, 1, 50)
110 
111 
112  @unittest.skipIf(not workspace.C.use_trt, "No TensortRT support")
113  def test_relu_graph_big_batch(self):
114  X = np.random.randn(52, 1, 3, 2).astype(np.float32)
115  self._test_relu_graph(X, 52, 50)
116 
117  def _test_onnx_importer(self, model_name, data_input_index,
118  opset_version = onnx.defs.onnx_opset_version()):
119  model_dir = _download_onnx_model(model_name, opset_version)
120  model_def = onnx.load(os.path.join(model_dir, 'model.onnx'))
121  input_blob_dims = [int(x.dim_value) for x in model_def.graph.input[data_input_index].type.tensor_type.shape.dim]
122  op_inputs = [x.name for x in model_def.graph.input]
123  op_outputs = [x.name for x in model_def.graph.output]
124  print("{}".format(op_inputs))
125  data = np.random.randn(*input_blob_dims).astype(np.float32)
126  Y_c2 = c2.run_model(model_def, {op_inputs[data_input_index]: data})
127  op = convert_onnx_model_to_trt_op(model_def, verbosity=3)
128  device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
129  op.device_option.CopyFrom(device_option)
130  Y_trt = None
131  ws = Workspace()
132  with core.DeviceScope(device_option):
133  ws.FeedBlob(op_inputs[data_input_index], data)
134  if opset_version >= 5:
135  # Some newer models from ONNX Zoo come with pre-set "data_0" input
136  ws.FeedBlob("data_0", data)
137  ws.RunOperatorsOnce([op])
138  output_values = [ws.FetchBlob(name) for name in op_outputs]
139  Y_trt = namedtupledict('Outputs', op_outputs)(*output_values)
140  np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)
141 
142  @unittest.skipIf(not workspace.C.use_trt, "No TensortRT support")
143  def test_resnet50(self):
144  self._test_onnx_importer('resnet50', 0, 9)
145 
146  @unittest.skipIf(not workspace.C.use_trt, "No TensortRT support")
147  def test_bvlc_alexnet(self):
148  self._test_onnx_importer('bvlc_alexnet', 0, 9)
149 
150  @unittest.skip("Until fixing Unsqueeze op")
151  def test_densenet121(self):
152  self._test_onnx_importer('densenet121', -1, 3)
153 
154  @unittest.skipIf(not workspace.C.use_trt, "No TensortRT support")
155  def test_inception_v1(self):
156  self._test_onnx_importer('inception_v1', -3, 9)
157 
158  @unittest.skip("Until fixing Unsqueeze op")
159  def test_inception_v2(self):
160  self._test_onnx_importer('inception_v2', 0, 9)
161 
162  @unittest.skip('Need to revisit our ChannelShuffle exporter to avoid generating 5D tensor')
163  def test_shufflenet(self):
164  self._test_onnx_importer('shufflenet', 0)
165 
166  @unittest.skipIf(not workspace.C.use_trt, "No TensortRT support")
167  def test_squeezenet(self):
168  self._test_onnx_importer('squeezenet', -1, 9)
169 
170  @unittest.skipIf(not workspace.C.use_trt, "No TensortRT support")
171  def test_vgg16(self):
172  self._test_onnx_importer('vgg16', 0, 9)
173 
174  @unittest.skipIf(not workspace.C.use_trt, "No TensortRT support")
175  def test_vgg19(self):
176  self._test_onnx_importer('vgg19', -2, 9)
177 
178 
180  def _model_dir(self, model):
181  caffe2_home = os.path.expanduser(os.getenv('CAFFE2_HOME', '~/.caffe2'))
182  models_dir = os.getenv('CAFFE2_MODELS', os.path.join(caffe2_home, 'models'))
183  return os.path.join(models_dir, model)
184 
185  def _get_c2_model(self, model_name):
186  model_dir = self._model_dir(model_name)
187  if not os.path.exists(model_dir):
188  self._download(model_name)
189  c2_predict_pb = os.path.join(model_dir, 'predict_net.pb')
190  c2_predict_net = caffe2_pb2.NetDef()
191  with open(c2_predict_pb, 'rb') as f:
192  c2_predict_net.ParseFromString(f.read())
193  c2_predict_net.name = model_name
194 
195  c2_init_pb = os.path.join(model_dir, 'init_net.pb')
196  c2_init_net = caffe2_pb2.NetDef()
197  with open(c2_init_pb, 'rb') as f:
198  c2_init_net.ParseFromString(f.read())
199  c2_init_net.name = model_name + '_init'
200 
201  with open(os.path.join(model_dir, 'value_info.json')) as f:
202  value_info = json.load(f)
203  return c2_init_net, c2_predict_net, value_info
204 
205  def _add_head_tail(self, pred_net, new_head, new_tail):
206  orig_head = pred_net.external_input[0]
207  orig_tail = pred_net.external_output[0]
208 
209  # Add head
210  head = caffe2_pb2.OperatorDef()
211  head.type = "Copy"
212  head.input.append(new_head)
213  head.output.append(orig_head)
214  dummy = caffe2_pb2.NetDef()
215  dummy.op.extend(pred_net.op)
216  del pred_net.op[:]
217  pred_net.op.extend([head])
218  pred_net.op.extend(dummy.op)
219  pred_net.external_input[0] = new_head
220 
221  # Add tail
222  tail = caffe2_pb2.OperatorDef()
223  tail.type = "Copy"
224  tail.input.append(orig_tail)
225  tail.output.append(new_tail)
226  pred_net.op.extend([tail])
227  pred_net.external_output[0] = new_tail
228 
229 
230  @unittest.skipIf(not workspace.C.use_trt, "No TensortRT support")
231  def test_resnet50_core(self):
232  N = 2
233  warmup = 20
234  repeat = 100
235  print("Batch size: {}, repeat inference {} times, warmup {} times".format(N, repeat, warmup))
236  init_net, pred_net, _ = self._get_c2_model('resnet50')
237  self._add_head_tail(pred_net, 'real_data', 'real_softmax')
238  input_blob_dims = (N, 3, 224, 224)
239  input_name = "real_data"
240 
241  device_option = core.DeviceOption(caffe2_pb2.CUDA, 0)
242  init_net.device_option.CopyFrom(device_option)
243  pred_net.device_option.CopyFrom(device_option)
244  for op in pred_net.op:
245  op.device_option.CopyFrom(device_option)
246  op.engine = 'CUDNN'
247  net_outputs = pred_net.external_output
248  Y_c2 = None
249  data = np.random.randn(*input_blob_dims).astype(np.float32)
250  c2_time = 1
251  workspace.SwitchWorkspace("gpu_test", True)
252  with core.DeviceScope(device_option):
253  workspace.FeedBlob(input_name, data)
254  workspace.RunNetOnce(init_net)
255  workspace.CreateNet(pred_net)
256  for _ in range(warmup):
257  workspace.RunNet(pred_net.name)
258  start = time.time()
259  for _ in range(repeat):
260  workspace.RunNet(pred_net.name)
261  end = time.time()
262  c2_time = end - start
263  output_values = [workspace.FetchBlob(name) for name in net_outputs]
264  Y_c2 = namedtupledict('Outputs', net_outputs)(*output_values)
265  workspace.ResetWorkspace()
266 
267  # Fill the workspace with the weights
268  with core.DeviceScope(device_option):
269  workspace.RunNetOnce(init_net)
270 
271  # Cut the graph
272  start = time.time()
273  pred_net_cut = transform_caffe2_net(pred_net,
274  {input_name: input_blob_dims},
275  build_serializable_op=False)
276  del init_net, pred_net
277  pred_net_cut.device_option.CopyFrom(device_option)
278  for op in pred_net_cut.op:
279  op.device_option.CopyFrom(device_option)
280  #_print_net(pred_net_cut)
281 
282  Y_trt = None
283  input_name = pred_net_cut.external_input[0]
284  print("C2 runtime: {}s".format(c2_time))
285  with core.DeviceScope(device_option):
286  workspace.FeedBlob(input_name, data)
287  workspace.CreateNet(pred_net_cut)
288  end = time.time()
289  print("Conversion time: {:.2f}s".format(end -start))
290 
291  for _ in range(warmup):
292  workspace.RunNet(pred_net_cut.name)
293  start = time.time()
294  for _ in range(repeat):
295  workspace.RunNet(pred_net_cut.name)
296  end = time.time()
297  trt_time = end - start
298  print("TRT runtime: {}s, improvement: {}%".format(trt_time, (c2_time-trt_time)/c2_time*100))
299  output_values = [workspace.FetchBlob(name) for name in net_outputs]
300  Y_trt = namedtupledict('Outputs', net_outputs)(*output_values)
301  np.testing.assert_allclose(Y_c2, Y_trt, rtol=1e-3)
302 
303 
def _add_head_tail(self, pred_net, new_head, new_tail)
Definition: test_trt.py:205
def _test_relu_graph(self, X, batch_size, trt_max_batch_size)
Definition: test_trt.py:83
def _test_onnx_importer(self, model_name, data_input_index, opset_version=onnx.defs.onnx_opset_version())
Definition: test_trt.py:118