Caffe2 - Python API
A deep learning, cross platform ML framework
update-models-from-caffe2.py
1 #! /usr/bin/env python
2 
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 from __future__ import unicode_literals
7 
8 import onnx.backend
9 
10 import argparse
11 import caffe2.python.workspace as c2_workspace
12 import glob
13 import json
14 import math
15 import numpy as np
16 import onnx
19 import os
20 import shutil
21 import subprocess
22 import sys
23 import tarfile
24 import tempfile
25 
26 import boto3
27 
28 from six.moves.urllib.request import urlretrieve
29 
30 from caffe2.python.models.download import downloadFromURLToFile, getURLFromName, deleteDirectory
31 from caffe2.proto import caffe2_pb2
32 from onnx import numpy_helper
33 from filechunkio import FileChunkIO
34 
35 
36 """A script converting Caffe2 models to ONNX, and updating ONNX model zoos.
37 
38 Arguments:
39  -v, verbose
40  --local-dir, where we store the ONNX and Caffe2 models
41  --no-cache, ignore existing models in local-dir
42  --clean-test-data, delete all the existing test data when updating ONNX model zoo
43  --add-test-data, add add-test-data sets of test data for each ONNX model
44  --only-local, run locally (for testing purpose)
45 
46 Examples:
47  # store the data in /home/username/zoo-dir, delete existing test data, ignore local cache,
48  # and generate 3 sets of new test data
49  python update-caffe2-models.py --local-dir /home/username/zoo-dir --clean-test-data --no-cache --add-test-data 3
50 
51 """
52 
53 # TODO: Add GPU support
54 
55 
56 def upload_onnx_model(model_name, zoo_dir, backup=False, only_local=False):
57  if only_local:
58  print('No uploading in local only mode.')
59  return
60  model_dir = os.path.join(zoo_dir, model_name)
61  suffix = '-backup' if backup else ''
62  if backup:
63  print('Backing up the previous version of ONNX model {}...'.format(model_name))
64  rel_file_name = '{}{}.tar.gz'.format(model_name, suffix)
65  abs_file_name = os.path.join(zoo_dir, rel_file_name)
66  print('Compressing {} model to {}'.format(model_name, abs_file_name))
67  with tarfile.open(abs_file_name, 'w:gz') as f:
68  f.add(model_dir, arcname=model_name)
69  file_size = os.stat(abs_file_name).st_size
70  print('Uploading {} ({} MB) to s3 cloud...'.format(abs_file_name, float(file_size) / 1024 / 1024))
71  client = boto3.client('s3', 'us-east-1')
72  transfer = boto3.s3.transfer.S3Transfer(client)
73  transfer.upload_file(abs_file_name, 'download.onnx', 'models/latest/{}'.format(rel_file_name),
74  extra_args={'ACL': 'public-read'})
75 
76  print('Successfully uploaded {} to s3!'.format(rel_file_name))
77 
78 
79 def download_onnx_model(model_name, zoo_dir, use_cache=True, only_local=False):
80  model_dir = os.path.join(zoo_dir, model_name)
81  if os.path.exists(model_dir):
82  if use_cache:
83  upload_onnx_model(model_name, zoo_dir, backup=True, only_local=only_local)
84  return
85  else:
86  shutil.rmtree(model_dir)
87  url = 'https://s3.amazonaws.com/download.onnx/models/latest/{}.tar.gz'.format(model_name)
88 
89  download_file = tempfile.NamedTemporaryFile(delete=False)
90  try:
91  download_file.close()
92  print('Downloading ONNX model {} from {} and save in {} ...\n'.format(
93  model_name, url, download_file.name))
94  urlretrieve(url, download_file.name)
95  with tarfile.open(download_file.name) as t:
96  print('Extracting ONNX model {} to {} ...\n'.format(model_name, zoo_dir))
97  t.extractall(zoo_dir)
98  except Exception as e:
99  print('Failed to download/backup data for ONNX model {}: {}'.format(model_name, e))
100  if not os.path.exists(model_dir):
101  os.makedirs(model_dir)
102  finally:
103  os.remove(download_file.name)
104 
105  if not only_local:
106  upload_onnx_model(model_name, zoo_dir, backup=True, only_local=only_local)
107 
108 
109 def download_caffe2_model(model_name, zoo_dir, use_cache=True):
110  model_dir = os.path.join(zoo_dir, model_name)
111  if os.path.exists(model_dir):
112  if use_cache:
113  return
114  else:
115  shutil.rmtree(model_dir)
116  os.makedirs(model_dir)
117 
118  for f in ['predict_net.pb', 'init_net.pb', 'value_info.json']:
119  url = getURLFromName(model_name, f)
120  dest = os.path.join(model_dir, f)
121  try:
122  try:
123  downloadFromURLToFile(url, dest,
124  show_progress=False)
125  except TypeError:
126  # show_progress not supported prior to
127  # Caffe2 78c014e752a374d905ecfb465d44fa16e02a28f1
128  # (Sep 17, 2017)
129  downloadFromURLToFile(url, dest)
130  except Exception as e:
131  print("Abort: {reason}".format(reason=e))
132  print("Cleaning up...")
133  deleteDirectory(model_dir)
134  raise
135 
136 
137 def caffe2_to_onnx(caffe2_model_name, caffe2_model_dir):
138  caffe2_init_proto = caffe2_pb2.NetDef()
139  caffe2_predict_proto = caffe2_pb2.NetDef()
140 
141  with open(os.path.join(caffe2_model_dir, 'init_net.pb'), 'rb') as f:
142  caffe2_init_proto.ParseFromString(f.read())
143  caffe2_init_proto.name = '{}_init'.format(caffe2_model_name)
144  with open(os.path.join(caffe2_model_dir, 'predict_net.pb'), 'rb') as f:
145  caffe2_predict_proto.ParseFromString(f.read())
146  caffe2_predict_proto.name = caffe2_model_name
147  with open(os.path.join(caffe2_model_dir, 'value_info.json'), 'rb') as f:
148  value_info = json.loads(f.read())
149 
150  print('Converting Caffe2 model {} in {} to ONNX format'.format(caffe2_model_name, caffe2_model_dir))
152  init_net=caffe2_init_proto,
153  predict_net=caffe2_predict_proto,
154  value_info=value_info
155  )
156 
157  return onnx_model, caffe2_init_proto, caffe2_predict_proto
158 
159 
160 def tensortype_to_ndarray(tensor_type):
161  shape = []
162  for dim in tensor_type.shape.dim:
163  shape.append(dim.dim_value)
164  if tensor_type.elem_type == onnx.TensorProto.FLOAT:
165  type = np.float32
166  elif tensor_type.elem_type == onnx.TensorProto.INT:
167  type = np.int32
168  else:
169  raise
170  array = np.random.rand(*shape).astype(type)
171  return array
172 
173 
174 def generate_test_input_data(onnx_model, scale):
175  real_inputs_names = list(set([input.name for input in onnx_model.graph.input]) - set([init.name for init in onnx_model.graph.initializer]))
176  real_inputs = []
177  for name in real_inputs_names:
178  for input in onnx_model.graph.input:
179  if name == input.name:
180  real_inputs.append(input)
181 
182  test_inputs = []
183  for input in real_inputs:
184  ndarray = tensortype_to_ndarray(input.type.tensor_type)
185  test_inputs.append((input.name, ndarray * scale))
186 
187  return test_inputs
188 
189 
190 def generate_test_output_data(caffe2_init_net, caffe2_predict_net, inputs):
191  p = c2_workspace.Predictor(caffe2_init_net, caffe2_predict_net)
192  inputs_map = {input[0]:input[1] for input in inputs}
193 
194  output = p.run(inputs_map)
195  c2_workspace.ResetWorkspace()
196  return output
197 
198 
199 def onnx_verify(onnx_model, inputs, ref_outputs):
200  prepared = caffe2.python.onnx.backend.prepare(onnx_model)
201  onnx_inputs = []
202  for input in inputs:
203  if isinstance(input, tuple):
204  onnx_inputs.append(input[1])
205  else:
206  onnx_inputs.append(input)
207  onnx_outputs = prepared.run(inputs=onnx_inputs)
208  np.testing.assert_almost_equal(onnx_outputs, ref_outputs, decimal=3)
209 
210 
211 model_mapping = {
212  'bvlc_alexnet': 'bvlc_alexnet',
213  'bvlc_googlenet': 'bvlc_googlenet',
214  'bvlc_reference_caffenet': 'bvlc_reference_caffenet',
215  'bvlc_reference_rcnn_ilsvrc13': 'bvlc_reference_rcnn_ilsvrc13',
216  'densenet121': 'densenet121',
217  #'finetune_flickr_style': 'finetune_flickr_style',
218  'inception_v1': 'inception_v1',
219  'inception_v2': 'inception_v2',
220  'resnet50': 'resnet50',
221  'shufflenet': 'shufflenet',
222  'squeezenet': 'squeezenet_old',
223  #'vgg16': 'vgg16',
224  'vgg19': 'vgg19',
225  'zfnet512': 'zfnet512',
226 }
227 
228 
229 
230 if __name__ == '__main__':
231  parser = argparse.ArgumentParser(description='Update the ONNX models.')
232  parser.add_argument('-v', action="store_true", default=False, help="verbose")
233  parser.add_argument("--local-dir", type=str, default=os.path.expanduser('~'),
234  help="local dir to store Caffe2 and ONNX models")
235  parser.add_argument("--no-cache", action="store_true", default=False,
236  help="whether use local ONNX models")
237  parser.add_argument('--clean-test-data', action="store_true", default=False,
238  help="remove the old test data")
239  parser.add_argument('--add-test-data', type=int, default=0,
240  help="add new test data")
241  parser.add_argument('--only-local', action="store_true", default=False,
242  help="no upload including backup")
243 
244  args = parser.parse_args()
245  delete_test_data = args.clean_test_data
246  add_test_data = args.add_test_data
247  use_cache = not args.no_cache
248  only_local = args.only_local
249 
250  root_dir = args.local_dir
251  caffe2_zoo_dir = os.path.join(root_dir, ".caffe2", "models")
252  onnx_zoo_dir = os.path.join(root_dir, ".onnx", "models")
253 
254  for onnx_model_name in model_mapping:
255  c2_model_name = model_mapping[onnx_model_name]
256 
257  print('####### Processing ONNX model {} ({} in Caffe2) #######'.format(onnx_model_name, c2_model_name))
258  download_caffe2_model(c2_model_name, caffe2_zoo_dir, use_cache=use_cache)
259  download_onnx_model(onnx_model_name, onnx_zoo_dir, use_cache=use_cache, only_local=only_local)
260 
261  onnx_model_dir = os.path.join(onnx_zoo_dir, onnx_model_name)
262 
263  if delete_test_data:
264  print('Deleting all the existing test data...')
265  # NB: For now, we don't delete the npz files.
266  #for f in glob.glob(os.path.join(onnx_model_dir, '*.npz')):
267  # os.remove(f)
268  for f in glob.glob(os.path.join(onnx_model_dir, 'test_data_set*')):
269  shutil.rmtree(f)
270 
271  onnx_model, c2_init_net, c2_predict_net = caffe2_to_onnx(c2_model_name, os.path.join(caffe2_zoo_dir, c2_model_name))
272 
273  print('Deleteing old ONNX {} model...'.format(onnx_model_name))
274  for f in glob.glob(os.path.join(onnx_model_dir, 'model*'.format(onnx_model_name))):
275  os.remove(f)
276 
277  print('Serializing generated ONNX {} model ...'.format(onnx_model_name))
278  with open(os.path.join(onnx_model_dir, 'model.onnx'), 'wb') as file:
279  file.write(onnx_model.SerializeToString())
280 
281  print('Verifying model {} with ONNX model checker...'.format(onnx_model_name))
282  onnx.checker.check_model(onnx_model)
283 
284  total_existing_data_set = 0
285  print('Verifying model {} with existing test data...'.format(onnx_model_name))
286  for f in glob.glob(os.path.join(onnx_model_dir, '*.npz')):
287  test_data = np.load(f, encoding='bytes')
288  inputs = list(test_data['inputs'])
289  ref_outputs = list(test_data['outputs'])
290  onnx_verify(onnx_model, inputs, ref_outputs)
291  total_existing_data_set += 1
292  for f in glob.glob(os.path.join(onnx_model_dir, 'test_data_set*')):
293  inputs = []
294  inputs_num = len(glob.glob(os.path.join(f, 'input_*.pb')))
295  for i in range(inputs_num):
296  tensor = onnx.TensorProto()
297  with open(os.path.join(f, 'input_{}.pb'.format(i)), 'rb') as pf:
298  tensor.ParseFromString(pf.read())
299  inputs.append(numpy_helper.to_array(tensor))
300  ref_outputs = []
301  ref_outputs_num = len(glob.glob(os.path.join(f, 'output_*.pb')))
302  for i in range(ref_outputs_num):
303  tensor = onnx.TensorProto()
304  with open(os.path.join(f, 'output_{}.pb'.format(i)), 'rb') as pf:
305  tensor.ParseFromString(pf.read())
306  ref_outputs.append(numpy_helper.to_array(tensor))
307  onnx_verify(onnx_model, inputs, ref_outputs)
308  total_existing_data_set += 1
309 
310  starting_index = 0
311  while os.path.exists(os.path.join(onnx_model_dir, 'test_data_set_{}'.format(starting_index))):
312  starting_index += 1
313 
314  if total_existing_data_set == 0 and add_test_data == 0:
315  add_test_data = 3
316  total_existing_data_set = 3
317 
318  print('Generating {} sets of new test data...'.format(add_test_data))
319  for i in range(starting_index, add_test_data + starting_index):
320  data_dir = os.path.join(onnx_model_dir, 'test_data_set_{}'.format(i))
321  os.makedirs(data_dir)
322  inputs = generate_test_input_data(onnx_model, 255)
323  ref_outputs = generate_test_output_data(c2_init_net, c2_predict_net, inputs)
324  onnx_verify(onnx_model, inputs, ref_outputs)
325  for index, input in enumerate(inputs):
326  tensor = numpy_helper.from_array(input[1])
327  with open(os.path.join(data_dir, 'input_{}.pb'.format(index)), 'wb') as file:
328  file.write(tensor.SerializeToString())
329  for index, output in enumerate(ref_outputs):
330  tensor = numpy_helper.from_array(output)
331  with open(os.path.join(data_dir, 'output_{}.pb'.format(index)), 'wb') as file:
332  file.write(tensor.SerializeToString())
333 
334  del onnx_model
335  del c2_init_net
336  del c2_predict_net
337 
338  upload_onnx_model(onnx_model_name, onnx_zoo_dir, backup=False, only_local=only_local)
339 
340  print('\n\n')