Caffe2 - Python API
A deep learning, cross platform ML framework
core.py
1 # Copyright (c) 2016-present, Facebook, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 ##############################################################################
15 
16 ## @package core
17 # Module caffe2.python.core
18 from __future__ import absolute_import
19 from __future__ import division
20 from __future__ import print_function
21 from __future__ import unicode_literals
22 
23 from collections import namedtuple, OrderedDict, defaultdict
24 from past.builtins import basestring
25 from future.utils import viewitems, viewkeys, viewvalues
26 from itertools import chain
27 from six import binary_type, string_types, text_type
28 
29 from caffe2.proto import caffe2_pb2
30 from caffe2.python import scope, utils, workspace
32  gen_do_gradient, gen_if_gradient, gen_while_gradient
33 
35 import pickle
36 import numpy as np
37 import sys
38 
39 # Mac os specific message
40 if (sys.platform == 'darwin' and 'leveldb' in C.registered_dbs()):
41  print('If you are using homebrew leveldb on a Mac OS, you might see an '
42  'error warning you that malloc_zone_unregister() failed. This is '
43  'not a caffe2 issue but is due to the homebrew leveldb having an '
44  'incompatible memory allocator. It does not affect usage.')
45 
46 # Convenience redirections to functions inside scope.
47 DeviceScope = scope.DeviceScope
48 NameScope = scope.NameScope
49 
50 
51 # Bring datatype enums to the main namespace
52 class DataType:
53  pass
54 
55 
56 def _InitDataType():
57  for name, value in caffe2_pb2.TensorProto.DataType.items():
58  setattr(DataType, name, value)
59 
60 
61 _InitDataType()
62 
63 
64 def _GetRegisteredOperators():
65  return set(workspace.RegisteredOperators())
66 
67 
68 _REGISTERED_OPERATORS = _GetRegisteredOperators()
69 
70 
71 def RefreshRegisteredOperators():
72  global _REGISTERED_OPERATORS
73  _REGISTERED_OPERATORS = _GetRegisteredOperators()
74 
75 
76 _GLOBAL_INIT_ARGS = []
77 
78 
79 def GlobalInit(args):
80  _GLOBAL_INIT_ARGS.extend(args[1:])
81  C.global_init(args)
82 
83 
84 def GetGlobalInitArgs():
85  return _GLOBAL_INIT_ARGS[:]
86 
87 
88 def IsOperator(op_type):
89  return IsOperatorWithEngine(op_type, engine='DEFAULT')
90 
91 
92 def IsOperatorWithEngine(op_type, engine):
93  return C.op_registry_key(op_type, engine) in _REGISTERED_OPERATORS
94 
95 
96 def DeviceOption(device_type, cuda_gpu_id=0, random_seed=None, node_name=None):
97  option = caffe2_pb2.DeviceOption()
98  option.device_type = device_type
99  option.cuda_gpu_id = cuda_gpu_id
100  if node_name is not None:
101  option.node_name = node_name
102  if random_seed is not None:
103  option.random_seed = random_seed
104  return option
105 
106 
107 def device_option_equal(opt1, opt2, ignore_node_name=True, ignore_random_seed=True):
108  if not opt1 or not opt2:
109  return opt1 == opt2
110  if not ignore_node_name and opt1.node_name != opt2.node_name:
111  return False
112  if not ignore_random_seed and opt1.random_seed != opt2.random_seed:
113  return False
114  if not opt1.device_type or not opt2.device_type:
115  # At least one option is for CPU, check if both are for CPU.
116  return not opt1.device_type and not opt2.device_type
117  return opt1.cuda_gpu_id == opt2.cuda_gpu_id
118 
119 
120 def InferBlobDevices(net):
121  '''
122  Compute mapping from parameters to devices by looking at the
123  device option of the op that creates the blob has
124  '''
125  mapping = {}
126  for op in net.Proto().op:
127  op_device = op.device_option
128  if op_device is None:
129  op_device = caffe2_pb2.DeviceOption(caffe2_pb2.CPU)
130  # TODO: T18892922, use device annotations
131  for b in op.output:
132  mapping[b] = op_device
133  return mapping
134 
135 
136 def InferOpBlobDevices(op):
137  device_info = C.infer_op_input_output_device(op.SerializeToString())
138  input_info = []
139  output_info = []
140  for dev_str in device_info[0]:
141  device_option = caffe2_pb2.DeviceOption()
142  device_option.ParseFromString(dev_str)
143  input_info.append(device_option)
144  for dev_str in device_info[1]:
145  device_option = caffe2_pb2.DeviceOption()
146  device_option.ParseFromString(dev_str)
147  output_info.append(device_option)
148  return input_info, output_info
149 
150 
151 GradientSlice = namedtuple('GradientSlice', ['indices', 'values'])
152 
153 
154 class BlobReference(object):
155  """A wrapper around a blob in a net.
156 
157  BlobReference gives us a way to refer to the network that the blob is
158  generated from. Note that blobs are, essentially, just strings in the
159  current workspace.
160  """
161 
162  def __init__(self, name, net=None):
163  """Initializes a blob reference.
164 
165  Note that this does not prepends the namescope. If needed, use
166  ScopedBlobReference() to prepend the existing namespace.
167  """
168  if isinstance(name, string_types):
169  self._name = name
170  elif isinstance(name, binary_type):
171  self._name = name.decode('utf-8')
172  else:
173  self._name = str(name)
174  self._from_net = net
175  # meta allows helper functions to put whatever metainformation needed
176  # there.
177  self.meta = {}
178 
179  def __hash__(self):
180  return hash(self._name)
181 
182  def __eq__(self, other):
183  if isinstance(other, string_types):
184  return self._name == other
185  elif isinstance(other, binary_type):
186  return self._name == other.decode('utf-8')
187  elif isinstance(other, BlobReference):
188  return self._name == other._name
189  else:
190  return False
191 
192  def __ne__(self, other):
193  return not(self == other)
194 
195  def __str__(self):
196  return self._name
197 
198  def __repr__(self):
199  return 'BlobReference("{}")'.format(self._name)
200 
201  def __add__(self, other):
202  if not isinstance(other, string_types):
203  raise RuntimeError('Cannot add BlobReference to a non-string.')
204  return BlobReference(self._name + other, self._from_net)
205 
206  def __radd__(self, other):
207  if not isinstance(other, string_types):
208  raise RuntimeError('Cannot add a non-string to BlobReference.')
209  return BlobReference(other + self._name, self._from_net)
210 
211  def Net(self):
212  return self._from_net
213 
214  def GetNameScope(self):
215  return self._name[:self._name.rfind(scope._NAMESCOPE_SEPARATOR) + 1]
216 
217  def _CreateAndAddToNet(self, op_type, inputs=None, *args, **kwargs):
218  """Internal function that routes the operator generation to the
219  network's __getattr__ function.
220  """
221  inputs = [] if inputs is None else inputs
222  if isinstance(inputs, BlobReference) or isinstance(inputs, string_types):
223  inputs = [inputs]
224  # add self to the input list.
225  inputs.insert(0, self)
226  return self._from_net.__getattr__(op_type)(inputs, *args, **kwargs)
227 
228  def __getattr__(self, op_type):
229  """A wrapper allowing one to initiate operators from a blob reference.
230 
231  Example: for a blob reference b that comes from network n, doing
232  b.Relu(...)
233  is equivalent to doing
234  net.Relu([b], ...)
235  """
236  if op_type.startswith('__'):
237  raise AttributeError('Attribute {} not found.'.format(op_type))
238  if self._from_net is None:
239  raise RuntimeError(
240  'You cannot use a blob reference that does not have a net '
241  'source to create operators. Create the operator from an '
242  'explicit net object.')
243  if not IsOperator(op_type):
244  raise RuntimeError(
245  'Method ' + op_type + ' is not a registered operator.' +
246  ' Did you mean: [' +
247  ",".join(workspace.C.nearby_opnames(op_type)) + ']'
248  )
249  return lambda *args, **kwargs: self._CreateAndAddToNet(
250  op_type, *args, **kwargs)
251 
252  def __dir__(self):
253  additional_methods = [
254  op
255  for op in _REGISTERED_OPERATORS
256  if '_ENGINE_' not in op or '_ENGINE_CUDNN' in op]
257  return sorted(set(chain(
258  dir(type(self)),
259  viewkeys(self.__dict__),
260  additional_methods
261  )))
262 
263 
264 def ScopedName(name):
265  """prefix the name with the current scope."""
266  if isinstance(name, binary_type):
267  name = name.decode('ascii')
268  return scope.CurrentNameScope() + name
269 
270 
271 def ScopedBlobReference(name, *args, **kwargs):
272  """Returns a blob reference with scope prefixed."""
273  return BlobReference(ScopedName(name), *args, **kwargs)
274 
275 
276 def _RectifyInputOutput(blobs, net=None):
277  """A helper function to rectify the input or output of the CreateOperator
278  interface.
279  """
280  if isinstance(blobs, string_types) or isinstance(blobs, binary_type):
281  # If blobs is a single string, prepend scope.CurrentNameScope()
282  # and put it as a list.
283  # TODO(jiayq): enforce using BlobReference instead of raw strings.
284  return [ScopedBlobReference(blobs, net=net)]
285  elif type(blobs) is BlobReference:
286  # If blob is a BlobReference, simply put it as a list.
287  return [blobs]
288  elif type(blobs) in (list, tuple):
289  # If blob is a list, we go through it and type check.
290  rectified = []
291  for blob in blobs:
292  if isinstance(blob, string_types) or isinstance(blob, binary_type):
293  rectified.append(ScopedBlobReference(blob, net=net))
294  elif type(blob) is BlobReference:
295  rectified.append(blob)
296  else:
297  raise TypeError(
298  "I/O blob #{} of unsupported type: {} of type {}"
299  .format(len(rectified), str(blob), type(blob)))
300  return rectified
301  else:
302  raise TypeError(
303  "Unknown input/output type: %s of type %s." %
304  (str(blobs), type(blobs))
305  )
306 
307 
308 def CreateOperator(
309  operator_type,
310  inputs,
311  outputs,
312  name='',
313  control_input=None,
314  device_option=None,
315  arg=None,
316  engine=None,
317  **kwargs
318 ):
319  """A function wrapper that allows one to create operators based on the
320  operator type. The type should be a string corresponding to an operator
321  registered with Caffe2.
322  """
323  operator = caffe2_pb2.OperatorDef()
324  operator.type = operator_type
325  operator.name = name
326  # Add rectified inputs and outputs
327  inputs = _RectifyInputOutput(inputs)
328  outputs = _RectifyInputOutput(outputs)
329  operator.input.extend([text_type(i) for i in inputs])
330  operator.output.extend([text_type(o) for o in outputs])
331  if control_input:
332  control_input = _RectifyInputOutput(control_input)
333  operator.control_input.extend([text_type(i) for i in control_input])
334  # Set device option:
335  # (1) If device_option is explicitly set, use device_option.
336  # (2) If not, but scope.CurrentDeviceScope() is set,
337  # then we use scope.CurrentDeviceScope().
338  # (3) Otherwise, do not set device option.
339  if device_option is not None:
340  operator.device_option.CopyFrom(device_option)
341  elif scope.CurrentDeviceScope() is not None:
342  operator.device_option.CopyFrom(scope.CurrentDeviceScope())
343  if engine is not None:
344  operator.engine = engine
345  # random seed is defined in the device option, so we need to do special
346  # care.
347 
348  if 'random_seed' in kwargs:
349  operator.device_option.random_seed = kwargs['random_seed']
350  del kwargs['random_seed']
351  # Add given arguments that do not need parsing
352  if arg is not None:
353  operator.arg.extend(arg)
354  # Add all other arguments
355  for key, value in viewitems(kwargs):
356  operator.arg.add().CopyFrom(utils.MakeArgument(key, value))
357 
358  if workspace.IsImmediate():
359  workspace.RunOperatorImmediate(operator)
360  return operator
361 
362 
363 def _RegisterPythonImpl(
364  f, grad_f=None, python_func_type=None, pass_workspace=False
365 ):
366  if python_func_type:
367  func = python_func_type(f)
368  f = func.forward
369  grad_f = func.backward
370  else:
371  if isinstance(f, tuple):
372  f = f[0](*f[1], **f[2])
373  if isinstance(grad_f, tuple):
374  grad_f = grad_f[0](*grad_f[1], **grad_f[2])
375 
376  token = C.register_python_op(f, pass_workspace, '')
377  if grad_f:
378  C.register_python_gradient_op(token, grad_f)
379  return token
380 
381 
382 def CreatePythonOperator(
383  f, inputs,
384  outputs,
385  grad_f=None,
386  pass_workspace=False,
387  python_func_type=None,
388  *args,
389  **kwargs
390 ):
391  """
392  `f` should have a signature (inputs, outputs)
393 
394  If `pass_workspace` is True, the signature is changed to
395  (inputs, outputs, workspace) where `workspace` is the workspace the op
396  is going to run on. This is potentially dangerous (as the op can manipulate
397  the workspace directly), use on your own risk.
398  """
399  kwargs["token"] = _RegisterPythonImpl(
400  f, grad_f, python_func_type, pass_workspace=pass_workspace
401  )
402  return CreateOperator("Python", inputs, outputs, *args, **kwargs)
403 
404 
405 def GetIndexFromGradientList(g_list, name):
406  """A helper function to get the index from a gradient list, None if not
407  matching."""
408  for i, g in enumerate(g_list):
409  if g == name:
410  return i
411  elif type(g) is GradientSlice:
412  if (g.indices == name or g.values == name):
413  return i
414  return None
415 
416 
417 OpSSA = namedtuple('OpSSA', ['op', 'in_versions', 'out_versions'])
418 GradGenMeta = namedtuple('GradGenMeta', ['grad_op', 'idx', 'gradient'])
419 SparseGradGenMeta = namedtuple('SparseGradGenMeta', [
420  'grad_op_indices', 'idx_indices',
421  'grad_op_values', 'idx_values',
422  'gradient',
423 ])
424 
425 
426 class IR(object):
427  """A simple IR class to keep track of all intermediate representations used
428  in the gradient computation.
429  """
430 
431  def __init__(self, operators):
432  # The IR class holds multiple metadata from the forward pass:
433  # a) ssa: a list of [op, in_versions, out_versions] recording the
434  # input and the output version of each operator, similar
435  # to a normal SSA form.
436  # b) input_count: a dictionary specifying for each blob and
437  # each of its version, how many times it is used as input for another
438  # op.
439  # c) frontier: maintaining the current versions of the blobs
440  # we are having in the workspace, after the execution of all the ops
441  # added to the IR so far. This is useful because if a gradient is
442  # trying to access an earlier version of a blob, we can sanity check
443  # that it is no longer there, and thus throw an error.
444  # d) gradient_frontier: maps the names of blobs to its version that the
445  # gradient corresponds to.
446  # e) gradient_generators: for each blob and each of its version, maps to
447  # a list of operators that generates its gradient together with the
448  # gradient name.
449  self.ssa = []
450  self.input_usages = defaultdict(lambda: defaultdict(list))
451  self.frontier = defaultdict(int)
452  self.gradient_frontier = {}
453  self.gradient_generators = defaultdict(lambda: defaultdict(list))
454  self.out_version_history = defaultdict(list)
455  self.in_version_history = defaultdict(list)
456 
457  for op in operators:
458  self.Play(op)
459 
460  self.SanityCheck(operators)
461 
462  def SanityCheck(self, operators):
463  # Validate StopGradient usage by checking that StopGradient's output
464  # is actually passed forward
465  for op in operators:
466  if op.type == 'StopGradient':
467  if op.output[0] not in self.input_usages:
468  raise ValueError("""StopGradient's output '{}' is orphan.
469 You typically want to specify same input and output for
470 StopGradient. Op:\n\n{}""".format(op.output[0], str(op)))
471 
472  def Play(self, op):
473  """"Adds an op to the current IR, and update the internal states to
474  reflect the blobs and versions after the execution of the op.
475  """
476  # For input, they are the current version in the dict.
477  in_versions = {}
478  for s in op.input:
479  in_versions[s] = self.frontier[s]
480  self.input_usages[s][self.frontier[s]].append(len(self.ssa))
481  self.in_version_history[s].append((op, self.frontier[s]))
482  # For output, they are the current version plus one. If this is a
483  # newly created blob, its version starts with zero.
484  out_versions = {}
485  for s in op.output:
486  if s in self.frontier:
487  self.frontier[s] += 1
488  out_versions[s] = self.frontier[s]
489  self.out_version_history[s].append((op, self.frontier[s]))
490  # Add to SSA for bookkeeping.
491  self.ssa.append(OpSSA(op, in_versions, out_versions))
492 
494  self, grad_op_input, g_output, fwd_op_idx, locally_generated_blobs):
495  """Checks if the gradient operators can be correctly carried out."""
496  forward_op, in_versions, out_versions = self.ssa[fwd_op_idx]
497  original_index = GetIndexFromGradientList(g_output, grad_op_input)
498 
499  # Functions to generate debug help for version-mismatches
500  def versionMismatchInfoOut(name):
501  s = "DEBUG HELP:\n"
502  s += "Maybe you use same output blob twice for different ops?\n"
503  s += "== Version history of blob [{}]\n".format(name)
504  for (op, vers) in self.out_version_history[name]:
505  s += "Version (out) {} <-- {}".format(vers, op)
506  s += "\n"
507  return s
508 
509  def versionMismatchInfoIn(name):
510  s = "DEBUG HELP:\n"
511  s += "Maybe the blob was overwritten by another op?\n"
512  s += "== Version history of blob [{}]\n".format(name)
513  for (op, vers) in self.in_version_history[name]:
514  s += "version (in) {} <-- {}".format(vers, op)
515  s += "\n"
516  return s
517 
518  # If it is a dense or sparse gradient name, it should match the
519  # version of the corresponding output.
520  if original_index is not None:
521  original_name = forward_op.output[original_index]
522  if (out_versions[original_name] !=
523  self.gradient_frontier[original_name]):
524  raise RuntimeError(
525  'Gradient name "%s" is expected to correspond '
526  'to version %d of "%s", but currently we have '
527  'version %d.\n\n' % (
528  grad_op_input, out_versions[original_name],
529  original_name,
530  self.gradient_frontier[original_name]) +
531  versionMismatchInfoOut(original_name))
532  # If it is an output name, the current version should match the
533  # version when the operator was run.
534  elif grad_op_input in out_versions:
535  if self.frontier[grad_op_input] != out_versions[grad_op_input]:
536  raise RuntimeError(
537  'Gradient operator needs output "%s" at version'
538  ' %d, but currently we have version %d.\n\n' % (
539  grad_op_input, out_versions[grad_op_input],
540  self.frontier[grad_op_input]
541  ) + versionMismatchInfoOut(grad_op_input)
542  )
543  # If it is an input name, the current version should match the
544  # version when the operator was run.
545  elif grad_op_input in in_versions:
546  if (self.frontier[grad_op_input] != in_versions[grad_op_input]):
547  raise RuntimeError(
548  'Gradient operator needs input "%s" at version '
549  '%d, but currently we have version %d.\n\n' % (
550  grad_op_input, in_versions[grad_op_input],
551  self.frontier[grad_op_input]
552  ) + versionMismatchInfoIn(grad_op_input)
553  )
554  # If it is none of the above, it should be a blob that is
555  # generated locally by one of the previous gradient operators.
556  else:
557  if grad_op_input not in locally_generated_blobs:
558  raise RuntimeError(
559  'Blob name "%s" not in the scope of operator: '
560  '%s\nand is not generated by any of the local '
561  'gradient operators.' % (grad_op_input, str(forward_op))
562  )
563 
564  def AppendSparseGenerators(self, sparse_generators):
565  # merge indices and values generators for sparse gradients
566  for name, input_generators in viewitems(sparse_generators):
567  for version, generators in viewitems(input_generators):
568  if len(generators) == 1:
569  # either indices or values are generated (but not both)
570  generator = generators[0]
571  else:
572  # both indices and values are generated
573  assert(len(generators) == 2)
574  op1_i, idx1_i, op1_v, idx1_v, g1 = generators[0]
575  op2_i, idx2_i, op2_v, idx2_v, g2 = generators[1]
576  assert(g1 == g2)
577  assert(op1_i is None or op2_i is None)
578  assert(op1_v is None or op2_v is None)
579  assert(idx1_i == 0 or idx2_i == 0)
580  assert(idx1_v == 0 or idx2_v == 0)
581  generator = SparseGradGenMeta(
582  op1_i or op2_i, idx1_i + idx2_i,
583  op1_v or op2_v, idx1_v + idx2_v,
584  g1)
585  self.gradient_generators[name][version].append(generator)
586 
587  def BuildGradientGenerators( # NOQA
588  self, fwd_op_idx, gradient_ops, g_output, g_input):
589  """Updates gradient_generators and gradient_frontier"""
590  forward_op, in_versions, out_versions = self.ssa[fwd_op_idx]
591  locally_generated_blobs = []
592  sparse_generators = defaultdict(lambda: defaultdict(list))
593 
594  for grad_op in gradient_ops:
595  # (1) check that inputs are valid
596  for s in grad_op.input:
598  s, g_output, fwd_op_idx, locally_generated_blobs)
599 
600  # (2) add outputs to the locally generated blobs
601  # If an output corresponds to the gradient of an input, we also
602  # record it to gradient_generators
603  locally_generated_blobs.extend([str(s) for s in grad_op.output])
604  for i, output in enumerate(grad_op.output):
605  input_index = GetIndexFromGradientList(g_input, output)
606  if input_index is not None:
607  input_name = forward_op.input[input_index]
608  input_version = in_versions[input_name]
609  g = g_input[input_index]
610  if type(g) is GradientSlice:
611  # the output corresponds either to the indices or the
612  # values of the sparse gradient. In either case we
613  # create a (partial) SparseGradGenMeta. If necessary,
614  # we'll merge indices and values generators
615  # corresponding to the same gradient in step (3)
616  if g.indices == output:
617  m = SparseGradGenMeta(grad_op, i, None, 0, g)
618  else:
619  assert(g.values == output)
620  m = SparseGradGenMeta(None, 0, grad_op, i, g)
621  sparse_generators[input_name][input_version].append(m)
622  else:
623  self.gradient_generators[input_name][input_version] \
624  .append(GradGenMeta(
625  grad_op, i, g))
626 
627  # (3) merge indices and values generators for sparse gradients, and
628  # add them to gradient_generators
629  self.AppendSparseGenerators(sparse_generators)
630 
631  # (4) for ops (e.g., Add, Sum, Sub) which have gradient outputs directly
632  # passed from inputs (not computed from gradient ops), we create an
633  # GradGenMeta with None grad_op and idx so that the gradient_generators
634  # knows where the gradients are coming from. This is needed for creating
635  # Sum op to accumulate the gradients from multiple parents.
636  for input_index, g in enumerate(g_input):
637  input_name = forward_op.input[input_index]
638  input_version = in_versions[input_name]
639  if not g:
640  continue
641  if type(g) is GradientSlice:
642  if str(g.indices) not in locally_generated_blobs and \
643  str(g.values) not in locally_generated_blobs:
644  self.gradient_generators[input_name][input_version].append(
645  SparseGradGenMeta(None, 0, None, 0, g))
646  else:
647  if str(g) not in locally_generated_blobs:
648  self.gradient_generators[input_name][input_version].append(
649  GradGenMeta(None, 0, g))
650 
651  # Finally, for the gradients specified in g_input, we update the
652  # gradient frontier to reflect the input versions that the gradients
653  # correspond to.
654  for i, g in enumerate(g_input):
655  if g is not None:
656  input_name = forward_op.input[i]
657  input_version = in_versions[input_name]
658  self.gradient_frontier[input_name] = input_version
659 
660  def _GetSumOpOutputName(self, generator, input_name):
661  def remove_suffix(s, suffix):
662  if s.endswith(suffix):
663  return s[:-len(suffix)]
664  return s
665 
666  for g in generator:
667  if type(g) is GradGenMeta:
668  grad_op, idx, _ = g
669  if grad_op:
670  return grad_op.output[idx]
671  else:
672  assert(type(g) is SparseGradGenMeta)
673  op_i, idx_i, op_v, idx_v, _ = g
674  if op_i:
675  return remove_suffix(op_i.output[idx_i], '_indices')
676  if op_v:
677  return remove_suffix(op_v.output[idx_v], '_values')
678 
679  return input_name + '_grad'
680 
681  def _SetSumOpsDeviceOption(self, sum_ops, generators):
682  # we already checked that device options are consistent so we can just
683  # use the first one we find
684  for generator in generators:
685  grad_op = generator.grad_op if type(generator) is GradGenMeta \
686  else generator.grad_op_values or generator.grad_op_indices
687  if grad_op:
688  if grad_op.HasField('device_option'):
689  for op in sum_ops:
690  op.device_option.CopyFrom(grad_op.device_option)
691  break
692 
693  def _DisambiguateGradOpOutput(self, grad_op, idx, cnt):
694  grad_op.output[idx] = (
695  '_' + grad_op.output[idx] + '_autosplit_{}'.format(cnt))
696  return grad_op.output[idx], cnt + 1
697 
698  def _CheckSumOpsConflict(self, out_base_name, g):
699  if str(out_base_name) == str(g):
700  # TODO not sure what this message really means
701  raise RuntimeError(
702  'The gradient output of empty gradient op can not '
703  'be the same as the normal name of the current '
704  'input gradient.')
705 
706  def _MakeDenseSumOps(self, generators, out_base_name):
707  sum_op_input = []
708  cnt = 0
709 
710  assert len(generators) > 1
711 
712  first_grad_op = True
713  for generator in generators:
714  grad_op, idx, g = generator
715  assert(type(g) is not GradientSlice)
716  if grad_op:
717  if first_grad_op:
718  first_grad_op = False
719  out = grad_op.output[idx]
720  else:
721  out, cnt = self._DisambiguateGradOpOutput(grad_op, idx, cnt)
722  sum_op_input.append(out)
723  else:
724  self._CheckSumOpsConflict(out_base_name, g)
725  sum_op_input.append(str(g))
726 
727  if out_base_name in sum_op_input:
728  # Sum inplace mode works only for the first input
729  # So we do a swap
730  idx = sum_op_input.index(out_base_name)
731  sum_op_input[0], sum_op_input[idx] = (
732  sum_op_input[idx], sum_op_input[0]
733  )
734  sum_ops = [CreateOperator(
735  "Sum",
736  [BlobReference(x) for x in sum_op_input],
737  BlobReference(out_base_name))]
738  return sum_ops, out_base_name
739 
740  def _MakeSparseSumOps(self, generators, out_base_name):
741  indices_concat_input = []
742  values_concat_input = []
743  cnt_i = 0
744  cnt_v = 0
745 
746  for generator in generators:
747  assert(type(generator) is SparseGradGenMeta)
748  op_i, idx_i, op_v, idx_v, g = generator
749  if op_i:
750  out, cnt_i = self._DisambiguateGradOpOutput(op_i, idx_i, cnt_i)
751  indices_concat_input.append(out)
752  else:
753  self._CheckSumOpsConflict(out_base_name, g.indices)
754  indices_concat_input.append(g.indices)
755  if op_v:
756  out, cnt_v = self._DisambiguateGradOpOutput(op_v, idx_v, cnt_v)
757  values_concat_input.append(out)
758  else:
759  self._CheckSumOpsConflict(out_base_name, g.values)
760  values_concat_input.append(g.values)
761 
762  indices_concat_output = out_base_name + '_indices_concat'
763  indices_concat_split = out_base_name + '_indices_concat_split'
764  values_concat_output = out_base_name + '_values_concat'
765  values_concat_split = out_base_name + '_values_concat_split'
766  # Sum the given sparse representations by simply concatenating the
767  # indices (resp. values) tensors together. We don't do any deduplication
768  # of indices at this point. This will be done as needed before the
769  # optimizer is called
770  sum_ops = [
771  CreateOperator(
772  "Concat",
773  [BlobReference(x) for x in indices_concat_input],
774  [BlobReference(x) for x in
775  [indices_concat_output, indices_concat_split]],
776  axis=0
777  ),
778  CreateOperator(
779  "Concat",
780  [BlobReference(x) for x in values_concat_input],
781  [BlobReference(x) for x in
782  [values_concat_output, values_concat_split]],
783  axis=0
784  ),
785  ]
786  sum_op_output = GradientSlice(
787  indices=indices_concat_output,
788  values=values_concat_output,
789  )
790  return sum_ops, sum_op_output
791 
792  def _MakeSumOps(self, input_name, input_version):
793  generators = self.gradient_generators[input_name][input_version]
794  out_base_name = self._GetSumOpOutputName(generators, input_name)
795  types = list(set(type(x) for x in generators))
796  assert(len(types) == 1)
797  if types[0] is GradGenMeta:
798  sum_ops, g = self._MakeDenseSumOps(generators, out_base_name)
799  else:
800  assert(types[0] is SparseGradGenMeta)
801  sum_ops, g = self._MakeSparseSumOps(generators, out_base_name)
802  self._SetSumOpsDeviceOption(sum_ops, generators)
803  return sum_ops, g
804 
805  def _VerifyGradientGenerators(self, generator):
806  # (1) check if all gradients are of the same type. Aggregating a mix of
807  # sparse and dense gradients is not supported yet
808  if len({type(g) for g in generator}) > 1:
809  raise RuntimeError(
810  'Automatic aggregation of a mix of sparse and dense gradients '
811  'is not supported yet')
812 
813  # If for all the operators that used the operator, none or only one
814  # produced the gradient, then no additional sum needs to be carried
815  # out.
816  if len(generator) < 2:
817  return False
818 
819  all_gradient_names = []
820  all_device_options = []
821  for g in generator:
822  if type(g) is GradGenMeta:
823  if g.grad_op:
824  all_gradient_names.append(g.gradient)
825  all_device_options.append(g.grad_op.device_option)
826  else:
827  assert(type(g) is SparseGradGenMeta)
828  if g.grad_op_indices:
829  all_device_options.append(g.grad_op_indices.device_option)
830  if g.grad_op_values:
831  all_device_options.append(g.grad_op_values.device_option)
832  all_gradient_names.append(g.gradient.values)
833 
834  # Check if all grad op device options are the same.
835  if len(all_device_options) >= 2 and not all(
836  device_option_equal(d, all_device_options[0])
837  for d in all_device_options[1:]):
838  raise RuntimeError('Unexpected behavior: not all grad ops '
839  'have the same device option.')
840  return True
841 
842  def DoGradientAccumulation(self, fwd_op_idx):
843  """For each input name in the forward op, check if we will need to
844  add gradient accumulation. If so, do gradient accumulation and return
845  the list of gradient operators.
846 
847  The criteria for doing gradient accumulation is:
848  (1) the specific input version has been used by multiple operators.
849  (2) the current fwd_op_idx is the first to use that input, i.e. in the
850  backward pass, is the last to optionally generate the gradient for
851  the op.
852  (3) For the operators that used the input, their gradient operators
853  have generated more than 1 gradient.
854 
855  When accumulating operators, our current solution is to rename all the
856  created gradients with an internal intermediate name, and then add a
857  Sum() operator that adds up all the gradients. This may use more memory
858  due to intermediate storage, but is usually the fastest approach as one
859  can do one single sum for multiple intermediate gradients.
860  """
861  forward_op, in_versions, out_versions = self.ssa[fwd_op_idx]
862  additional_sum_ops = []
863  grad_map = {}
864  for _i, input_name in enumerate(set(forward_op.input)):
865  input_version = in_versions[input_name]
866  input_usage = self.input_usages[input_name][input_version]
867  if (len(input_usage) <= 1 or fwd_op_idx != input_usage[0]):
868  # We do not need to do gradient accumulation yet.
869  continue
870  generator = self.gradient_generators[input_name][input_version]
871  try:
872  if not self._VerifyGradientGenerators(generator):
873  continue
874  except RuntimeError as err:
875  raise RuntimeError(
876  "Gradients for param ''{}'' failed to verify: {}".format(
877  input_name,
878  err
879  )
880  )
881 
882  # Finally, let's create the sum operator.
883  sum_ops, g = self._MakeSumOps(input_name, input_version)
884  additional_sum_ops.extend(sum_ops)
885  grad_map[input_name] = g
886  return additional_sum_ops, grad_map
887 
888  def _AppendAutoGradGenerator(self, y, grad, autograd_op):
889  # Gradient here is not sparse as it was generated by
890  # a ConstantFill operator. Autogeneration for sparse gradients is
891  # not supported
892  generator = GradGenMeta(
893  autograd_op, 0 if autograd_op else None, str(grad))
894 
895  self.gradient_generators[str(y)][self.frontier[str(y)]].append(
896  generator)
897 
898 
899  def _GetInitGradients(self, ys):
900  input_to_grad = {}
901  gradient_ops = []
902 
903  for y, g in viewitems(ys):
904  autograd_op = None
905  if g is None:
906  autograd_op = CreateOperator(
907  "ConstantFill", [y], [str(y) + "_autogen_grad"],
908  value=1.0)
909  gradient_ops.append(autograd_op)
910  g = autograd_op.output[0]
911  # Since the C++ gradient registry does not have notion of
912  # NameScopes, we will convert all references to strings.
913  input_to_grad[str(y)] = (
914  GradientSlice(str(g[0]), str(g[1]))
915  if isinstance(g, GradientSlice) else str(g))
916  # Autogenerated gradients are assumed to be provided for the last
917  # input version
918  if autograd_op is not None:
919  self._AppendAutoGradGenerator(y, g, autograd_op)
920 
921  return input_to_grad, gradient_ops
922 
923  def _GenerateGradientsForForwardOp(
924  self, forward_op_idx, input_to_grad):
925  new_input_to_grad = {}
926  gradient_ops = []
927  forward_op, in_versions, out_versions = self.ssa[forward_op_idx]
928  g_output = list(
929  input_to_grad.get(name, None) for name in forward_op.output)
930 
931  if not all(g is None for g in g_output) or (
932  forward_op.type == "ZeroGradient"):
933  gradient_ops, g_input = GradientRegistry.GetGradientForOp(
934  forward_op, g_output)
935  # Check if the gradient operators are legal, and update
936  # gradient_generators and gradient_frontier
938  forward_op_idx, gradient_ops, g_output, g_input)
939  # Record the gradient map to all_input_to_grad.
940  for name, grad in zip(forward_op.input, g_input):
941  # Do not overwrite an existing gradient with a None
942  # unless the input is also an output of the op, since
943  # we update the blob version when blob is output of an
944  # operator.
945  if grad is not None or \
946  name not in input_to_grad or \
947  name in list(forward_op.output):
948  new_input_to_grad[name] = grad
949 
950  return new_input_to_grad, gradient_ops
951 
952  def GetBackwardPass(self, ys):
953  """Gets the backward pass that computes the derivatives of given blobs.
954 
955  Inputs:
956  ys: a list or a dictionary specifying what blobs we want to compute
957  derivatives of. If the input is a list, we will automatically
958  generate their gradients with all-one values; if the input is a
959  dictionary, for any dictionary entries that are not None, we will
960  take the corresponding blobs as their gradients; for all those
961  that are None, we will auto-fill them with 1.
962  """
963  if isinstance(ys, list):
964  ys = dict((y, None) for y in ys)
965  elif not isinstance(ys, dict):
966  raise TypeError("ys should either be a list or a dict.")
967 
968  # Set the gradient frontier with the initialized external
969  # gradients.
970  for y in viewkeys(ys):
971  self.gradient_frontier[y] = self.frontier[y]
972  self.input_usages[str(y)][self.frontier[str(y)]].append(
973  len(self.ssa))
974 
975  all_input_to_grad, all_gradient_ops = self._GetInitGradients(ys)
976 
977  # (2) Now, after having the virtual play above, we now play the ops
978  # backwards, creating the gradients along the path. Note that although
979  # we are playing it backwards, we cannot refer to variables that are
980  # at a version older than current_versions because it is already been
981  # overwritten.
982  for forward_op_idx in reversed(range(len(self.ssa))):
983  input_to_grad, gradient_ops = self._GenerateGradientsForForwardOp(
984  forward_op_idx, all_input_to_grad)
985  all_input_to_grad.update(input_to_grad)
986  all_gradient_ops += gradient_ops
987 
988  # If there are multiple use blobs, do gradient accumulation.
989  additional_sum_ops, grad_map = self.DoGradientAccumulation(
990  forward_op_idx)
991  # This line is so that if in an accumulation some of the operators
992  # have not produced gradients, they still do not overwrite the
993  # general all_input_to_grad map.
994  all_input_to_grad.update(grad_map)
995  all_gradient_ops += additional_sum_ops
996 
997  # (3) Post-processing.
998  # After we have done computation for each op, we now have the gradient
999  # operators ready. For the output map, we will convert everything to
1000  # BlobReferences for easier handling in python.
1001  all_input_to_grad_out = {}
1002  for key, val in viewitems(all_input_to_grad):
1003  if val is not None:
1004  if (isinstance(val, string_types) or
1005  isinstance(val, binary_type)):
1006  grad_out = BlobReference(val)
1007  else:
1008  grad_out = GradientSlice(BlobReference(val[0]),
1009  BlobReference(val[1]))
1010  all_input_to_grad_out[BlobReference(key)] = grad_out
1011  return all_gradient_ops, all_input_to_grad_out
1012 
1013 
1014 class GradientRegistry(object):
1015  """GradientRegistry holds the mapping from operators to their gradients."""
1016  gradient_registry_ = {}
1017 
1018  @classmethod
1019  def RegisterGradient(cls, op_type):
1020  """A decorator for registering gradient mappings."""
1021 
1022  def Wrapper(func):
1023  cls.gradient_registry_[op_type] = func
1024  return func
1025 
1026  return Wrapper
1027 
1028  @classmethod
1029  def _GetGradientForOpCC(cls, op_def, g_output):
1030  # TODO(tulloch) - Propagate GradientWrapper up through the stack.
1031  def from_untyped(grad):
1032  if grad is None:
1033  w = C.GradientWrapper()
1034  assert w.is_empty()
1035  return w
1036  try:
1037  (indices, values) = grad
1038  w = C.GradientWrapper()
1039  w.indices = indices
1040  w.values = values
1041  assert w.is_sparse()
1042  return w
1043  except ValueError:
1044  w = C.GradientWrapper()
1045  w.dense = grad
1046  assert w.is_dense()
1047  return w
1048 
1049  g_output = [from_untyped(grad) for grad in g_output]
1050  grad_defs_str, g_input = C.get_gradient_defs(
1051  op_def.SerializeToString(), g_output)
1052 
1053  def to_untyped(grad_wrapper):
1054  if grad_wrapper.is_empty():
1055  return None
1056  if grad_wrapper.is_sparse():
1057  return GradientSlice(grad_wrapper.indices, grad_wrapper.values)
1058  assert grad_wrapper.is_dense()
1059  return grad_wrapper.dense
1060 
1061  g_input = [to_untyped(grad_wrapper) for grad_wrapper in g_input]
1062  grad_defs = []
1063  for grad_def_str in grad_defs_str:
1064  grad_def = caffe2_pb2.OperatorDef()
1065  grad_def.ParseFromString(grad_def_str)
1066  grad_defs.append(grad_def)
1067  return grad_defs, g_input
1068 
1069  @classmethod
1070  def GetGradientForOp(cls, op, g_output):
1071  try:
1072  gradient_ops, g_input = cls._GetGradientForOpCC(op, g_output)
1073  except Exception as e:
1074  # Not supported in C++; will try python registration next.
1075  if op.type in cls.gradient_registry_:
1076  gradient_ops, g_input = cls.gradient_registry_[op.type](
1077  op, g_output
1078  )
1079  else:
1080  raise Exception(
1081  "Exception when creating gradient for [{}]:{}.\nOp: \n{}".
1082  format(op.type, e, str(op))
1083  )
1084 
1085  if gradient_ops is None:
1086  return [], g_input
1087  if type(gradient_ops) is not list:
1088  gradient_ops = [gradient_ops]
1089  return gradient_ops, g_input
1090 
1091  @classmethod
1092  def GetBackwardPass(cls, operators, ys, ys_generate_gradient=False):
1093  """Gets the backward pass for the list of operators.
1094 
1095  Args:
1096  operators: a list of operators constituting the forward pass.
1097  ys: a list or a dictionary specifying what blobs we want to compute
1098  derivatives of. If the input is a list, we will automatically
1099  generate their gradients with all-one values; if the input is a
1100  dictionary, for any dictionary entries that are not None, we'll
1101  take the corresponding blobs as their gradients; for all those
1102  that are None, we will auto-fill them with 1.
1103  Returns:
1104  gradient_ops: a list of gradient operators to run.
1105  all_input_to_grads: a map from input to their corresponding
1106  gradients.
1107  """
1108  ir = IR(operators)
1109  return ir.GetBackwardPass(ys)
1110 
1111 
1112 GradientRegistry.RegisterGradient('Do')(gen_do_gradient)
1113 GradientRegistry.RegisterGradient('If')(gen_if_gradient)
1114 GradientRegistry.RegisterGradient('While')(gen_while_gradient)
1115 
1116 
1117 def get_ssa(net, blob_versions=None):
1118  """
1119  Given a net, return a structure containing the version of each input and
1120  output blob used by each operator.
1121 
1122  Args:
1123  net: either a Net or a NetDef
1124  blob_versions: (optional) map with current version number for given
1125  blob names. If not provided or blob not found, start
1126  from version 0.
1127  Returns:
1128  Tuple (ssa, blob_versions)
1129  ssa: list of tuples (versioned_inputs, versioned_outputs)
1130  for each op in the net. A versioned input is a tuple
1131  (blob_name, version).
1132  blob_versions: updated map with latest version of each blob found in
1133  the net.
1134  """
1135  proto = net.Proto() if isinstance(net, Net) else net
1136  assert isinstance(proto, caffe2_pb2.NetDef)
1137  if blob_versions is None:
1138  blob_versions = {}
1139  if isinstance(net, list):
1140  return [get_ssa(n, blob_versions) for n in net], blob_versions
1141  for i in proto.external_input:
1142  if i not in blob_versions:
1143  blob_versions[str(i)] = 0
1144  ssa = []
1145  for op in proto.op:
1146  if not proto.external_input:
1147  for i in op.input:
1148  if i not in blob_versions:
1149  blob_versions[i] = 0
1150  inputs = [(str(i), blob_versions.get(str(i), 0)) for i in op.input]
1151  for o in op.output:
1152  blob_versions[str(o)] = blob_versions.get(str(o), 0) + 1
1153  outputs = [(str(o), blob_versions[str(o)]) for o in op.output]
1154  ssa.append((inputs, outputs))
1155  return ssa, blob_versions
1156 
1157 
1158 def get_undefined_blobs(ssa):
1159  """
1160  Given a ssa in the format produced by get_ssa(), return a set of blobs that
1161  are used before they are defined, which corresponds to inputs at version 0.
1162  """
1163  undef_blobs = set()
1164  for inputs, _outputs in ssa:
1165  undef_blobs |= set(name for (name, ver) in inputs if ver == 0)
1166  return undef_blobs
1167 
1168 
1169 def get_output_producers(ssa):
1170  """
1171  Given a ssa in the format produced by get_ssa(), returns a map from
1172  versioned blob into the operator index that produces that version of
1173  the blob. A versioned blob is a tuple (blob_name, version).
1174  """
1175  producers = {}
1176  for i, (_inputs, outputs) in enumerate(ssa):
1177  for o in outputs:
1178  producers[o] = i
1179  return producers
1180 
1181 
1182 def get_op_ids_in_path(ssa, blob_versions, inputs, outputs):
1183  """
1184  Given a ssa and blob_versions as produced by get_ssa(), returns the list
1185  of op indices that are necessary in order to generate the blobs in
1186  `outputs`, given blobs in `inputs`.
1187  Consider that the `inputs` are given in their latest version.
1188  """
1189  inputs_set = set((str(i), blob_versions[str(i)]) for i in inputs)
1190  producers = get_output_producers(ssa)
1191  queue = [(str(o), blob_versions[str(o)]) for o in outputs]
1192  used_op_ids = set()
1193  while len(queue) > 0:
1194  o = queue.pop()
1195  if (o not in inputs_set) and (o in producers):
1196  op_id = producers[o]
1197  if op_id not in used_op_ids:
1198  used_op_ids |= {op_id}
1199  inputs, _ = ssa[op_id]
1200  queue.extend(inputs)
1201  return sorted(used_op_ids)
1202 
1203 
1204 def recurrent_network_op_remap(op, prefix, blob_remap):
1205  """
1206  Parameters
1207  ----------
1208  op : Caffe2 operator (RecurrentNetworkOp or RecurrentNetworkGradientOp).
1209  prefix: this argument is not used in this function, just for legacy support.
1210  blob_remap : Dictionary that represents the map from old blob name to new.
1211 
1212  Updates blob names in arguments of RecurrentNetworkOp and
1213  RecurrentNetworkGradientOp to conform to cloned input and output of both
1214  operators and also makes sure names of locally generated blobs in arguments
1215  have the same prefix as the input and output of the operators.
1216  """
1217 
1218  def get_remapped_str(blob_str):
1219  if isinstance(blob_str, binary_type):
1220  blob_str = blob_str.decode('utf-8')
1221  return blob_remap.get(blob_str, blob_str).encode('utf-8')
1222 
1223  for argument in op.arg:
1224  if len(argument.strings) > 0:
1225  for i in range(len(argument.strings)):
1226  argument.strings[i] = get_remapped_str(argument.strings[i])
1227  elif argument.name == 'timestep':
1228  argument.s = get_remapped_str(argument.s)
1229  elif argument.name.endswith('step_net'):
1230  # argument is a proto
1231  remap_proto(argument, blob_remap)
1232 
1233 
1234 def control_op_remap(op, prefix, blob_remap):
1235  net_arg_names = []
1236  if op.type == "If":
1237  net_arg_names = ['then_net', 'else_net']
1238  else:
1239  net_arg_names = ['loop_net', 'cond_net']
1240  for argument in op.arg:
1241  if argument.name in net_arg_names:
1242  assert argument.n, \
1243  "Expected non empty net in " + op.type + "'s " + argument.name + " argument"
1244  subnet = Net(argument.n)
1245  remapped_subnet = subnet.Clone(
1246  name=(subnet._net.name if subnet._net.name else '') + '_remapped',
1247  blob_remap=blob_remap)
1248  argument.n.CopyFrom(remapped_subnet.Proto())
1249 
1250 
1251 DEFAULT_REMAP_FUNCS = {
1252  'RecurrentNetwork': recurrent_network_op_remap,
1253  'RecurrentNetworkGradient': recurrent_network_op_remap,
1254  'If': control_op_remap,
1255  'While': control_op_remap,
1256 }
1257 
1258 
1259 def remap_proto(argument, blob_remap):
1260  subnet = Net(argument.n)
1261 
1262  cloned_sub_net = subnet.Clone(
1263  'cloned_sub_net',
1264  blob_remap,
1265  )
1266 
1267  argument.n.CopyFrom(cloned_sub_net.Proto())
1268 
1269 
1270 def clone_and_bind_net(net, name, prefix, blob_remap=None, inputs=None,
1271  keep_schema=True):
1272  """
1273  Clone the given Net, binding its input schema to the given `inputs` record.
1274  Blob names defined by the net are prepended with the given `prefix`.
1275 
1276  Args:
1277  net: the net to clone
1278  name: the name of the new net
1279  prefix: the prefix to append to local blobs
1280  blob_remap: (optional) dict with additional blob name remapping.
1281  inputs: (optional) input record that will provide actual input
1282  values for the cloned net. Must be compatible with the
1283  net's input schema or be a strict superset of it
1284  keep_schema: by default (True), the original schema will be kept and
1285  remapped accordingly. otherwise, the schema will be set as
1286  inputs or left empty if inputs is not given.
1287  Returns:
1288  Tuple (cloned_net, blob_remap)
1289  clone_net: the cloned Net
1290  blob_remap: a map from original blob names into remapped blob names
1291  """
1292  from caffe2.python import schema
1293  assert isinstance(net, Net)
1294  if blob_remap is None:
1295  blob_remap = {}
1296  if inputs is not None:
1297  assert isinstance(inputs, schema.Field)
1298  original = net.input_record()
1299  assert original is not None
1300  # TODO(azzolini): improve schema type checking
1301  diff = set(original.field_names()) - set(inputs.field_names())
1302  assert len(diff) == 0, (
1303  "Schemas don't match, extra fields {diff} found in the net {name}. "
1304  "original: {original}; inputs: {inputs}"
1305  .format(
1306  diff=diff, name=net.Name(), original=original.field_names(),
1307  inputs=inputs.field_names()
1308  )
1309  )
1310  original_mapping = dict(zip(original.field_names(),
1311  original.field_blobs()))
1312  for fn, fb in zip(inputs.field_names(), inputs.field_blobs()):
1313  if fn in original_mapping:
1314  blob_remap[str(original_mapping[fn])] = str(fb)
1315  proto = net.Proto()
1316  ssa, blob_versions = get_ssa(proto)
1317  undef_blobs = get_undefined_blobs(ssa)
1318 
1319  for blob in viewkeys(blob_versions):
1320  if blob in blob_remap:
1321  continue
1322  elif blob in undef_blobs:
1323  blob_remap[blob] = blob
1324  else:
1325  blob_remap[blob] = prefix + blob
1326  cloned_net = net.Clone(name, blob_remap, keep_schema=keep_schema)
1327  if not keep_schema and inputs:
1328  cloned_net.set_input_record(inputs)
1329  return cloned_net, blob_remap
1330 
1331 
1332 def _get_blob_ref(blob_name_or_ref):
1333  return (
1334  blob_name_or_ref if isinstance(input, BlobReference)
1335  else BlobReference(blob_name_or_ref)
1336  )
1337 
1338 
1339 def _recover_record_by_prefix(names, prefix=''):
1340  """
1341  Tries to recover record by taking a subset of blob names with
1342  a given prefix name and interpreting them as schema column names
1343  """
1344  from caffe2.python import schema
1345  column_names = [name[len(prefix):] for name in names
1346  if name.startswith(prefix)]
1347  if not column_names:
1348  return None
1349  return schema.from_column_list(
1350  column_names,
1351  col_blobs=[_get_blob_ref(prefix + name) for name in column_names])
1352 
1353 
1354 class Net(object):
1355  _net_names_used = set()
1356  operator_registry_ = {}
1357 
1358  @staticmethod
1359  def current_prefix():
1360  from caffe2.python.net_builder import NetBuilder
1361  builder = NetBuilder.current(required=False)
1362  return builder.name if builder else ''
1363 
1364  @staticmethod
1365  def _get_next_net_name(basename):
1366  name = basename = '/'.join(
1367  x for x in [Net.current_prefix(), basename] if x
1368  )
1369  next_idx = 1
1370  while name in Net._net_names_used:
1371  name = basename + '_' + str(next_idx)
1372  next_idx += 1
1373  Net._net_names_used |= set([name])
1374  return name
1375 
1376  def __init__(self, name_or_proto):
1377  """
1378  Create a Net.
1379  Args:
1380  name_or_proto: If a NetDef is provided, clone it. Otherwise,
1381  create an empty net with the given name.
1382  """
1383  self._input_record = None
1384  self._output_record = None
1385  # Register blobs so that it's guaranteed that different calls to
1386  # NextBlob/NextScopedBlob always return blobs with different names
1387  self._registered_blob_names = set()
1388  self._recreate_lookup_tables = False
1389  self._op_outputs = set()
1390  self._external_input_map = set()
1391  self._attr_dict = defaultdict(list)
1392  if type(name_or_proto) is caffe2_pb2.NetDef:
1393  proto = name_or_proto
1394  # We rae initializing a network by a NetDef. In this case, we will
1395  # initialize our network with the given netdef.
1396  self._net = caffe2_pb2.NetDef()
1397  self._net.CopyFrom(proto)
1398 
1399  existing_outputs = [list(op.output) for op in self._net.op]
1400 
1401  self._external_input_map.update(list(self._net.external_input))
1402 
1403  # Set the next name index properly.
1404  existing_names = set(
1405  sum(
1406  [list(op.input) for op in self._net.op], []
1407  ) + sum(
1408  existing_outputs, []
1409  )
1410  )
1411  for outs in existing_outputs:
1412  self._op_outputs.update(outs)
1413 
1414  prefix_len = len(self._net.name + '_blob_')
1415  autogen_indices = []
1416  for s in existing_names:
1417  if s.startswith(self._net.name + '_blob_'):
1418  try:
1419  autogen_indices.append(int(s[prefix_len]))
1420  except ValueError:
1421  pass
1422  if len(autogen_indices):
1423  self._next_name_index = max(autogen_indices) + 1
1424  else:
1425  self._next_name_index = 0
1426  name = self._net.name
1427  else:
1428  name = name_or_proto
1429  self._net = caffe2_pb2.NetDef()
1430  self._next_name_index = 0
1431 
1432  # make sure that this net name hasn't been used before
1433  self._net.name = Net._get_next_net_name(name)
1434 
1435  def AppendNet(self, net):
1436  assert isinstance(net, Net)
1437  for i in net.Proto().external_input:
1438  if (
1439  i not in self.Proto().external_input and
1440  i not in self._op_outputs
1441  ):
1442  self.Proto().external_input.append(i)
1443 
1444  self.Proto().external_output.extend(
1445  [
1446  o for o in net.Proto().external_output
1447  if o not in self.Proto().external_output
1448  ]
1449  )
1450  self._ExtendOps(net.Proto().op)
1451  return self
1452 
1453  def LogInfo(self, *msg_or_blobs):
1454  for msg_or_blob in msg_or_blobs:
1455  if not isinstance(msg_or_blob, BlobReference):
1456  blob = self.GivenTensorStringFill(
1457  [], self.NextName('log'),
1458  shape=[], values=[msg_or_blob])
1459  else:
1460  blob = msg_or_blob
1461  self.Print(blob, [])
1462 
1463  def add_attribute(self, name, obj):
1464  """
1465  Add `obj` to the list of attributes in this net under the given `name`.
1466  Attributes are user-defined objects and have no pre-defined semantics.
1467  """
1468  self._attr_dict[name].append(obj)
1469 
1470  def get_attributes(self, name):
1471  """
1472  Returns the list of attributes in this net for a given `name`.
1473  Attributes are user-defined objects added with `add_attribute'.
1474  """
1475  return self._attr_dict.get(name, [])
1476 
1477  def set_rand_seed(self, seed=100, sequence_seed=True, seed_on_op_def=False):
1478  """
1479  Adds a random seed to each op in the net.
1480  If sequence_seed is set, the i-th op has rand_seed=`seed + i`
1481  If seed_on_op_def is set, the op rand_seed=hash(str(op))
1482  sequence_seed and seed_on_op_def cannot be both set to True.
1483  """
1484  assert not (sequence_seed and seed_on_op_def), (
1485  'sequence_seed and seed_on_op_def cannot be both set to True.')
1486  for i, op in enumerate(self.Proto().op):
1487  if sequence_seed:
1488  curr_seed = seed + i
1489  elif seed_on_op_def:
1490  curr_seed = hash(str(op) + str(seed)) % np.iinfo(np.uint32).max
1491  else:
1492  curr_seed = seed
1493  op.device_option.random_seed = curr_seed
1494 
1495  def Name(self):
1496  return self._net.name
1497 
1498  def __str__(self):
1499  return self.Name()
1500 
1501  def Const(self, array, blob_out=None, dtype=None):
1502  if isinstance(array, bool):
1503  return self.ConstantFill(
1504  [],
1505  blob_out or 1,
1506  dtype=DataType.BOOL,
1507  value=array)
1508 
1509  if dtype is None:
1510  array = np.array(array)
1511  else:
1512  array = np.array(array, dtype=dtype)
1513 
1514  def do_set(operator):
1515  return operator(
1516  [],
1517  blob_out or 1,
1518  shape=array.shape,
1519  values=array.flatten().tolist())
1520 
1521  if array.dtype == np.int32:
1522  return do_set(self.GivenTensorIntFill)
1523  elif array.dtype == np.int64:
1524  return do_set(self.GivenTensorInt64Fill)
1525  elif array.dtype == np.str:
1526  return do_set(self.GivenTensorStringFill)
1527  elif array.dtype == np.bool:
1528  return do_set(self.GivenTensorBoolFill)
1529  else:
1530  return do_set(self.GivenTensorFill)
1531 
1532  def BlobIsDefined(self, blob):
1533  """
1534  Returns true if the given BlobReference is produced as output of
1535  an operator in this net, or if it is provided as an external input.
1536  """
1537  if self._recreate_lookup_tables:
1538  self._RecreateLookupTables()
1539  name = str(blob)
1540  return (name in self._op_outputs) or (name in self._external_input_map)
1541 
1542  def UsesBlob(self, blob):
1543  """
1544  Returns true iff the given BlobReference is used by any operator
1545  or this net, or if it is one of the external inputs of the net.
1546  """
1547  blob_name = str(blob)
1548  for op in self._net.op:
1549  for input in op.input:
1550  if input == blob_name:
1551  return True
1552  return blob_name in self._external_input_map
1553 
1554  def UsedBlobNames(self):
1555  """
1556  Returns a set of blob names used in the net
1557  """
1558  blob_names = set()
1559  for op in self._net.op:
1560  blob_names |= set(op.input)
1561  blob_names |= set(op.output)
1562  if self._net.external_input:
1563  blob_names |= set(self._net.external_input)
1564  if self._net.external_output:
1565  blob_names |= set(self._net.external_output)
1566  return blob_names
1567 
1568  def GetBlobRef(self, blob_name):
1569  """
1570  Given the name of a blob produced by this net, return a BlobReference
1571  to it. If the blob is not produced by any op in this net,
1572  raises KeyError.
1573  """
1574  blob_name = str(blob_name)
1575  if not self.BlobIsDefined(blob_name):
1576  raise KeyError('Net does not define blob %s' % blob_name)
1577  return BlobReference(blob_name, self)
1578 
1579  def Clone(
1580  self,
1581  name,
1582  blob_remap=None,
1583  op_id_mask=None,
1584  remap_funcs=None,
1585  keep_schema=True
1586  ):
1587  """
1588  Clone this net.
1589  Args:
1590  name: name of the cloned net
1591  blob_remap: optional map with list of blob names to replace
1592  op_id_mask: optional list of operator indices to include in
1593  the cloned net. If not provided, all ops are included.
1594  """
1595  orig_remap_funcs = {} if remap_funcs is None else remap_funcs
1596  # by default we want to put RecurrentNetworkOp and
1597  # RecurrentNetworkGradientOp into remap_funcs, as these two operators
1598  # also take blobs and proto into the arguments.
1599  remap_funcs = DEFAULT_REMAP_FUNCS.copy()
1600  remap_funcs.update(orig_remap_funcs)
1601  proto = self._net
1602  new_proto = caffe2_pb2.NetDef()
1603  new_proto.CopyFrom(proto)
1604  new_proto.name = name
1605 
1606  if blob_remap is None:
1607  blob_remap = {}
1608  if op_id_mask is None:
1609  op_id_mask = list(range(0, len(proto.op)))
1610 
1611  def get_remapped_str(blob):
1612  blob_str = str(blob)
1613  return str(blob_remap.get(blob_str, blob_str))
1614 
1615  def remap_list(proto_list):
1616  new_list = [get_remapped_str(b) for b in proto_list]
1617  del proto_list[:]
1618  proto_list.extend(new_list)
1619 
1620  def remap_op(op):
1621  new_op = caffe2_pb2.OperatorDef()
1622  new_op.CopyFrom(op)
1623  remap_list(new_op.input)
1624  remap_list(new_op.output)
1625  if new_op.type in remap_funcs:
1626  remap_funcs[new_op.type](
1627  new_op,
1628  (name + '/') if name else '',
1629  blob_remap,
1630  )
1631  return new_op
1632 
1633  del new_proto.op[:]
1634  new_proto.op.extend([remap_op(proto.op[op_id]) for op_id in op_id_mask])
1635  remap_list(new_proto.external_input)
1636  remap_list(new_proto.external_output)
1637  new_net = Net(new_proto)
1638 
1639  if keep_schema:
1640  from caffe2.python import schema
1641  if self._input_record:
1642  new_net._input_record = schema.from_blob_list(
1643  self._input_record,
1644  [
1645  BlobReference(get_remapped_str(blob), net=new_net)
1646  for blob in self._input_record.field_blobs()
1647  ],
1648  )
1649  if self._output_record:
1650  new_net._output_record = schema.from_blob_list(
1651  self._output_record,
1652  [
1653  BlobReference(get_remapped_str(blob), net=new_net)
1654  for blob in self._output_record.field_blobs()
1655  ],
1656  )
1657 
1658  new_net._attr_dict.update(self._attr_dict)
1659  return new_net
1660 
1661  def ClonePartial(self, name, inputs, outputs, remap_funcs=None):
1662  """
1663  Clone this net, including only ops that are necessary in order to
1664  compute `outputs` given `inputs`. Return references to the cloned
1665  outputs. Internal blobs (blobs that are produced and consumed inside
1666  the net but not used as outputs) will be remapped to avoid name
1667  conflict.
1668 
1669  Args:
1670  name: the name of the cloned net
1671  inputs: map where the keys correspond to BlobReferences in the
1672  original net, and the values correspond to external inputs
1673  in the partially cloned net. If `inputs` is a list, don't
1674  remap input names.
1675  outputs: outputs to be produced by the cloned net.
1676 
1677  Returns:
1678  Tuple (new_net, new_outputs)
1679  new_net: a new Net object.
1680  new_outputs: list of BlobReferences corresponding to the
1681  outputs produced by new_net.
1682  """
1683  input_is_pair_list = isinstance(inputs, list) and all(
1684  isinstance(i, tuple) and len(i) == 2 for i in inputs)
1685  inputs = (
1686  inputs if isinstance(inputs, (dict, OrderedDict)) else
1687  OrderedDict(inputs) if input_is_pair_list else
1688  OrderedDict(zip(inputs, inputs)))
1689  for output in outputs:
1690  assert self.BlobIsDefined(output)
1691  input_names = {str(k): str(v) for k, v in viewitems(inputs)}
1692  output_names = [str(o) for o in outputs]
1693  proto = self._net
1694  blob_versions = {str(i): 0 for i in inputs}
1695  ssa, blob_versions = get_ssa(proto, blob_versions)
1696  used_op_ids = get_op_ids_in_path(ssa, blob_versions, inputs, outputs)
1697  disallowed_op_ids = get_op_ids_in_path(ssa, blob_versions, [], inputs)
1698  assert len(set(used_op_ids) & set(disallowed_op_ids)) == 0, (
1699  'Cannot partially clone net: some of the ops required would ' +
1700  'generate the given input.')
1701 
1702  sub_ssa = [op for i, op in enumerate(ssa) if i in used_op_ids]
1703  undef_blobs = get_undefined_blobs(sub_ssa) - set(viewkeys(input_names))
1704  prefix = (name + '/') if name else ''
1705 
1706  def remap(blob_name):
1707  if blob_name in input_names:
1708  return input_names[blob_name]
1709  elif blob_name in undef_blobs:
1710  return blob_name
1711  else:
1712  return prefix + blob_name
1713 
1714  blob_mapping = {b: remap(b) for b in viewkeys(blob_versions)}
1715  new_net = self.Clone(name, blob_mapping, used_op_ids, remap_funcs)
1716  new_in = [
1717  blob_mapping[i] for i in viewkeys(input_names)] + list(undef_blobs)
1718  new_out = [blob_mapping[o] for o in output_names]
1719  del new_net.Proto().external_input[:]
1720  new_net.Proto().external_input.extend(new_in)
1721  new_net._external_input_map = set(list(new_in))
1722  del new_net.Proto().external_output[:]
1723  new_net.Proto().external_output.extend(new_out)
1724  return new_net, [new_net.GetBlobRef(o) for o in new_out]
1725 
1726  def Proto(self):
1728  return self._net
1729 
1730  def PopulateProtoWithFileName(self):
1731  net_tb = workspace.operator_tracebacks.get(self.Name(), None)
1732  if net_tb is not None:
1733  for idx, op in enumerate(self.Proto().op):
1734  if idx in net_tb:
1735  op.name = ':'.join(map(str, net_tb[idx][0]))
1736 
1737  def NextScopedBlob(self, prefix='unnamed'):
1738  """Return the blob that has not been defined or registered in the
1739  current net. It returns `ScopedBlobReference(prefix)`, if it's valid,
1740  otherwise `ScopedBlobReference(prefix) + '_auto_' + ?`. Different calls
1741  is guaranteed to return blob with different names.
1742  """
1743  output_blob_base = ScopedName(prefix)
1744  return self.NextBlob(output_blob_base)
1745 
1746  def NextBlob(self, prefix='unnamed'):
1747  """Return the blob that has not been defined or registered in the
1748  current net. It returns `BlobReference(prefix)`, if it's valid,
1749  otherwise `BlobReference(prefix) + '_auto_' + ?`. Different calls
1750  is guaranteed to return blob with different names."""
1751  output_blob_base = BlobReference(prefix)
1752  output_blob = output_blob_base
1753  index = 0
1754  while str(output_blob) in self._registered_blob_names or (
1755  self.BlobIsDefined(output_blob)):
1756  output_blob = output_blob_base + '_auto_' + str(index)
1757  index += 1
1758 
1759  self._registered_blob_names.add(str(output_blob))
1760  return output_blob
1761 
1762  def NextName(self, prefix=None, output_id=None):
1763  """Returns the next name to be used, if you do not want to explicitly
1764  name your blob. [Deprecated, use NextBlob, NextScopedBlob instead]"""
1765  if prefix:
1766  output_name_base = self._net.name + '/' + prefix
1767  output_name = output_name_base
1768  if output_id is not None:
1769  output_name += ':' + str(output_id)
1770  index = 2
1771  while self.BlobIsDefined(str(ScopedBlobReference(output_name))):
1772  output_name = output_name_base + '_' + str(index)
1773  if output_id is not None:
1774  output_name += ':' + str(output_id)
1775  index += 1
1776  else:
1777  output_name = self._net.name + '_blob_' + str(self._next_name_index)
1778  self._next_name_index += 1
1779  return str(output_name)
1780 
1781  def _ExtendOps(self, new_ops):
1782  self._net.op.extend(new_ops)
1783  for op in new_ops:
1784  self._op_outputs.update([text_type(o) for o in op.output])
1785 
1786  def _CheckLookupTables(self):
1787  '''
1788  Called from unit tests to validate the internal lookup tables
1789  match the protobuf contents.
1790  '''
1791  test_op_outputs = set()
1792  for op in self._net.op:
1793  for o in op.output:
1794  test_op_outputs.add(o)
1795 
1796  test_external_inp = set()
1797  for inp in self._net.external_input:
1798  test_external_inp.add(inp)
1799 
1800  assert test_op_outputs.difference(self._op_outputs) == set()
1801  assert test_external_inp.difference(self._external_input_map) == set()
1802 
1803  def _InvalidateLookupTables(self):
1804  self._recreate_lookup_tables = True
1805 
1806  def _RecreateLookupTables(self):
1807  self._op_outputs = set()
1808  for op in self._net.op:
1809  for o in op.output:
1810  self._op_outputs.add(o)
1811 
1812  self._external_input_map = set()
1813  for inp in self._net.external_input:
1814  self._external_input_map.add(inp)
1815 
1816  self._recreate_lookup_tables = False
1817 
1818  def AddGradientOperators(self, ys, skip=0):
1819  """Add the gradient for operators in the net.
1820 
1821  Inputs:
1822  ys: a list or a dictionary specifying what blobs we want to compute
1823  derivatives of. If the input is a list, we will automatically
1824  generate their gradients with all-one values; if the input is a
1825  dictionary, for any dictionary entries that are not None, we will
1826  take the corresponding blobs as their gradients; for all those
1827  that are None, we will auto-fill them with 1.
1828  skip: skips the first n operators. This is provided mainly because a
1829  lot of nets may use the first few operators for data generation
1830  like stuff which really do not need to have gradients.
1831 
1832  Outputs:
1833  returns a map from the blob name in the input network to a blob
1834  containing gradient or a GradientSlice in case of sparse gradient
1835 
1836  Currently, this is hard-coded for float operators if there are branches
1837  (i.e. a blob is used as input to multiple operators). This is because
1838  the gradient accumulation (Sum) is float only right now.
1839  """
1840 
1841  grad_ops, input_to_grad = GradientRegistry.GetBackwardPass(
1842  self._net.op[skip:], ys)
1843  # Check if in immediate mode: the grad_ops are actually being produced
1844  # by C++ and bypasses the CreateOperator() call, so in immediate mode
1845  # we will have to explicitly run them.
1846  if workspace.IsImmediate():
1847  for op in grad_ops:
1848  workspace.RunOperatorImmediate(op)
1849  self._ExtendOps(grad_ops)
1850  return input_to_grad
1851 
1852  def AddExternalInput(self, *inputs):
1853  assert len(inputs) > 0
1854  refs = []
1855  for input in inputs:
1856  input_name = str(input)
1857  assert str(input) not in self._external_input_map, (
1858  'Net already contains an input named %s' % input_name)
1859  for input in inputs:
1860  input_name = str(input)
1861  self._net.external_input.extend([input_name])
1862  self._external_input_map.update([input_name])
1863  refs.append(_get_blob_ref(input_name))
1864 
1865  return refs[0] if len(refs) == 1 else refs
1866 
1867  def AddExternalOutput(self, *outputs):
1868  for output in outputs:
1869  assert isinstance(output, BlobReference)
1870  assert self.BlobIsDefined(output)
1871  for output in outputs:
1872  self.Proto().external_output.extend([str(output)])
1873 
1874  def AddScopedExternalInputs(self, *inputs):
1875  res = self.AddExternalInput(
1876  * [ScopedBlobReference(b) for b in inputs]
1877  )
1878  if not isinstance(res, list):
1879  res = [res]
1880  return res
1881 
1882  def AddScopedExternalOutputs(self, *outputs):
1883  return self.AddExternalOutput(
1884  * [ScopedBlobReference(b) for b in outputs]
1885  )
1886 
1887  # This returns a reference to the observer
1888  def AddObserver(self, observer_type):
1889  return C.add_observer_to_net(self._net.name, observer_type)
1890 
1891  def RemoveObserver(self, observer):
1892  C.remove_observer_from_net(self._net.name, observer)
1893 
1894  def NumObservers(self):
1895  return C.num_observers_on_net(self._net.name)
1896 
1897  @property
1898  def external_inputs(self):
1899  return [_get_blob_ref(x) for x in self._net.external_input]
1900 
1901  @property
1902  def external_outputs(self):
1903  return [_get_blob_ref(x) for x in self._net.external_output]
1904 
1905  def set_input_record(self, input_record):
1906  from caffe2.python import schema
1907  assert self._input_record is None or (input_record.has_blobs() and
1908  set(input_record.field_blobs()) ==
1909  set(self._input_record.field_blobs())), (
1910  'Input schema cannot be reset')
1911  if not input_record.has_blobs():
1912  with NameScope(self.Name()):
1913  self._input_record = schema.NewRecord(self, input_record)
1914  else:
1915  self._input_record = input_record
1916  for blob in input_record.field_blobs():
1917  if blob not in self.external_inputs:
1918  self.AddExternalInput(blob)
1919  return self._input_record
1920 
1922  """
1923  Tries to recover input record by taking a subset of external_inputs with
1924  a given prefix name and interpreting them as schema column names
1925  """
1926  record = _recover_record_by_prefix(self._net.external_input, prefix)
1927  if record:
1928  self.set_input_record(record)
1929 
1930  def set_output_record(self, record):
1931  assert self._output_record is None or (record.has_blobs() and
1932  set(record.field_blobs()) ==
1933  set(self._output_record.field_blobs())), (
1934  'Output schema cannot be reset')
1935  for blob in record.field_blobs():
1936  assert self.BlobIsDefined(blob), "{} is not defined".format(blob)
1937  for blob in record.field_blobs():
1938  self.AddExternalOutput(blob)
1939  self._output_record = record
1940 
1942  """
1943  Tries to recover out record by taking a subset of external_outputs with
1944  a given prefix name and interpreting them as schema column names
1945  """
1946  record = _recover_record_by_prefix(self._net.external_output, prefix)
1947  if record:
1948  self.set_output_record(record)
1949 
1950  def AppendOutputRecordField(self, field_name, record):
1951  from caffe2.python import schema
1952  assert self._output_record is not None, (
1953  'Tried to append to missing output record'
1954  )
1955  for blob in record.field_blobs():
1956  assert self.BlobIsDefined(blob)
1957  for blob in record.field_blobs():
1958  self.AddExternalOutput(blob)
1960  (field_name, record)
1961  )
1962 
1963  def input_record(self):
1964  return self._input_record
1965 
1966  def output_record(self):
1967  return self._output_record
1968 
1969  def AddExternalInputs(self, *inputs):
1970  return self.AddExternalInput(*inputs)
1971 
1972  def AddExternalOutputs(self, *outputs):
1973  self.AddExternalOutput(*outputs)
1974 
1975  def DeduplicateGradientSlices(self, g, aggregator='sum'):
1976  assert isinstance(g, GradientSlice)
1977  unique, remapping = self.Unique([g.indices], 2, engine='SparseHash')
1978  if aggregator.lower() == 'sum':
1979  new_g = self.UnsortedSegmentSum([g.values, remapping], 1)
1980  elif aggregator.lower() == 'mean':
1981  new_g = self.UnsortedSegmentMean([g.values, remapping], 1)
1982  else:
1983  raise ValueError('{} is not supported'.format(aggregator))
1984  return GradientSlice(indices=unique, values=new_g)
1985 
1986  def RunAllOnGPU(self, gpu_id=0, use_cudnn=False):
1987  """A convenient function to run everything on the GPU."""
1988  device_option = caffe2_pb2.DeviceOption()
1989  device_option.device_type = caffe2_pb2.CUDA
1990  device_option.cuda_gpu_id = gpu_id
1991  self._net.device_option.CopyFrom(device_option)
1992  if use_cudnn:
1993  for op in self._net.op:
1994  op.engine = "CUDNN"
1995  def RunAllOnMKL(self):
1996  """A convenient function to run everything using MKLDNN."""
1997  device_option = caffe2_pb2.DeviceOption()
1998  device_option.device_type = caffe2_pb2.MKLDNN
1999  self._net.device_option.CopyFrom(device_option)
2000 
2001  def _CreateAndAddToSelf(self, op_type, inputs, outputs=None, **kwargs):
2002  """A helper function to create an operator and add it to self.
2003  """
2004  inputs = _RectifyInputOutput(inputs)
2005  for input in inputs:
2006  if not self.BlobIsDefined(input):
2007  assert input.Net() != self
2008  self.AddExternalInput(input)
2009  if outputs is None:
2010  # If we do not specify an output, we will assume that this op
2011  # produces one output in this case.
2012  outputs = self.NextName(prefix=op_type)
2013  elif type(outputs) is int:
2014  # In this case, we will auto-fill the given number of outputs
2015  # with auto-generated names.
2016  outputs = [
2017  self.NextName(prefix=op_type, output_id=i)
2018  for i in range(outputs)]
2019  outputs = _RectifyInputOutput(outputs, net=self)
2020  op = CreateOperator(op_type, inputs, outputs, **kwargs)
2021  self._ExtendOps([op])
2022 
2023  workspace.operator_tracebacks[self.Name()][
2024  len(self._net.op) - 1] = _extract_stacktrace()
2025 
2026  if len(op.output) == 0:
2027  return
2028  elif len(op.output) == 1:
2029  return BlobReference(op.output[0], self)
2030  else:
2031  return tuple(BlobReference(o, self) for o in op.output)
2032 
2033  def __getattr__(self, op_type):
2034  if op_type.startswith('__'):
2035  raise AttributeError('Attribute {} not found.'.format(op_type))
2036  if not IsOperator(op_type) and not IsOperatorWithEngine(op_type, "CUDNN"):
2037  raise AttributeError(
2038  'Method ' + op_type + ' is not a registered operator.' +
2039  ' Did you mean: [' +
2040  ",".join(workspace.C.nearby_opnames(op_type)) + ']'
2041  )
2042  return lambda *args, **kwargs: self._CreateAndAddToSelf(
2043  op_type, *args, **kwargs)
2044 
2045  def __dir__(self):
2046  additional_methods = [
2047  op
2048  for op in _REGISTERED_OPERATORS
2049  if '_ENGINE_' not in op]
2050  return sorted(set(chain(
2051  dir(type(self)),
2052  viewkeys(self.__dict__),
2053  additional_methods
2054  )))
2055 
2056  def Python(
2057  self,
2058  f,
2059  grad_f=None,
2060  python_func_type=None,
2061  pass_workspace=False,
2062  grad_output_indices=None,
2063  grad_input_indices=None
2064  ):
2065  """
2066  Registers and returns a python operator.
2067 
2068  `f` and `grad_f` can be one of the following:
2069  - a function with signature (inputs, outputs), where inputs and
2070  outputs are a list of CPUTensor objects. This function will be
2071  called from C++ everytime the operator is executed.
2072  - a tuple (func, args, kwargs), here `func` is a callable, args is
2073  an argument list, and kwargs is a dict list. The call:
2074  f = func(*args, kwargs)
2075  will be performed locally at node initialization time, on all of
2076  the nodes of the job, returning `f`, a callable that will be used
2077  as the python operator function to be called during Net execution.
2078  This is to be used when using python operator in a distributed
2079  context, and allows to create and keep local python state across
2080  calls to the operator.
2081 
2082  `python_func_type` is a type of an object that constructed as
2083  python_func_type(f) and provides an implementation to forward and
2084  backward functions. Its useful in such a case where users needs
2085  a statefull PythonOp (ex: use autograd for computing grad_f).
2086 
2087  If `pass_workspace` is True, the signature is changed to
2088  (inputs, outputs, workspace) where `workspace` is the workspace the op
2089  is going to run on. This is potentially dangerous (as the op can
2090  manipulate the workspace directly), use on your own risk.
2091 
2092  If a gradient function is specified (`grad_f`), by default its inputs
2093  will be: (1) all inputs to `f`, (2) followed by all outputs of `f`, (3)
2094  and then all gradient outputs of `f`. The outputs of `grad_f` will be
2095  (by default) all gradient inputs to `f`. If a subset of the gradient
2096  outputs or gradient inputs is desired instead, then the subsets can be
2097  specified by providing `grad_output_indices` and/or `grad_input_indices`
2098  which identify the indices of `f`'s inputs and outputs which have
2099  gradients.
2100  """
2101  assert(IsOperator('Python'))
2102 
2103  def make_builder(t):
2104  if not isinstance(t, tuple):
2105  return ''
2106  assert len(t) == 3, 'Expected builder tuple (func, args, kwargs)'
2107  func, args, kwargs = t
2108  normalized = (func, tuple(args), dict(kwargs))
2109  return pickle.dumps(normalized)
2110 
2111  f_builder = make_builder(f)
2112  grad_f_builder = make_builder(grad_f)
2113 
2114  assert (not grad_f) or ((not f_builder) == (not grad_f_builder)), (
2115  'A tuple has to be passed to both f and grad_f or neither.')
2116 
2117  core_kwargs = {}
2118  if f_builder:
2119  core_kwargs['pickled_builder'] = f_builder
2120  core_kwargs['pickled_grad_builder'] = grad_f_builder
2121  core_kwargs['pass_workspace'] = pass_workspace
2122  else:
2123  core_kwargs['token'] = _RegisterPythonImpl(
2124  f, grad_f, python_func_type, pass_workspace=pass_workspace)
2125 
2126  grad_output_indices = grad_output_indices or []
2127  grad_input_indices = grad_input_indices or []
2128  return lambda *args, **kwargs: self._CreateAndAddToSelf(
2129  'Python',
2130  grad_output_indices=grad_output_indices,
2131  grad_input_indices=grad_input_indices,
2132  *args,
2133  **dict(chain(viewitems(kwargs), viewitems(core_kwargs)))
2134  )
2135 
2136  def is_external_input(self, blob):
2137  name = str(blob)
2138  return name in self._external_input_map
2139 
2140  def extend_ops(self, new_ops):
2141  return self._ExtendOps(new_ops)
2142 
2143 
2144 def copy_func_between_devices(src, dst):
2145  CPU = caffe2_pb2.CPU
2146  CUDA = caffe2_pb2.CUDA
2147 
2148  if src.device_type == CPU and dst.device_type == CPU:
2149  return None
2150 
2151  if src.device_type == CUDA and dst.device_type == CUDA:
2152  if src.cuda_gpu_id == dst.cuda_gpu_id:
2153  return None
2154  else:
2155  def fun(net, *args, **kw):
2156  with DeviceScope(dst):
2157  return net.Copy(*args, **kw)
2158  return fun
2159 
2160  if src.device_type == CUDA and dst.device_type == CPU:
2161  def fun(net, *args, **kw):
2162  with DeviceScope(src):
2163  return net.CopyGPUToCPU(*args, **kw)
2164  return fun
2165 
2166  if src.device_type == CPU and dst.device_type == CUDA:
2167  def fun(net, *args, **kw):
2168  with DeviceScope(dst):
2169  return net.CopyCPUToGPU(*args, **kw)
2170  return fun
2171 
2172  raise ValueError('Non-supported devices: %s and %s' % (src, dst))
2173 
2174 
2175 def device_equal(src, dst):
2176  '''
2177  We are using this fucntion instead of == operator because optional-value
2178  comparison between empty device_options and {device_type:0, cuda_gpu_id:0}
2179  returns not equal in some cases.
2180  '''
2181  return src.device_type == dst.device_type and src.cuda_gpu_id == dst.cuda_gpu_id
2182 
2183 
2185  def __init__(self, blob, device):
2186  self.blob = blob
2187  self.device = device
2188 
2189  def __eq__(self, other):
2190  return self.blob == other.blob and self.device == other.device
2191 
2192  def __hash__(self):
2193  return hash(self.blob + str(self.device))
2194 
2195 
2196 def InjectCrossDeviceCopies(net, blob_to_device=None, blob_remap=None):
2197  '''
2198  Injecting Copy functions between device within a net. Users can provide
2199  a net with part of operators using different device_options. This method
2200  will automatically create a new net with Copy ops inserted in it.
2201 
2202  Inputs:
2203  blob_to_device: If not None, it is a map of blobs and their device locations.
2204  blob_remap: If not None, it is a map from a pair (blob, device) to
2205  the name of the blob in the given device. Blobs found in this
2206  map are assumed to be cached and don't need to be copied.
2207  Outputs:
2208  new_net: A new net with CopyCPUToGPU inserted with correct device option
2209 
2210  required_external_to_device:
2211  A mapping between unresolved external inputs and their
2212  required device options.
2213  Assumptions:
2214  1. every external inputs of this net is already in blob_to_device!
2215  2. if not, this function will use net device option
2216  '''
2217  new_net = net.Clone(net._net.name + '_cross_device', keep_schema=True)
2218  del new_net._net.op[:]
2219  if blob_to_device is None:
2220  blob_to_device = {}
2221  # remapping of input blobs for each op.
2222  if blob_remap is None:
2223  blob_remap = {}
2224  temp_remap = {}
2225  net_option = net._net.device_option or caffe2_pb2.DeviceOption()
2226 
2227  # if external_inputs have device remappings generated by previous nets,
2228  # then add those remappings as external inputs as well.
2229  all_remaps = defaultdict(list)
2230  for entry, mapped_blob in blob_remap.items():
2231  all_remaps[entry.blob].append(mapped_blob)
2232  mapped_external_inputs = []
2233  for input in new_net._net.external_input:
2234  mapped_external_inputs.extend(all_remaps.get(input) or [])
2235  new_net._net.external_input.extend(mapped_external_inputs)
2236 
2237  for op in net._net.op:
2238  temp_remap.clear()
2239  # Get where inputs and outputs should be
2240  input_dev, output_dev = InferOpBlobDevices(op)
2241 
2242  for dev, input in zip(input_dev, op.input):
2243  assert net.BlobIsDefined(input), \
2244  "input {} should be defined in the net.".format(input)
2245  if input not in blob_to_device:
2246  if net.is_external_input(input):
2247  blob_to_device[input] = net_option
2248  else:
2249  raise AttributeError(
2250  "No device information found for blob {}.".
2251  format(input)
2252  )
2253 
2254  if not device_equal(blob_to_device[input], dev):
2255  # reuse already moved input
2256  if (RemapEntry(input, dev) in blob_remap and
2257  blob_to_device[blob_remap[RemapEntry(input, dev)]] == dev):
2258  temp_remap[input] = blob_remap[RemapEntry(input, dev)]
2259  else:
2260  # need to make input on correct device.
2261  copy_func = copy_func_between_devices(
2262  blob_to_device[input], dev
2263  )
2264 
2265  def _gen_new_name(blob, device_option):
2266  CPU = caffe2_pb2.CPU
2267  CUDA = caffe2_pb2.CUDA
2268  if device_option.device_type == CPU:
2269  suffix = '_cpu'
2270  elif device_option.device_type == CUDA:
2271  suffix = '_cuda_' + str(device_option.cuda_gpu_id)
2272  else:
2273  raise RuntimeError(
2274  "Unknown device type: {}".
2275  format(device_option.device_type)
2276  )
2277  return blob + suffix
2278 
2279  new_name = _gen_new_name(input, dev)
2280  copy_func(new_net, input, new_name)
2281  blob_remap[RemapEntry(input, dev)] = new_name
2282  temp_remap[input] = new_name
2283  blob_to_device[new_name] = dev
2284 
2285  # Enforcing no reuse blob between operators. In-place blob usage in an
2286  # op is allowed. This is based on the assumption that in-place op has
2287  # same device info
2288  for out_blob, device in zip(op.output, output_dev):
2289  if out_blob in blob_to_device and (
2290  out_blob not in op.input and
2291  not device_equal(blob_to_device[out_blob], device)
2292  ):
2293  raise RuntimeError(
2294  "In-place blob: {} is not supported between operators "
2295  "with different device option previous:{} now: {}. "
2296  "Failed op:\n {}".format(
2297  out_blob, blob_to_device[out_blob], device, op
2298  )
2299  )
2300  new_op = caffe2_pb2.OperatorDef()
2301  new_op.CopyFrom(op)
2302 
2303  new_list = [temp_remap.get(b, b) for b in new_op.input]
2304  del new_op.input[:]
2305  new_op.input.extend(new_list)
2306 
2307  # keep inplace blobs inplace
2308  original_inputs = list(op.input)
2309  for i, out in enumerate(new_op.output):
2310  try:
2311  input_idx = original_inputs.index(out)
2312  new_op.output[i] = new_op.input[input_idx]
2313  except ValueError:
2314  pass
2315 
2316  blob_to_device.update(
2317  {o: d for d, o in zip(output_dev, new_op.output)})
2318  new_net.extend_ops([new_op])
2319 
2320  return new_net, blob_to_device
2321 
2322 
2323 def InjectDeviceCopiesAmongNets(nets, blob_to_device_init=None):
2324  """
2325  Takes in a list of nets. They usually represent your whole execution graph.
2326  This function will insert cross device copy functions to all nets, and resolve
2327  inter-net external inputs dependencies. This method will insert Copy funcitons if
2328  external inputs of a net is produced on different device than it is required.
2329  Inputs:
2330  nets: a list of nets
2331  Outputs:
2332  new_nets: a list of new nets with device difference solved.
2333 
2334  Some notes from wyiming:
2335  1. You MUST pass nets in execution order. e.g. [train_init, train]
2336  """
2337  assert isinstance(nets, list), \
2338  "nets {} should be a list of nets.".format(str(nets))
2339  assert all(isinstance(net, Net) for net in nets), \
2340  "nets {} should be a list of nets.".format(str(nets))
2341  # A holistic blob to device mapping.
2342  blob_to_device = blob_to_device_init or {}
2343  blob_remap = {}
2344  new_nets = []
2345 
2346  for net in nets:
2347  new_net, blob_to_device = InjectCrossDeviceCopies(
2348  net,
2349  blob_to_device=blob_to_device,
2350  blob_remap=blob_remap,
2351  )
2352  new_nets.append(new_net)
2353 
2354  return new_nets, blob_to_device
2355 
2356 
2357 def InjectDeviceCopiesAmongNetsWithoutB2D(nets, blob_to_device_init=None):
2358  new_nets, _ = InjectDeviceCopiesAmongNets(nets, blob_to_device_init)
2359  return new_nets
2360 
2361 
2362 def get_net_name(netlike):
2363  if isinstance(netlike, Net):
2364  return netlike.Proto().name
2365  elif isinstance(netlike, caffe2_pb2.NetDef):
2366  return netlike.name
2367  else:
2368  return netlike
2369 
2370 
2371 def output_to_list(op_output):
2372  """
2373  Ensures that the output of an operator is a list.
2374  Use when an operator has a variable number of outputs, but a list of
2375  outputs is desired even when number of outputs is 1.
2376 
2377  Args:
2378  op_output: Either a BlobReferenece or an iterable of BlobReferences.
2379 
2380  Returns:
2381  A list of BlobReferences.
2382  """
2383  assert type(op_output) in (list, tuple, BlobReference)
2384  return (
2385  [op_output]
2386  if isinstance(op_output, BlobReference) else list(op_output))
2387 
2388 
2389 def _add_net_to_dict(net_dict, net):
2390  name = get_net_name(net)
2391  if name in net_dict:
2392  assert net_dict[name] is None or net == net_dict[name], (
2393  'Different nets with same name: ' + name)
2394  return False
2395  else:
2396  net_dict[name] = net if isinstance(net, Net) else None
2397  return True
2398 
2399 
2400 class ExecutionStep(object):
2401  _step_names_used = set()
2402 
2403  @staticmethod
2404  def _get_next_step_name(basename):
2405  name = basename
2406  next_idx = 1
2407  while name in ExecutionStep._step_names_used:
2408  name = basename + '_' + str(next_idx)
2409  next_idx += 1
2410  ExecutionStep._step_names_used |= set([name])
2411  return name
2412 
2413  def __init__(self, name, nets=None, num_iter=None):
2414  self._step = caffe2_pb2.ExecutionStep()
2415  self._step.name = name or ExecutionStep._get_next_step_name('step')
2416  self._net_dict = OrderedDict()
2417  self._is_used = False
2418  self._substeps = []
2419  if nets is not None:
2420  if type(nets) is Net:
2421  nets = [nets]
2422  for net in nets:
2423  if _add_net_to_dict(self._net_dict, net):
2424  self._step.network.extend([get_net_name(net)])
2425  if num_iter is not None:
2426  self._step.num_iter = num_iter
2427 
2428  def get_net(self, name):
2429  return self._net_dict[name]
2430 
2431  def Name(self):
2432  return self._step.name
2433 
2434  def __str__(self):
2435  return self._step.name
2436 
2437  def _assert_can_mutate(self):
2438  assert not self._is_used, (
2439  'Cannot mutate a step that has already been added to a plan/step.')
2440 
2441  def _notify_is_used(self):
2442  self._is_used = True
2443 
2444  def Proto(self):
2445  return self._step
2446 
2447  def HasNets(self):
2448  return self._step.network is not None and (
2449  len(self._step.network) > 0)
2450 
2451  def HasSubsteps(self):
2452  return self._step.substep is not None and (
2453  len(self._step.substep) > 0)
2454 
2455  def Nets(self):
2456  return list(viewvalues(self._net_dict))
2457 
2458  def Substeps(self):
2459  return self._substeps
2460 
2461  def SetIter(self, num_iter):
2462  self._assert_can_mutate()
2463  self._step.num_iter = num_iter
2464 
2465  def SetCreateWorkspace(self, create_workspace):
2466  self._assert_can_mutate()
2467  self._step.create_workspace = create_workspace
2468 
2469  def SetNumConcurrentInstances(self, num_concurrent_instances):
2470  self._assert_can_mutate()
2471  self._step.num_concurrent_instances = num_concurrent_instances
2472 
2473  def SetOnlyOnce(self, only_once):
2474  self._assert_can_mutate()
2475  self._step.only_once = only_once
2476 
2477  def SetShouldStopBlob(self, should_stop_blob):
2478  assert isinstance(should_stop_blob, BlobReference), (
2479  "expects BlobReference here, got {}".format(type(should_stop_blob)))
2480  self._assert_can_mutate()
2481  self._step.should_stop_blob = str(should_stop_blob)
2482 
2483  def RunEveryMillis(self, interval):
2484  """
2485  Run this step every interval millisecods, as long as its
2486  siblings are still running. It is guaranteed that, after all
2487  siblings finish, this step will run at least one.
2488 
2489  This property is ignored for top-level ExecutionSteps.
2490  """
2491  self._step.run_every_ms = interval
2492 
2493  def SetReportNet(self, report_net, report_interval):
2494  """ DEPRECATED. Use RunEveryMillis instead. """
2495  self._assert_can_mutate()
2496  _add_net_to_dict(self._net_dict, report_net)
2497  self._step.report_net = get_net_name(report_net)
2498  self._step.report_interval = report_interval
2499 
2500  def AddSubstep(self, substep):
2501  self._assert_can_mutate()
2502  assert not self.HasNets(), 'Cannot have both network and substeps.'
2503  if isinstance(substep, ExecutionStep):
2504  substep._notify_is_used()
2505  if not substep.HasNets() and not substep.HasSubsteps():
2506  return self
2507  for net in substep.Nets():
2508  _add_net_to_dict(self._net_dict, net)
2509  self._substeps.append(substep)
2510  proto = substep.Proto()
2511  else:
2512  proto = substep
2513  self._step.substep.add().CopyFrom(proto)
2514  return self
2515 
2516  def SetConcurrentSubsteps(self, concurrent_substeps):
2517  self._assert_can_mutate()
2518  assert not self.HasNets(), 'Cannot have both network and substeps.'
2519  self._step.concurrent_substeps = concurrent_substeps
2520 
2521  def AddNet(self, net):
2522  self._assert_can_mutate()
2523  assert not self.HasSubsteps(), 'Cannot have both network and substeps.'
2524  assert isinstance(net, Net)
2525  _add_net_to_dict(self._net_dict, net)
2526  self._step.network.extend([get_net_name(net)])
2527  return self
2528 
2529  def get_all_attributes(self, name):
2530  """
2531  Return the list of all attributes under the given `name`, present in
2532  all of the nets used in this execution step and its children.
2533  """
2534  return [
2535  attr
2536  for net in viewvalues(self._net_dict)
2537  for attr in net.get_attributes(name)
2538  ]
2539 
2540  @classmethod
2541  def create_from_proto(cls, step_proto, net_obj_dict, net_proto_dict):
2542  """
2543  Create ExecutionStep from ExecutionStep protobuf recursively
2544  """
2545  assert isinstance(step_proto, caffe2_pb2.ExecutionStep)
2546  assert (len(step_proto.network) > 0 and len(step_proto.substep) == 0) or \
2547  (len(step_proto.network) == 0 and len(step_proto.substep) > 0)
2548 
2549  steps_or_nets = []
2550  if len(step_proto.substep) > 0:
2551  for substep_proto in step_proto.substep:
2552  steps_or_nets.append(ExecutionStep.create_from_proto(
2553  substep_proto, net_obj_dict, net_proto_dict))
2554  else:
2555  for net_name in step_proto.network:
2556  if net_name not in net_obj_dict:
2557  assert net_name in net_proto_dict
2558  net = Net(net_proto_dict[net_name])
2559  net_obj_dict[net_name] = net
2560  net = net_obj_dict[net_name]
2561  assert isinstance(net, Net)
2562  steps_or_nets.append(net)
2563 
2564  num_iter = step_proto.num_iter if step_proto.HasField('num_iter') else None
2565  concurrent_substeps = step_proto.concurrent_substeps if\
2566  step_proto.HasField('concurrent_substeps') else None
2567  should_stop_blob = BlobReference(step_proto.should_stop_blob) if\
2568  step_proto.HasField('should_stop_blob') else None
2569  only_once = step_proto.only_once if\
2570  step_proto.HasField('only_once') else None
2571  num_concurrent_instances = step_proto.num_concurrent_instances if\
2572  step_proto.HasField('num_concurrent_instances') else None
2573  create_workspace = step_proto.create_workspace if\
2574  step_proto.HasField('create_workspace') else None
2575  run_every_ms = step_proto.run_every_ms if\
2576  step_proto.HasField('run_every_ms') else None
2577 
2578  return execution_step(
2579  step_proto.name,
2580  steps_or_nets,
2581  num_iter=num_iter,
2582  report_net=None, # DEPRECATED
2583  report_interval=None, # DEPRECATED
2584  concurrent_substeps=concurrent_substeps,
2585  should_stop_blob=should_stop_blob,
2586  only_once=only_once,
2587  num_concurrent_instances=num_concurrent_instances,
2588  create_workspace=create_workspace,
2589  run_every_ms=run_every_ms)
2590 
2591 
2592 def add_nets_in_order(step, net_list):
2593  proto = step.Proto()
2594  for substep in step.Substeps():
2595  add_nets_in_order(substep, net_list)
2596  for net in proto.network:
2597  if net not in net_list:
2598  net_list.append(net)
2599  # FIXME(azzolini): This is actually wrong. Report nets should be
2600  # instantiated first since they may run before any substep is run.
2601  # However, curerntly, Reporter depends on this behavior.
2602  if proto.report_net and proto.report_net not in net_list:
2603  net_list.append(proto.report_net)
2604 
2605 
2606 class Plan(object):
2607 
2608  def __init__(self, name_or_step):
2609  self._plan = caffe2_pb2.PlanDef()
2610  self._net_dict = OrderedDict()
2611  self._steps = [] # A list of ExecutionStep
2612  if isinstance(name_or_step, ExecutionStep):
2613  self._plan.name = name_or_step.Name()
2614  self.AddStep(name_or_step)
2615  elif isinstance(name_or_step, basestring):
2616  self._plan.name = name_or_step
2617  else:
2618  raise ValueError('name_or_step must be a string or ExecutionStep')
2619 
2620  def __str__(self):
2621  return self._plan.name
2622 
2623  def Proto(self):
2624  return self._plan
2625 
2626  def AddNets(self, nets):
2627  for net in nets:
2628  if _add_net_to_dict(self._net_dict, net):
2629  assert isinstance(net, Net)
2630  self._plan.network.add().CopyFrom(net.Proto())
2631 
2632  def Nets(self):
2633  return list(viewvalues(self._net_dict))
2634 
2635  def AddStep(self, step):
2636  assert isinstance(step, ExecutionStep)
2637  step._notify_is_used()
2638  if not step.HasNets() and not step.HasSubsteps():
2639  return
2640  self._plan.execution_step.add().CopyFrom(step.Proto())
2641  self._steps.append(step)
2642  # nets need to be added to the plan in order of usage
2643  net_list = []
2644  add_nets_in_order(step, net_list)
2645  self.AddNets([step.get_net(n) for n in net_list])
2646 
2647  def Steps(self):
2648  return self._steps
2649 
2650  def get_all_attributes(self, name):
2651  """
2652  Return the list of all attributes under the given `name`, present in
2653  all of the nets used in this plan.
2654  """
2655  return [
2656  attr
2657  for net in viewvalues(self._net_dict)
2658  for attr in net.get_attributes(name)
2659  ]
2660 
2661  @classmethod
2662  def create_from_proto(cls, plan_proto):
2663  assert isinstance(plan_proto, caffe2_pb2.PlanDef)
2664  plan = Plan(plan_proto.name)
2665  plan._plan.CopyFrom(plan_proto)
2666 
2667  net_obj_dict = {}
2668  net_proto_dict = {}
2669  for net_proto in plan_proto.network:
2670  assert net_proto.name not in net_proto_dict
2671  net_proto_dict[net_proto.name] = net_proto
2672 
2673  for step_proto in plan_proto.execution_step:
2674  step = ExecutionStep.create_from_proto(
2675  step_proto, net_obj_dict, net_proto_dict)
2676  plan.AddStep(step)
2677 
2678  return plan
2679 
2680 
2681 def to_execution_step(step_or_nets, default_name=None):
2682  from caffe2.python.net_builder import NetBuilder
2683  if isinstance(step_or_nets, ExecutionStep):
2684  return step_or_nets
2685 
2686  stop_blob = None
2687  if not default_name and hasattr(step_or_nets, 'name'):
2688  default_name = step_or_nets.name
2689  if isinstance(step_or_nets, NetBuilder):
2690  stop_blob = step_or_nets._stop_blob
2691  step_or_nets = step_or_nets.get()
2692  return execution_step(
2693  default_name, step_or_nets, should_stop_blob=stop_blob)
2694 
2695 
2696 def execution_step(default_name,
2697  steps_or_nets,
2698  num_iter=None,
2699  report_net=None,
2700  report_interval=None,
2701  concurrent_substeps=None,
2702  should_stop_blob=None,
2703  only_once=None,
2704  num_concurrent_instances=None,
2705  create_workspace=False,
2706  run_every_ms=None):
2707  """
2708  Helper for creating an ExecutionStep.
2709  - steps_or_nets can be:
2710  - None
2711  - Net
2712  - ExecutionStep
2713  - list<Net>
2714  - list<ExecutionStep>
2715  - should_stop_blob is either None or a scalar boolean blob.
2716  - This blob is checked AFTER every substeps/subnets.
2717  - If specified and true, then this step will return immediately.
2718  - Be sure to handle race conditions if setting from concurrent threads.
2719  - if no should_stop_blob or num_iter is provided, defaults to num_iter=1
2720  """
2721  assert should_stop_blob is None or num_iter is None, (
2722  'Cannot set both should_stop_blob and num_iter.')
2723  if should_stop_blob is None and num_iter is None:
2724  num_iter = 1
2725 
2726  step = ExecutionStep(default_name)
2727  if should_stop_blob is not None:
2728  step.SetShouldStopBlob(should_stop_blob)
2729  if num_iter is not None:
2730  step.SetIter(num_iter)
2731  if only_once is not None:
2732  step.SetOnlyOnce(only_once)
2733  if concurrent_substeps is not None:
2734  step.SetConcurrentSubsteps(concurrent_substeps)
2735  if report_net is not None:
2736  assert report_interval is not None
2737  step.SetReportNet(report_net, report_interval)
2738  if num_concurrent_instances is not None:
2739  step.SetNumConcurrentInstances(num_concurrent_instances)
2740  if create_workspace:
2741  step.SetCreateWorkspace(True)
2742  if run_every_ms:
2743  step.RunEveryMillis(run_every_ms)
2744 
2745  if isinstance(steps_or_nets, ExecutionStep):
2746  step.AddSubstep(steps_or_nets)
2747  elif isinstance(steps_or_nets, Net):
2748  step.AddNet(steps_or_nets)
2749  elif isinstance(steps_or_nets, list):
2750  if all(isinstance(x, Net) for x in steps_or_nets):
2751  for x in steps_or_nets:
2752  step.AddNet(x)
2753  else:
2754  for x in steps_or_nets:
2755  step.AddSubstep(to_execution_step(x))
2756  elif steps_or_nets:
2757  raise ValueError(
2758  'steps_or_nets must be a step, a net, or a list of nets or steps.')
2759  return step
2760 
2761 
2762 def scoped_execution_step(name, *args, **kwargs):
2763  """Same as execution_step() except that the step name is scoped."""
2764  default_name = ScopedName(name) if name else name
2765  return execution_step(default_name, *args, **kwargs)
2766 
2767 
2768 def _extract_stacktrace():
2769  '''
2770  This function extracts stacktrace without file system access
2771  by purely using sys._getframe() and removes part that belongs to
2772  this file (core.py). We are not using inspect module because
2773  its just a wrapper on top of sys._getframe() whos
2774  logis is based on accessing source files on disk - exactly what
2775  we are trying to avoid here. Same stands for traceback module
2776 
2777  The reason for file system access avoidance is that
2778  if code is located on an NFS, file access might be slow
2779 
2780  Function returns a list of tuples (file_name, line_number)
2781  '''
2782 
2783  current_file_name = __name__.replace('.', '/') + ".py"
2784  result = []
2785  frame = sys._getframe(1)
2786  # We just go down the frame stack in a loop
2787  while frame:
2788  if current_file_name not in frame.f_code.co_filename:
2789  # Its important to extract information from the frame here
2790  # as frame's current line most probably will change later.
2791  result.append((frame.f_code.co_filename, frame.f_lineno))
2792  frame = frame.f_back
2793  return result
2794 
2795 
2796 SetPerOpEnginePref = C.set_per_op_engine_pref
2797 SetGlobalEnginePref = C.set_global_engine_pref
2798 SetEnginePref = C.set_engine_pref
2799 SetOpEnginePref = C.set_op_engine_pref
def AddStep(self, step)
Definition: core.py:2635
def add_attribute(self, name, obj)
Definition: core.py:1463
def BuildGradientGenerators(self, fwd_op_idx, gradient_ops, g_output, g_input)
Definition: core.py:588
def _CreateAndAddToSelf(self, op_type, inputs, outputs=None, kwargs)
Definition: core.py:2001
def _RecreateLookupTables(self)
Definition: core.py:1806
def Name(self)
Definition: core.py:1495
def get_attributes(self, name)
Definition: core.py:1470
def recover_input_record_by_prefix(self, prefix)
Definition: core.py:1921
def AddExternalOutput(self, outputs)
Definition: core.py:1867
device
Definition: core.py:2187
def NextBlob(self, prefix='unnamed')
Definition: core.py:1746
def get_all_attributes(self, name)
Definition: core.py:2529
def RegisterGradient(cls, op_type)
Definition: core.py:1019
def external_inputs(self)
Definition: core.py:1898
def SanityCheck(self, operators)
Definition: core.py:462
def DoGradientAccumulation(self, fwd_op_idx)
Definition: core.py:842
def _VerifyGradientGenerators(self, generator)
Definition: core.py:805
def set_rand_seed(self, seed=100, sequence_seed=True, seed_on_op_def=False)
Definition: core.py:1477
def _CreateAndAddToNet(self, op_type, inputs=None, args, kwargs)
Definition: core.py:217
def _AppendAutoGradGenerator(self, y, grad, autograd_op)
Definition: core.py:888
def __init__(self, name_or_proto)
Definition: core.py:1376
def _MakeSparseSumOps(self, generators, out_base_name)
Definition: core.py:740
def UsedBlobNames(self)
Definition: core.py:1554
def AddGradientOperators(self, ys, skip=0)
Definition: core.py:1818
def GetBackwardPass(self, ys)
Definition: core.py:952
def Proto(self)
Definition: core.py:1726
def _MakeSumOps(self, input_name, input_version)
Definition: core.py:792
def RunAllOnGPU(self, gpu_id=0, use_cudnn=False)
Definition: core.py:1986
def Clone(self, name, blob_remap=None, op_id_mask=None, remap_funcs=None, keep_schema=True)
Definition: core.py:1586
def get_all_attributes(self, name)
Definition: core.py:2650
def _DisambiguateGradOpOutput(self, grad_op, idx, cnt)
Definition: core.py:693
def CheckGradientOperatorInput(self, grad_op_input, g_output, fwd_op_idx, locally_generated_blobs)
Definition: core.py:494
def Play(self, op)
Definition: core.py:472
def RunEveryMillis(self, interval)
Definition: core.py:2483
def NextName(self, prefix=None, output_id=None)
Definition: core.py:1762
def _ExtendOps(self, new_ops)
Definition: core.py:1781
def __getattr__(self, op_type)
Definition: core.py:228
def set_output_record(self, record)
Definition: core.py:1930
def _GetSumOpOutputName(self, generator, input_name)
Definition: core.py:660
def _SetSumOpsDeviceOption(self, sum_ops, generators)
Definition: core.py:681
def AppendSparseGenerators(self, sparse_generators)
Definition: core.py:564
def create_from_proto(cls, step_proto, net_obj_dict, net_proto_dict)
Definition: core.py:2541
def AddNets(self, nets)
Definition: core.py:2626
def __init__(self, name, net=None)
Definition: core.py:162
blob
Definition: core.py:2186
def BlobIsDefined(self, blob)
Definition: core.py:1532
def UsesBlob(self, blob)
Definition: core.py:1542
def _MakeDenseSumOps(self, generators, out_base_name)
Definition: core.py:706
def _GetGradientForOpCC(cls, op_def, g_output)
Definition: core.py:1029
def AddExternalInput(self, inputs)
Definition: core.py:1852
def NextScopedBlob(self, prefix='unnamed')
Definition: core.py:1737
def _GetInitGradients(self, ys)
Definition: core.py:899
def Python(self, f, grad_f=None, python_func_type=None, pass_workspace=False, grad_output_indices=None, grad_input_indices=None)
Definition: core.py:2064
def _CheckSumOpsConflict(self, out_base_name, g)
Definition: core.py:698
Definition: core.py:2184
def RunAllOnMKL(self)
Definition: core.py:1995
def ClonePartial(self, name, inputs, outputs, remap_funcs=None)
Definition: core.py:1661
def GetBlobRef(self, blob_name)
Definition: core.py:1568
def _GenerateGradientsForForwardOp(self, forward_op_idx, input_to_grad)
Definition: core.py:924
def _InvalidateLookupTables(self)
Definition: core.py:1803
def SetReportNet(self, report_net, report_interval)
Definition: core.py:2493
def set_input_record(self, input_record)
Definition: core.py:1905
def GetBackwardPass(cls, operators, ys, ys_generate_gradient=False)
Definition: core.py:1092
def recover_output_record_by_prefix(self, prefix)
Definition: core.py:1941