Caffe2 - Python API
A deep learning, cross platform ML framework
control_ops_grad.py
1 # Copyright (c) 2016-present, Facebook, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 ##############################################################################
15 
16 ## @package control_ops_grad
17 # Module caffe2.python.control_ops_grad
18 from __future__ import absolute_import
19 from __future__ import division
20 from __future__ import print_function
21 from __future__ import unicode_literals
22 
23 from caffe2.proto import caffe2_pb2
24 
25 
26 def gen_do_gradient(op, g_output):
27  """
28  Generates gradient Do operator, given forward Do op and a list
29  of gradient blobs corresponding to forward op's outputs
30  Returns a gradient op and a list of blobs corresponding to input gradients
31  """
32  from caffe2.python.core import BlobReference
33  subnet, outer_to_inner_map, inner_to_outer_map, workspace_blob_name = \
34  _do_op_sanity_check_and_process(op)
35 
36  assert len(g_output) == len(op.output), \
37  "Different number of gradient blobs and Do op outputs"
38 
39  grad_ops, deduped_g_output = dedupe_g_output(op, g_output)
40  g_output = deduped_g_output
41 
42  # From the outer net point of view:
43  # Do is an operator that has some number of inputs and outputs;
44  # we have to generate a gradient operator that writes into
45  # corresponding input gradient blobs and has access to inputs, outputs
46  # and gradient output blobs
47  # From the inner net point of view:
48  # Do is an operator with a subnet and blob bindings,
49  # we need to forward Do's output blob gradients into inner workspace,
50  # use them to run backward pass generation and forward Do's input blob
51  # gradients back into outer workspace
52 
53  op_output = [str(o) for o in op.output]
54  op_output = op_output[:-1] # remove workspace pointer blob
55  op_input = [str(i) for i in op.input]
56  op_input = op_input[:-1] # remove workspace pointer blob
57 
58  ordered_inner_output_blob_names = [outer_to_inner_map[o] for o in op_output]
59 
60  backward_pass_initial_grad_map = {}
61  initial_grad_map = {}
62  for inner_output_name, outer_grad_output_name in \
63  zip(ordered_inner_output_blob_names, g_output):
64  # link inner_output_name to corresponding inner_grad_output_name for
65  # backward pass generation;
66  if outer_grad_output_name:
67  inner_grad_output_name = inner_output_name + "/_DO_OPERATOR_INNER_GRAD_"
68  backward_pass_initial_grad_map[BlobReference(inner_output_name)] = \
69  BlobReference(inner_grad_output_name)
70  initial_grad_map[inner_grad_output_name] = str(outer_grad_output_name)
71  assert len(initial_grad_map) > 0, "Empty initial gradient map for Do op"
72 
73  inner_grad_ops, inner_grad_names_map = _gen_subgradient_pass(
74  subnet, backward_pass_initial_grad_map)
75 
76  if len(inner_grad_ops) == 0:
77  return [], []
78 
79  grad_copy_ops = []
80  g_input = []
81  new_op_outputs = []
82  new_blob_bindings = {}
83  for outer_input_name in op_input:
84  inner_input_name = outer_to_inner_map[outer_input_name]
85  if inner_input_name in inner_grad_names_map:
86  inner_grad_input_name = inner_grad_names_map[inner_input_name]
87  outer_grad_input_name = outer_input_name + "_grad"
88 
89  # It is possible that inner_grad_input_name will need to be
90  # linked to another outer blob. For example:
91  #
92  # // y - param initialized in init_net
93  # x = ...
94  # z = ...
95  # with ops.IfNet(...):
96  # ops.Add([z, x], y) # inner Do block
97  # loss = f(..., y, ...)
98  #
99  # In this case x, y and z are external for the inner Do block,
100  # the inputs of the Do block are z and x and the output is y.
101  # When computing the gradient of input x given the gradient
102  # of output y it's easy to see that they are equal.
103  # During the generation of gradient Do operator, we link
104  # external gradient y (y_grad) to the internal name
105  # (y/_DO_OPERATOR_INNER_GRAD_) and generate the backward pass
106  # for the internal Do net. As a result we get gradient operators
107  # for the gradient Do and gradient map that maps internal Do
108  # blobs to their computed gradients.
109  # In this example, gradient map may have blob x linked to
110  # gradient blob y/_DO_OPERATOR_INNER_GRAD_.
111  # We should export gradient for x outside of Do, so
112  # we add a blob mapping from inner gradient blob
113  # (y/_DO_OPERATOR_INNER_GRAD_) to a new outer name (x_grad).
114  #
115  # (Note: since we use transparent blob mapping between outer and
116  # inner (Do's) workspace, these operations do not involve copying
117  # but are merely using blobs in outer workspace in the Do's operator
118  # workspace under (possibly) different names)
119  #
120  # At the same time, we need to add a blob mapping from inner name
121  # y/_DO_OPERATOR_INNER_GRAD_ to the outer blob y_grad
122  # Hence in this case, we cannot use existing blob mapping scheme
123  # that requires a bijection between subset of inner blob names and
124  # a set of all (Do's input and output) outer blob names
125 
126  # TODO(iliacher): Remove unnecessary blob copying
127 
128  new_inner_grad_input_name = \
129  inner_input_name + "/_DO_OPERATOR_INNER_GRAD_COPY_"
130  grad_copy_ops.append(_prepare_blob_copy_op(
131  inner_grad_input_name, new_inner_grad_input_name))
132 
133  new_blob_bindings[new_inner_grad_input_name] = outer_grad_input_name
134  new_op_outputs.append(outer_grad_input_name)
135  g_input.append(outer_grad_input_name)
136  else:
137  g_input.append(None)
138 
139  new_op_inputs = []
140  overwritten_names = set()
141  saved_local_blob_names = set()
142  for grad_op in inner_grad_ops:
143  grad_op_input = [str(i) for i in grad_op.input]
144  grad_op_output = [str(o) for o in grad_op.output]
145  for grad_op_input_name in grad_op_input:
146  if grad_op_input_name in overwritten_names:
147  continue
148  # check if this is an external blob
149  outer_name = inner_to_outer_map.get(grad_op_input_name, None)
150  if not outer_name:
151  # check if this is an external gradient blob
152  outer_name = initial_grad_map.get(grad_op_input_name, None)
153  if outer_name:
154  outer_name = str(outer_name)
155  if outer_name not in new_op_inputs:
156  new_op_inputs.append(outer_name)
157 
158  new_blob_bindings[grad_op_input_name] = outer_name
159  else:
160  # this is a local blob, we'll get it's value from
161  # a saved forward op workspace
162  saved_local_blob_names.add(grad_op_input_name)
163  overwritten_names.update(grad_op_output)
164 
165  # add inner gradient copy ops
166  inner_grad_ops += grad_copy_ops
167 
168  gradient_do_def = _prepare_gradient_do_op(
169  fwd_op=op,
170  fwd_net=subnet,
171  grad_ops=inner_grad_ops,
172  inputs=new_op_inputs,
173  outputs=new_op_outputs,
174  blob_bindings=new_blob_bindings,
175  saved_fwd_blobs=saved_local_blob_names,
176  workspace_blob_name=workspace_blob_name)
177  grad_ops.append(gradient_do_def)
178 
179  _do_op_sanity_check_and_process(gradient_do_def)
180 
181  return grad_ops, g_input
182 
183 
184 def dedupe_g_output(op, g_output):
185  # When generation a gradient op it's possible to receive the same gradient
186  # blob corresponding to different forward op output blobs, Do operator
187  # requires a bijection between inner and outer names, make sure we do
188  # deduplication
189  grad_ops = []
190  deduped_g_output = []
191  init_grad_map = {}
192  for output_name, grad_name in zip(op.output, g_output):
193  if not grad_name:
194  deduped_g_output.append(grad_name)
195  continue
196 
197  if output_name in init_grad_map:
198  deduped_g_output.append(init_grad_map[output_name])
199  else:
200  if grad_name not in init_grad_map.values():
201  init_grad_map[output_name] = grad_name
202  deduped_g_output.append(grad_name)
203  else:
204  deduped_grad_name = output_name + "_" + grad_name + "_DEDUP"
205  assert deduped_grad_name not in init_grad_map.values()
206  grad_copy_op = caffe2_pb2.OperatorDef()
207  grad_copy_op.type = "Copy"
208  grad_copy_op.input.extend([grad_name])
209  grad_copy_op.output.extend([deduped_grad_name])
210  grad_ops.append(grad_copy_op)
211  deduped_g_output.append(deduped_grad_name)
212  init_grad_map[output_name] = deduped_grad_name
213  return grad_ops, deduped_g_output
214 
215 
216 def gen_while_gradient(op, g_output):
217  """
218  Generates gradient While operator
219  """
220  from caffe2.python.core import BlobReference
221  assert op.type == "While", "Expected While op"
222  assert len(op.input) > 0, "Expected at least one input in While op"
223 
224  assert len(op.output) == len(g_output), \
225  "Different number of gradient blobs and While op outputs"
226 
227  grad_ops, deduped_g_output = dedupe_g_output(op, g_output)
228  g_output = deduped_g_output
229 
230  init_grad_map = {}
231  op_output = [str(o) for o in op.output]
232  for output_name, grad_output_name in zip(op_output, g_output):
233  if grad_output_name:
234  init_grad_map[BlobReference(output_name)] = \
235  BlobReference(grad_output_name)
236  assert len(init_grad_map) > 0, "Empty initial gradient map for While op"
237 
238  loop_net = _get_net_argument(op, "loop_net")
239  assert loop_net, "Expected loop subnet in While op"
240  assert len(loop_net.op) == 1 and loop_net.op[0].type == "Do", \
241  "Gradient While op requires single Do op as a loop body"
242  do_op = loop_net.op[0]
243  do_args = _get_do_arguments(do_op)
244  assert "reuse_workspace" not in do_args or not do_args["reuse_workspace"], \
245  "Gradient While op requires Do loop body op without reuse_workspace set"
246 
247  assert len(do_op.output) > 0, "Expected Do op with at least one output"
248  workspace_blob = do_op.output[-1]
249 
250  loop_grad_net, loop_grad_map, loop_input_names, loop_output_names = \
251  _gen_subnet_gradient(loop_net, init_grad_map)
252  assert loop_grad_net, "Failed to get gradient net for loop body in While op"
253 
254  grad_ops += _prepare_gradient_while_ops(
255  fwd_op=op,
256  input_names=loop_input_names,
257  output_names=loop_output_names,
258  loop_grad_net=loop_grad_net,
259  workspace_blob=workspace_blob,
260  init_grad_map=init_grad_map,
261  loop_grad_map=loop_grad_map)
262 
263  op_input = [str(i) for i in op.input]
264  g_input = [loop_grad_map.get(i, None) for i in op_input]
265  return grad_ops, g_input
266 
267 
268 # Constructs gradient While op, arguments:
269 # fwd_op - forward While op
270 # input_names - input blob names for a gradient op
271 # output_names - output blob names for a gradient op
272 # loop_grad_net - gradient loop body net
273 # workspace_blob - blob that holds forward workspaces stack
274 # init_grad_map - initial gradient to forward blob map
275 # loop_grad_map - gradient blob map for loop's body
276 def _prepare_gradient_while_ops(
277  fwd_op, input_names, output_names, loop_grad_net, workspace_blob,
278  init_grad_map, loop_grad_map):
279  gradient_while_def = caffe2_pb2.OperatorDef()
280  gradient_while_def.CopyFrom(fwd_op)
281  if gradient_while_def.name:
282  gradient_while_def.name += "_grad"
283 
284  loop_net_arg = caffe2_pb2.Argument()
285  loop_net_arg.name = "loop_net"
286  loop_net_arg.n.CopyFrom(loop_grad_net)
287 
288  cond_net_arg = caffe2_pb2.Argument()
289  cond_net_arg.name = "cond_net"
290  from caffe2.python.core import Net, BlobReference
291  # Construct condition net - check that there're still forward workspaces
292  # left using HasScope op
293  cond_net = Net('gradient_loop_cond_net')
294  cond_init_net = Net('gradient_loop_cond_net_init')
295  cond_blob = cond_net.NextScopedBlob(cond_net.Name() + '/cond')
296  cond_init_net.HasScope(workspace_blob, cond_blob)
297  cond_net.HasScope(workspace_blob, cond_blob)
298  for blob, init_grad_blob in init_grad_map.items():
299  blob_name = str(blob)
300  init_grad_blob_name = str(init_grad_blob)
301  if blob_name in loop_grad_map and \
302  loop_grad_map[blob_name] != init_grad_blob_name:
303  cond_net.Copy(
304  BlobReference(loop_grad_map[blob_name]), init_grad_blob)
305  cond_init_net.Copy(
306  init_grad_blob, BlobReference(loop_grad_map[blob_name]))
307  cond_net_arg.n.CopyFrom(cond_net.Proto())
308 
309  del gradient_while_def.arg[:]
310  gradient_while_def.arg.extend([loop_net_arg, cond_net_arg])
311 
312  del gradient_while_def.control_input[:]
313  del gradient_while_def.input[:]
314  gradient_while_def.input.extend(
315  [str(cond_blob).encode('utf-8')] + list(input_names))
316  del gradient_while_def.output[:]
317  gradient_while_def.output.extend(output_names)
318  gradient_while_def.is_gradient_op = True
319  return [o for o in cond_init_net.Proto().op] + [gradient_while_def]
320 
321 
322 def _get_do_arguments(do_op):
323  assert do_op.type == "Do", "Expected Do op"
324  args = {}
325  for arg in do_op.arg:
326  if not arg.name:
327  continue
328  if arg.name == "net":
329  assert arg.n, "Expected non empty net argument"
330  args["net"] = arg.n
331  elif arg.name == "reuse_workspace":
332  assert arg.i, "Expected non empty reuse_workspace argument"
333  args["reuse_workspace"] = bool(arg.i)
334  elif arg.name == "inner_blobs":
335  assert arg.strings, "Expected non empty inner_blobs argument"
336  args["inner_blobs"] = arg.strings
337  elif arg.name == "outer_blobs_idx":
338  assert arg.ints, "Expected non empty outer_blobs_idx argument"
339  args["outer_blobs_idx"] = arg.ints
340  return args
341 
342 
343 def gen_if_gradient(op, g_output):
344  """
345  Generates gradient If operator, given forward If op and a list
346  of gradient blobs corresponding to forward op's outputs
347  Returns a gradient op and a list of blobs corresponding to input gradients
348  """
349  from caffe2.python.core import BlobReference
350  assert op.type == "If", "Expected If op"
351  # first input is the condition blob
352  assert len(op.input) > 0, "Expected at least one input in If op"
353 
354  assert len(op.output) == len(g_output), \
355  "Different number of gradient blobs and If op outputs"
356 
357  grad_ops, deduped_g_output = dedupe_g_output(op, g_output)
358  g_output = deduped_g_output
359 
360  init_grad_map = {} # map from if's output blob to output gradient blob
361  op_input = [str(i) for i in op.input]
362  op_output = [str(o) for o in op.output]
363  for output_name, grad_output_name in zip(op_output, g_output):
364  if grad_output_name:
365  init_grad_map[BlobReference(output_name)] = \
366  BlobReference(grad_output_name)
367  # shouldn't call without at least one output gradient available
368  assert len(init_grad_map) > 0, "Empty initial gradient map for If op"
369 
370  grad_map = {} # map from blob to gradient blob
371  then_net = _get_net_argument(op, "then_net")
372  assert then_net, "Expected then subnet in If op"
373  then_grad_net, then_grad_map, then_input_names, then_output_names = \
374  _gen_subnet_gradient(then_net, init_grad_map)
375  assert then_grad_net, "Failed to get gradient net for then in If op"
376  grad_map.update(then_grad_map)
377 
378  else_input_names = set()
379  else_output_names = set()
380  else_grad_map = {}
381  else_grad_net = None
382  else_net = _get_net_argument(op, "else_net")
383  if else_net:
384  else_grad_net, else_grad_map, else_input_names, else_output_names = \
385  _gen_subnet_gradient(else_net, init_grad_map)
386  assert else_grad_net, "Failed to get gradient net for else in If op"
387  # consider case: else doesn't update blob's gradient and keeps original
388  # from init_grad_map, but then updates the gradient
389  for else_blob, else_grad_blob in else_grad_map.items():
390  if else_blob in then_grad_map:
391  then_grad_blob = then_grad_map[else_blob]
392  # if both then and else branches have grad blob name for the same
393  # blob and grad names are different, then one of the branches
394  # doesn't use blob and has original grad blob name in it's grad map,
395  # and another branch uses blob and has <blob_name>_grad name
396  # in it's grad map (might be different from original grad blob)
397  if then_grad_blob != else_grad_blob:
398  init_grad_name = init_grad_map[else_blob] \
399  if else_blob in init_grad_map else None
400 
401  if then_grad_blob == init_grad_name:
402  grad_map[else_blob] = else_grad_blob
403  elif else_grad_blob == init_grad_name:
404  grad_map[else_blob] = then_grad_blob
405  else:
406  raise "Unexpected grad blob name " + else_blob + ", " + \
407  else_grad_blob + ", " + then_grad_blob
408  else:
409  grad_map[else_blob] = else_grad_blob
410 
411  # make sure gradients of blobs that were not computed
412  # by the selected if's branch are initialized with zeros
413  then_other_output_names = \
414  then_output_names - (then_output_names & else_output_names)
415  then_other_grad_output_names = set(
416  [o for o in then_other_output_names if o in then_grad_map.values()])
417  zero_then = _gen_grad_zero_init_ops(
418  init_grad_map, then_grad_map, then_other_grad_output_names)
419  if else_grad_net:
420  else_grad_net.op.extend(zero_then)
421  elif len(zero_then) > 0:
422  else_grad_net = caffe2_pb2.NetDef()
423  else_grad_net.CopyFrom(then_grad_net)
424  if else_grad_net.name:
425  else_grad_net.name += "_auto_else_zero_blobs_"
426  del else_grad_net.op[:]
427  else_grad_net.op.extend(zero_then)
428  del else_grad_net.external_input[:]
429  del else_grad_net.external_output[:]
430 
431  else_other_output_names = \
432  else_output_names - (then_output_names & else_output_names)
433  else_other_grad_output_names = set(
434  [o for o in else_other_output_names if o in else_grad_map.values()])
435  zero_else = _gen_grad_zero_init_ops(
436  init_grad_map, else_grad_map, else_other_grad_output_names)
437  then_grad_net.op.extend(zero_else)
438 
439  output_names = list(then_output_names | else_output_names)
440  input_names = then_input_names | else_input_names
441  # make sure condition blob is the first in the list
442  input_names = [op_input[0]] + list(input_names - set(op_input[0]))
443  gradient_if_def = _prepare_gradient_if_op(
444  fwd_op=op,
445  input_names=input_names,
446  output_names=output_names,
447  then_grad_net=then_grad_net,
448  else_grad_net=else_grad_net)
449  g_input = [grad_map.get(i, None) for i in op_input]
450  return grad_ops + [gradient_if_def], g_input
451 
452 
453 def _gen_subnet_gradient(subnet, init_grad):
454  grad_ops, grad_names_map = _gen_subgradient_pass(
455  subnet, init_grad)
456 
457  output_names = set()
458  input_names = set()
459  for grad_op in grad_ops:
460  for grad_op_input in grad_op.input:
461  if str(grad_op_input) not in output_names:
462  input_names.add(str(grad_op_input))
463  for grad_op_output in grad_op.output:
464  output_names.add(str(grad_op_output))
465 
466  gradient_net_def = caffe2_pb2.NetDef()
467  gradient_net_def.CopyFrom(subnet)
468  if gradient_net_def.name:
469  gradient_net_def.name += "_grad"
470  del gradient_net_def.op[:]
471  gradient_net_def.op.extend(grad_ops)
472  del gradient_net_def.external_input[:]
473  del gradient_net_def.external_output[:]
474 
475  return gradient_net_def, grad_names_map, input_names, output_names
476 
477 
478 def _get_net_argument(op, net_name):
479  for arg in op.arg:
480  if arg.name and arg.name == net_name:
481  assert arg.n, "Expected non empty net argument " + net_name
482  return arg.n
483  return None
484 
485 
486 def _gen_subgradient_pass(subnet, init_grad):
487  from caffe2.python.core import IR
488  subnet_ir = IR(subnet.op)
489  grad_ops, grad_blob_map = \
490  subnet_ir.GetBackwardPass(init_grad)
491  grad_names_map = {}
492  for b, g in grad_blob_map.items():
493  grad_names_map[str(b)] = str(g)
494  return grad_ops, grad_names_map
495 
496 
497 def _do_op_sanity_check_and_process(op):
498  assert op.type == "Do", "Expected Do op"
499 
500  subnet = _get_net_argument(op, "net")
501  assert subnet, "No net argument found in Do op"
502 
503  inner_blobs = None
504  outer_blobs_idx = None
505  for arg in op.arg:
506  if arg.name and arg.name == "inner_blobs":
507  assert not inner_blobs, "inner_blobs redefinition"
508  assert arg.strings and len(arg.strings) > 0, \
509  "Empty inner_blobs argument in Do op"
510  inner_blobs = [s.decode('utf-8') for s in arg.strings]
511  if arg.name and arg.name == "outer_blobs_idx":
512  assert not outer_blobs_idx, "outer_blobs_idx redefinition"
513  assert arg.ints and len(arg.ints) > 0, \
514  "Empty outer_blobs_idx argument in Do op"
515  outer_blobs_idx = arg.ints
516  if inner_blobs and outer_blobs_idx:
517  break
518 
519  assert inner_blobs, "No inner_blobs argument found in Do op"
520  assert outer_blobs_idx, "No outer_blobs_idx argument found in Do op"
521 
522  assert len(inner_blobs) == len(outer_blobs_idx), \
523  "Arguments inner_blobs and outer_blobs_idx of different length in Do op"
524 
525  all_inner_blobs = set(inner_blobs)
526  assert len(all_inner_blobs) == len(inner_blobs), \
527  "Found duplicates in inner_blobs in Do op"
528 
529  op_input = [str(i) for i in op.input]
530  assert len(op_input) > 0, "Expected at least one input blob"
531  # remove last input blob that holds pointer to workspace
532  input_workspace_blob_name = op_input[-1]
533  op_input = op_input[:-1]
534 
535  op_output = [str(o) for o in op.output]
536  assert len(op_output) > 0, "Expected at least one output blob"
537  # remove last output blob that holds pointer to workspace
538  workspace_blob_name = op_output[-1]
539  assert input_workspace_blob_name == workspace_blob_name, \
540  "Expected same input/output workspace blob"
541  op_output = op_output[:-1]
542 
543  all_op_input_blob_names = set(op_input)
544  assert len(all_op_input_blob_names) == len(op_input), \
545  "Found duplicates in Do op inputs"
546  all_op_output_blob_names = set(op_output)
547  assert len(all_op_output_blob_names) == len(op_output), \
548  "Found duplicates in Do op outputs"
549 
550  ordered_outer_blob_names = op_input + op_output
551  all_outer_blob_names = set(ordered_outer_blob_names)
552  used_outer_blob_names = set()
553  outer_to_inner_map = {}
554  inner_to_outer_map = {}
555  for inner_name, outer_blob_idx in zip(inner_blobs, outer_blobs_idx):
556  assert outer_blob_idx >= 0 and \
557  outer_blob_idx < len(ordered_outer_blob_names), \
558  "Outer blob index is out of bounds in Do op"
559  outer_name = ordered_outer_blob_names[outer_blob_idx]
560  assert outer_name not in used_outer_blob_names, \
561  "Reusage of outer blob name " + outer_name + " in Do op"
562  used_outer_blob_names.add(outer_name)
563  outer_to_inner_map[outer_name] = inner_name
564  inner_to_outer_map[inner_name] = outer_name
565 
566  assert len(used_outer_blob_names) == len(all_outer_blob_names), \
567  "Not all outer blob names are used in blob bindings in Do op"
568 
569  return subnet, outer_to_inner_map, inner_to_outer_map, workspace_blob_name
570 
571 
572 def _prepare_blob_copy_op(from_name, to_name):
573  copy_op_def = caffe2_pb2.OperatorDef()
574  copy_op_def.type = "Copy"
575  copy_op_def.input.extend([from_name])
576  copy_op_def.output.extend([to_name])
577  return copy_op_def
578 
579 
580 def _prepare_gradient_do_op(
581  fwd_op, fwd_net, grad_ops, inputs, outputs, blob_bindings, saved_fwd_blobs,
582  workspace_blob_name):
583  gradient_net_def = caffe2_pb2.NetDef()
584  gradient_net_def.CopyFrom(fwd_net)
585  if gradient_net_def.name:
586  gradient_net_def.name += "_grad"
587  del gradient_net_def.op[:]
588  gradient_net_def.op.extend(grad_ops)
589  del gradient_net_def.external_input[:]
590  del gradient_net_def.external_output[:]
591 
592  gradient_do_def = caffe2_pb2.OperatorDef()
593  gradient_do_def.CopyFrom(fwd_op)
594  if gradient_do_def.name and len(gradient_do_def.name) > 0:
595  gradient_do_def.name += "_grad"
596 
597  del gradient_do_def.input[:]
598  gradient_do_def.input.extend(inputs)
599  # workspace pointer blob
600  gradient_do_def.input.append(workspace_blob_name)
601  del gradient_do_def.output[:]
602  gradient_do_def.output.extend(outputs)
603  # workspace pointer blob
604  gradient_do_def.output.append(workspace_blob_name)
605 
606  net_arg = caffe2_pb2.Argument()
607  net_arg.name = "net"
608  net_arg.n.CopyFrom(gradient_net_def)
609 
610  ordered_new_outer_names = inputs + outputs
611  inner_blobs = blob_bindings.keys()
612  new_outer_blobs_idx = [ordered_new_outer_names.index(blob_bindings[b])
613  for b in inner_blobs]
614 
615  inner_blobs_arg = caffe2_pb2.Argument()
616  inner_blobs_arg.name = "inner_blobs"
617  inner_blobs_arg.strings.extend([b.encode('utf-8') for b in inner_blobs])
618 
619  outer_blobs_idx_arg = caffe2_pb2.Argument()
620  outer_blobs_idx_arg.name = "outer_blobs_idx"
621  outer_blobs_idx_arg.ints.extend(new_outer_blobs_idx)
622 
623  saved_blobs_arg = caffe2_pb2.Argument()
624  saved_blobs_arg.name = "saved_fwd_blobs"
625  saved_blobs_arg.strings.extend(
626  [b.encode('utf-8') for b in saved_fwd_blobs])
627 
628  del gradient_do_def.arg[:]
629  gradient_do_def.arg.extend([
630  net_arg, inner_blobs_arg, outer_blobs_idx_arg, saved_blobs_arg])
631  del gradient_do_def.control_input[:]
632 
633  gradient_do_def.is_gradient_op = True
634 
635  return gradient_do_def
636 
637 
638 def _gen_grad_zero_init_ops(init_grad_map, grad_map, grad_output_names):
639  grad_init_ops = []
640  for grad_output in grad_output_names:
641  # get the corresponding output name blob and use it in ConstantFill
642  # so that grad_output has the same shape
643  output_name = None
644  for o, g in grad_map.items():
645  if g == grad_output:
646  output_name = o
647  break
648  assert output_name, "Unknown gradient output " + grad_output
649 
650  grad_init_op = None
651  # make sure that we do not overwrite existing gradients with zeros
652  if output_name in init_grad_map:
653  init_grad_name = init_grad_map[output_name]
654  # in case we use a different gradient blob name, copy gradient
655  if init_grad_name != grad_output:
656  grad_init_op = caffe2_pb2.OperatorDef()
657  grad_init_op.type = "Copy"
658  grad_init_op.input.extend([str(init_grad_name)])
659  grad_init_op.output.extend([str(grad_output)])
660  else:
661  grad_init_op = caffe2_pb2.OperatorDef()
662  grad_init_op.type = "ConstantFill"
663  grad_init_op.input.extend([output_name])
664  grad_init_op.output.extend([grad_output])
665  value_arg = caffe2_pb2.Argument()
666  value_arg.name = "value"
667  value_arg.f = 0.0
668  grad_init_op.arg.extend([value_arg])
669 
670  if grad_init_op:
671  grad_init_ops.append(grad_init_op)
672  return grad_init_ops
673 
674 
675 def _prepare_gradient_if_op(
676  fwd_op, input_names, output_names, then_grad_net, else_grad_net):
677  gradient_if_def = caffe2_pb2.OperatorDef()
678  gradient_if_def.CopyFrom(fwd_op)
679  del gradient_if_def.input[:]
680  gradient_if_def.input.extend(input_names)
681  del gradient_if_def.output[:]
682  gradient_if_def.output.extend(output_names)
683 
684  then_net_arg = caffe2_pb2.Argument()
685  then_net_arg.name = "then_net"
686  then_net_arg.n.CopyFrom(then_grad_net)
687  gradient_args = [then_net_arg]
688  if else_grad_net:
689  else_net_arg = caffe2_pb2.Argument()
690  else_net_arg.name = "else_net"
691  else_net_arg.n.CopyFrom(else_grad_net)
692  gradient_args.append(else_net_arg)
693 
694  del gradient_if_def.arg[:]
695  gradient_if_def.arg.extend(gradient_args)
696  if gradient_if_def.name:
697  gradient_if_def.name += "_grad"
698  del gradient_if_def.control_input[:]
699  gradient_if_def.is_gradient_op = True
700  return gradient_if_def