Caffe2 - Python API
A deep learning, cross platform ML framework
gradient_checker.py
1 ## @package gradient_checker
2 # Module caffe2.python.gradient_checker
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 from __future__ import unicode_literals
7 
8 import numpy as np
9 
10 from caffe2.python import core, workspace, net_drawer
11 from caffe2.proto import caffe2_pb2
12 
13 
14 def getGradientForOp(op):
15  return core.GradientRegistry.GetGradientForOp(
16  op, [s + '_grad' for s in op.output])
17 
18 
19 def _get_grad_blob(grad_map, input_to_check):
20  grad_blob = grad_map[input_to_check]
21 
22  if isinstance(grad_blob, core.BlobReference):
23  return workspace.blobs[grad_blob]
24 
25  # If grad_blob is not a single blob, it should be a gradient slice.
26  # To make it comparable with the estimiated gradient which is dense,
27  # we need to first convert grad_blob to dense gradient.
28  assert isinstance(grad_blob, core.GradientSlice)
29  dense_grad = 'tmp_dense_grad'
30  sparse_to_dense_op = core.CreateOperator(
31  'SparseToDense',
32  [grad_blob.indices, grad_blob.values, input_to_check],
33  dense_grad,
34  )
35  workspace.RunOperatorOnce(sparse_to_dense_op)
36  return workspace.blobs[dense_grad]
37 
38 
39 def _get_grad(net, outputs, outputs_with_grad, input_values, inputs_with_grads):
40  grad_net = net.Clone(net.Name() + "_copy")
41  grad_map = grad_net.AddGradientOperators(outputs_with_grad)
42 
43  for name, value in (input_values or {}).items():
44  workspace.blobs[name] = value
45 
46  for input_to_check in inputs_with_grads:
47  assert input_to_check in grad_map, (
48  '{} has no gradient, cannot check net gradient.'.format(
49  input_to_check))
50  assert str(input_to_check) in workspace.blobs
51 
52  workspace.RunNetOnce(grad_net)
53  forward_results = [(output, workspace.blobs[output]) for output in outputs]
54  grads = {input_to_check: _get_grad_blob(grad_map, input_to_check)
55  for input_to_check in inputs_with_grads}
56 
57  return forward_results, grads, grad_net
58 
59 
60 def _assert_close(value1, value2, threshold, err_msg=''):
61  np.testing.assert_allclose(
62  value1, value2,
63  atol=threshold, rtol=threshold,
64  err_msg=err_msg,
65  )
66 
67  delta = np.abs(value1 - value2).flatten()
68  return np.mean(delta), max(delta)
69 
70 
71 class NetGradientChecker(object):
72  @staticmethod
73  def CompareNets(nets, outputs, outputs_with_grad_ids,
74  inputs_with_grads, input_values=None,
75  threshold=0.0000001, print_net_images=False):
76  def _get_output_with_grad_names(net_outputs):
77  return [net_outputs[i] for i in outputs_with_grad_ids]
78 
79  if print_net_images:
80  for i, net in enumerate(nets):
81  png = net_drawer.GetPydotGraph(net).create_png()
82  with open("caffe2_net_forward_" + str(i) + net.Name() + ".png",
83  'wb') \
84  as f:
85  f.write(png)
86 
87  results = [
88  _get_grad(net, net_outputs,
89  _get_output_with_grad_names(net_outputs),
90  input_values, inputs_with_grads)
91  for net, net_outputs in zip(nets, outputs)
92  ]
93 
94  if print_net_images:
95  _, _, backward_nets = zip(*results)
96  for i, net in enumerate(backward_nets):
97  png = net_drawer.GetPydotGraph(net).create_png()
98  with open("caffe2_net_" + str(i) + net.Name() + ".png", 'wb') \
99  as f:
100  f.write(png)
101 
102  first_net_results, first_net_grads, _ = results[0]
103  for net_results, net_grads, _ in results[1:]:
104  assert len(net_results) == len(first_net_results)
105  for idx, ((blob1, blob_value1), (blob2, blob_value2)) in enumerate(
106  zip(first_net_results, net_results)):
107  _assert_close(
108  blob_value1, blob_value2, threshold,
109  err_msg="Different forward pass results for output id {}. "
110  "Corresponding output blobs: {} and {}".format(
111  idx, blob1, blob2))
112 
113  assert net_grads.keys() == first_net_grads.keys()
114  for blob, blob_grad_value in net_grads.items():
115  _assert_close(
116  first_net_grads[blob], blob_grad_value, threshold,
117  err_msg="Different gradients for input {}".format(blob))
118 
119  @staticmethod
120  def Check(net, outputs_with_grad, input_values,
121  input_to_check, step_size=0.0001,
122  threshold=0.05, print_net=True):
123 
124  net_results, net_grads, full_net = _get_grad(
125  net, [], outputs_with_grad, input_values, [input_to_check])
126  analytic_grad = net_grads[input_to_check]
127 
128  def GetLoss(new_value):
129  workspace.blobs[input_to_check] = new_value
130  workspace.RunNetOnce(full_net)
131  return sum([
132  workspace.blobs[output]
133  for output in outputs_with_grad
134  ]).sum()
135 
136  def GetValue(dim, delta):
137  input_value = input_values[input_to_check].copy()
138  input_value.flat[dim] += delta
139  return input_value
140 
141  grad_estimate = np.zeros_like(input_values[input_to_check])
142  for dim in range(input_values[input_to_check].size):
143  pos_loss = GetLoss(GetValue(dim, step_size))
144  neg_loss = GetLoss(GetValue(dim, -step_size))
145  grad_estimate.flat[dim] = (pos_loss - neg_loss) / step_size / 2
146 
147  err_msg = "Error in gradient check for net_copy {}".format(
148  net.Name())
149  if print_net:
150  err_msg += ": {}".format(net.Proto())
151 
152  return _assert_close(analytic_grad, grad_estimate, threshold, err_msg)
153 
154 
156  """A gradient checker in Python.
157 
158  This is not the most efficient way to check gradients, as the Python
159  interface will involve a lot of copies back and forth operations. Use at your
160  own risk.
161  """
162 
163  def __init__(
164  self,
165  stepsize,
166  threshold,
167  device_option=None,
168  workspace_name="gradient_check",
169  input_device_options=None,
170  ):
171  self._stepsize = stepsize
172  self._threshold = threshold
173  self._device_option = device_option or caffe2_pb2.DeviceOption()
174  self._workspace_name = workspace_name
175  if input_device_options is None:
176  self._input_device_options = {}
177  else:
178  self._input_device_options = input_device_options
179 
180  def GetLossAndGrad(
181  self, op, grad_ops, inputs, input_names, input_to_check, grad_name,
182  outputs_with_grads
183  ):
184  for i in range(len(inputs)):
185  workspace.FeedBlob(input_names[i], inputs[i],
186  self._input_device_options.get(
187  input_names[i], self._device_option))
188  x = inputs[input_to_check]
189  # Run.
190  workspace.RunOperatorOnce(op)
191  loss = 0.
192  # Get Loss and feed in the gradients, run gradient ops.
193  for idx in outputs_with_grads:
194  name = op.output[idx]
195  arr = workspace.FetchBlob(name)
196  loss += (arr**2).sum()
197  workspace.FeedBlob(name + '_grad', arr, self._device_option)
198  loss /= 2.
199  # Run gradient ops
200  workspace.RunOperatorsOnce(grad_ops)
201  # Get gradients
202  if isinstance(grad_name, core.GradientSlice):
203  workspace.FeedBlob('zeros', np.zeros_like(x, dtype=np.float32))
204  workspace.FeedBlob('ones', np.ones(1, dtype=np.float32))
205  gv_cpu_op = core.CreateOperator(
206  'EnsureCPUOutput', grad_name.values, grad_name.values + '_cpu',
207  device_option=self._device_option
208  )
209  gi_cpu_op = core.CreateOperator(
210  'EnsureCPUOutput', grad_name.indices, grad_name.indices + '_cpu',
211  device_option=self._device_option
212  )
213  sparse_to_dense_op = core.CreateOperator(
214  'ScatterWeightedSum',
215  [
216  'zeros', 'ones', grad_name.indices + '_cpu',
217  grad_name.values + '_cpu', 'ones'
218  ],
219  'zeros',
220  )
221  workspace.RunOperatorOnce(gv_cpu_op)
222  workspace.RunOperatorOnce(gi_cpu_op)
223  workspace.RunOperatorOnce(sparse_to_dense_op)
224  grad = workspace.FetchBlob('zeros')
225  else:
226  grad = workspace.FetchBlob(grad_name)
227  return loss, grad
228 
229  def CheckSimple(
230  self,
231  op,
232  inputs,
233  input_to_check,
234  outputs_with_grads,
235  grad_ops=None,
236  input_device_options=None
237  ):
238  """Checks the operator in a very simple fashion by stacking a sum of
239  squares on the top.
240 
241  Inputs:
242  op: the operator to be checked.
243  inputs: the input data in numpy arrays.
244  input_to_check: an index specifying which input blob we should
245  check.
246  outputs_with_grads: indices specifying which output blobs will we
247  need to check gradients with. For these outputs, we will collect a
248  squared sum and also feed in their gradients.
249  grad_operator: the gradient operator. If not given, we will get the
250  gradient operator from the gradient registry.
251  input_device_options: an optional mapping from input names to
252  DeviceOptions (to override the default DeviceOption)
253  Outputs:
254  boolean: True if it passes, False if it does not pass.
255  """
256  # Entering the checker workspace
257  old_ws_name = workspace.CurrentWorkspace()
258  if self._workspace_name != old_ws_name:
259  workspace.SwitchWorkspace(self._workspace_name, True)
260 
261  op.device_option.CopyFrom(self._device_option)
262  if grad_ops is None:
263  # TODO(jiayq): use the gradient registration instead of the old
264  # hack.
265  grad_ops, g_input = getGradientForOp(op)
266 
267 
268  _input_device_options = input_device_options or \
269  core.InferOpBlobDevicesAsDict(op)[0]
270  # First, feed in the input.
271  for i, arr in enumerate(inputs):
272  workspace.FeedBlob(
273  op.input[i], arr,
274  _input_device_options.get(
275  op.input[i], self._device_option))
276 
277  # Get the loss and gradient for the original.
278  grad_name = g_input[input_to_check]
279  loss, grad = self.GetLossAndGrad(
280  op, grad_ops, inputs, op.input, input_to_check, grad_name,
281  outputs_with_grads
282  )
283  grad_estimate = np.zeros_like(inputs[input_to_check])
284  if grad_estimate.shape != grad.shape:
285  raise Exception(
286  "Mismatched gradient shapes: estimated ({}), grad ({})".format(
287  grad_estimate.shape, grad.shape))
288 
289  dims_to_check = inputs[input_to_check].size
290  for current_dim in range(dims_to_check):
291  # Positive gradient
292  inputs[input_to_check].flat[current_dim] += self._stepsize
293  pos_loss, _ = self.GetLossAndGrad(
294  op, grad_ops, inputs, op.input, input_to_check, grad_name,
295  outputs_with_grads
296  )
297  # Negative gradient
298  inputs[input_to_check].flat[current_dim] -= self._stepsize * 2
299  neg_loss, _ = self.GetLossAndGrad(
300  op, grad_ops, inputs, op.input, input_to_check, grad_name,
301  outputs_with_grads
302  )
303  # Recover the value
304  inputs[input_to_check].flat[current_dim] += self._stepsize
305  grad_estimate.flat[current_dim] = (
306  pos_loss - neg_loss) / self._stepsize / 2
307  # Now, check correctness
308  fail_mat = ~np.isclose(
309  grad, grad_estimate, atol=self._threshold, rtol=self._threshold)
310  if np.any(fail_mat):
311  idx = np.flatnonzero(fail_mat)
312  print('Failed. [idx, grad, grad_estimate] are:')
313  print(np.vstack([idx, grad.flat[idx], grad_estimate.flat[idx]]).T)
314  ret = False
315  else:
316  ret = True
317  # After finishing, cleaning up things.
318  if self._workspace_name != old_ws_name:
319  # We reset the workspace to make sure everything intermediate is
320  # cleaned up. Note that there is no need to delete a workspace -
321  # when empty it takes a very limited amount of memory.
322  workspace.ResetWorkspace()
323  workspace.SwitchWorkspace(old_ws_name)
324  return ret, grad, grad_estimate
def GetLossAndGrad(self, op, grad_ops, inputs, input_names, input_to_check, grad_name, outputs_with_grads)
def CheckSimple(self, op, inputs, input_to_check, outputs_with_grads, grad_ops=None, input_device_options=None)