Caffe2 - Python API
A deep learning, cross platform ML framework
gradient_checker.py
1 # Copyright (c) 2016-present, Facebook, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 ##############################################################################
15 
16 ## @package gradient_checker
17 # Module caffe2.python.gradient_checker
18 from __future__ import absolute_import
19 from __future__ import division
20 from __future__ import print_function
21 from __future__ import unicode_literals
22 
23 import numpy as np
24 
25 from caffe2.python import core, workspace, net_drawer
26 from caffe2.proto import caffe2_pb2
27 
28 
29 def _get_grad_blob(grad_map, input_to_check):
30  grad_blob = grad_map[input_to_check]
31 
32  if isinstance(grad_blob, core.BlobReference):
33  return workspace.blobs[grad_blob]
34 
35  # If grad_blob is not a single blob, it should be a gradient slice.
36  # To make it comparable with the estimiated gradient which is dense,
37  # we need to first convert grad_blob to dense gradient.
38  assert isinstance(grad_blob, core.GradientSlice)
39  dense_grad = 'tmp_dense_grad'
40  sparse_to_dense_op = core.CreateOperator(
41  'SparseToDense',
42  [grad_blob.indices, grad_blob.values, input_to_check],
43  dense_grad,
44  )
45  workspace.RunOperatorOnce(sparse_to_dense_op)
46  return workspace.blobs[dense_grad]
47 
48 
49 def _get_grad(net, outputs, outputs_with_grad, input_values, inputs_with_grads):
50  grad_net = net.Clone(net.Name() + "_copy")
51  grad_map = grad_net.AddGradientOperators(outputs_with_grad)
52 
53  for name, value in (input_values or {}).items():
54  workspace.blobs[name] = value
55 
56  for input_to_check in inputs_with_grads:
57  assert input_to_check in grad_map, (
58  '{} has no gradient, cannot check net gradient.'.format(
59  input_to_check))
60  assert str(input_to_check) in workspace.blobs
61 
62  workspace.RunNetOnce(grad_net)
63  forward_results = [(output, workspace.blobs[output]) for output in outputs]
64  grads = {input_to_check: _get_grad_blob(grad_map, input_to_check)
65  for input_to_check in inputs_with_grads}
66 
67  return forward_results, grads, grad_net
68 
69 
70 def _assert_close(value1, value2, threshold, err_msg=''):
71  np.testing.assert_allclose(
72  value1, value2,
73  atol=threshold, rtol=threshold,
74  err_msg=err_msg,
75  )
76 
77  delta = np.abs(value1 - value2).flatten()
78  return np.mean(delta), max(delta)
79 
80 
81 class NetGradientChecker(object):
82  @staticmethod
83  def CompareNets(nets, outputs, outputs_with_grad_ids,
84  inputs_with_grads, input_values=None,
85  threshold=0.0000001, print_net_images=False):
86  def _get_output_with_grad_names(net_outputs):
87  return [net_outputs[i] for i in outputs_with_grad_ids]
88 
89  if print_net_images:
90  for i, net in enumerate(nets):
91  png = net_drawer.GetPydotGraph(net).create_png()
92  with open("caffe2_net_forward_" + str(i) + net.Name() + ".png",
93  'wb') \
94  as f:
95  f.write(png)
96 
97  results = [
98  _get_grad(net, net_outputs,
99  _get_output_with_grad_names(net_outputs),
100  input_values, inputs_with_grads)
101  for net, net_outputs in zip(nets, outputs)
102  ]
103 
104  if print_net_images:
105  _, _, backward_nets = zip(*results)
106  for i, net in enumerate(backward_nets):
107  png = net_drawer.GetPydotGraph(net).create_png()
108  with open("caffe2_net_" + str(i) + net.Name() + ".png", 'wb') \
109  as f:
110  f.write(png)
111 
112  first_net_results, first_net_grads, _ = results[0]
113  for net_results, net_grads, _ in results[1:]:
114  assert len(net_results) == len(first_net_results)
115  for idx, ((blob1, blob_value1), (blob2, blob_value2)) in enumerate(
116  zip(first_net_results, net_results)):
117  _assert_close(
118  blob_value1, blob_value2, threshold,
119  err_msg="Different forward pass results for output id {}. "
120  "Corresponding output blobs: {} and {}".format(
121  idx, blob1, blob2))
122 
123  assert net_grads.keys() == first_net_grads.keys()
124  for blob, blob_grad_value in net_grads.items():
125  _assert_close(
126  first_net_grads[blob], blob_grad_value, threshold,
127  err_msg="Different gradients for input {}".format(blob))
128 
129  @staticmethod
130  def Check(net, outputs_with_grad, input_values,
131  input_to_check, step_size=0.0001,
132  threshold=0.05, print_net=True):
133 
134  net_results, net_grads, full_net = _get_grad(
135  net, [], outputs_with_grad, input_values, [input_to_check])
136  analytic_grad = net_grads[input_to_check]
137 
138  def GetLoss(new_value):
139  workspace.blobs[input_to_check] = new_value
140  workspace.RunNetOnce(full_net)
141  return sum([
142  workspace.blobs[output]
143  for output in outputs_with_grad
144  ]).sum()
145 
146  def GetValue(dim, delta):
147  input_value = input_values[input_to_check].copy()
148  input_value.flat[dim] += delta
149  return input_value
150 
151  grad_estimate = np.zeros_like(input_values[input_to_check])
152  for dim in range(input_values[input_to_check].size):
153  pos_loss = GetLoss(GetValue(dim, step_size))
154  neg_loss = GetLoss(GetValue(dim, -step_size))
155  grad_estimate.flat[dim] = (pos_loss - neg_loss) / step_size / 2
156 
157  err_msg = "Error in gradient check for net_copy {}".format(
158  net.Name())
159  if print_net:
160  err_msg += ": {}".format(net.Proto())
161 
162  return _assert_close(analytic_grad, grad_estimate, threshold, err_msg)
163 
165  """A gradient checker in Python.
166 
167  This is not the most efficient way to check gradients, as the Python
168  interface will involve a lot of copies back and forth operations. Use at your
169  own risk.
170  """
171 
172  def __init__(
173  self,
174  stepsize,
175  threshold,
176  device_option=caffe2_pb2.DeviceOption(),
177  workspace_name="gradient_check"
178  ):
179  self._stepsize = stepsize
180  self._threshold = threshold
181  self._device_option = device_option
182  self._workspace_name = workspace_name
183 
184  def GetLossAndGrad(
185  self, op, grad_ops, x, input_name, grad_name, outputs_with_grads
186  ):
187  # First, feed in the current input. Note that we are not changing
188  # anything else, so we don't need to feed in others.
189  workspace.FeedBlob(input_name, x, self._device_option)
190  # Run.
191  workspace.RunOperatorOnce(op)
192  loss = 0.
193  # Get Loss and feed in the gradients, run gradient ops.
194  for idx in outputs_with_grads:
195  name = op.output[idx]
196  arr = workspace.FetchBlob(name)
197  loss += (arr**2).sum()
198  workspace.FeedBlob(name + '_grad', arr, self._device_option)
199  loss /= 2.
200  # Run gradient ops
201  workspace.RunOperatorsOnce(grad_ops)
202  # Get gradients
203  if isinstance(grad_name, core.GradientSlice):
204  workspace.FeedBlob('zeros', np.zeros_like(x, dtype=np.float32))
205  workspace.FeedBlob('ones', np.ones(1, dtype=np.float32))
206  gv_cpu_op = core.CreateOperator(
207  'EnsureCPUOutput', grad_name.values, grad_name.values + '_cpu',
208  device_option=self._device_option
209  )
210  gi_cpu_op = core.CreateOperator(
211  'EnsureCPUOutput', grad_name.indices, grad_name.indices + '_cpu',
212  device_option=self._device_option
213  )
214  sparse_to_dense_op = core.CreateOperator(
215  'ScatterWeightedSum',
216  [
217  'zeros', 'ones', grad_name.indices + '_cpu',
218  grad_name.values + '_cpu', 'ones'
219  ],
220  'zeros',
221  )
222  workspace.RunOperatorOnce(gv_cpu_op)
223  workspace.RunOperatorOnce(gi_cpu_op)
224  workspace.RunOperatorOnce(sparse_to_dense_op)
225  grad = workspace.FetchBlob('zeros')
226  else:
227  grad = workspace.FetchBlob(grad_name)
228  return loss, grad
229 
230  def CheckSimple(
231  self,
232  op,
233  inputs,
234  input_to_check,
235  outputs_with_grads,
236  grad_ops=None,
237  input_device_options=None
238  ):
239  """Checks the operator in a very simple fashion by stacking a sum of
240  squares on the top.
241 
242  Inputs:
243  op: the operator to be checked.
244  inputs: the input data in numpy arrays.
245  input_to_check: an index specifying which input blob we should
246  check.
247  outputs_with_grads: indices specifying which output blobs will we
248  need to check gradients with. For these outputs, we will collect a
249  squared sum and also feed in their gradients.
250  grad_operator: the gradient operator. If not given, we will get the
251  gradient operator from the gradient registry.
252  input_device_options: an optional mapping from input names to
253  DeviceOptions (to override the default DeviceOption)
254  Outputs:
255  boolean: True if it passes, False if it does not pass.
256  """
257  if input_device_options is None:
258  input_device_options = {}
259  # Entering the checker workspace
260  old_ws_name = workspace.CurrentWorkspace()
261  if self._workspace_name != old_ws_name:
262  workspace.SwitchWorkspace(self._workspace_name, True)
263 
264  op.device_option.CopyFrom(self._device_option)
265  if grad_ops is None:
266  # TODO(jiayq): use the gradient registration instead of the old
267  # hack.
268  grad_ops, g_input = core.GradientRegistry.GetGradientForOp(
269  op, [s + '_grad' for s in op.output])
270 
271  dims_to_check = inputs[input_to_check].size
272  # First, feed in the input.
273  for i, arr in enumerate(inputs):
274  workspace.FeedBlob(
275  op.input[i], arr,
276  input_device_options.get(
277  op.input[i], self._device_option))
278 
279  # Get the loss and gradient for the original.
280  input_name = op.input[input_to_check]
281  grad_name = g_input[input_to_check]
282  loss, grad = self.GetLossAndGrad(
283  op, grad_ops, inputs[input_to_check], input_name, grad_name,
284  outputs_with_grads
285  )
286  grad_estimate = np.zeros_like(inputs[input_to_check])
287  if grad_estimate.shape != grad.shape:
288  raise Exception(
289  "Mismatched gradient shapes: estimated ({}), grad ({})".format(
290  grad_estimate.shape, grad.shape))
291 
292  for current_dim in range(dims_to_check):
293  # Positive gradient
294  inputs[input_to_check].flat[current_dim] += self._stepsize
295  pos_loss, _ = self.GetLossAndGrad(
296  op, grad_ops, inputs[input_to_check], input_name,
297  grad_name, outputs_with_grads
298  )
299  # Negative gradient
300  inputs[input_to_check].flat[current_dim] -= self._stepsize * 2
301  neg_loss, _ = self.GetLossAndGrad(
302  op, grad_ops, inputs[input_to_check], input_name,
303  grad_name, outputs_with_grads
304  )
305  # Recover the value
306  inputs[input_to_check].flat[current_dim] += self._stepsize
307  grad_estimate.flat[current_dim] = (
308  pos_loss - neg_loss) / self._stepsize / 2
309  # Now, check correctness
310  fail_mat = ~np.isclose(
311  grad, grad_estimate, atol=self._threshold, rtol=self._threshold)
312  if np.any(fail_mat):
313  idx = np.flatnonzero(fail_mat)
314  print('Failed. [idx, grad, grad_estimate] are:')
315  print(np.vstack([idx, grad.flat[idx], grad_estimate.flat[idx]]).T)
316  ret = False
317  else:
318  ret = True
319  # After finishing, cleaning up things.
320  if self._workspace_name != old_ws_name:
321  # We reset the workspace to make sure everything intermediate is
322  # cleaned up. Note that there is no need to delete a workspace -
323  # when empty it takes a very limited amount of memory.
324  workspace.ResetWorkspace()
325  workspace.SwitchWorkspace(old_ws_name)
326  return ret, grad, grad_estimate
def CheckSimple(self, op, inputs, input_to_check, outputs_with_grads, grad_ops=None, input_device_options=None)
def GetLossAndGrad(self, op, grad_ops, x, input_name, grad_name, outputs_with_grads)