3 from __future__
import absolute_import
4 from __future__
import division
5 from __future__
import print_function
6 from __future__
import unicode_literals
11 from caffe2.proto
import caffe2_pb2
14 def getGradientForOp(op):
15 return core.GradientRegistry.GetGradientForOp(
16 op, [s +
'_grad' for s
in op.output])
19 def _get_grad_blob(grad_map, input_to_check):
20 grad_blob = grad_map[input_to_check]
22 if isinstance(grad_blob, core.BlobReference):
23 return workspace.blobs[grad_blob]
28 assert isinstance(grad_blob, core.GradientSlice)
29 dense_grad =
'tmp_dense_grad' 30 sparse_to_dense_op = core.CreateOperator(
32 [grad_blob.indices, grad_blob.values, input_to_check],
35 workspace.RunOperatorOnce(sparse_to_dense_op)
36 return workspace.blobs[dense_grad]
39 def _get_grad(net, outputs, outputs_with_grad, input_values, inputs_with_grads):
40 grad_net = net.Clone(net.Name() +
"_copy")
41 grad_map = grad_net.AddGradientOperators(outputs_with_grad)
43 for name, value
in (input_values
or {}).items():
44 workspace.blobs[name] = value
46 for input_to_check
in inputs_with_grads:
47 assert input_to_check
in grad_map, (
48 '{} has no gradient, cannot check net gradient.'.format(
50 assert str(input_to_check)
in workspace.blobs
52 workspace.RunNetOnce(grad_net)
53 forward_results = [(output, workspace.blobs[output])
for output
in outputs]
54 grads = {input_to_check: _get_grad_blob(grad_map, input_to_check)
55 for input_to_check
in inputs_with_grads}
57 return forward_results, grads, grad_net
60 def _assert_close(value1, value2, threshold, err_msg=''):
61 np.testing.assert_allclose(
63 atol=threshold, rtol=threshold,
67 delta = np.abs(value1 - value2).flatten()
68 return np.mean(delta), max(delta)
73 def CompareNets(nets, outputs, outputs_with_grad_ids,
74 inputs_with_grads, input_values=
None,
75 threshold=0.0000001, print_net_images=
False):
76 def _get_output_with_grad_names(net_outputs):
77 return [net_outputs[i]
for i
in outputs_with_grad_ids]
80 for i, net
in enumerate(nets):
81 png = net_drawer.GetPydotGraph(net).create_png()
82 with open(
"caffe2_net_forward_" + str(i) + net.Name() +
".png",
88 _get_grad(net, net_outputs,
89 _get_output_with_grad_names(net_outputs),
90 input_values, inputs_with_grads)
91 for net, net_outputs
in zip(nets, outputs)
95 _, _, backward_nets = zip(*results)
96 for i, net
in enumerate(backward_nets):
97 png = net_drawer.GetPydotGraph(net).create_png()
98 with open(
"caffe2_net_" + str(i) + net.Name() +
".png",
'wb') \
102 first_net_results, first_net_grads, _ = results[0]
103 for net_results, net_grads, _
in results[1:]:
104 assert len(net_results) == len(first_net_results)
105 for idx, ((blob1, blob_value1), (blob2, blob_value2))
in enumerate(
106 zip(first_net_results, net_results)):
108 blob_value1, blob_value2, threshold,
109 err_msg=
"Different forward pass results for output id {}. " 110 "Corresponding output blobs: {} and {}".format(
113 assert net_grads.keys() == first_net_grads.keys()
114 for blob, blob_grad_value
in net_grads.items():
116 first_net_grads[blob], blob_grad_value, threshold,
117 err_msg=
"Different gradients for input {}".format(blob))
120 def Check(net, outputs_with_grad, input_values,
121 input_to_check, step_size=0.0001,
122 threshold=0.05, print_net=
True):
124 net_results, net_grads, full_net = _get_grad(
125 net, [], outputs_with_grad, input_values, [input_to_check])
126 analytic_grad = net_grads[input_to_check]
128 def GetLoss(new_value):
129 workspace.blobs[input_to_check] = new_value
130 workspace.RunNetOnce(full_net)
132 workspace.blobs[output]
133 for output
in outputs_with_grad
136 def GetValue(dim, delta):
137 input_value = input_values[input_to_check].copy()
138 input_value.flat[dim] += delta
141 grad_estimate = np.zeros_like(input_values[input_to_check])
142 for dim
in range(input_values[input_to_check].size):
143 pos_loss = GetLoss(GetValue(dim, step_size))
144 neg_loss = GetLoss(GetValue(dim, -step_size))
145 grad_estimate.flat[dim] = (pos_loss - neg_loss) / step_size / 2
147 err_msg =
"Error in gradient check for net_copy {}".format(
150 err_msg +=
": {}".format(net.Proto())
152 return _assert_close(analytic_grad, grad_estimate, threshold, err_msg)
156 """A gradient checker in Python. 158 This is not the most efficient way to check gradients, as the Python 159 interface will involve a lot of copies back and forth operations. Use at your 168 workspace_name=
"gradient_check",
169 input_device_options=
None,
175 if input_device_options
is None:
181 self, op, grad_ops, inputs, input_names, input_to_check, grad_name,
184 for i
in range(len(inputs)):
185 workspace.FeedBlob(input_names[i], inputs[i],
186 self._input_device_options.get(
188 x = inputs[input_to_check]
190 workspace.RunOperatorOnce(op)
193 for idx
in outputs_with_grads:
194 name = op.output[idx]
195 arr = workspace.FetchBlob(name)
196 loss += (arr**2).sum()
200 workspace.RunOperatorsOnce(grad_ops)
202 if isinstance(grad_name, core.GradientSlice):
203 workspace.FeedBlob(
'zeros', np.zeros_like(x, dtype=np.float32))
204 workspace.FeedBlob(
'ones', np.ones(1, dtype=np.float32))
205 gv_cpu_op = core.CreateOperator(
206 'EnsureCPUOutput', grad_name.values, grad_name.values +
'_cpu',
209 gi_cpu_op = core.CreateOperator(
210 'EnsureCPUOutput', grad_name.indices, grad_name.indices +
'_cpu',
213 sparse_to_dense_op = core.CreateOperator(
214 'ScatterWeightedSum',
216 'zeros',
'ones', grad_name.indices +
'_cpu',
217 grad_name.values +
'_cpu',
'ones' 221 workspace.RunOperatorOnce(gv_cpu_op)
222 workspace.RunOperatorOnce(gi_cpu_op)
223 workspace.RunOperatorOnce(sparse_to_dense_op)
224 grad = workspace.FetchBlob(
'zeros')
226 grad = workspace.FetchBlob(grad_name)
236 input_device_options=
None 238 """Checks the operator in a very simple fashion by stacking a sum of 242 op: the operator to be checked. 243 inputs: the input data in numpy arrays. 244 input_to_check: an index specifying which input blob we should 246 outputs_with_grads: indices specifying which output blobs will we 247 need to check gradients with. For these outputs, we will collect a 248 squared sum and also feed in their gradients. 249 grad_operator: the gradient operator. If not given, we will get the 250 gradient operator from the gradient registry. 251 input_device_options: an optional mapping from input names to 252 DeviceOptions (to override the default DeviceOption) 254 boolean: True if it passes, False if it does not pass. 257 old_ws_name = workspace.CurrentWorkspace()
265 grad_ops, g_input = getGradientForOp(op)
268 _input_device_options = input_device_options
or \
269 core.InferOpBlobDevicesAsDict(op)[0]
271 for i, arr
in enumerate(inputs):
274 _input_device_options.get(
278 grad_name = g_input[input_to_check]
280 op, grad_ops, inputs, op.input, input_to_check, grad_name,
283 grad_estimate = np.zeros_like(inputs[input_to_check])
284 if grad_estimate.shape != grad.shape:
286 "Mismatched gradient shapes: estimated ({}), grad ({})".format(
287 grad_estimate.shape, grad.shape))
289 dims_to_check = inputs[input_to_check].size
290 for current_dim
in range(dims_to_check):
292 inputs[input_to_check].flat[current_dim] += self.
_stepsize 294 op, grad_ops, inputs, op.input, input_to_check, grad_name,
298 inputs[input_to_check].flat[current_dim] -= self.
_stepsize * 2
300 op, grad_ops, inputs, op.input, input_to_check, grad_name,
304 inputs[input_to_check].flat[current_dim] += self.
_stepsize 305 grad_estimate.flat[current_dim] = (
306 pos_loss - neg_loss) / self.
_stepsize / 2
308 fail_mat = ~np.isclose(
311 idx = np.flatnonzero(fail_mat)
312 print(
'Failed. [idx, grad, grad_estimate] are:')
313 print(np.vstack([idx, grad.flat[idx], grad_estimate.flat[idx]]).T)
322 workspace.ResetWorkspace()
323 workspace.SwitchWorkspace(old_ws_name)
324 return ret, grad, grad_estimate
def GetLossAndGrad(self, op, grad_ops, inputs, input_names, input_to_check, grad_name, outputs_with_grads)
def CheckSimple(self, op, inputs, input_to_check, outputs_with_grads, grad_ops=None, input_device_options=None)