5 from itertools
import product
10 if isinstance(x, torch.Tensor):
11 if x.grad
is not None:
14 elif isinstance(x, container_abcs.Iterable):
19 def make_jacobian(input, num_out):
20 if isinstance(input, torch.Tensor):
21 if not input.is_floating_point():
23 if not input.requires_grad:
25 return torch.zeros(input.nelement(), num_out, dtype=input.dtype)
26 elif isinstance(input, container_abcs.Iterable)
and not isinstance(input, str):
27 jacobians = list(filter(
28 lambda x: x
is not None, (make_jacobian(elem, num_out)
for elem
in input)))
31 return type(input)(jacobians)
36 def iter_tensors(x, only_requiring_grad=False):
37 if isinstance(x, torch.Tensor):
38 if x.requires_grad
or not only_requiring_grad:
40 elif isinstance(x, container_abcs.Iterable)
and not isinstance(x, str):
42 for result
in iter_tensors(elem, only_requiring_grad):
46 def get_numerical_jacobian(fn, input, target=None, eps=1e-3):
49 target: the Tensors wrt whom Jacobians are calculated (default=`input`) 51 Note that `target` may not even be part of `input` to `fn`, so please be 52 **very careful** in this to not clone `target`. 56 output_size = fn(input).numel()
57 jacobian = make_jacobian(target, output_size)
62 x_tensors = [t
for t
in iter_tensors(target,
True)]
63 j_tensors = [t
for t
in iter_tensors(jacobian)]
66 for x_tensor, d_tensor
in zip(x_tensors, j_tensors):
69 if x_tensor.is_sparse:
74 for i
in reversed(range(dim)):
79 x_nnz = x_tensor._nnz()
80 x_size = list(x_tensor.size())
81 x_indices = x_tensor._indices().t()
82 x_values = x_tensor._values().data
83 x_stride = get_stride(x_size)
85 for i
in range(x_nnz):
87 for x_idx
in product(*[range(m)
for m
in x_values.size()[1:]]):
88 indices = x_indices[i].tolist() + list(x_idx)
89 d_idx = sum(indices[k] * x_stride[k]
for k
in range(len(x_size)))
91 orig = x_value[x_idx].item()
92 x_value[x_idx] = orig - eps
93 outa = fn(input).clone()
94 x_value[x_idx] = orig + eps
95 outb = fn(input).clone()
97 r = (outb - outa) / (2 * eps)
98 d_tensor[d_idx] = r.detach().reshape(-1)
100 x_tensor = x_tensor.data
101 for d_idx, x_idx
in enumerate(product(*[range(m)
for m
in x_tensor.size()])):
102 orig = x_tensor[x_idx].item()
103 x_tensor[x_idx] = orig - eps
104 outa = fn(input).clone()
105 x_tensor[x_idx] = orig + eps
106 outb = fn(input).clone()
107 x_tensor[x_idx] = orig
108 r = (outb - outa) / (2 * eps)
109 d_tensor[d_idx] = r.detach().reshape(-1)
114 def get_analytical_jacobian(input, output):
118 raise ValueError(
'Sparse output is not supported at gradcheck yet. ' 119 'Please call to_dense() on the output of fn for gradcheck.')
120 diff_input_list = list(iter_tensors(input,
True))
121 jacobian = make_jacobian(input, output.numel())
122 jacobian_reentrant = make_jacobian(input, output.numel())
123 grad_output = torch.zeros_like(output)
124 flat_grad_output = grad_output.view(-1)
126 correct_grad_sizes =
True 128 for i
in range(flat_grad_output.numel()):
129 flat_grad_output.zero_()
130 flat_grad_output[i] = 1
131 for jacobian_c
in (jacobian, jacobian_reentrant):
133 retain_graph=
True, allow_unused=
True)
134 for jacobian_x, d_x, x
in zip(jacobian_c, grads_input, diff_input_list):
135 if d_x
is not None and d_x.size() != x.size():
136 correct_grad_sizes =
False 137 elif jacobian_x.numel() != 0:
139 jacobian_x[:, i].zero_()
141 d_x_dense = d_x.to_dense()
if d_x.is_sparse
else d_x
142 assert jacobian_x[:, i].numel() == d_x_dense.numel()
143 jacobian_x[:, i] = d_x_dense.contiguous().view(-1)
145 for jacobian_x, jacobian_reentrant_x
in zip(jacobian, jacobian_reentrant):
146 if jacobian_x.numel() != 0
and (jacobian_x - jacobian_reentrant_x).abs().max() != 0:
149 return jacobian, reentrant, correct_grad_sizes
155 elif isinstance(x, list):
161 def _differentiable_outputs(x):
162 return tuple(o
for o
in _as_tuple(x)
if o.requires_grad)
165 def gradcheck(func, inputs, eps=1e-6, atol=1e-5, rtol=1e-3, raise_exception=True, check_sparse_nnz=False):
166 r"""Check gradients computed via small finite differences against analytical 167 gradients w.r.t. tensors in :attr:`inputs` that are of floating point type 168 and with ``requires_grad=True``. 170 The check between numerical and analytical gradients uses :func:`~torch.allclose`. 173 The default values are designed for :attr:`input` of double precision. 174 This check will likely fail if :attr:`input` is of less precision, e.g., 178 If any checked tensor in :attr:`input` has overlapping memory, i.e., 179 different indices pointing to the same memory address (e.g., from 180 :func:`torch.expand`), this check will likely fail because the numerical 181 gradients computed by point perturbation at such indices will change 182 values at all other indices that share the same memory address. 185 func (function): a Python function that takes Tensor inputs and returns 186 a Tensor or a tuple of Tensors 187 inputs (tuple of Tensor or Tensor): inputs to the function 188 eps (float, optional): perturbation for finite differences 189 atol (float, optional): absolute tolerance 190 rtol (float, optional): relative tolerance 191 raise_exception (bool, optional): indicating whether to raise an exception if 192 the check fails. The exception gives more information about the 193 exact nature of the failure. This is helpful when debugging gradchecks. 194 check_sparse_nnz (bool, optional): if True, gradcheck allows for SparseTensor input, 195 and for any SparseTensor at input, gradcheck will perform check at nnz positions only. 198 True if all differences satisfy allclose condition 202 raise RuntimeError(msg)
205 tupled_inputs = _as_tuple(inputs)
206 if any(t.is_sparse
for t
in tupled_inputs
if isinstance(t, torch.Tensor))
and not check_sparse_nnz:
207 fail_test(
'gradcheck expects all tensor inputs ' 208 'are dense when check_sparse_nnz is set to False.')
211 any_input_requiring_grad =
False 212 for inp
in tupled_inputs:
213 if isinstance(inp, torch.Tensor):
214 if inp.requires_grad:
215 if inp.dtype != torch.float64:
217 'At least one of the inputs that requires gradient ' 218 'is not of double precision floating point. ' 219 'This check will likely fail if all the inputs are ' 220 'not of double precision floating point. ')
221 any_input_requiring_grad =
True 223 if not any_input_requiring_grad:
225 'gradcheck expects at least one input tensor to require gradient, ' 226 'but none of the them have requires_grad=True.')
228 output = _differentiable_outputs(func(*tupled_inputs))
230 for i, o
in enumerate(output):
231 if not o.requires_grad:
235 return _as_tuple(func(*input))[i]
237 analytical, reentrant, correct_grad_sizes = get_analytical_jacobian(tupled_inputs, o)
238 numerical = get_numerical_jacobian(fn, tupled_inputs, eps=eps)
240 if not correct_grad_sizes:
241 return fail_test(
'Analytical gradient has incorrect size')
243 for j, (a, n)
in enumerate(zip(analytical, numerical)):
244 if a.numel() != 0
or n.numel() != 0:
245 if not torch.allclose(a, n, rtol, atol):
246 return fail_test(
'Jacobian mismatch for output %d with respect to input %d,\n' 247 'numerical:%s\nanalytical:%s\n' % (i, j, n, a))
250 return fail_test(
'Backward is not reentrant, i.e., running backward with same ' 251 'input and grad_output multiple times gives different values, ' 252 'although analytical gradient matches numerical gradient')
255 output = _differentiable_outputs(func(*tupled_inputs))
256 if any([o.requires_grad
for o
in output]):
257 diff_input_list = list(iter_tensors(tupled_inputs,
True))
258 if not diff_input_list:
259 raise RuntimeError(
"no Tensors requiring grad found in input")
260 grads_input =
torch.autograd.grad(output, diff_input_list, [torch.zeros_like(o)
for o
in output],
262 for gi, i
in zip(grads_input, diff_input_list):
265 if isinstance(gi, torch.Tensor)
and gi.is_sparse:
266 if gi.layout != i.layout:
267 return fail_test(
'grad is sparse tensor, but has incorrect layout')
268 if gi.sparse_dim() != i.sparse_dim():
269 return fail_test(
'grad is sparse tensor, but has incorrect sparse_dim')
270 if gi.dense_dim() != i.dense_dim():
271 return fail_test(
'grad is sparse tensor, but has incorrect dense_dim')
274 if not gi.eq(0).all():
275 return fail_test(
'backward not multiplied by grad_output')
276 if gi.type() != i.type():
277 return fail_test(
"grad is incorrect type")
278 if gi.size() != i.size():
279 return fail_test(
'grad is incorrect size')
284 def gradgradcheck(func, inputs, grad_outputs=None, eps=1e-6, atol=1e-5, rtol=1e-3,
285 gen_non_contig_grad_outputs=
False, raise_exception=
True):
286 r"""Check gradients of gradients computed via small finite differences 287 against analytical gradients w.r.t. tensors in :attr:`inputs` and 288 :attr:`grad_outputs` that are of floating point type and with 289 ``requires_grad=True``. 291 This function checks that backpropagating through the gradients computed 292 to the given :attr:`grad_outputs` are correct. 294 The check between numerical and analytical gradients uses :func:`~torch.allclose`. 297 The default values are designed for :attr:`input` and 298 :attr:`grad_outputs` of double precision. This check will likely fail if 299 they are of less precision, e.g., ``FloatTensor``. 302 If any checked tensor in :attr:`input` and :attr:`grad_outputs` has 303 overlapping memory, i.e., different indices pointing to the same memory 304 address (e.g., from :func:`torch.expand`), this check will likely fail 305 because the numerical gradients computed by point perturbation at such 306 indices will change values at all other indices that share the same 310 func (function): a Python function that takes Tensor inputs and returns 311 a Tensor or a tuple of Tensors 312 inputs (tuple of Tensor or Tensor): inputs to the function 313 grad_outputs (tuple of Tensor or Tensor, optional): The gradients with 314 respect to the function's outputs. 315 eps (float, optional): perturbation for finite differences 316 atol (float, optional): absolute tolerance 317 rtol (float, optional): relative tolerance 318 gen_non_contig_grad_outputs (bool, optional): if :attr:`grad_outputs` is 319 ``None`` and :attr:`gen_non_contig_grad_outputs` is ``True``, the 320 randomly generated gradient outputs are made to be noncontiguous 321 raise_exception (bool, optional): indicating whether to raise an exception if 322 the check fails. The exception gives more information about the 323 exact nature of the failure. This is helpful when debugging gradchecks. 326 True if all differences satisfy allclose condition 328 tupled_inputs = _as_tuple(inputs)
330 if grad_outputs
is None:
335 if gen_non_contig_grad_outputs:
337 return y.requires_grad_()
338 outputs = _as_tuple(func(*tupled_inputs))
339 tupled_grad_outputs = tuple(randn_like(x)
for x
in outputs)
341 tupled_grad_outputs = _as_tuple(grad_outputs)
343 num_outputs = len(tupled_grad_outputs)
346 input_args = args[:-num_outputs]
347 grad_outputs = args[-num_outputs:]
348 outputs = _differentiable_outputs(func(*input_args))
349 input_args = tuple(x
for x
in input_args
if isinstance(x, torch.Tensor)
and x.requires_grad)
353 return gradcheck(new_func, tupled_inputs + tupled_grad_outputs, eps, atol, rtol, raise_exception)
def make_non_contiguous(tensor)
def grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=False, only_inputs=True, allow_unused=False)