5 from itertools 
import product
    10     if isinstance(x, torch.Tensor):
    11         if x.grad 
is not None:
    14     elif isinstance(x, container_abcs.Iterable):
    19 def make_jacobian(input, num_out):
    20     if isinstance(input, torch.Tensor):
    21         if not input.is_floating_point():
    23         if not input.requires_grad:
    25         return torch.zeros(input.nelement(), num_out, dtype=input.dtype)
    26     elif isinstance(input, container_abcs.Iterable) 
and not isinstance(input, str):
    27         jacobians = list(filter(
    28             lambda x: x 
is not None, (make_jacobian(elem, num_out) 
for elem 
in input)))
    31         return type(input)(jacobians)
    36 def iter_tensors(x, only_requiring_grad=False):
    37     if isinstance(x, torch.Tensor):
    38         if x.requires_grad 
or not only_requiring_grad:
    40     elif isinstance(x, container_abcs.Iterable) 
and not isinstance(x, str):
    42             for result 
in iter_tensors(elem, only_requiring_grad):
    46 def get_numerical_jacobian(fn, input, target=None, eps=1e-3):
    49     target: the Tensors wrt whom Jacobians are calculated (default=`input`)    51     Note that `target` may not even be part of `input` to `fn`, so please be    52     **very careful** in this to not clone `target`.    56     output_size = fn(input).numel()
    57     jacobian = make_jacobian(target, output_size)
    62     x_tensors = [t 
for t 
in iter_tensors(target, 
True)]
    63     j_tensors = [t 
for t 
in iter_tensors(jacobian)]
    66     for x_tensor, d_tensor 
in zip(x_tensors, j_tensors):
    69         if x_tensor.is_sparse:
    74                 for i 
in reversed(range(dim)):
    79             x_nnz = x_tensor._nnz()
    80             x_size = list(x_tensor.size())
    81             x_indices = x_tensor._indices().t()
    82             x_values = x_tensor._values().data
    83             x_stride = get_stride(x_size)
    85             for i 
in range(x_nnz):
    87                 for x_idx 
in product(*[range(m) 
for m 
in x_values.size()[1:]]):
    88                     indices = x_indices[i].tolist() + list(x_idx)
    89                     d_idx = sum(indices[k] * x_stride[k] 
for k 
in range(len(x_size)))
    91                     orig = x_value[x_idx].item()
    92                     x_value[x_idx] = orig - eps
    93                     outa = fn(input).clone()
    94                     x_value[x_idx] = orig + eps
    95                     outb = fn(input).clone()
    97                     r = (outb - outa) / (2 * eps)
    98                     d_tensor[d_idx] = r.detach().reshape(-1)
   100             x_tensor = x_tensor.data
   101             for d_idx, x_idx 
in enumerate(product(*[range(m) 
for m 
in x_tensor.size()])):
   102                 orig = x_tensor[x_idx].item()
   103                 x_tensor[x_idx] = orig - eps
   104                 outa = fn(input).clone()
   105                 x_tensor[x_idx] = orig + eps
   106                 outb = fn(input).clone()
   107                 x_tensor[x_idx] = orig
   108                 r = (outb - outa) / (2 * eps)
   109                 d_tensor[d_idx] = r.detach().reshape(-1)
   114 def get_analytical_jacobian(input, output):
   118         raise ValueError(
'Sparse output is not supported at gradcheck yet. '   119                          'Please call to_dense() on the output of fn for gradcheck.')
   120     diff_input_list = list(iter_tensors(input, 
True))
   121     jacobian = make_jacobian(input, output.numel())
   122     jacobian_reentrant = make_jacobian(input, output.numel())
   123     grad_output = torch.zeros_like(output)
   124     flat_grad_output = grad_output.view(-1)
   126     correct_grad_sizes = 
True   128     for i 
in range(flat_grad_output.numel()):
   129         flat_grad_output.zero_()
   130         flat_grad_output[i] = 1
   131         for jacobian_c 
in (jacobian, jacobian_reentrant):
   133                                               retain_graph=
True, allow_unused=
True)
   134             for jacobian_x, d_x, x 
in zip(jacobian_c, grads_input, diff_input_list):
   135                 if d_x 
is not None and d_x.size() != x.size():
   136                     correct_grad_sizes = 
False   137                 elif jacobian_x.numel() != 0:
   139                         jacobian_x[:, i].zero_()
   141                         d_x_dense = d_x.to_dense() 
if d_x.is_sparse 
else d_x
   142                         assert jacobian_x[:, i].numel() == d_x_dense.numel()
   143                         jacobian_x[:, i] = d_x_dense.contiguous().view(-1)
   145     for jacobian_x, jacobian_reentrant_x 
in zip(jacobian, jacobian_reentrant):
   146         if jacobian_x.numel() != 0 
and (jacobian_x - jacobian_reentrant_x).abs().max() != 0:
   149     return jacobian, reentrant, correct_grad_sizes
   155     elif isinstance(x, list):
   161 def _differentiable_outputs(x):
   162     return tuple(o 
for o 
in _as_tuple(x) 
if o.requires_grad)
   165 def gradcheck(func, inputs, eps=1e-6, atol=1e-5, rtol=1e-3, raise_exception=True, check_sparse_nnz=False):
   166     r"""Check gradients computed via small finite differences against analytical   167     gradients w.r.t. tensors in :attr:`inputs` that are of floating point type   168     and with ``requires_grad=True``.   170     The check between numerical and analytical gradients uses :func:`~torch.allclose`.   173         The default values are designed for :attr:`input` of double precision.   174         This check will likely fail if :attr:`input` is of less precision, e.g.,   178        If any checked tensor in :attr:`input` has overlapping memory, i.e.,   179        different indices pointing to the same memory address (e.g., from   180        :func:`torch.expand`), this check will likely fail because the numerical   181        gradients computed by point perturbation at such indices will change   182        values at all other indices that share the same memory address.   185         func (function): a Python function that takes Tensor inputs and returns   186             a Tensor or a tuple of Tensors   187         inputs (tuple of Tensor or Tensor): inputs to the function   188         eps (float, optional): perturbation for finite differences   189         atol (float, optional): absolute tolerance   190         rtol (float, optional): relative tolerance   191         raise_exception (bool, optional): indicating whether to raise an exception if   192             the check fails. The exception gives more information about the   193             exact nature of the failure. This is helpful when debugging gradchecks.   194         check_sparse_nnz (bool, optional): if True, gradcheck allows for SparseTensor input,   195             and for any SparseTensor at input, gradcheck will perform check at nnz positions only.   198         True if all differences satisfy allclose condition   202             raise RuntimeError(msg)
   205     tupled_inputs = _as_tuple(inputs)
   206     if any(t.is_sparse 
for t 
in tupled_inputs 
if isinstance(t, torch.Tensor)) 
and not check_sparse_nnz:
   207         fail_test(
'gradcheck expects all tensor inputs '   208                   'are dense when check_sparse_nnz is set to False.')
   211     any_input_requiring_grad = 
False   212     for inp 
in tupled_inputs:
   213         if isinstance(inp, torch.Tensor):
   214             if inp.requires_grad:
   215                 if inp.dtype != torch.float64:
   217                         'At least one of the inputs that requires gradient '   218                         'is not of double precision floating point. '   219                         'This check will likely fail if all the inputs are '   220                         'not of double precision floating point. ')
   221                 any_input_requiring_grad = 
True   223     if not any_input_requiring_grad:
   225             'gradcheck expects at least one input tensor to require gradient, '   226             'but none of the them have requires_grad=True.')
   228     output = _differentiable_outputs(func(*tupled_inputs))
   230     for i, o 
in enumerate(output):
   231         if not o.requires_grad:
   235             return _as_tuple(func(*input))[i]
   237         analytical, reentrant, correct_grad_sizes = get_analytical_jacobian(tupled_inputs, o)
   238         numerical = get_numerical_jacobian(fn, tupled_inputs, eps=eps)
   240         if not correct_grad_sizes:
   241             return fail_test(
'Analytical gradient has incorrect size')
   243         for j, (a, n) 
in enumerate(zip(analytical, numerical)):
   244             if a.numel() != 0 
or n.numel() != 0:
   245                 if not torch.allclose(a, n, rtol, atol):
   246                     return fail_test(
'Jacobian mismatch for output %d with respect to input %d,\n'   247                                      'numerical:%s\nanalytical:%s\n' % (i, j, n, a))
   250             return fail_test(
'Backward is not reentrant, i.e., running backward with same '   251                              'input and grad_output multiple times gives different values, '   252                              'although analytical gradient matches numerical gradient')
   255     output = _differentiable_outputs(func(*tupled_inputs))
   256     if any([o.requires_grad 
for o 
in output]):
   257         diff_input_list = list(iter_tensors(tupled_inputs, 
True))
   258         if not diff_input_list:
   259             raise RuntimeError(
"no Tensors requiring grad found in input")
   260         grads_input = 
torch.autograd.grad(output, diff_input_list, [torch.zeros_like(o) 
for o 
in output],
   262         for gi, i 
in zip(grads_input, diff_input_list):
   265             if isinstance(gi, torch.Tensor) 
and gi.is_sparse:
   266                 if gi.layout != i.layout:
   267                     return fail_test(
'grad is sparse tensor, but has incorrect layout')
   268                 if gi.sparse_dim() != i.sparse_dim():
   269                     return fail_test(
'grad is sparse tensor, but has incorrect sparse_dim')
   270                 if gi.dense_dim() != i.dense_dim():
   271                     return fail_test(
'grad is sparse tensor, but has incorrect dense_dim')
   274             if not gi.eq(0).all():
   275                 return fail_test(
'backward not multiplied by grad_output')
   276             if gi.type() != i.type():
   277                 return fail_test(
"grad is incorrect type")
   278             if gi.size() != i.size():
   279                 return fail_test(
'grad is incorrect size')
   284 def gradgradcheck(func, inputs, grad_outputs=None, eps=1e-6, atol=1e-5, rtol=1e-3,
   285                   gen_non_contig_grad_outputs=
False, raise_exception=
True):
   286     r"""Check gradients of gradients computed via small finite differences   287     against analytical gradients w.r.t. tensors in :attr:`inputs` and   288     :attr:`grad_outputs` that are of floating point type and with   289     ``requires_grad=True``.   291     This function checks that backpropagating through the gradients computed   292     to the given :attr:`grad_outputs` are correct.   294     The check between numerical and analytical gradients uses :func:`~torch.allclose`.   297         The default values are designed for :attr:`input` and   298         :attr:`grad_outputs` of double precision. This check will likely fail if   299         they are of less precision, e.g., ``FloatTensor``.   302        If any checked tensor in :attr:`input` and :attr:`grad_outputs` has   303        overlapping memory, i.e., different indices pointing to the same memory   304        address (e.g., from :func:`torch.expand`), this check will likely fail   305        because the numerical gradients computed by point perturbation at such   306        indices will change values at all other indices that share the same   310         func (function): a Python function that takes Tensor inputs and returns   311             a Tensor or a tuple of Tensors   312         inputs (tuple of Tensor or Tensor): inputs to the function   313         grad_outputs (tuple of Tensor or Tensor, optional): The gradients with   314             respect to the function's outputs.   315         eps (float, optional): perturbation for finite differences   316         atol (float, optional): absolute tolerance   317         rtol (float, optional): relative tolerance   318         gen_non_contig_grad_outputs (bool, optional): if :attr:`grad_outputs` is   319             ``None`` and :attr:`gen_non_contig_grad_outputs` is ``True``, the   320             randomly generated gradient outputs are made to be noncontiguous   321         raise_exception (bool, optional): indicating whether to raise an exception if   322             the check fails. The exception gives more information about the   323             exact nature of the failure. This is helpful when debugging gradchecks.   326         True if all differences satisfy allclose condition   328     tupled_inputs = _as_tuple(inputs)
   330     if grad_outputs 
is None:
   335             if gen_non_contig_grad_outputs:
   337             return y.requires_grad_()
   338         outputs = _as_tuple(func(*tupled_inputs))
   339         tupled_grad_outputs = tuple(randn_like(x) 
for x 
in outputs)
   341         tupled_grad_outputs = _as_tuple(grad_outputs)
   343     num_outputs = len(tupled_grad_outputs)
   346         input_args = args[:-num_outputs]
   347         grad_outputs = args[-num_outputs:]
   348         outputs = _differentiable_outputs(func(*input_args))
   349         input_args = tuple(x 
for x 
in input_args 
if isinstance(x, torch.Tensor) 
and x.requires_grad)
   353     return gradcheck(new_func, tupled_inputs + tupled_grad_outputs, eps, atol, rtol, raise_exception)
 
def make_non_contiguous(tensor)
 
def grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=False, only_inputs=True, allow_unused=False)