2 ``torch.autograd`` provides classes and functions implementing automatic 3 differentiation of arbitrary scalar valued functions. It requires minimal 4 changes to the existing code - you only need to declare :class:`Tensor` s 5 for which gradients should be computed with the ``requires_grad=True`` keyword. 10 from .variable
import Variable
11 from .function
import Function, NestedIOFunction
12 from .gradcheck
import gradcheck, gradgradcheck
13 from .grad_mode
import no_grad, enable_grad, set_grad_enabled
14 from .anomaly_mode
import detect_anomaly, set_detect_anomaly
15 from .
import profiler
17 __all__ = [
'Variable',
'Function',
'backward',
'grad_mode']
20 def _make_grads(outputs, grads):
22 for out, grad
in zip(outputs, grads):
23 if isinstance(grad, torch.Tensor):
24 new_grads.append(grad)
28 raise RuntimeError(
"grad can be implicitly created only for scalar outputs")
29 new_grads.append(torch.ones_like(out))
31 new_grads.append(
None)
33 raise TypeError(
"gradients can be either Tensors or None, but got " +
35 return tuple(new_grads)
38 def backward(tensors, grad_tensors=None, retain_graph=None, create_graph=False, grad_variables=None):
39 r"""Computes the sum of gradients of given tensors w.r.t. graph leaves. 41 The graph is differentiated using the chain rule. If any of ``tensors`` 42 are non-scalar (i.e. their data has more than one element) and require 43 gradient, then the Jacobian-vector product would be computed, in this 44 case the function additionally requires specifying ``grad_tensors``. 45 It should be a sequence of matching length, that contains the "vector" 46 in the Jacobian-vector product, usually the gradient of the differentiated 47 function w.r.t. corresponding tensors (``None`` is an acceptable value for 48 all tensors that don't need gradient tensors). 50 This function accumulates gradients in the leaves - you might need to zero 51 them before calling it. 54 tensors (sequence of Tensor): Tensors of which the derivative will be 56 grad_tensors (sequence of (Tensor or None)): The "vector" in the Jacobian-vector 57 product, usually gradients w.r.t. each element of corresponding tensors. 58 None values can be specified for scalar Tensors or ones that don't require 59 grad. If a None value would be acceptable for all grad_tensors, then this 61 retain_graph (bool, optional): If ``False``, the graph used to compute the grad 62 will be freed. Note that in nearly all cases setting this option to ``True`` 63 is not needed and often can be worked around in a much more efficient 64 way. Defaults to the value of ``create_graph``. 65 create_graph (bool, optional): If ``True``, graph of the derivative will 66 be constructed, allowing to compute higher order derivative products. 67 Defaults to ``False``. 69 if grad_variables
is not None:
70 warnings.warn(
"'grad_variables' is deprecated. Use 'grad_tensors' instead.")
71 if grad_tensors
is None:
72 grad_tensors = grad_variables
74 raise RuntimeError(
"'grad_tensors' and 'grad_variables' (deprecated) " 75 "arguments both passed to backward(). Please only " 76 "use 'grad_tensors'.")
78 tensors = (tensors,)
if isinstance(tensors, torch.Tensor)
else tuple(tensors)
80 if grad_tensors
is None:
81 grad_tensors = [
None] * len(tensors)
82 elif isinstance(grad_tensors, torch.Tensor):
83 grad_tensors = [grad_tensors]
85 grad_tensors = list(grad_tensors)
87 grad_tensors = _make_grads(tensors, grad_tensors)
88 if retain_graph
is None:
89 retain_graph = create_graph
91 Variable._execution_engine.run_backward(
92 tensors, grad_tensors, retain_graph, create_graph,
93 allow_unreachable=
True)
96 def grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=False,
97 only_inputs=
True, allow_unused=
False):
98 r"""Computes and returns the sum of gradients of outputs w.r.t. the inputs. 100 ``grad_outputs`` should be a sequence of length matching ``output`` 101 containing the "vector" in Jacobian-vector product, usually the pre-computed 102 gradients w.r.t. each of the outputs. If an output doesn't require_grad, 103 then the gradient can be ``None``). 105 If ``only_inputs`` is ``True``, the function will only return a list of gradients 106 w.r.t the specified inputs. If it's ``False``, then gradient w.r.t. all remaining 107 leaves will still be computed, and will be accumulated into their ``.grad`` 111 outputs (sequence of Tensor): outputs of the differentiated function. 112 inputs (sequence of Tensor): Inputs w.r.t. which the gradient will be 113 returned (and not accumulated into ``.grad``). 114 grad_outputs (sequence of Tensor): The "vector" in the Jacobian-vector product. 115 Usually gradients w.r.t. each output. None values can be specified for scalar 116 Tensors or ones that don't require grad. If a None value would be acceptable 117 for all grad_tensors, then this argument is optional. Default: None. 118 retain_graph (bool, optional): If ``False``, the graph used to compute the grad 119 will be freed. Note that in nearly all cases setting this option to ``True`` 120 is not needed and often can be worked around in a much more efficient 121 way. Defaults to the value of ``create_graph``. 122 create_graph (bool, optional): If ``True``, graph of the derivative will 123 be constructed, allowing to compute higher order derivative products. 125 allow_unused (bool, optional): If ``False``, specifying inputs that were not 126 used when computing outputs (and therefore their grad is always zero) 127 is an error. Defaults to ``False``. 130 warnings.warn(
"only_inputs argument is deprecated and is ignored now " 131 "(defaults to True). To accumulate gradient for other " 132 "parts of the graph, please use torch.autograd.backward.")
134 outputs = (outputs,)
if isinstance(outputs, torch.Tensor)
else tuple(outputs)
135 inputs = (inputs,)
if isinstance(inputs, torch.Tensor)
else tuple(inputs)
136 if grad_outputs
is None:
137 grad_outputs = [
None] * len(outputs)
138 elif isinstance(grad_outputs, torch.Tensor):
139 grad_outputs = [grad_outputs]
141 grad_outputs = list(grad_outputs)
143 grad_outputs = _make_grads(outputs, grad_outputs)
144 if retain_graph
is None:
145 retain_graph = create_graph
147 return Variable._execution_engine.run_backward(
148 outputs, grad_outputs, retain_graph, create_graph,
149 inputs, allow_unused)
166 def _is_checkpoint_valid():
167 return Variable._execution_engine.is_checkpoint_valid()
170 def variable(*args, **kwargs):
171 warnings.warn(
"torch.autograd.variable(...) is deprecated, use torch.tensor(...) instead")
175 if not torch._C._autograd_init():
176 raise RuntimeError(
"autograd initialization failed")