Caffe2 - C++ API
A deep learning, cross platform ML framework
accumulate_grad.cpp
1 #include <torch/csrc/autograd/functions/accumulate_grad.h>
2 
3 #include <torch/csrc/autograd/grad_mode.h>
4 #include <torch/csrc/autograd/variable.h>
5 #include <torch/csrc/autograd/functions/basic_ops.h>
6 #include <torch/csrc/autograd/functions/tensor.h>
7 #include <torch/csrc/autograd/functions/utils.h>
8 
9 #include <cstdint>
10 #include <stdexcept>
11 #include <utility>
12 
13 using at::Tensor;
14 
15 namespace torch { namespace autograd {
16 
17 // AccumulateGrad sets sequence_nr to the max value so it's always called
18 // ASAP during backwards.
19 AccumulateGrad::AccumulateGrad(Variable variable_)
20  : Function(/*sequence_nr=*/UINT64_MAX)
21  , variable(std::move(variable_)) {
22  add_input_metadata(variable);
23 }
24 
25 auto AccumulateGrad::apply(variable_list&& grads) -> variable_list {
26  // XXX: this method is not thread-safe!
27  check_input_variables("AccumulateGrad", grads, 1, 0);
28 
29  if (!grads[0].defined())
30  return {};
31  if (variable.grad_fn())
32  throw std::logic_error("leaf variable has been moved into the graph interior");
33  if (!variable.requires_grad())
34  return {};
35 
36  auto new_grad = std::move(grads[0]);
37  for (auto& hook : variable.hooks()) {
38  new_grad = (*hook)({new_grad})[0];
39  }
40 
41  at::Tensor& grad = variable.grad();
42  if (!grad.defined()) {
43  // under following condition, we can avoid clone()
44  if (!GradMode::is_enabled()
45  && !new_grad.is_sparse()
46  && new_grad.is_contiguous()
47  && new_grad.use_count() <= 1 + !post_hooks().empty()) {
48  // first check it is in first-order grad only mode
49  // then check not sparse before is_contiguous
50  // then check contiguous, otherwise later in place accumulation may fail
51  // and lastly, check it is the last reference before we grab it.
52  // If the function has post hooks (for example, a DDP allreduce hook),
53  // call_function in Engine.cpp will temporarily bump the refcount by one, hence the
54  // addition of !post_hooks().empty().
55  variable.grad() = new_grad.detach();
56  } else {
57  variable.grad() = new_grad.clone();
58  }
59  } else if (!GradMode::is_enabled()) {
60  Variable& grad_variable = as_variable_ref(grad);
61  // This case is not strictly necessary, but it makes the first-order only case
62  // slightly more efficient and, what's more important, more predictable for
63  // the users. Thanks to this case we can avoid changing the grad tensor,
64  // a thing never promised and documented, but used in some hacks seen
65  // on the internet.
66  if (grad_variable.is_sparse() && !new_grad.is_sparse()) {
67  grad_variable.set_data(new_grad.data() + grad_variable.data());
68  } else {
69  grad_variable.data() += new_grad.data();
70  }
71  } else {
72  variable.grad() = grad + new_grad;
73  }
74 
75  return variable_list();
76 }
77 
78 }} // namespace torch::autograd
Definition: jit_type.h:17