1 #include <torch/optim/adam.h> 3 #include <torch/csrc/autograd/variable.h> 4 #include <torch/nn/module.h> 5 #include <torch/serialize/archive.h> 6 #include <torch/utils.h> 15 AdamOptions::AdamOptions(
double learning_rate)
16 : learning_rate_(learning_rate) {}
21 if (!p.grad().defined()) {
25 if (options.weight_decay_ > 0) {
26 p.grad() = p.grad() + options.weight_decay_ * p;
29 auto& exp_average =
buffer_at(exp_average_buffers, i);
30 auto& exp_average_sq =
buffer_at(exp_average_sq_buffers, i);
34 exp_average.mul_(options.beta1_).add_(p.grad(), 1 - options.beta1_);
35 exp_average_sq.mul_(options.beta2_)
36 .addcmul_(p.grad(), p.grad(), 1 - options.beta2_);
38 Tensor denom = exp_average_sq;
39 if (options.amsgrad_) {
40 auto& max_exp_average_sq =
buffer_at(max_exp_average_sq_buffers, i);
41 max_exp_average_sq = torch::max(max_exp_average_sq, exp_average_sq);
42 denom = max_exp_average_sq;
45 const auto bias_correction1 =
46 1 - std::pow(options.beta1_,
buffer_at(step_buffers, i));
47 const auto bias_correction2 =
48 1 - std::pow(options.beta2_,
buffer_at(step_buffers, i));
49 const auto step_size =
50 options.learning_rate_ * std::sqrt(bias_correction2) / bias_correction1;
53 p.addcdiv_(exp_average, denom.sqrt() + options.eps_, -step_size);
58 serialize(*
this, archive);
62 serialize(*
this, archive);
void load(serialize::InputArchive &archive) override
Deserializes the optimizer state from the given archive.
std::vector< Tensor > parameters_
The parameters this optimizer optimizes.
void save(serialize::OutputArchive &archive) const override
Serializes the optimizer state into the given archive.
T & buffer_at(std::vector< T > &buffers, size_t index)
Accesses a buffer at the given index.