1 #include <torch/optim/adam.h>     3 #include <torch/csrc/autograd/variable.h>     4 #include <torch/nn/module.h>     5 #include <torch/serialize/archive.h>     6 #include <torch/utils.h>    15 AdamOptions::AdamOptions(
double learning_rate)
    16     : learning_rate_(learning_rate) {}
    21     if (!p.grad().defined()) {
    25     if (options.weight_decay_ > 0) {
    26       p.grad() = p.grad() + options.weight_decay_ * p;
    29     auto& exp_average = 
buffer_at(exp_average_buffers, i);
    30     auto& exp_average_sq = 
buffer_at(exp_average_sq_buffers, i);
    34     exp_average.mul_(options.beta1_).add_(p.grad(), 1 - options.beta1_);
    35     exp_average_sq.mul_(options.beta2_)
    36         .addcmul_(p.grad(), p.grad(), 1 - options.beta2_);
    38     Tensor denom = exp_average_sq;
    39     if (options.amsgrad_) {
    40       auto& max_exp_average_sq = 
buffer_at(max_exp_average_sq_buffers, i);
    41       max_exp_average_sq = torch::max(max_exp_average_sq, exp_average_sq);
    42       denom = max_exp_average_sq;
    45     const auto bias_correction1 =
    46         1 - std::pow(options.beta1_, 
buffer_at(step_buffers, i));
    47     const auto bias_correction2 =
    48         1 - std::pow(options.beta2_, 
buffer_at(step_buffers, i));
    49     const auto step_size =
    50         options.learning_rate_ * std::sqrt(bias_correction2) / bias_correction1;
    53     p.addcdiv_(exp_average, denom.sqrt() + options.eps_, -step_size);
    58   serialize(*
this, archive);
    62   serialize(*
this, archive);
 void load(serialize::InputArchive &archive) override
Deserializes the optimizer state from the given archive. 
 
std::vector< Tensor > parameters_
The parameters this optimizer optimizes. 
 
void save(serialize::OutputArchive &archive) const override
Serializes the optimizer state into the given archive. 
 
T & buffer_at(std::vector< T > &buffers, size_t index)
Accesses a buffer at the given index.