Caffe2 - C++ API
A deep learning, cross platform ML framework
adadelta_op.cc
1 #include "caffe2/sgd/adadelta_op.h"
2 
3 namespace caffe2 {
4 
5 REGISTER_CPU_OPERATOR(Adadelta, AdadeltaOp<CPUContext>);
6 OPERATOR_SCHEMA(Adadelta)
7  .NumInputs(5)
8  .NumOutputs(3)
9  .AllowInplace({{0, 0}, {1, 1}, {2, 2}})
10  .SetDoc(R"DOC(
11 
12 Computes the AdaDelta update (https://arxiv.org/abs/1212.5701) for an input
13 gradient and accumulated history of squared gradients. Concretely, given
14 inputs (param, moment, moment_delta, grad, learning_rate), computes:
15 
16  new_moment = moment * decay + square(grad) * (1 - decay)
17  new_grad = sqrt(moment_delta + epsilon) / sqrt(new_moment + epsilon) * grad
18  new_param = param + learning_rate * new_grad
19  new_moment_delta = moment_delta * decay + square(new_grad) * (1 - decay)
20 
21 and returns (new_param, new_moment, new_moment_delta).
22 
23 )DOC")
24  .Input(0, "param", "Parameters to be updated")
25  .Input(1, "moment", "Average of squared gradients")
26  .Input(2, "moment_delta", "Average of squared parameter updates")
27  .Input(3, "grad", "Gradient computed")
28  .Input(4, "lr", "Learning rate")
29  .Output(0, "output_param", "Updated parameters")
30  .Output(1, "output_moment", "Updated average squared gradient")
31  .Output(
32  2,
33  "output_moment_delta",
34  "Updated average of squared parameter updates")
35  .Arg("epsilon", "Default 1e-5")
36  .Arg(
37  "decay",
38  "Default 0.95, the squared gradient sum is decayed by this factor.");
39 
40 REGISTER_CPU_OPERATOR(SparseAdadelta, SparseAdadeltaOp<CPUContext>);
41 OPERATOR_SCHEMA(SparseAdadelta)
42  .NumInputs(6)
43  .NumOutputs(3)
44  .EnforceOneToOneInplace()
45  .SetDoc(R"DOC(
46 
47 Given inputs (param, moment, moment_delta, indices, grad, lr),
48 runs the dense AdaDelta update on (param, grad, moment[indices],
49  moment_delta[indices], lr), and returns (new_param, new_moment,
50  new_moment_delta) as in the dense case.
51 
52 )DOC")
53  .Input(0, "param", "Parameters to be updated")
54  .Input(1, "moment", "Average of squared gradients")
55  .Input(2, "moment_delta", "Average of squared parameter updates")
56  .Input(3, "indices", "Sparse indices")
57  .Input(4, "grad", "Gradient computed")
58  .Input(5, "lr", "learning rate")
59  .Output(0, "output_param", "Updated parameters")
60  .Output(1, "output_moment", "Updated average squared gradient")
61  .Output(
62  2,
63  "output_moment_delta",
64  "Updated average of squared parameter updates")
65  .Arg("epsilon", "Default 1e-5")
66  .Arg(
67  "decay",
68  "Default 0.95, the squared gradient sum is decayed by this factor.");
69 
70 SHOULD_NOT_DO_GRADIENT(Adadelta);
71 SHOULD_NOT_DO_GRADIENT(SparseAdadelta);
72 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13