Caffe2 - C++ API
A deep learning, cross platform ML framework
momentum_sgd_op.cc
1 #include "momentum_sgd_op.h"
2 
3 namespace caffe2 {
4 
5 REGISTER_CPU_OPERATOR(MomentumSGD, MomentumSGDOp<float, CPUContext>);
6 OPERATOR_SCHEMA(MomentumSGD)
7  .NumInputs(3)
8  .NumOutputs(2)
9  .AllowInplace({{0, 0}, {1, 1}})
10  .TensorInferenceFunction(
11  [](const OperatorDef& /* unused */, const vector<TensorShape>& in) {
12  vector<TensorShape> out(2);
13  out[0] = in[0];
14  out[1] = in[1];
15  return out;
16  })
17  .SetDoc(R"DOC(
18 
19 Computes a momentum SGD update for an input gradient and momentum
20 parameters. Concretely, given inputs (grad, m, lr) and parameters
21 (momentum, nesterov), computes:
22 
23  if not nesterov:
24  adjusted_gradient = lr * grad + momentum * m
25  return (adjusted_gradient, adjusted_gradient)
26  else:
27  m_new = momentum * m + lr * grad
28  return ((1 + momentum) * m_new - momentum * m, m_new)
29 
30 Output is (grad, momentum)
31 
32 Note the difference to MomemtumSGDUpdate, which actually performs the
33 parameter update (and is thus faster).
34 )DOC");
35 SHOULD_NOT_DO_GRADIENT(MomentumSGD);
36 
37 REGISTER_CPU_OPERATOR(
38  MomentumSGDUpdate,
39  MomentumSGDUpdateOp<float, CPUContext>);
40 OPERATOR_SCHEMA(MomentumSGDUpdate)
41  .NumInputs(4)
42  .NumOutputs(3)
43  .AllowInplace({{0, 0}, {1, 1}, {3, 2}})
44  .TensorInferenceFunction(
45  [](const OperatorDef& /* unused */, const vector<TensorShape>& in) {
46  vector<TensorShape> out(3);
47  out[0] = in[0];
48  out[1] = in[1];
49  out[2] = in[3];
50  return out;
51  })
52  .SetDoc(R"DOC(
53 
54 Performs a momentum SGD update for an input gradient and momentum
55 parameters. Concretely, given inputs (grad, m, lr, param) and arguments
56 (momentum, nesterov), computes:
57 
58  if not nesterov:
59  adjusted_gradient = lr * grad + momentum * m
60  param = param - adjusted_gradient
61  return (adjusted_gradient, adjusted_gradient, param)
62  else:
63  m_new = momentum * m + lr * grad
64  param = param - ((1 + momentum) * m_new - momentum * m),
65  return ((1 + momentum) * m_new - momentum * m, m_new, param)
66 
67 Output is (grad, momentum, parameter).
68 
69 Note the difference to MomentumSGD, which returns a new gradient
70 but does not perform the parameter update.
71 
72 )DOC");
73 SHOULD_NOT_DO_GRADIENT(MomentumSGDUpdate);
74 
75 REGISTER_CPU_OPERATOR(
76  SparseMomentumSGDUpdate,
77  SparseMomentumSGDUpdateOp<float, CPUContext>);
78 OPERATOR_SCHEMA(SparseMomentumSGDUpdate)
79  .NumInputs(5)
80  .NumOutputs(3)
81  .AllowInplace({{0, 0}})
82  .EnforceInplace({{1, 1}, {3, 2}})
83  .TensorInferenceFunction(
84  [](const OperatorDef& /* unused */, const vector<TensorShape>& in) {
85  vector<TensorShape> out(3);
86  out[0] = in[0];
87  out[1] = in[1];
88  out[2] = in[3];
89  return out;
90  })
91  .SetDoc(R"DOC(
92 
93 Performs a momentum SGD update analogous to MomentumSGDUpdate, but using a
94 GradientSlice and indices into the full param and momentum tables. Both param
95 and momentum should be in-place (corresponding inputs and outputs should be the
96 same blobs).
97 
98 
99 
100 )DOC")
101  .Input(0, "grad", "GradientSlice with gradients for updated indices.")
102  .Input(1, "moment", "Momentum blob, same shape as param.")
103  .Input(2, "lr", "Learning rate.")
104  .Input(3, "param", "Full parameter blob.")
105  .Input(
106  4,
107  "indices",
108  "Indices (in first dimension of param) where updates are performed.")
109  .Output(0, "output_grad", "Adjusted gradient.")
110  .Output(1, "output_moment", "Updated momentum.")
111  .Output(2, "output_param", "Updated parameter")
112  .Arg("momentum", "Momentum hyperparameter.")
113  .Arg("nesterov", "(boolean) Whether to use Nesterov Accelerated Gradient.");
114 SHOULD_NOT_DO_GRADIENT(SparseMomentumSGDUpdate);
115 }
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13