1 #include <caffe2/ideep/ideep_utils.h> 5 void adam_ideep_update(
19 #pragma omp parallel for schedule(static) 21 for (
auto i = 0; i < N; ++i) {
23 float mi = nm[i] = m[i] * beta1 + gi * (1 - beta1);
24 float vi = nv[i] = v[i] * beta2 + gi * gi * (1 - beta2);
25 ng[i] = lr[0] * correction * mi / (std::sqrt(vi) + eps_hat);
29 void adam_ideep_compute(
44 #pragma omp parallel for schedule(static) 46 for (
auto i = 0; i < N; ++i) {
48 float mi = nm[i] = m[i] * beta1 + gi * (1 - beta1);
49 float vi = nv[i] = v[i] * beta2 + gi * gi * (1 - beta2);
50 nw[i] = w[i] + lr[0] * correction * mi / (std::sqrt(vi) + eps_hat);
54 void adam_ideep_compute_output_grad(
71 #pragma omp parallel for schedule(static) 73 for (
auto i = 0; i < N; ++i) {
75 float mi = nm[i] = m[i] * beta1 + gi * (1 - beta1);
76 float vi = nv[i] = v[i] * beta2 + gi * gi * (1 - beta2);
77 float ngi = ng[i] = correction * mi / (std::sqrt(vi) + eps_hat);
78 nw[i] = w[i] + lr[0] * ngi;
85 USE_IDEEP_DEF_ALIASES();
86 USE_IDEEP_OPERATOR_FUNCTIONS();
90 beta1_(OperatorBase::GetSingleArgument<float>(
"beta1", 0.9f)),
91 beta2_(OperatorBase::GetSingleArgument<float>(
"beta2", 0.999f)),
92 epsilon_(OperatorBase::GetSingleArgument<float>(
"epsilon", 1e-5f)) {}
93 bool RunOnDevice()
override {
95 CAFFE_ENFORCE(OperatorBase::InputIsTensorType(ITER, CPU));
96 const auto& params = Input(PARAM);
97 const auto& moment_1 = Input(MOMENT_1);
98 const auto& moment_2 = Input(MOMENT_2);
99 const auto& grad = Input(GRAD);
101 const auto& lr = OperatorBase::Input<TensorCPU>(LR, CPU);
102 auto* out_params = Output(OUTPUT_PARAM);
103 auto* out_moment1 = Output(OUTPUT_MOMENT_1);
104 auto* out_moment2 = Output(OUTPUT_MOMENT_2);
106 CAFFE_ENFORCE(lr.size() == 1);
107 CAFFE_ENFORCE(grad.get_nelems() == params.get_nelems());
108 CAFFE_ENFORCE(grad.get_nelems() == moment_1.get_nelems());
109 CAFFE_ENFORCE(grad.get_nelems() == moment_2.get_nelems());
110 if (params != *out_params)
111 out_params->reinit(params.get_descriptor());
112 if (moment_1 != *out_moment1)
113 out_moment1->reinit(moment_1.get_descriptor());
114 if (moment_2 != *out_moment2)
115 out_moment2->reinit(moment_2.get_descriptor());
116 const auto w =
static_cast<float *
>(params.get_data_handle());
117 const auto g =
static_cast<float *
>(grad.get_data_handle());
118 const auto m =
static_cast<float *
>(moment_1.get_data_handle());
119 const auto v =
static_cast<float *
>(moment_2.get_data_handle());
120 auto nw =
static_cast<float *
>(out_params->get_data_handle());
121 auto nm =
static_cast<float *
>(out_moment1->get_data_handle());
122 auto nv =
static_cast<float *
>(out_moment2->get_data_handle());
123 const auto nlr = lr.template data<T>();
125 OperatorBase::Input<TensorCPU>(ITER, CPU).
template data<int64_t>()[0];
126 const auto t = iter + 1;
127 const auto correction =
128 std::sqrt(
T(1.) - std::pow(beta2_, t)) / (
T(1.) - std::pow(beta1_, t));
129 if (OutputSize() == 3) {
145 auto* out_grad = Output(OUTPUT_GRAD);
146 if (grad != *out_grad)
147 out_grad->reinit(grad.get_descriptor());
148 auto ng =
static_cast<float *
>(out_grad->get_data_handle());
149 adam_ideep_compute_output_grad(
173 INPUT_TAGS(PARAM, MOMENT_1, MOMENT_2, GRAD, LR, ITER);
174 OUTPUT_TAGS(OUTPUT_PARAM, OUTPUT_MOMENT_1, OUTPUT_MOMENT_2, OUTPUT_GRAD);
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...