Caffe2 - C++ API
A deep learning, cross platform ML framework
recurrent_op_cudnn.cc
1 #include "caffe2/operators/rnn/recurrent_op_cudnn.h"
2 #include "caffe2/utils/math.h"
3 
4 #include <map>
5 
6 namespace caffe2 {
7 
8 namespace detail {
9 
10 template <typename T>
11 TensorDescriptors<T>::TensorDescriptors(
12  size_t n,
13  const std::vector<int>& dim,
14  const std::vector<int>& stride) {
15  descs_.resize(n);
16  CAFFE_ENFORCE_EQ(dim.size(), stride.size());
17  for (auto i = 0; i < n; ++i) {
18  CUDNN_ENFORCE(cudnnCreateTensorDescriptor(&descs_[i]));
19  CUDNN_ENFORCE(cudnnSetTensorNdDescriptor(
20  descs_[i],
21  cudnnTypeWrapper<T>::type,
22  dim.size(),
23  dim.data(),
24  stride.data()));
25  }
26 }
27 
28 template <typename T>
29 TensorDescriptors<T>::~TensorDescriptors() {
30  for (auto desc : descs_) {
31  cudnnDestroyTensorDescriptor(desc);
32  }
33 }
34 }
35 
36 template <typename T>
37 RecurrentBaseOp<T>::~RecurrentBaseOp() {
38  CUDNN_ENFORCE(cudnnDestroyDropoutDescriptor(dropoutDesc_));
39  CUDNN_ENFORCE(cudnnDestroyRNNDescriptor(rnnDesc_));
40  CUDNN_ENFORCE(cudnnDestroyFilterDescriptor(wDesc_));
41  CUDNN_ENFORCE(cudnnDestroyTensorDescriptor(hxDesc_));
42 }
43 
44 template <typename T>
45 void RecurrentBaseOp<T>::initialize(
46  const Tensor& input,
47  Tensor* dropoutStates,
48  Tensor* output,
49  Tensor* hiddenOutput,
50  Tensor* cellOutput) {
51  static_assert(sizeof(T) == 4, ""); // workaround clang bug
52  CAFFE_ENFORCE_GE(input.dim(), 3);
53  const int seqLength = input.size(0);
54  const int batchSize = input.size(1);
55  const int inputDim = input.size(2);
56  const int hiddenSize = OperatorBase::GetSingleArgument<int>("hidden_size", 0);
57  CAFFE_ENFORCE_GT(hiddenSize, 0);
58  const auto bidirectional =
59  OperatorBase::GetSingleArgument<int>("bidirectional", 0);
60  CAFFE_ENFORCE(bidirectional == 0 || bidirectional == 1);
61  const auto numDirections = bidirectional == 1 ? 2 : 1;
62  const auto outputDim = hiddenSize * numDirections;
63  const auto rnnDirection =
64  bidirectional == 1 ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL;
65  const auto numLayers = OperatorBase::GetSingleArgument<int>("num_layers", 0);
66  CAFFE_ENFORCE_GT(numLayers, 0);
67  const auto& rnnModeStr =
68  OperatorBase::GetSingleArgument<string>("rnn_mode", "");
69  CAFFE_ENFORCE(rnnModeStr == "lstm" || rnnModeStr == "gru");
70  const auto rnnMode = rnnModeStr == "lstm" ? CUDNN_LSTM : CUDNN_GRU;
71  const auto& rnnInputStr =
72  OperatorBase::GetSingleArgument<string>("input_mode", "");
73  CAFFE_ENFORCE(rnnInputStr == "linear" || rnnInputStr == "skip");
74  const auto rnnInput =
75  rnnInputStr == "linear" ? CUDNN_LINEAR_INPUT : CUDNN_SKIP_INPUT;
76 
77  // Dropout setup
78  {
79  if (dropoutStates) {
80  size_t stateSize;
81  float dropout_param =
82  OperatorBase::GetSingleArgument<float>("dropout", 1.0);
83  if (dropout_param < 1.0) {
84  CUDNN_ENFORCE(cudnnDropoutGetStatesSize(
85  cudnn_wrapper_.inline_cudnn_handle(), &stateSize));
86  dropoutStates->Resize(std::vector<int>{static_cast<int>(
87  stateSize / 4 /* sizeof(T) - workaround clang bug */)});
88  CUDNN_ENFORCE(cudnnSetDropoutDescriptor(
89  dropoutDesc_,
90  cudnn_wrapper_.inline_cudnn_handle(),
91  dropout_param,
92  dropoutStates->template mutable_data<T>(),
93  stateSize,
94  OperatorBase::GetSingleArgument<int>("seed", 0)));
95  }
96  }
97  }
98 
99  // RNN setup
100  {
101 #if CUDNN_VERSION_MIN(7, 0, 0)
102  CUDNN_ENFORCE(cudnnSetRNNDescriptor(
103  cudnn_wrapper_.inline_cudnn_handle(),
104  rnnDesc_,
105  hiddenSize,
106  numLayers,
107  dropoutDesc_,
108  rnnInput,
109  rnnDirection,
110  rnnMode,
111  CUDNN_RNN_ALGO_STANDARD, // TODO: verify correctness / efficiency.
112  cudnnTypeWrapper<T>::type));
113 #else
114  CUDNN_ENFORCE(cudnnSetRNNDescriptor(
115  rnnDesc_,
116  hiddenSize,
117  numLayers,
118  dropoutDesc_,
119  rnnInput,
120  rnnDirection,
121  rnnMode,
122  cudnnTypeWrapper<T>::type));
123 #endif
124  }
125  // X setup
126  {
127  xDesc_.reset(new detail::TensorDescriptors<T>(
128  seqLength,
129  // Third dimension is unused
130  {batchSize, inputDim, 1},
131  // Fully-packed
132  {inputDim, 1, 1}));
133  }
134  // Y setup
135  {
136  yDesc_.reset(new detail::TensorDescriptors<T>(
137  seqLength,
138  // Third dimension is unused
139  {batchSize, hiddenSize * numDirections, 1},
140  // Fully-packed
141  {numDirections * hiddenSize, 1, 1}));
142 
143  if (output) {
144  output->Resize(std::vector<int>{seqLength, batchSize, outputDim});
145  }
146  }
147 
148  // Hidden/Cell setup
149  {
150  const std::array<int, 3> dim{
151  numLayers * numDirections, batchSize, hiddenSize};
152  const std::array<int, 3> stride{batchSize * hiddenSize, hiddenSize, 1};
153  CUDNN_ENFORCE(cudnnSetTensorNdDescriptor(
154  hxDesc_, cudnnTypeWrapper<T>::type, 3, dim.data(), stride.data()));
155  cxDesc_ = hxDesc_;
156  hyDesc_ = hxDesc_;
157  cyDesc_ = hxDesc_;
158 
159  if (hiddenOutput) {
160  hiddenOutput->Resize(
161  std::vector<int>{numLayers * numDirections, batchSize, hiddenSize});
162  }
163 
164  if (cellOutput) {
165  cellOutput->Resize(
166  std::vector<int>{numLayers * numDirections, batchSize, hiddenSize});
167  }
168  }
169 
170  // Weights setup
171  {
172  size_t weightsSize;
173  CUDNN_ENFORCE(cudnnGetRNNParamsSize(
174  cudnn_wrapper_.inline_cudnn_handle(),
175  rnnDesc_,
176  xDesc_->descs()[0],
177  &weightsSize,
178  cudnnTypeWrapper<T>::type));
179  const std::array<int, 3> dims{
180  static_cast<int>(
181  weightsSize / 4 /* sizeof(T) - workaround clang bug */),
182  1,
183  1};
184  CUDNN_ENFORCE(cudnnSetFilterNdDescriptor(
185  wDesc_, cudnnTypeWrapper<T>::type, CUDNN_TENSOR_NCHW, 3, dims.data()));
186  }
187 
188  // RNN workspace size
189  {
190  CUDNN_ENFORCE(cudnnGetRNNWorkspaceSize(
191  cudnn_wrapper_.inline_cudnn_handle(),
192  rnnDesc_,
193  seqLength,
194  xDesc_->descs(),
195  &cudnnWsNbytes_));
196  }
197 }
198 
199 template <typename T>
200 bool RecurrentOp<T>::RunOnDevice() {
201  const int seqLength = Input(INPUT).dim32(0);
202  if (Input(INPUT).sizes() != cachedInputDims_) {
203  initialize(
204  Input(INPUT),
205  Output(DROPOUT_STATES),
206  Output(OUTPUT),
207  Output(HIDDEN_OUTPUT),
208  Output(CELL_OUTPUT));
209  cachedInputDims_ = Input(INPUT).sizes().vec();
210  }
211 
212  // Validation checks
213  size_t weightsSize;
214  CUDNN_ENFORCE(cudnnGetRNNParamsSize(
215  cudnn_wrapper_.inline_cudnn_handle(),
216  rnnDesc_,
217  xDesc_->descs()[0],
218  &weightsSize,
219  cudnnTypeWrapper<T>::type));
220  CAFFE_ENFORCE_EQ(Input(WEIGHT).nbytes(), weightsSize);
221 
222  // Training reserve size
223  CUDNN_ENFORCE(cudnnGetRNNTrainingReserveSize(
224  cudnn_wrapper_.inline_cudnn_handle(),
225  rnnDesc_,
226  seqLength,
227  xDesc_->descs(),
228  &reserveNbytes_));
229  Output(RNN_SCRATCH)
230  ->Resize(std::vector<int>{static_cast<int>(
231  reserveNbytes_ / 4)}); // sizeof(T) - workaround clang bug
232  Output(RNN_SCRATCH)->template mutable_data<T>();
233 
234  auto InputData = [this](int i) { return this->Input(i).template data<T>(); };
235  auto OutputData = [this](int i) {
236  return this->Output(i)->template mutable_data<T>();
237  };
238 
239  if (OperatorBase::GetSingleArgument<int>(OpSchema::Arg_IsTest, 0)) {
240  cudnn_wrapper_.with_cudnn_state(0, [&](CuDNNState* state) {
241  CUDNN_ENFORCE(cudnnRNNForwardInference(
242  state->cudnn_handle(),
243  rnnDesc_,
244  seqLength,
245  xDesc_->descs(),
246  InputData(INPUT), //.template data<T>(),
247  hxDesc_,
248  InputData(HIDDEN_INPUT), //.template data<T>(),
249  cxDesc_,
250  InputData(CELL_INPUT), //.template data<T>(),
251  wDesc_,
252  InputData(WEIGHT), //.template data<T>(),
253  yDesc_->descs(),
254  OutputData(OUTPUT), //->template mutable_data<T>(),
255  hyDesc_,
256  OutputData(HIDDEN_OUTPUT), //->template mutable_data<T>(),
257  cyDesc_,
258  OutputData(CELL_OUTPUT), //->template mutable_data<T>(),
259  state->workspace().get(cudnnWsNbytes_),
260  cudnnWsNbytes_));
261  });
262  } else {
263  cudnn_wrapper_.with_cudnn_state(0, [&](CuDNNState* state) {
264  CUDNN_ENFORCE(cudnnRNNForwardTraining(
265  state->cudnn_handle(),
266  rnnDesc_,
267  seqLength,
268  xDesc_->descs(),
269  InputData(INPUT), //.template data<T>(),
270  hxDesc_,
271  InputData(HIDDEN_INPUT), //.template data<T>(),
272  cxDesc_,
273  InputData(CELL_INPUT), //.template data<T>(),
274  wDesc_,
275  InputData(WEIGHT), //.template data<T>(),
276  yDesc_->descs(),
277  OutputData(OUTPUT), //->template mutable_data<T>(),
278  hyDesc_,
279  OutputData(HIDDEN_OUTPUT), //->template mutable_data<T>(),
280  cyDesc_,
281  OutputData(CELL_OUTPUT), //->template mutable_data<T>(),
282  state->workspace().get(cudnnWsNbytes_),
283  cudnnWsNbytes_,
284  OutputData(RNN_SCRATCH), //->template mutable_data<T>(),
285  reserveNbytes_));
286  });
287  }
288 
289  return true;
290 }
291 
292 template <typename T>
293 bool RecurrentGradientOp<T>::RunOnDevice() {
294  const int seqLength = Input(INPUT).dim32(0);
295  if (Input(INPUT).sizes() != cachedInputDims_) {
296  initialize(Input(INPUT), Output(DROPOUT_STATES));
297  cachedInputDims_ = Input(INPUT).sizes().vec();
298  }
299  CUDNN_ENFORCE(cudnnGetRNNTrainingReserveSize(
300  cudnn_wrapper_.inline_cudnn_handle(),
301  rnnDesc_,
302  seqLength,
303  xDesc_->descs(),
304  &reserveNbytes_));
305  CAFFE_ENFORCE_EQ(reserveNbytes_, Input(RNN_SCRATCH).nbytes());
306  Output(GRAD_INPUT)->ResizeLike(Input(INPUT));
307  Output(GRAD_HIDDEN_INPUT)->ResizeLike(Input(HIDDEN_INPUT));
308  Output(GRAD_CELL_INPUT)->ResizeLike(Input(CELL_INPUT));
309 
310  Output(GRAD_WEIGHT)->ResizeLike(Input(WEIGHT));
311  math::Set<T, CUDAContext>(
312  Output(GRAD_WEIGHT)->numel(),
313  0.0,
314  Output(GRAD_WEIGHT)->template mutable_data<T>(),
315  &context_);
316 
317 #if CUDNN_VERSION_MIN(6,0,0)
318  auto * reserve = Output(RNN_SCRATCH_OUT)->template mutable_data<T>();
319 #else
320  const auto * reserve = Output(RNN_SCRATCH_OUT)->template data<T>();
321 #endif
322  auto InputData = [this](int i) { return this->Input(i).template data<T>(); };
323  auto OutputData = [this](int i) {
324  return this->Output(i)->template mutable_data<T>();
325  };
326 
327  cudnn_wrapper_.with_cudnn_state(0, [&](CuDNNState* state) {
328  CUDNN_ENFORCE(cudnnRNNBackwardData(
329  state->cudnn_handle(),
330  rnnDesc_,
331  seqLength,
332  yDesc_->descs(),
333  InputData(OUTPUT), // Input(OUTPUT).template data<T>(),
334  yDesc_->descs(),
335  InputData(GRAD_OUTPUT), // Input(GRAD_OUTPUT).template data<T>(),
336  hyDesc_,
337  // Note: like CNTK, ignore these gradient inputs. t16675365 to
338  // reconsider.
339  nullptr,
340  cyDesc_,
341  nullptr,
342  wDesc_,
343  InputData(WEIGHT), // Input(WEIGHT).template data<T>(),
344  hxDesc_,
345  InputData(HIDDEN_INPUT), // Input(HIDDEN_INPUT).template data<T>(),
346  cxDesc_,
347  InputData(CELL_INPUT),
348  xDesc_->descs(),
349  OutputData(GRAD_INPUT),
350  hxDesc_,
351  OutputData(GRAD_HIDDEN_INPUT),
352  cxDesc_,
353  OutputData(GRAD_CELL_INPUT),
354  state->workspace().get(cudnnWsNbytes_),
355  cudnnWsNbytes_,
356  reserve,
357  reserveNbytes_));
358  CUDNN_ENFORCE(cudnnRNNBackwardWeights(
359  state->cudnn_handle(),
360  rnnDesc_,
361  seqLength,
362  xDesc_->descs(),
363  InputData(INPUT), // Input(INPUT).template data<T>(),
364  hxDesc_,
365  InputData(HIDDEN_INPUT), // Input(HIDDEN_INPUT).template data<T>(),
366  yDesc_->descs(),
367  InputData(OUTPUT), // Input(OUTPUT).template data<T>(),
368  state->workspace().get(cudnnWsNbytes_),
369  cudnnWsNbytes_,
370  wDesc_,
371  OutputData(
372  GRAD_WEIGHT), // Output(GRAD_WEIGHT)->template mutable_data<T>(),
373  reserve,
374  reserveNbytes_));
375  });
376 
377  return true;
378 }
379 
380 template <typename T, RecurrentParamOpMode mode>
381 bool RecurrentParamAccessOp<T, mode>::RunOnDevice() {
382  initialize(Input(0));
383 
384  if (mode == SET_PARAM) {
385  size_t paramsSize;
386  CUDNN_ENFORCE(cudnnGetRNNParamsSize(
387  cudnn_wrapper_.inline_cudnn_handle(),
388  rnnDesc_,
389  xDesc_->descs()[0],
390  &paramsSize,
391  cudnnTypeWrapper<T>::type));
392 
393  CAFFE_ENFORCE_EQ(
394  paramsSize / 4, Input(1).numel(), "Incorrect weight initialization");
395  }
396 
397  int layer = OperatorBase::GetSingleArgument<int>("layer", 0);
398  std::string param_type =
399  OperatorBase::GetSingleArgument<string>("param_type", "");
400  std::string input_type =
401  OperatorBase::GetSingleArgument<string>("input_type", "");
402 
403  // Mapping to CUDNN constants
404  std::map<string, int> weight_constants = {{"input_gate_w", 0},
405  {"forget_gate_w", 1},
406  {"cell_w", 2},
407  {"output_gate_w", 3}};
408  std::map<string, int> bias_constants = {{"input_gate_b", 0},
409  {"forget_gate_b", 1},
410  {"cell_b", 2},
411  {"output_gate_b", 3}};
412  if (bias_constants.find(param_type) != bias_constants.end()) {
413  int param_id = bias_constants[param_type] + 4 * (input_type == "recurrent");
414 
415  cudnnFilterDescriptor_t biasDesc;
416  CUDNN_ENFORCE(cudnnCreateFilterDescriptor(&biasDesc));
417  void* bias;
418 
419  CUDNN_ENFORCE(cudnnGetRNNLinLayerBiasParams(
420  cudnn_wrapper_.inline_cudnn_handle(),
421  rnnDesc_,
422  layer,
423  xDesc_->descs()[0],
424  wDesc_,
425  Input(1).template data<T>(),
426  param_id, // Forget gate bias for recurrent input
427  biasDesc,
428  &bias));
429  int numBiasDims;
430  std::vector<int> biasDims(3);
431  cudnnDataType_t dt;
432  cudnnTensorFormat_t tf;
433  // For some reason, the CuDNN Bias tensor is 3 dimensional
434  CUDNN_ENFORCE(cudnnGetFilterNdDescriptor(
435  biasDesc, 3, &dt, &tf, &numBiasDims, biasDims.data()));
436  CAFFE_ENFORCE_EQ(numBiasDims, 3);
437 
438  if (mode == SET_PARAM) {
439  CAFFE_ENFORCE_EQ(
440  biasDims[0] * biasDims[1] * biasDims[2], Input(2).numel());
441  this->context_.template CopySameDevice<T>(
442  biasDims[0] * biasDims[1] * biasDims[2],
443  Input(2).template data<T>(),
444  static_cast<T*>(bias));
445  } else {
446  Output(0)->Resize(biasDims);
447  this->context_.template CopySameDevice<T>(
448  biasDims[0] * biasDims[1] * biasDims[2],
449  static_cast<T*>(bias),
450  Output(0)->template mutable_data<T>());
451  }
452  } else if (weight_constants.find(param_type) != weight_constants.end()) {
453  int param_id =
454  weight_constants[param_type] + 4 * (input_type == "recurrent");
455  cudnnFilterDescriptor_t matrixParamDesc;
456  CUDNN_ENFORCE(cudnnCreateFilterDescriptor(&matrixParamDesc));
457  void* pmatrix;
458  CUDNN_ENFORCE(cudnnGetRNNLinLayerMatrixParams(
459  cudnn_wrapper_.inline_cudnn_handle(),
460  rnnDesc_,
461  layer,
462  xDesc_->descs()[0],
463  wDesc_,
464  Input(1).template data<T>(),
465  param_id, // Forget gate bias for recurrent input
466  matrixParamDesc,
467  &pmatrix));
468  int numDims;
469  std::vector<int> matDims(3);
470  cudnnDataType_t dt;
471  cudnnTensorFormat_t tf;
472 
473  CUDNN_ENFORCE(cudnnGetFilterNdDescriptor(
474  matrixParamDesc, 3, &dt, &tf, &numDims, matDims.data()));
475  CAFFE_ENFORCE_EQ(numDims, 3);
476  if (mode == SET_PARAM) {
477  CAFFE_ENFORCE_EQ(matDims[0] * matDims[1] * matDims[2], Input(2).numel());
478  this->context_.template CopySameDevice<T>(
479  matDims[0] * matDims[1] * matDims[2],
480  Input(2).template data<T>(),
481  static_cast<T*>(pmatrix));
482  } else {
483  Output(0)->Resize(matDims);
484  this->context_.template CopySameDevice<T>(
485  matDims[0] * matDims[1] * matDims[2],
486  static_cast<T*>(pmatrix),
487  Output(0)->template mutable_data<T>());
488  }
489  } else {
490  CAFFE_ENFORCE(false, "Unknown param type:", param_type);
491  }
492 
493  return true;
494 }
495 
496 REGISTER_CUDNN_OPERATOR(Recurrent, RecurrentOp<float>);
497 OPERATOR_SCHEMA(Recurrent).NumInputs(4).NumOutputs(5).SetDoc(R"DOC(
498 
499 Recurrent wraps the CuDNN R5 RNN implementation. See the CuDNN R5
500 documentation for more information.
501 
502 In general, the implementation takes an input (TxNxD) tensor, the
503 hidden state input (NxD), the cell input (NxD), and a weight tensor
504 (effectively an opaque blob, where the size and layout is dictated by
505 CuDNN).
506 
507 The outputs are the output (again, TxNxD), the final hidden/cell
508 states (NxD). These can be reset (at sequence boundaries across
509 minibatches) by multiplying by zero.
510 
511 The CuDNN arguments (hidden_size, bidirectional, num_layers, rnn_mode,
512 input_mode) are passed directly through to CuDNN.
513 
514 )DOC");
515 REGISTER_CUDNN_OPERATOR(RecurrentGradient, RecurrentGradientOp<float>);
516 OPERATOR_SCHEMA(RecurrentGradient)
517  .NumInputs(7)
518  .NumOutputs(6)
519  .AllowInplace({{4, 5}});
520 
521 REGISTER_CUDNN_OPERATOR(
522  RecurrentParamSet,
523  RecurrentParamAccessOp<float, SET_PARAM>);
524 OPERATOR_SCHEMA(RecurrentParamSet)
525  .NumInputs(3)
526  .NumOutputs(1)
527  .EnforceInplace({{1, 0}})
528  .SetDoc("Set individual parameters of a recurrent net.")
529  .Arg("param_type", R"DOC(Type of param to be set:
530  "input_gate_w", "forget_gate_w", "cell_w", "output_gate_w"
531  "input_gate_b", "forget_gate_b", "cell_b", "output_gate_b"
532  )DOC")
533  .Arg("input_type", "'recurrent' or 'input'")
534  .Arg("layer", "layer index (starting from 0)")
535  .Input(0, "input", R"DOC(Input blob. Needed for inferring the shapes.
536  A dummy tensor matching the input shape is ok.)DOC")
537  .Input(1, "all_params", "Blob holding all the parameters")
538  .Input(2, "param", "Values for the specified parameter")
539  .Output(
540  0,
541  "all_params",
542  "Blob holding all the parameters (same as input(1))");
543 
544 REGISTER_CUDNN_OPERATOR(
545  RecurrentParamGet,
546  RecurrentParamAccessOp<float, GET_PARAM>);
547 OPERATOR_SCHEMA(RecurrentParamGet)
548  .NumInputs(2)
549  .NumOutputs(1)
550  .SetDoc("Retrieve individual parameters of a recurrent net op.")
551  .Arg("param_type", R"DOC(Type of param to be set:
552  "input_gate_w", "forget_gate_w", "cell_w", "output_gate_w"
553  "input_gate_b", "forget_gate_b", "cell_b", "output_gate_b"
554  )DOC")
555  .Arg("input_type", "'recurrent' or 'input'")
556  .Arg("layer", "layer index (starting from 0)")
557  .Input(0, "input", R"DOC(Input blob. Needed for inferring the shapes.
558  A dummy tensor matching the input shape is ok.)DOC")
559  .Input(1, "all_params", "Blob holding all the parameters")
560  .Output(0, "param", "Blob holding the requested values");
561 
563  using GradientMakerBase::GradientMakerBase;
564  vector<OperatorDef> GetGradientDefs() override {
565  return SingleGradientDef(
566  "RecurrentGradient",
567  "",
568  vector<string>{I(0), // INPUT
569  I(1), // HIDDEN_INPUT
570  I(2), // CELL_INPUT
571  I(3), // WEIGHT
572  O(3), // RNN_SCRATCH
573  O(0), // OUTPUT
574  GO(0)}, // GRAD_OUTPUT
575  // TODO: not currently using these gradients, investigate t16675365
576  // GO(1), // GRAD_HIDDEN_OUTPUT
577  // GO(2)}, // GRAD_CELL_OUTPUT
578  vector<string>{
579  GI(0), // GRAD_INPUT
580  GI(1), // GRAD_HIDDEN_INPUT
581  GI(2), // GRAD_CELL_INPUT
582  GI(3), // GRAD_WEIGHT
583  O(4), // DROPOUT_STATES
584  O(3) // RNN_SCRATCH
585  });
586  }
587 };
588 REGISTER_GRADIENT(Recurrent, GetRecurrentGradient);
589 }
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13