Caffe2 - C++ API
A deep learning, cross platform ML framework
quant_decode_op.h
1 
17 #ifndef QUANT_DECODE_OP_H_
18 #define QUANT_DECODE_OP_H_
19 
20 #include "caffe2/core/context.h"
21 #include "caffe2/core/operator.h"
22 #include "caffe2/core/tensor.h"
23 #include "caffe2/core/typeid.h"
24 
25 namespace caffe2 {
26 
27 namespace {
28 
29 template <class CodebookT, class CodeT>
30 void Decode(
31  const TensorCPU& codebook,
32  const TensorCPU& codes,
33  /* optional */ const TensorCPU* const decoded_grad,
34  TensorCPU* const output,
35  bool resizeOnly) {
36  CAFFE_ENFORCE(codebook.IsType<CodebookT>());
37 
38  auto* cb_ptr = codebook.data<CodebookT>();
39  int cb_size = codebook.size();
40 
41  CAFFE_ENFORCE(codes.IsType<CodeT>());
42  auto* code_ptr = codes.data<CodeT>();
43 
44  if (decoded_grad == nullptr) {
45  // Forward pass: decode and store codebook values in output.
46  output->ResizeLike(codes);
47  auto* out_ptr = output->mutable_data<CodebookT>();
48  if (resizeOnly) {
49  return;
50  }
51 
52  int sz = output->size();
53  for (int i = 0; i < sz; i++) {
54  DCHECK_LE(*code_ptr, cb_size);
55  *out_ptr++ = cb_ptr[*code_ptr++];
56  }
57  } else {
58  // Backward pass: decode and accumulate gradient w.r.t. codebook values.
59  CAFFE_ENFORCE_EQ(codes.size(), decoded_grad->size());
60  auto* gradient_ptr = decoded_grad->data<CodebookT>();
61  auto* const gradient_end = gradient_ptr + decoded_grad->size();
62 
63  CAFFE_ENFORCE_EQ(cb_size, output->size());
64  auto* out_ptr = output->mutable_data<CodebookT>();
65  while (gradient_ptr < gradient_end) {
66  DCHECK_LE(*code_ptr, cb_size);
67  out_ptr[*code_ptr++] += *gradient_ptr++;
68  }
69  }
70 }
71 
72 #define REGISTER_DECODER(codebookType, codesType) \
73  { \
74  {TypeMeta::Id<codebookType>(), TypeMeta::Id<codesType>()}, \
75  [](const TensorCPU& codebook_, \
76  const TensorCPU& codes_, \
77  const TensorCPU* gradient_, \
78  TensorCPU* outDecoded_, \
79  bool resizeOnly_) { \
80  Decode<codebookType, codesType>( \
81  codebook_, codes_, gradient_, outDecoded_, resizeOnly_); \
82  } \
83  }
84 
85 inline void DecodeGeneral(
86  const TensorCPU& codebook,
87  const TensorCPU& codes,
88  const TensorCPU* gradient,
89  TensorCPU* outDecoded,
90  bool resizeOnly) {
91  const static std::map<
92  std::pair<CaffeTypeId, CaffeTypeId>,
93  std::function<void(
94  const TensorCPU& codebook,
95  const TensorCPU& codes,
96  const TensorCPU* gradient,
97  TensorCPU* outDecoded,
98  bool resizeOnly)>>
99  gDecoderMapper = {REGISTER_DECODER(float, uint8_t),
100  REGISTER_DECODER(float, uint16_t),
101  REGISTER_DECODER(float, int32_t)};
102 
103  gDecoderMapper.at({codebook.meta().id(), codes.meta().id()})(
104  codebook, codes, gradient, outDecoded, resizeOnly);
105 }
106 
107 } // namespace
108 
109 // Decode tensors based on given codebook,
110 // The codebook is generated by model_quantize.py
111 
112 enum class QuantDecodeRunTy {
113  RUN_ALWAYS,
114  RUN_ONCE,
115 };
116 
117 template <QuantDecodeRunTy QuantDecodeRun>
118 class QuantDecodeOp final : public Operator<CPUContext> {
119  public:
120  USE_OPERATOR_FUNCTIONS(CPUContext);
121  QuantDecodeOp(const OperatorDef& operator_def, Workspace* ws)
122  : Operator<CPUContext>(operator_def, ws) {}
123 
124  ~QuantDecodeOp() {}
125 
126  bool RunOnDevice() override {
127  CAFFE_ENFORCE_GT(InputSize(), 1);
128  // first input is the codebook
129  CAFFE_ENFORCE_EQ(InputSize(), OutputSize() + 1);
130 
131  const auto& codebook = Input(0);
132  CAFFE_ENFORCE(codebook.template IsType<float>(), codebook.meta().name());
133 
134  for (int i = 0; i < OutputSize(); i++) {
135  auto& ci = Input(i + 1);
136  auto* co = Output(i);
137 
138  DecodeGeneral(
139  codebook,
140  ci,
141  nullptr,
142  co,
143  /*resizeOnly=*/QuantDecodeRun == QuantDecodeRunTy::RUN_ONCE &&
144  hasRun_);
145  }
146  hasRun_ = true;
147  return true;
148  }
149 
150  private:
151  bool hasRun_{false};
152 };
153 
154 class QuantDecodeGradientOp final : public Operator<CPUContext> {
155  public:
156  USE_OPERATOR_FUNCTIONS(CPUContext);
157  QuantDecodeGradientOp(const OperatorDef& operator_def, Workspace* ws)
158  : Operator<CPUContext>(operator_def, ws) {}
160 
161  bool RunOnDevice() override {
162  // Inputs: 1 codebook, n tensors of codes, and n corresponding gradients.
163  CAFFE_ENFORCE(InputSize() >= 3 && InputSize() % 2 == 1);
164  const int num_code_tensors = (InputSize() - 1) / 2;
165  CAFFE_ENFORCE_EQ(OutputSize(), 1);
166 
167  const auto& codebook = Input(0);
168  CAFFE_ENFORCE(codebook.template IsType<float>(), codebook.meta().name());
169 
170  auto* gradient = Output(0);
171  gradient->ResizeLike(codebook);
172  auto* gradient_ptr = gradient->mutable_data<float>();
173  std::fill(gradient_ptr, gradient_ptr + gradient->size(), 0);
174 
175  for (int i = 0; i < num_code_tensors; i++) {
176  auto& codes_i = Input(i + 1);
177  auto& output_gradient_i = Input(i + num_code_tensors + 1);
178  DecodeGeneral(codebook, codes_i, &output_gradient_i, gradient, false);
179  }
180  return true;
181  }
182 };
183 
184 } // namespace caffe2
185 #endif // QUANT_DECODE_OP_H_
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Definition: context.h:82
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:63
Copyright (c) 2016-present, Facebook, Inc.