Caffe2 - C++ API
A deep learning, cross platform ML framework
gather_fused_8bit_rowwise_op.h
1 #pragma once
2 
3 #include "caffe2/core/operator.h"
4 #include "caffe2/utils/eigen_utils.h"
5 #include "caffe2/utils/math.h"
6 
7 namespace caffe2 {
8 
9 template <class Context>
10 class GatherFused8BitRowwiseOp : public Operator<Context> {
11  public:
12  USE_OPERATOR_CONTEXT_FUNCTIONS;
13  USE_SIMPLE_CTOR_DTOR(GatherFused8BitRowwiseOp);
14 
15  bool RunOnDevice() override {
17  this, this->template Input<Tensor>(INDICES, CPU));
18  }
19 
20  template <typename Index>
21  bool DoRunWithType() {
22  const auto& data = Input(DATA);
23  const auto& indices = Input(INDICES);
24 
25  CAFFE_ENFORCE_EQ(data.dim(), 2, "DATA must be a matrix");
26  CAFFE_ENFORCE_EQ(indices.dim(), 1, "INDICES must be a vector");
27  CAFFE_ENFORCE_GT(data.size(1), 8, "DATA must have more than 8 columns");
28  // Subtract 8 from the #columns of data for the 4 bytes for scale and 4
29  // bytes for bias that we use in the fused representation (per row).
30  const std::vector<int64_t> shape = {indices.size(0), data.size(1) - 8};
31  auto* output = Output(0, shape, at::dtype<float>());
32 
33  int block_size = shape[1];
34  auto block_bytesize = data.size_from_dim(1) * data.dtype().itemsize();
35  int N = indices.numel();
36 
37  const uint8_t* src_base = data.template data<uint8_t>();
38  const Index* idxs = indices.template data<Index>();
39  auto out = output->template mutable_data<float>();
40 
41  for (int i = 0; i < N; ++i) {
42  auto idx = idxs[i];
43  CAFFE_ENFORCE(
44  0 <= idx && idx < data.size(0),
45  "INDICES element is out of DATA bounds, id=",
46  idx,
47  " data_dim=",
48  data.size(0));
49  const uint8_t* src = src_base + idx * block_bytesize;
50  ConstEigenVectorArrayMap<uint8_t> input_row_values(src, shape[1]);
51  ConstEigenVectorArrayMap<float> input_row_scale_bias(
52  reinterpret_cast<const float*>(src + shape[1]), 2);
53 
54  EigenVectorArrayMap<float> output_row(out + i * shape[1], shape[1]);
55 
56  output_row = input_row_values.cast<float>() * input_row_scale_bias(0) +
57  input_row_scale_bias(1);
58  }
59  return true;
60  }
61 
62  INPUT_TAGS(DATA, INDICES);
63 };
64 
65 } // namespace caffe2
const Tensor & Input(int idx, DeviceType type=Context::GetDeviceType())
Retrieve a non-owning reference to the input at position &#39;idx&#39; for this operator. ...
Definition: operator.h:702
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13