Caffe2 - C++ API
A deep learning, cross platform ML framework
tt_linear_op.h
1 #ifndef CAFFE2_OPERATORS_TT_LINEAR_OP_H_
2 #define CAFFE2_OPERATORS_TT_LINEAR_OP_H_
3 
4 #ifdef CAFFE2_USE_MKL
5 #include <mkl.h>
6 #endif // CAFFE2_USE_MKL
7 
8 #include "Eigen/Core"
9 #include "Eigen/Dense"
10 #include "caffe2/core/context.h"
11 #include "caffe2/core/operator.h"
12 #include "caffe2/utils/eigen_utils.h"
13 #include "caffe2/utils/math.h"
14 
15 namespace caffe2 {
16 
17 template <typename T, class Context, class Engine = DefaultEngine>
18 class TTLinearOp final : public Operator<Context> {
19  public:
20  USE_OPERATOR_CONTEXT_FUNCTIONS;
21  template <class... Args>
22  explicit TTLinearOp(Args&&... args)
23  : Operator<Context>(std::forward<Args>(args)...),
24  inp_sizes_(this->template GetRepeatedArgument<int>("inp_sizes")),
25  out_sizes_(this->template GetRepeatedArgument<int>("out_sizes")),
26  tt_ranks_(this->template GetRepeatedArgument<int>("tt_ranks")),
27  Y_temp_(unique_ptr<Blob>(new Blob())) {}
28  ~TTLinearOp() {}
29 
30  bool RunOnDevice() override {
31  const auto& X = Input(0); // Input array
32  const auto& b = Input(1); // Bias array
33  const auto& cores = Input(2); // 1D array containing the TT-cores
34 
35  CAFFE_ENFORCE(X.dim() > 1, "Number of dimensions in X: ", X.dim());
36  CAFFE_ENFORCE(b.dim() == 1, "Number of dimensions in b: ", b.dim());
37  CAFFE_ENFORCE(
38  inp_sizes_.size() == out_sizes_.size(),
39  "inp_sizes has size: ",
40  inp_sizes_.size(),
41  ", out_sizes has size: ",
42  out_sizes_.size());
43  CAFFE_ENFORCE(
44  cores.dim() == 1, "Number of dimensions in cores: ", cores.dim());
45  // batch size
46  const int batch_size = X.dim() > 1 ? X.dim32(0) : 1;
47 
48  // dimension d of tensors
49  const int d = inp_sizes_.size();
50 
51  // Keep track of index of current core in multiplication
52  int cores_idx = 0;
53 
54  // Temporary buffer to facilitate multiplication of TT-cores with input
55  auto Y_buf = BlobGetMutableTensor(Y_temp_.get(), Context::GetDeviceType());
56  Y_buf->ResizeLike(X);
57  Y_buf->CopyFrom(X);
58  Tensor* Y;
59 
60  // The overall forward pass involves multiplication with each core, where
61  // each core has sizes dictated by inp_sizes_ and out_sizes_. Each core thus
62  // has size inp_sizes_[i] * tt_ranks_[i] * tt_ranks_[i + 1] * out_sizes_[i].
63  for (int i = (d - 1); i >= 0; --i) {
64  int curr_rows = inp_sizes_[i] * tt_ranks_[i + 1];
65  int curr_cols = tt_ranks_[i] * out_sizes_[i];
66 
67  // TODO Replace by Reshape(), once wrappers are written
68  Y_buf->Resize(Y_buf->numel() / curr_rows, curr_rows);
69  Y = Output(
70  0, {Y_buf->numel() / curr_rows, curr_cols}, at::dtype<float>());
71 
72  // Defensive checks
73  CAFFE_ENFORCE(Y_buf->numel() % curr_rows == 0, Y_buf->numel(), curr_rows);
74  CAFFE_ENFORCE(
75  cores_idx + curr_rows * curr_cols <= cores.numel(),
76  cores_idx + curr_rows * curr_cols,
77  cores.numel());
78 
79  // Multiply ith core with the intermediate output
80  math::Gemm<float, Context, Engine>(
81  CblasNoTrans,
82  CblasNoTrans,
83  Y_buf->numel() / curr_rows,
84  curr_cols,
85  curr_rows,
86  1,
87  Y_buf->template data<float>(),
88  cores.template data<float>() + cores_idx,
89  0,
90  Y->template mutable_data<float>(),
91  &context_);
92 
93  CAFFE_ENFORCE(Y->numel() % out_sizes_[i] == 0, Y->numel(), out_sizes_[i]);
94 
95  // TODO Add GPU support by writing a generic wrapper.
96  auto Y_mat = EigenMatrixMap<float>(
97  Y->template mutable_data<float>(),
98  Y->numel() / out_sizes_[i],
99  out_sizes_[i]);
100  Y_mat = ConstEigenMatrixMap<float>(
101  Y->template data<float>(),
102  out_sizes_[i],
103  Y->numel() / out_sizes_[i])
104  .transpose()
105  .eval();
106 
107  // Resize operation
108  Y_buf->Resize(Y->dim32(0), Y->dim32(1));
109  context_.template CopyFromCPU<float>(
110  Y->numel(),
111  Y->template data<float>(),
112  Y_buf->template mutable_data<float>());
113 
114  cores_idx += curr_rows * curr_cols;
115  }
116 
117  // TODO Add GPU support by writing a generic wrapper.
118  auto Y_mat = EigenMatrixMap<float>(
119  Y->template mutable_data<float>(), batch_size, Y->numel() / batch_size);
120  Y_mat = ConstEigenMatrixMap<float>(
121  Y->template data<float>(), Y->numel() / batch_size, batch_size)
122  .transpose()
123  .eval();
124  // TODO Replace by Reshape(), once wrappers are written
125  Y = Output(0, {batch_size, Y->numel() / batch_size}, at::dtype<float>());
126 
127  // Check that output size of Y is the element-wise product of out_sizes
128  int prod_out_sizes = 1;
129  for (int i = 0; i < out_sizes_.size(); i++) {
130  prod_out_sizes *= out_sizes_[i];
131  }
132  CAFFE_ENFORCE(
133  Y->dim32(1) == prod_out_sizes,
134  "Output dimension of Y: ",
135  Y->dim32(1),
136  ", product of out_sizes: ",
137  prod_out_sizes);
138 
139  // Add bias term
140  if (bias_multiplier_.numel() != batch_size) {
141  // If the helper bias multiplier is not M, reshape and fill it with one.
143  &bias_multiplier_,
144  {batch_size},
145  at::dtype<T>().device(Context::GetDeviceType()));
146  math::Set<T, Context>(
147  batch_size,
148  static_cast<T>(1),
149  bias_multiplier_.template mutable_data<T>(),
150  &context_);
151  }
152  math::Gemm<T, Context, Engine>(
153  CblasNoTrans,
154  CblasNoTrans,
155  Y->dim32(0),
156  Y->dim32(1),
157  1,
158  1,
159  bias_multiplier_.template data<T>(),
160  b.template data<T>(),
161  1,
162  Y->template mutable_data<T>(),
163  &context_);
164  return true;
165  }
166 
167  protected:
168  Tensor bias_multiplier_;
169  std::vector<int> inp_sizes_;
170  std::vector<int> out_sizes_;
171  std::vector<int> tt_ranks_;
172  std::unique_ptr<Blob> Y_temp_;
173 };
174 
175 // TODO: Complete after verifying utility of TT-layer's forward pass.
176 template <typename T, class Context, class Engine = DefaultEngine>
177 class TTLinearGradientOp : public Operator<Context> {
178  public:
179  USE_OPERATOR_CONTEXT_FUNCTIONS;
180  template <class... Args>
181  explicit TTLinearGradientOp(Args&&... args)
182  : Operator<Context>(std::forward<Args>(args)...) {}
183  ~TTLinearGradientOp() {}
184 
185  bool RunOnDevice() override {
186  return false;
187  }
188 
189  protected:
190  Tensor bias_multiplier_{Context::GetDeviceType()};
191 };
192 
193 } // namespace caffe2
194 
195 #endif // CAFFE2_OPERATORS_TT_LINEAR_OP_H_
Blob is a general container that hosts a typed pointer.
Definition: blob.h:24
void ReinitializeTensor(Tensor *tensor, at::IntArrayRef dims, at::TensorOptions options)
Reinitialize a Tensor to given dims and options if necessary, note that this will not do anything if ...
Definition: tensor.cc:127
const Tensor & Input(int idx, DeviceType type=Context::GetDeviceType())
Retrieve a non-owning reference to the input at position &#39;idx&#39; for this operator. ...
Definition: operator.h:702
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13