Caffe2 - C++ API
A deep learning, cross platform ML framework
tt_linear_op.h
1 
17 #ifndef CAFFE2_OPERATORS_TT_LINEAR_OP_H_
18 #define CAFFE2_OPERATORS_TT_LINEAR_OP_H_
19 
20 #ifdef CAFFE2_USE_MKL
21 #include <mkl.h>
22 #endif // CAFFE2_USE_MKL
23 
24 #include "Eigen/Core"
25 #include "Eigen/Dense"
26 #include "caffe2/core/context.h"
27 #include "caffe2/core/operator.h"
28 #include "caffe2/utils/math.h"
29 
30 namespace caffe2 {
31 
32 template <typename T, class Context, class Engine = DefaultEngine>
33 class TTLinearOp final : public Operator<Context> {
34  public:
35  USE_OPERATOR_CONTEXT_FUNCTIONS;
36  TTLinearOp(const OperatorDef& operator_def, Workspace* ws)
37  : Operator<Context>(operator_def, ws),
38  inp_sizes_(OperatorBase::GetRepeatedArgument<int>("inp_sizes")),
39  out_sizes_(OperatorBase::GetRepeatedArgument<int>("out_sizes")),
40  tt_ranks_(OperatorBase::GetRepeatedArgument<int>("tt_ranks")),
41  Y_temp_(unique_ptr<Blob>(new Blob())) {}
42  ~TTLinearOp() {}
43 
44  bool RunOnDevice() override {
45  const auto& X = Input(0); // Input array
46  const auto& b = Input(1); // Bias array
47  const auto& cores = Input(2); // 1D array containing the TT-cores
48  auto* Y = Output(0);
49 
50  CAFFE_ENFORCE(X.ndim() > 1, "Number of dimensions in X: ", X.ndim());
51  CAFFE_ENFORCE(b.ndim() == 1, "Number of dimensions in b: ", b.ndim());
52  CAFFE_ENFORCE(
53  inp_sizes_.size() == out_sizes_.size(),
54  "inp_sizes has size: ",
55  inp_sizes_.size(),
56  ", out_sizes has size: ",
57  out_sizes_.size());
58  CAFFE_ENFORCE(
59  cores.ndim() == 1, "Number of dimensions in cores: ", cores.ndim());
60  // batch size
61  const int batch_size = X.ndim() > 1 ? X.dim32(0) : 1;
62 
63  // dimension d of tensors
64  const int d = inp_sizes_.size();
65 
66  // Keep track of index of current core in multiplication
67  int cores_idx = 0;
68 
69  // Temporary buffer to facilitate multiplication of TT-cores with input
70  auto Y_buf = Y_temp_->GetMutable<Tensor<Context>>();
71  Y_buf->ResizeLike(X);
72  Y_buf->CopyFrom(X);
73 
74  // The overall forward pass involves multiplication with each core, where
75  // each core has sizes dictated by inp_sizes_ and out_sizes_. Each core thus
76  // has size inp_sizes_[i] * tt_ranks_[i] * tt_ranks_[i + 1] * out_sizes_[i].
77  for (int i = (d - 1); i >= 0; --i) {
78  int curr_rows = inp_sizes_[i] * tt_ranks_[i + 1];
79  int curr_cols = tt_ranks_[i] * out_sizes_[i];
80 
81  // TODO Replace by Reshape(), once wrappers are written
82  Y_buf->Resize(Y_buf->size() / curr_rows, curr_rows);
83  Y->Resize(Y_buf->size() / curr_rows, curr_cols);
84 
85  // Defensive checks
86  CAFFE_ENFORCE(Y_buf->size() % curr_rows == 0, Y_buf->size(), curr_rows);
87  CAFFE_ENFORCE(
88  cores_idx + curr_rows * curr_cols <= cores.size(),
89  cores_idx + curr_rows * curr_cols,
90  cores.size());
91 
92  // Multiply ith core with the intermediate output
93  math::Gemm<float, Context, Engine>(
94  CblasNoTrans,
95  CblasNoTrans,
96  Y_buf->size() / curr_rows,
97  curr_cols,
98  curr_rows,
99  1,
100  Y_buf->template data<float>(),
101  cores.template data<float>() + cores_idx,
102  0,
103  Y->template mutable_data<float>(),
104  &context_);
105 
106  CAFFE_ENFORCE(Y->size() % out_sizes_[i] == 0, Y->size(), out_sizes_[i]);
107 
108  // TODO Add GPU support by writing a generic wrapper.
109  auto Y_mat = EigenMatrixMap<float>(
110  Y->template mutable_data<float>(),
111  Y->size() / out_sizes_[i],
112  out_sizes_[i]);
113  Y_mat = ConstEigenMatrixMap<float>(
114  Y->template data<float>(),
115  out_sizes_[i],
116  Y->size() / out_sizes_[i])
117  .transpose()
118  .eval();
119 
120  // Resize operation
121  Y_buf->Resize(Y->dim32(0), Y->dim32(1));
122  context_.template Copy<float, CPUContext, CPUContext>(
123  Y->size(),
124  Y->template data<float>(),
125  Y_buf->template mutable_data<float>());
126 
127  cores_idx += curr_rows * curr_cols;
128  }
129 
130  // TODO Add GPU support by writing a generic wrapper.
131  auto Y_mat = EigenMatrixMap<float>(
132  Y->template mutable_data<float>(), batch_size, Y->size() / batch_size);
133  Y_mat = ConstEigenMatrixMap<float>(
134  Y->template data<float>(), Y->size() / batch_size, batch_size)
135  .transpose()
136  .eval();
137  // TODO Replace by Reshape(), once wrappers are written
138  Y->Resize(batch_size, Y->size() / batch_size);
139 
140  // Check that output size of Y is the element-wise product of out_sizes
141  int prod_out_sizes = 1;
142  for (int i = 0; i < out_sizes_.size(); i++) {
143  prod_out_sizes *= out_sizes_[i];
144  }
145  CAFFE_ENFORCE(
146  Y->dim32(1) == prod_out_sizes,
147  "Output dimension of Y: ",
148  Y->dim32(1),
149  ", product of out_sizes: ",
150  prod_out_sizes);
151 
152  // Add bias term
153  if (bias_multiplier_.size() != batch_size) {
154  // If the helper bias multiplier is not M, reshape and fill it with one.
155  bias_multiplier_.Resize(batch_size);
156  math::Set<T, Context>(
157  batch_size,
158  static_cast<T>(1),
159  bias_multiplier_.template mutable_data<T>(),
160  &context_);
161  }
162  math::Gemm<T, Context, Engine>(
163  CblasNoTrans,
164  CblasNoTrans,
165  Y->dim32(0),
166  Y->dim32(1),
167  1,
168  1,
169  bias_multiplier_.template data<T>(),
170  b.template data<T>(),
171  1,
172  Y->template mutable_data<T>(),
173  &context_);
174  return true;
175  }
176 
177  protected:
178  Tensor<Context> bias_multiplier_;
179  std::vector<int> inp_sizes_;
180  std::vector<int> out_sizes_;
181  std::vector<int> tt_ranks_;
182  std::unique_ptr<Blob> Y_temp_;
183 };
184 
185 // TODO: Complete after verifying utility of TT-layer's forward pass.
186 template <typename T, class Context, class Engine = DefaultEngine>
187 class TTLinearGradientOp : public Operator<Context> {
188  public:
189  USE_OPERATOR_CONTEXT_FUNCTIONS;
190  TTLinearGradientOp(const OperatorDef& operator_def, Workspace* ws)
191  : Operator<Context>(operator_def, ws) {}
192  ~TTLinearGradientOp() {}
193 
194  bool RunOnDevice() override {
195  return false;
196  }
197 
198  protected:
199  Tensor<Context> bias_multiplier_;
200 };
201 
202 } // namespace caffe2
203 
204 #endif // CAFFE2_OPERATORS_TT_LINEAR_OP_H_
Blob is a general container that hosts a typed pointer.
Definition: blob.h:41
Tensor is the basic class in Caffe2 that stores a contiguous memory with its shape information...
Definition: tensor.h:109
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:63
Copyright (c) 2016-present, Facebook, Inc.
void ResizeLike(const Tensor< OtherContext > &src_tensor)
Resize the tensor like the source tensor.
Definition: tensor.h:331