doxygen-c/html/tt__linear__op_8h_source.html

 #ifndef CAFFE2_OPERATORS_TT_LINEAR_OP_H_
 #define CAFFE2_OPERATORS_TT_LINEAR_OP_H_

 #ifdef CAFFE2_USE_MKL
 #include <mkl.h>
 #endif // CAFFE2_USE_MKL

 #include "Eigen/Core"
 #include "Eigen/Dense"
 #include "caffe2/core/context.h"
 #include "caffe2/core/operator.h"
 #include "caffe2/utils/eigen_utils.h"
 #include "caffe2/utils/math.h"

 namespace caffe2 {

 template <typename T, class Context, class Engine = DefaultEngine>
 class TTLinearOp final : public Operator<Context> {
  public:
   USE_OPERATOR_CONTEXT_FUNCTIONS;
   template <class... Args>
   explicit TTLinearOp(Args&&... args)
       : Operator<Context>(std::forward<Args>(args)...),
         inp_sizes_(this->template GetRepeatedArgument<int>("inp_sizes")),
         out_sizes_(this->template GetRepeatedArgument<int>("out_sizes")),
         tt_ranks_(this->template GetRepeatedArgument<int>("tt_ranks")),
         Y_temp_(unique_ptr<Blob>(new Blob())) {}
   ~TTLinearOp() {}

   bool RunOnDevice() override {
     const auto& X = Input(0); // Input array
     const auto& b = Input(1); // Bias array
     const auto& cores = Input(2); // 1D array containing the TT-cores

     CAFFE_ENFORCE(X.dim() > 1, "Number of dimensions in X: ", X.dim());
     CAFFE_ENFORCE(b.dim() == 1, "Number of dimensions in b: ", b.dim());
     CAFFE_ENFORCE(
         inp_sizes_.size() == out_sizes_.size(),
         "inp_sizes has size: ",
         inp_sizes_.size(),
         ", out_sizes has size: ",
         out_sizes_.size());
     CAFFE_ENFORCE(
         cores.dim() == 1, "Number of dimensions in cores: ", cores.dim());
     // batch size
     const int batch_size = X.dim() > 1 ? X.dim32(0) : 1;

     // dimension d of tensors
     const int d = inp_sizes_.size();

     // Keep track of index of current core in multiplication
     int cores_idx = 0;

     // Temporary buffer to facilitate multiplication of TT-cores with input
     auto Y_buf = BlobGetMutableTensor(Y_temp_.get(), Context::GetDeviceType());
     Y_buf->ResizeLike(X);
     Y_buf->CopyFrom(X);
     Tensor* Y;

     // The overall forward pass involves multiplication with each core, where
     // each core has sizes dictated by inp_sizes_ and out_sizes_. Each core thus
     // has size inp_sizes_[i] * tt_ranks_[i] * tt_ranks_[i + 1] * out_sizes_[i].
     for (int i = (d - 1); i >= 0; --i) {
       int curr_rows = inp_sizes_[i] * tt_ranks_[i + 1];
       int curr_cols = tt_ranks_[i] * out_sizes_[i];

       // TODO Replace by Reshape(), once wrappers are written
       Y_buf->Resize(Y_buf->numel() / curr_rows, curr_rows);
       Y = Output(
           0, {Y_buf->numel() / curr_rows, curr_cols}, at::dtype<float>());

       // Defensive checks
       CAFFE_ENFORCE(Y_buf->numel() % curr_rows == 0, Y_buf->numel(), curr_rows);
       CAFFE_ENFORCE(
           cores_idx + curr_rows * curr_cols <= cores.numel(),
           cores_idx + curr_rows * curr_cols,
           cores.numel());

       // Multiply ith core with the intermediate output
       math::Gemm<float, Context, Engine>(
           CblasNoTrans,
           CblasNoTrans,
           Y_buf->numel() / curr_rows,
           curr_cols,
           curr_rows,
           1,
           Y_buf->template data<float>(),
           cores.template data<float>() + cores_idx,
           0,
           Y->template mutable_data<float>(),
           &context_);

       CAFFE_ENFORCE(Y->numel() % out_sizes_[i] == 0, Y->numel(), out_sizes_[i]);

       // TODO Add GPU support by writing a generic wrapper.
       auto Y_mat = EigenMatrixMap<float>(
           Y->template mutable_data<float>(),
           Y->numel() / out_sizes_[i],
           out_sizes_[i]);
       Y_mat = ConstEigenMatrixMap<float>(
                   Y->template data<float>(),
                   out_sizes_[i],
                   Y->numel() / out_sizes_[i])
                   .transpose()
                   .eval();

       // Resize operation
       Y_buf->Resize(Y->dim32(0), Y->dim32(1));
       context_.template CopyFromCPU<float>(
           Y->numel(),
           Y->template data<float>(),
           Y_buf->template mutable_data<float>());

       cores_idx += curr_rows * curr_cols;
     }

     // TODO Add GPU support by writing a generic wrapper.
     auto Y_mat = EigenMatrixMap<float>(
         Y->template mutable_data<float>(), batch_size, Y->numel() / batch_size);
     Y_mat = ConstEigenMatrixMap<float>(
                 Y->template data<float>(), Y->numel() / batch_size, batch_size)
                 .transpose()
                 .eval();
     // TODO Replace by Reshape(), once wrappers are written
     Y = Output(0, {batch_size, Y->numel() / batch_size}, at::dtype<float>());

     // Check that output size of Y is the element-wise product of out_sizes
     int prod_out_sizes = 1;
     for (int i = 0; i < out_sizes_.size(); i++) {
       prod_out_sizes *= out_sizes_[i];
     }
     CAFFE_ENFORCE(
         Y->dim32(1) == prod_out_sizes,
         "Output dimension of Y: ",
         Y->dim32(1),
         ", product of out_sizes: ",
         prod_out_sizes);

     // Add bias term
     if (bias_multiplier_.numel() != batch_size) {
       // If the helper bias multiplier is not M, reshape and fill it with one.
       ReinitializeTensor(
           &bias_multiplier_,
           {batch_size},
           at::dtype<T>().device(Context::GetDeviceType()));
       math::Set<T, Context>(
           batch_size,
           static_cast<T>(1),
           bias_multiplier_.template mutable_data<T>(),
           &context_);
     }
     math::Gemm<T, Context, Engine>(
         CblasNoTrans,
         CblasNoTrans,
         Y->dim32(0),
         Y->dim32(1),
         1,
         1,
         bias_multiplier_.template data<T>(),
         b.template data<T>(),
         1,
         Y->template mutable_data<T>(),
         &context_);
     return true;
   }

  protected:
   Tensor bias_multiplier_;
   std::vector<int> inp_sizes_;
   std::vector<int> out_sizes_;
   std::vector<int> tt_ranks_;
   std::unique_ptr<Blob> Y_temp_;
 };

 // TODO: Complete after verifying utility of TT-layer's forward pass.
 template <typename T, class Context, class Engine = DefaultEngine>
 class TTLinearGradientOp : public Operator<Context> {
  public:
   USE_OPERATOR_CONTEXT_FUNCTIONS;
   template <class... Args>
   explicit TTLinearGradientOp(Args&&... args)
       : Operator<Context>(std::forward<Args>(args)...) {}
   ~TTLinearGradientOp() {}

   bool RunOnDevice() override {
     return false;
   }

  protected:
   Tensor bias_multiplier_{Context::GetDeviceType()};
 };

 } // namespace caffe2

 #endif // CAFFE2_OPERATORS_TT_LINEAR_OP_H_
caffe2::Blob
Blob is a general container that hosts a typed pointer.
Definition: blob.h:24

caffe2::ReinitializeTensor
void ReinitializeTensor(Tensor *tensor, at::IntArrayRef dims, at::TensorOptions options)
Reinitialize a Tensor to given dims and options if necessary, note that this will not do anything if ...
Definition: tensor.cc:127

T
Definition: dataloader.cpp:482

caffe2::TTLinearOp
Definition: tt_linear_op.h:18

nom::repr::Tensor
Definition: NeuralNet.h:158

caffe2::Operator::Input
const Tensor & Input(int idx, DeviceType type=Context::GetDeviceType())
Retrieve a non-owning reference to the input at position &#39;idx&#39; for this operator. ...
Definition: operator.h:702

caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13

caffe2::TTLinearGradientOp
Definition: tt_linear_op.h:177

caffe2::Operator
Definition: operator.h:677