doxygen-c/html/fully__connected__fake__lowp__op_8h_source.html

 #pragma once

 #include <immintrin.h>
 #include "caffe2/core/context.h"
 #include "caffe2/core/operator.h"
 #include "caffe2/utils/conversions.h"
 #include "caffe2/utils/math.h"

 namespace caffe2 {

 // convert to float16 reducing mantissa, preserving exponent
 void fp32_to_bfp16(const float* source, size_t size, float* dest);

 // convert to float24 reducing mantissa, preserving exponent
 void fp32_to_bfp24(const float* source, size_t size, float* dest);

 // convert to float14 reducing mantissa, preserving exponent
 void fp32_to_bfp14(const float* source, size_t size, float* dest);

 void fp32_to_bfp16_scalar(const float* source, size_t size, float* dest);

 // convert to IEEE float16
 void fp32_to_fp16(const float* source, size_t size, float* dest);

 // fp32 -> int32 -> += 1<< 15 -> fp32 -> truncation
 void fp32_to_bfp16_round(const float* source, size_t size, float* dest);

 // This is Caffe's InnerProductOp, with a name that fits its purpose better.
 template <
     void (*Q)(const float*, size_t, float*),
     class Context,
     class Engine = DefaultEngine,
     bool TransposeWeight = true>
 class FullyConnectedFakeLowpFPOp final : public Operator<Context> {
  public:
   USE_OPERATOR_CONTEXT_FUNCTIONS;
   FullyConnectedFakeLowpFPOp(const OperatorDef& operator_def, Workspace* ws)
       : Operator<Context>(operator_def, ws),
         axis_(this->template GetSingleArgument<int32_t>("axis", 1)),
         axis_w_(this->template GetSingleArgument<int32_t>("axis_w", 1)),
         float16_compute_(
             this->template GetSingleArgument<bool>("float16_compute", false)) {}
   ~FullyConnectedFakeLowpFPOp() {}

   template <
       typename T_X,
       typename T_W,
       typename T_B,
       typename T_Y,
       typename MATH>
   bool DoRunWithType();

   bool RunOnDevice() override {
     return DoRunWithType<
         float, // X
         float, // W
         float, // B
         float, // Y
         float>(); // Math
   }

  protected:
   size_t axis_{1};
   size_t axis_w_{1};
   // A local vector to cache the output shape so we don't need to recreate
   // a vector object every time we run Run().
   vector<int64_t> Y_shape_cache_;
   Tensor bias_multiplier_;

   bool float16_compute_;
 };

 template <
     void (*Q)(const float*, size_t, float*),
     class Context,
     class Engine = DefaultEngine,
     bool TransposeWeight = true>
 class FullyConnectedGradientFakeLowpFPOp : public Operator<Context> {
  public:
   USE_OPERATOR_CONTEXT_FUNCTIONS;
   FullyConnectedGradientFakeLowpFPOp(
       const OperatorDef& operator_def,
       Workspace* ws)
       : Operator<Context>(operator_def, ws),
         axis_(this->template GetSingleArgument<int32_t>("axis", 1)),
         axis_w_(this->template GetSingleArgument<int32_t>("axis_w", 1)),
         float16_compute_(
             this->template GetSingleArgument<bool>("float16_compute", false)) {}
   ~FullyConnectedGradientFakeLowpFPOp() {}

   template <
       typename T_X,
       typename T_W,
       typename T_DY,
       typename T_B,
       typename T_DX,
       typename T_DW,
       typename T_DB,
       typename MATH>
   bool DoRunWithType();

   bool RunOnDevice() override {
     return DoRunWithType<
         float, //  X
         float, //  W
         float, // dY
         float, //  B
         float, // dX
         float, // dW
         float, // dB
         float>(); // Math
   }

  protected:
   size_t axis_{1};
   size_t axis_w_{1};
   Tensor bias_multiplier_;
   bool float16_compute_;
 };

 } // namespace caffe2
caffe2::FullyConnectedGradientFakeLowpFPOp
Definition: fully_connected_fake_lowp_op.h:94

caffe2::DefaultEngine
Definition: math.h:31

nom::repr::Tensor
Definition: NeuralNet.h:158

caffe2::Workspace
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47

caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13

caffe2::FullyConnectedFakeLowpFPOp
Definition: fully_connected_fake_lowp_op.h:50

caffe2::Operator
Definition: operator.h:677