Caffe2 - C++ API
A deep learning, cross platform ML framework
Public Types | Public Member Functions
caffe2::FullyConnectedDNNLowPAcc16Op Class Referencefinal

Quantized FC operator with 16-bit accumulation. More...

#include <fully_connected_dnnlowp_acc16_op.h>

Inheritance diagram for caffe2::FullyConnectedDNNLowPAcc16Op:
caffe2::FullyConnectedDNNLowPOp< std::uint8_t > caffe2::DNNLowPOp< std::uint8_t, FullyConnectedOp< CPUContext > > caffe2::Operator< CPUContext > caffe2::OperatorBase caffe2::Observable< OperatorBase >

Public Types

using BaseType = FullyConnectedDNNLowPOp< std::uint8_t >
- Public Types inherited from caffe2::Observable< OperatorBase >
using Observer = ObserverBase< OperatorBase >

Public Member Functions

 FullyConnectedDNNLowPAcc16Op (const OperatorDef &operator_def, Workspace *ws)
bool RunOnDevice () override
- Public Member Functions inherited from caffe2::FullyConnectedDNNLowPOp< std::uint8_t >
 FullyConnectedDNNLowPOp (const OperatorDef &operator_def, Workspace *ws)
bool RunOnDevice () override
 USE_DNNLOWP_OPERATOR_BASE_FUNCTIONS (std::uint8_t, FullyConnectedOp< CPUContext >)
- Public Member Functions inherited from caffe2::DNNLowPOp< std::uint8_t, FullyConnectedOp< CPUContext > >
 DNNLowPOp (const OperatorDef &operator_def, Workspace *ws)
- Public Member Functions inherited from caffe2::Operator< CPUContext >
 Operator (const OperatorDef &operator_def, Workspace *ws)
 Operator (const c10::FunctionSchema &fn_schema, std::vector< c10::IValue > inputs, std::vector< at::Tensor > outputs)
const TensorInput (int idx, DeviceType type=CPUContext::GetDeviceType())
 Retrieve a non-owning reference to the input at position 'idx' for this operator. More...
Tensor XOutput (int idx, at::IntArrayRef dims, at::TensorOptions options)
 XOutput is a modernized version of Output which returns a Tensor rather than a Tensor* (the raw pointer in the latter case is useless, as Tensor is a pointer type.)
- Public Member Functions inherited from caffe2::OperatorBase
 OperatorBase (const OperatorDef &operator_def, Workspace *ws)
 OperatorBase (const c10::FunctionSchema &schema, std::vector< c10::IValue > inputs, std::vector< at::Tensor > outputs)
bool isLegacyOperator () const
 Return true if the operator was instantiated with OperatorDef New operators should be instantiated with FunctionSchema.
const c10::FunctionSchemagetFunctionSchema () const
bool HasArgument (const string &name) const
 Checks if the operator has an argument of the given name.
template<typename T >
T GetSingleArgument (const string &name, const T &default_value) const
template<typename T >
bool HasSingleArgumentOfType (const string &name) const
template<typename T >
vector< TGetVectorFromIValueList (const c10::IValue &value) const
template<typename T >
vector< TGetRepeatedArgument (const string &name, const vector< T > &default_value={}) const
template<typename T >
const TInput (int idx)
template<typename T >
const TInput (int idx, DeviceType type)
template<typename T >
TOutput (int idx)
template<typename T >
TOutput (int idx, DeviceType type)
Tensor XOutputTensor (int idx, at::IntArrayRef dims, at::TensorOptions options)
void SetOutputTensor (int idx, Tensor tensor)
Tensor OutputTensorOrUndefined (int idx)
TensorOutputTensor (int idx, at::IntArrayRef dims, at::TensorOptions options)
TensorOutputTensorCopyFrom (int idx, at::TensorOptions options, const Tensor &src, bool async=false)
TensorOutputTensorAlias (int idx, const Tensor &src)
template<typename T >
TOutput (int idx, T *allocated)
const BlobInputBlob (int idx)
BlobOutputBlob (int idx)
bool IsInputOutputAlias (int i, int j)
template<typename T >
bool InputIsType (int idx)
bool InputIsTensorType (int idx, DeviceType device_type)
template<typename T >
bool OutputIsType (int idx)
bool OutputIsTensorType (int idx, DeviceType type)
int InputSize () const
int OutputSize () const
const vector< const Blob * > & Inputs () const
const vector< Blob * > & Outputs ()
vector< TensorShape > InputTensorShapes () const
virtual void WaitEvent (const Event &ev, int=-1)
void Wait (const OperatorBase &other, int stream_id=-1)
virtual void WaitEvents (const std::vector< const Event * > &events, int=-1)
virtual void Finish ()
virtual bool Run (int=0)
virtual bool HasAsyncPart () const
virtual bool SupportsAsyncScheduling () const
virtual bool RunAsync (int stream_id=0)
virtual void AddRelatedBlobInfo (EnforceNotMet *err)
const OperatorDef & debug_def () const
void set_debug_def (const std::shared_ptr< const OperatorDef > &operator_def)
bool has_debug_def () const
void RecordLastFailedOpNetPosition ()
int net_position () const
void set_net_position (int idx)
const DeviceOption & device_option () const
const Eventevent () const
Eventevent ()
void ResetEvent ()
void DisableEvent ()
bool IsEventDisabled () const
virtual void SyncDeviceBarrierForObservers ()
virtual bool IsStreamFree (int) const
const std::string & type () const
void annotate_engine (const std::string &engine)
const std::string & engine () const
void SetExecutorHelper (ExecutorHelper *helper)
ExecutorHelperGetExecutorHelper () const
std::vector< at::Tensormove_newstyle_outputs ()&&
NetDef GetSingleArgument (const std::string &name, const NetDef &default_value) const
vector< int > GetVectorFromIValueList (const c10::IValue &value) const
vector< float > GetVectorFromIValueList (const c10::IValue &value) const
vector< string > GetVectorFromIValueList (const c10::IValue &value) const
- Public Member Functions inherited from caffe2::Observable< OperatorBase >
 Observable (Observable &&)=default
Observableoperator= (Observable &&)=default
const ObserverAttachObserver (std::unique_ptr< Observer > observer)
std::unique_ptr< ObserverDetachObserver (const Observer *observer_ptr)
 Returns a unique_ptr to the removed observer. More...
virtual size_t NumObservers ()
void StartAllObservers ()
void StopAllObservers ()

Additional Inherited Members

- Static Public Attributes inherited from caffe2::OperatorBase
static const int kNoNetPositionSet = -1
- Protected Types inherited from caffe2::FullyConnectedDNNLowPOp< std::uint8_t >
using T_signed = typename std::make_signed< std::uint8_t >::type
- Protected Member Functions inherited from caffe2::FullyConnectedDNNLowPOp< std::uint8_t >
bool GetQuantizationParameters_ ()
- Protected Member Functions inherited from caffe2::DNNLowPOp< std::uint8_t, FullyConnectedOp< CPUContext > >
const TensorCPUInputTensorCPU_ (int idx)
TensorCPUOutputTensorCPU_ (int idx)
TensorOutputTensorCPU_ (int idx, at::IntList dims, at::TensorOptions options)
std::uint8_t * GetQuantizedOutputData_ ()
void MeasureQuantizationError_ ()
void RunOnDeviceEpilogue_ ()
void ParseDNNLowPOperatorArguments_ ()
void GetOutputQuantizationParams_ ()
OpWrapper< FullyConnectedOp< CPUContext >, std::uint8_t > * Fp32Op_ ()
- Protected Member Functions inherited from caffe2::OperatorBase
virtual void RecordEvent (const char *=nullptr)
void SetEventFinished (const char *err_msg=nullptr)
void SetEventFinishedWithException (const char *err_msg=nullptr)
std::string getErrorMsg ()
- Protected Attributes inherited from caffe2::FullyConnectedDNNLowPOp< std::uint8_t >
std::size_t axis_
std::size_t axis_w_
vector< std::int64_t > Y_shape_cache_
std::vector< dnnlowp::RequantizationParams > requantization_params_
bool requantization_param_selected_
std::shared_ptr< fbgemm::PackBMatrix< std::int8_t > > Wq_packed_
std::vector< std::uint8_t > X_pack_buf_
std::vector< std::int32_t > Y_int32_
std::vector< dnnlowp::TensorQuantizationParams > filter_qparams_
std::vector< float > filter_scales_
std::vector< std::int32_t > filter_zero_points_
std::vector< float > requantization_multipliers_
bool quantize_channelwise_
std::vector< T_signed > W_quantized_
std::shared_ptr< std::vector< std::int32_t > > b_quantized_
const std::int32_t * b_quantized_data_
std::vector< std::int32_t > row_offsets_
std::shared_ptr< std::vector< std::int32_t > > column_offsets_
std::vector< float > b_dequantized_
const float * b_dequantized_data_
bool is_weight_constant_
float in_qparams0_scale_old_
std::int32_t in_qparams0_zero_point_old_
- Protected Attributes inherited from caffe2::DNNLowPOp< std::uint8_t, FullyConnectedOp< CPUContext > >
bool dequantize_output_
bool measure_quantization_error_
std::string followed_by_
std::vector< dnnlowp::TensorQuantizationParams > in_qparams_
dnnlowp::TensorQuantizationParams out_qparams_
std::unique_ptr< OpWrapper< FullyConnectedOp< CPUContext >, std::uint8_t > > fp32_op_
std::unique_ptr< dnnlowp::QuantizationFactoryqfactory_
std::vector< std::uint8_t > out_temp_
dnnlowp::QuantizationErrorStats quantization_error_stats_
bool arguments_parsed_
- Protected Attributes inherited from caffe2::OperatorBase
std::unique_ptr< Eventevent_
- Protected Attributes inherited from caffe2::Observable< OperatorBase >
std::vector< std::unique_ptr< Observer > > observers_list_

Detailed Description

Quantized FC operator with 16-bit accumulation.

We'll encounter saturation but this will be faster in Intel CPUs

Definition at line 11 of file fully_connected_dnnlowp_acc16_op.h.

The documentation for this class was generated from the following files: