Quantized FC operator with 16-bit accumulation. More...
#include <fully_connected_dnnlowp_acc16_op.h>
Public Types | |
using | BaseType = FullyConnectedDNNLowPOp< std::uint8_t > |
Public Types inherited from caffe2::Observable< OperatorBase > | |
using | Observer = ObserverBase< OperatorBase > |
Public Member Functions | |
FullyConnectedDNNLowPAcc16Op (const OperatorDef &operator_def, Workspace *ws) | |
bool | RunOnDevice () override |
USE_OPERATOR_FUNCTIONS (CPUContext) | |
Public Member Functions inherited from caffe2::FullyConnectedDNNLowPOp< std::uint8_t > | |
FullyConnectedDNNLowPOp (const OperatorDef &operator_def, Workspace *ws) | |
bool | RunOnDevice () override |
USE_OPERATOR_FUNCTIONS (CPUContext) | |
USE_DNNLOWP_OPERATOR_BASE_FUNCTIONS (std::uint8_t, FullyConnectedOp< CPUContext >) | |
Public Member Functions inherited from caffe2::DNNLowPOp< std::uint8_t, FullyConnectedOp< CPUContext > > | |
USE_OPERATOR_FUNCTIONS (CPUContext) | |
DNNLowPOp (const OperatorDef &operator_def, Workspace *ws) | |
Public Member Functions inherited from caffe2::Operator< CPUContext > | |
Operator (const OperatorDef &operator_def, Workspace *ws) | |
Operator (const c10::FunctionSchema &fn_schema, std::vector< c10::IValue > inputs, std::vector< at::Tensor > outputs) | |
const Tensor & | Input (int idx, DeviceType type=CPUContext::GetDeviceType()) |
Retrieve a non-owning reference to the input at position 'idx' for this operator. More... | |
Tensor | XOutput (int idx, at::IntArrayRef dims, at::TensorOptions options) |
XOutput is a modernized version of Output which returns a Tensor rather than a Tensor* (the raw pointer in the latter case is useless, as Tensor is a pointer type.) | |
Public Member Functions inherited from caffe2::OperatorBase | |
OperatorBase (const OperatorDef &operator_def, Workspace *ws) | |
OperatorBase (const c10::FunctionSchema &schema, std::vector< c10::IValue > inputs, std::vector< at::Tensor > outputs) | |
bool | isLegacyOperator () const |
Return true if the operator was instantiated with OperatorDef New operators should be instantiated with FunctionSchema. | |
const c10::FunctionSchema & | getFunctionSchema () const |
bool | HasArgument (const string &name) const |
Checks if the operator has an argument of the given name. | |
template<typename T > | |
T | GetSingleArgument (const string &name, const T &default_value) const |
template<typename T > | |
bool | HasSingleArgumentOfType (const string &name) const |
template<typename T > | |
vector< T > | GetVectorFromIValueList (const c10::IValue &value) const |
template<typename T > | |
vector< T > | GetRepeatedArgument (const string &name, const vector< T > &default_value={}) const |
template<typename T > | |
const T & | Input (int idx) |
template<typename T > | |
const T & | Input (int idx, DeviceType type) |
template<typename T > | |
T * | Output (int idx) |
template<typename T > | |
T * | Output (int idx, DeviceType type) |
Tensor | XOutputTensor (int idx, at::IntArrayRef dims, at::TensorOptions options) |
void | SetOutputTensor (int idx, Tensor tensor) |
Tensor | OutputTensorOrUndefined (int idx) |
Tensor * | OutputTensor (int idx, at::IntArrayRef dims, at::TensorOptions options) |
Tensor * | OutputTensorCopyFrom (int idx, at::TensorOptions options, const Tensor &src, bool async=false) |
Tensor * | OutputTensorAlias (int idx, const Tensor &src) |
template<typename T > | |
T * | Output (int idx, T *allocated) |
const Blob & | InputBlob (int idx) |
Blob * | OutputBlob (int idx) |
bool | IsInputOutputAlias (int i, int j) |
template<typename T > | |
bool | InputIsType (int idx) |
bool | InputIsTensorType (int idx, DeviceType device_type) |
template<typename T > | |
bool | OutputIsType (int idx) |
bool | OutputIsTensorType (int idx, DeviceType type) |
int | InputSize () const |
int | OutputSize () const |
const vector< const Blob * > & | Inputs () const |
const vector< Blob * > & | Outputs () |
vector< TensorShape > | InputTensorShapes () const |
virtual void | WaitEvent (const Event &ev, int=-1) |
void | Wait (const OperatorBase &other, int stream_id=-1) |
virtual void | WaitEvents (const std::vector< const Event * > &events, int=-1) |
virtual void | Finish () |
virtual bool | Run (int=0) |
virtual bool | HasAsyncPart () const |
virtual bool | SupportsAsyncScheduling () const |
virtual bool | RunAsync (int stream_id=0) |
virtual void | AddRelatedBlobInfo (EnforceNotMet *err) |
const OperatorDef & | debug_def () const |
void | set_debug_def (const std::shared_ptr< const OperatorDef > &operator_def) |
bool | has_debug_def () const |
void | RecordLastFailedOpNetPosition () |
int | net_position () const |
void | set_net_position (int idx) |
const DeviceOption & | device_option () const |
const Event & | event () const |
Event & | event () |
void | ResetEvent () |
void | DisableEvent () |
bool | IsEventDisabled () const |
virtual void | SyncDeviceBarrierForObservers () |
virtual bool | IsStreamFree (int) const |
const std::string & | type () const |
void | annotate_engine (const std::string &engine) |
const std::string & | engine () const |
void | SetExecutorHelper (ExecutorHelper *helper) |
ExecutorHelper * | GetExecutorHelper () const |
std::vector< at::Tensor > | move_newstyle_outputs ()&& |
template<> | |
NetDef | GetSingleArgument (const std::string &name, const NetDef &default_value) const |
template<> | |
vector< int > | GetVectorFromIValueList (const c10::IValue &value) const |
template<> | |
vector< float > | GetVectorFromIValueList (const c10::IValue &value) const |
template<> | |
vector< string > | GetVectorFromIValueList (const c10::IValue &value) const |
Public Member Functions inherited from caffe2::Observable< OperatorBase > | |
Observable (Observable &&)=default | |
Observable & | operator= (Observable &&)=default |
C10_DISABLE_COPY_AND_ASSIGN (Observable) | |
const Observer * | AttachObserver (std::unique_ptr< Observer > observer) |
std::unique_ptr< Observer > | DetachObserver (const Observer *observer_ptr) |
Returns a unique_ptr to the removed observer. More... | |
virtual size_t | NumObservers () |
void | StartAllObservers () |
void | StopAllObservers () |
Additional Inherited Members | |
Static Public Attributes inherited from caffe2::OperatorBase | |
static const int | kNoNetPositionSet = -1 |
Protected Types inherited from caffe2::FullyConnectedDNNLowPOp< std::uint8_t > | |
using | T_signed = typename std::make_signed< std::uint8_t >::type |
Protected Member Functions inherited from caffe2::FullyConnectedDNNLowPOp< std::uint8_t > | |
bool | GetQuantizationParameters_ () |
Protected Member Functions inherited from caffe2::DNNLowPOp< std::uint8_t, FullyConnectedOp< CPUContext > > | |
const TensorCPU & | InputTensorCPU_ (int idx) |
TensorCPU * | OutputTensorCPU_ (int idx) |
Tensor * | OutputTensorCPU_ (int idx, at::IntList dims, at::TensorOptions options) |
std::uint8_t * | GetQuantizedOutputData_ () |
void | MeasureQuantizationError_ () |
void | RunOnDeviceEpilogue_ () |
void | ParseDNNLowPOperatorArguments_ () |
void | GetOutputQuantizationParams_ () |
OpWrapper< FullyConnectedOp< CPUContext >, std::uint8_t > * | Fp32Op_ () |
Protected Member Functions inherited from caffe2::OperatorBase | |
virtual void | RecordEvent (const char *=nullptr) |
void | SetEventFinished (const char *err_msg=nullptr) |
void | SetEventFinishedWithException (const char *err_msg=nullptr) |
std::string | getErrorMsg () |
C10_DISABLE_COPY_AND_ASSIGN (OperatorBase) | |
Protected Attributes inherited from caffe2::FullyConnectedDNNLowPOp< std::uint8_t > | |
std::size_t | axis_ |
std::size_t | axis_w_ |
vector< std::int64_t > | Y_shape_cache_ |
std::vector< dnnlowp::RequantizationParams > | requantization_params_ |
bool | requantization_param_selected_ |
std::shared_ptr< fbgemm::PackBMatrix< std::int8_t > > | Wq_packed_ |
std::vector< std::uint8_t > | X_pack_buf_ |
std::vector< std::int32_t > | Y_int32_ |
std::vector< dnnlowp::TensorQuantizationParams > | filter_qparams_ |
std::vector< float > | filter_scales_ |
std::vector< std::int32_t > | filter_zero_points_ |
std::vector< float > | requantization_multipliers_ |
bool | quantize_channelwise_ |
std::vector< T_signed > | W_quantized_ |
std::shared_ptr< std::vector< std::int32_t > > | b_quantized_ |
const std::int32_t * | b_quantized_data_ |
std::vector< std::int32_t > | row_offsets_ |
std::shared_ptr< std::vector< std::int32_t > > | column_offsets_ |
std::vector< float > | b_dequantized_ |
const float * | b_dequantized_data_ |
bool | is_weight_constant_ |
float | in_qparams0_scale_old_ |
std::int32_t | in_qparams0_zero_point_old_ |
Protected Attributes inherited from caffe2::DNNLowPOp< std::uint8_t, FullyConnectedOp< CPUContext > > | |
bool | dequantize_output_ |
bool | measure_quantization_error_ |
std::string | followed_by_ |
std::vector< dnnlowp::TensorQuantizationParams > | in_qparams_ |
dnnlowp::TensorQuantizationParams | out_qparams_ |
std::unique_ptr< OpWrapper< FullyConnectedOp< CPUContext >, std::uint8_t > > | fp32_op_ |
std::unique_ptr< dnnlowp::QuantizationFactory > | qfactory_ |
std::vector< std::uint8_t > | out_temp_ |
dnnlowp::QuantizationErrorStats | quantization_error_stats_ |
bool | arguments_parsed_ |
Protected Attributes inherited from caffe2::OperatorBase | |
std::unique_ptr< Event > | event_ |
Protected Attributes inherited from caffe2::Observable< OperatorBase > | |
std::vector< std::unique_ptr< Observer > > | observers_list_ |
Quantized FC operator with 16-bit accumulation.
We'll encounter saturation but this will be faster in Intel CPUs
Definition at line 11 of file fully_connected_dnnlowp_acc16_op.h.