Quantized FC operator with 16-bit accumulation. More...

#include <fully_connected_dnnlowp_acc16_op.h>

Inheritance diagram for caffe2::FullyConnectedDNNLowPAcc16Op:

Public Types
using	BaseType = FullyConnectedDNNLowPOp< std::uint8_t >

Public Types inherited from caffe2::Observable< OperatorBase >
using	Observer = ObserverBase< OperatorBase >

Public Member Functions
	FullyConnectedDNNLowPAcc16Op (const OperatorDef &operator_def, Workspace *ws)

bool	RunOnDevice () override

	USE_OPERATOR_FUNCTIONS (CPUContext)

Public Member Functions inherited from caffe2::FullyConnectedDNNLowPOp< std::uint8_t >
	FullyConnectedDNNLowPOp (const OperatorDef &operator_def, Workspace *ws)

bool	RunOnDevice () override

	USE_OPERATOR_FUNCTIONS (CPUContext)

	USE_DNNLOWP_OPERATOR_BASE_FUNCTIONS (std::uint8_t, FullyConnectedOp< CPUContext >)

Public Member Functions inherited from caffe2::DNNLowPOp< std::uint8_t, FullyConnectedOp< CPUContext > >
	USE_OPERATOR_FUNCTIONS (CPUContext)

	DNNLowPOp (const OperatorDef &operator_def, Workspace *ws)

Public Member Functions inherited from caffe2::Operator< CPUContext >
	Operator (const OperatorDef &operator_def, Workspace *ws)

	Operator (const c10::FunctionSchema &fn_schema, std::vector< c10::IValue > inputs, std::vector< at::Tensor > outputs)

const Tensor &	Input (int idx, DeviceType type=CPUContext::GetDeviceType())
	Retrieve a non-owning reference to the input at position 'idx' for this operator. More...

Tensor	XOutput (int idx, at::IntArrayRef dims, at::TensorOptions options)
	XOutput is a modernized version of Output which returns a Tensor rather than a Tensor* (the raw pointer in the latter case is useless, as Tensor is a pointer type.)

Public Member Functions inherited from caffe2::OperatorBase
	OperatorBase (const OperatorDef &operator_def, Workspace *ws)

	OperatorBase (const c10::FunctionSchema &schema, std::vector< c10::IValue > inputs, std::vector< at::Tensor > outputs)

bool	isLegacyOperator () const
	Return true if the operator was instantiated with OperatorDef New operators should be instantiated with FunctionSchema.

const c10::FunctionSchema &	getFunctionSchema () const

bool	HasArgument (const string &name) const
	Checks if the operator has an argument of the given name.

template<typename T >
T	GetSingleArgument (const string &name, const T &default_value) const

template<typename T >
bool	HasSingleArgumentOfType (const string &name) const

template<typename T >
vector< T >	GetVectorFromIValueList (const c10::IValue &value) const

template<typename T >
vector< T >	GetRepeatedArgument (const string &name, const vector< T > &default_value={}) const

template<typename T >
const T &	Input (int idx)

template<typename T >
const T &	Input (int idx, DeviceType type)

template<typename T >
T *	Output (int idx)

template<typename T >
T *	Output (int idx, DeviceType type)

Tensor	XOutputTensor (int idx, at::IntArrayRef dims, at::TensorOptions options)

void	SetOutputTensor (int idx, Tensor tensor)

Tensor	OutputTensorOrUndefined (int idx)

Tensor *	OutputTensor (int idx, at::IntArrayRef dims, at::TensorOptions options)

Tensor *	OutputTensorCopyFrom (int idx, at::TensorOptions options, const Tensor &src, bool async=false)

Tensor *	OutputTensorAlias (int idx, const Tensor &src)

template<typename T >
T *	Output (int idx, T *allocated)

const Blob &	InputBlob (int idx)

Blob *	OutputBlob (int idx)

bool	IsInputOutputAlias (int i, int j)

template<typename T >
bool	InputIsType (int idx)

bool	InputIsTensorType (int idx, DeviceType device_type)

template<typename T >
bool	OutputIsType (int idx)

bool	OutputIsTensorType (int idx, DeviceType type)

int	InputSize () const

int	OutputSize () const

const vector< const Blob * > &	Inputs () const

const vector< Blob * > &	Outputs ()

vector< TensorShape >	InputTensorShapes () const

virtual void	WaitEvent (const Event &ev, int=-1)

void	Wait (const OperatorBase &other, int stream_id=-1)

virtual void	WaitEvents (const std::vector< const Event * > &events, int=-1)

virtual void	Finish ()

virtual bool	Run (int=0)

virtual bool	HasAsyncPart () const

virtual bool	SupportsAsyncScheduling () const

virtual bool	RunAsync (int stream_id=0)

virtual void	AddRelatedBlobInfo (EnforceNotMet *err)

const OperatorDef &	debug_def () const

void	set_debug_def (const std::shared_ptr< const OperatorDef > &operator_def)

bool	has_debug_def () const

void	RecordLastFailedOpNetPosition ()

int	net_position () const

void	set_net_position (int idx)

const DeviceOption &	device_option () const

const Event &	event () const

Event &	event ()

void	ResetEvent ()

void	DisableEvent ()

bool	IsEventDisabled () const

virtual void	SyncDeviceBarrierForObservers ()

virtual bool	IsStreamFree (int) const

const std::string &	type () const

void	annotate_engine (const std::string &engine)

const std::string &	engine () const

void	SetExecutorHelper (ExecutorHelper *helper)

ExecutorHelper *	GetExecutorHelper () const

std::vector< at::Tensor >	move_newstyle_outputs ()&&

template<>
NetDef	GetSingleArgument (const std::string &name, const NetDef &default_value) const

template<>
vector< int >	GetVectorFromIValueList (const c10::IValue &value) const

template<>
vector< float >	GetVectorFromIValueList (const c10::IValue &value) const

template<>
vector< string >	GetVectorFromIValueList (const c10::IValue &value) const

Public Member Functions inherited from caffe2::Observable< OperatorBase >
	Observable (Observable &&)=default

Observable &	operator= (Observable &&)=default

	C10_DISABLE_COPY_AND_ASSIGN (Observable)

const Observer *	AttachObserver (std::unique_ptr< Observer > observer)

std::unique_ptr< Observer >	DetachObserver (const Observer *observer_ptr)
	Returns a unique_ptr to the removed observer. More...

virtual size_t	NumObservers ()

void	StartAllObservers ()

void	StopAllObservers ()

Additional Inherited Members
Static Public Attributes inherited from caffe2::OperatorBase
static const int	kNoNetPositionSet = -1

Protected Types inherited from caffe2::FullyConnectedDNNLowPOp< std::uint8_t >
using	T_signed = typename std::make_signed< std::uint8_t >::type

Protected Member Functions inherited from caffe2::FullyConnectedDNNLowPOp< std::uint8_t >
bool	GetQuantizationParameters_ ()

Protected Member Functions inherited from caffe2::DNNLowPOp< std::uint8_t, FullyConnectedOp< CPUContext > >
const TensorCPU &	InputTensorCPU_ (int idx)

TensorCPU *	OutputTensorCPU_ (int idx)

Tensor *	OutputTensorCPU_ (int idx, at::IntList dims, at::TensorOptions options)

std::uint8_t *	GetQuantizedOutputData_ ()

void	MeasureQuantizationError_ ()

void	RunOnDeviceEpilogue_ ()

void	ParseDNNLowPOperatorArguments_ ()

void	GetOutputQuantizationParams_ ()

OpWrapper< FullyConnectedOp< CPUContext >, std::uint8_t > *	Fp32Op_ ()

Protected Member Functions inherited from caffe2::OperatorBase
virtual void	RecordEvent (const char *=nullptr)

void	SetEventFinished (const char *err_msg=nullptr)

void	SetEventFinishedWithException (const char *err_msg=nullptr)

std::string	getErrorMsg ()

	C10_DISABLE_COPY_AND_ASSIGN (OperatorBase)

Protected Attributes inherited from caffe2::FullyConnectedDNNLowPOp< std::uint8_t >
std::size_t	axis_

std::size_t	axis_w_

vector< std::int64_t >	Y_shape_cache_

std::vector< dnnlowp::RequantizationParams >	requantization_params_

bool	requantization_param_selected_

std::shared_ptr< fbgemm::PackBMatrix< std::int8_t > >	Wq_packed_

std::vector< std::uint8_t >	X_pack_buf_

std::vector< std::int32_t >	Y_int32_

std::vector< dnnlowp::TensorQuantizationParams >	filter_qparams_

std::vector< float >	filter_scales_

std::vector< std::int32_t >	filter_zero_points_

std::vector< float >	requantization_multipliers_

bool	quantize_channelwise_

std::vector< T_signed >	W_quantized_

std::shared_ptr< std::vector< std::int32_t > >	b_quantized_

const std::int32_t *	b_quantized_data_

std::vector< std::int32_t >	row_offsets_

std::shared_ptr< std::vector< std::int32_t > >	column_offsets_

std::vector< float >	b_dequantized_

const float *	b_dequantized_data_

bool	is_weight_constant_

float	in_qparams0_scale_old_

std::int32_t	in_qparams0_zero_point_old_

Protected Attributes inherited from caffe2::DNNLowPOp< std::uint8_t, FullyConnectedOp< CPUContext > >
bool	dequantize_output_

bool	measure_quantization_error_

std::string	followed_by_

std::vector< dnnlowp::TensorQuantizationParams >	in_qparams_

dnnlowp::TensorQuantizationParams	out_qparams_

std::unique_ptr< OpWrapper< FullyConnectedOp< CPUContext >, std::uint8_t > >	fp32_op_

std::unique_ptr< dnnlowp::QuantizationFactory >	qfactory_

std::vector< std::uint8_t >	out_temp_

dnnlowp::QuantizationErrorStats	quantization_error_stats_

bool	arguments_parsed_

Protected Attributes inherited from caffe2::OperatorBase
std::unique_ptr< Event >	event_

Protected Attributes inherited from caffe2::Observable< OperatorBase >
std::vector< std::unique_ptr< Observer > >	observers_list_

Detailed Description

Quantized FC operator with 16-bit accumulation.

We'll encounter saturation but this will be faster in Intel CPUs

Definition at line 11 of file fully_connected_dnnlowp_acc16_op.h.

The documentation for this class was generated from the following files:

caffe2/quantization/server/fully_connected_dnnlowp_acc16_op.h
caffe2/quantization/server/fully_connected_dnnlowp_acc16_op.cc

Public Types

Public Member Functions

Additional Inherited Members

Detailed Description

Facebook Open Source