Flush-To-Zero and Denormals-Are-Zero mode. More...

Data Structures
struct	AccumulateType

struct	AccumulateType< char, false >

struct	AccumulateType< char, true >

struct	AccumulateType< double, false >

struct	AccumulateType< double, true >

struct	AccumulateType< float, false >

struct	AccumulateType< float, true >

struct	AccumulateType< Half, true >

struct	AccumulateType< int16_t, false >

struct	AccumulateType< int16_t, true >

struct	AccumulateType< int32_t, false >

struct	AccumulateType< int32_t, true >

struct	AccumulateType< int64_t, false >

struct	AccumulateType< int64_t, true >

struct	AccumulateType< int8_t, false >

struct	AccumulateType< int8_t, true >

struct	AccumulateType< uint8_t, false >

struct	AccumulateType< uint8_t, true >

struct	ATenDLMTensor

struct	AutoNonVariableTypeMode

class	BaseContext
	Virtual interface for the Context class in Caffe2. More...

struct	CAFFE2_API

struct	ComplexHooks

struct	ComplexHooksArgs

struct	ComplexHooksInterface

class	Context

struct	CPUComplexFloatType

struct	CPUTypeDefault

struct	CUDAHooksArgs

struct	CUDAHooksInterface

struct	CUDATypeDefault

struct	DefaultPtrTraits

struct	DimCounter

struct	FormatGuard

struct	Generator

struct	HIPHooksArgs

struct	HIPHooksInterface

struct	LegacyDeviceTypeInit

struct	LegacyDeviceTypeInitArgs

struct	LegacyDeviceTypeInitInterface

class	LegacyTHDispatch

struct	LegacyTHDispatcher

struct	LegacyTHDispatcherDeleter

struct	LegacyTypeDeleter

class	LegacyTypeDispatch

class	MatrixRef
	MatrixRef - Like an ArrayRef, but with an extra recorded strides so that we can easily view it as a multidimensional array. More...

struct	NonVariableTypeMode

struct	OperandInfo

class	PackedTensorAccessor

class	PackedTensorAccessor< T, 1, PtrTraits, index_t >

class	PackedTensorAccessorBase

struct	Range

struct	SparseTensorImpl

struct	SparseTensorRef

struct	SplitUntil32Bit
	A container-like struct that acts as if it contains splits of a TensorIterator that can use 32-bit indexing. More...

struct	strided_tensor_iter

struct	strided_tensor_iter_fixed

class	Tensor

class	TensorAccessor

class	TensorAccessor< T, 1, PtrTraits, index_t >

class	TensorAccessorBase

struct	TensorArg

struct	TensorGeometry

struct	TensorGeometryArg

struct	TensorIterator

struct	Type

struct	TypeDefault

struct	TypeExtendedInterface

struct	UndefinedType

struct	VariableHooksArgs

struct	VariableHooksInterface

struct	WeakTensor

Typedefs
template<typename T , bool is_cuda>
using	acc_type = typename AccumulateType< T, is_cuda >::type

using	DimVector = SmallVector< int64_t, 5 >
	A container for sizes or strides.

using	TensorList = ArrayRef< Tensor >

using	DimMask = TensorIterator::DimMask

using	PtrVector = TensorIterator::PtrVector

using	loop_t = TensorIterator::loop_t

using	loop2d_t = TensorIterator::loop2d_t

using	CheckedFrom = const char *

using	DataType = caffe2::TypeIdentifier

Enumerations
enum	TypeID { CPUBool, CPUByte, CPUChar, CPUDouble, CPUFloat, CPUInt, CPULong, CPUShort, CPUHalf, SparseCPUBool, SparseCPUByte, SparseCPUChar, SparseCPUDouble, SparseCPUFloat, SparseCPUInt, SparseCPULong, SparseCPUShort, CUDABool, CUDAByte, CUDAChar, CUDADouble, CUDAFloat, CUDAInt, CUDALong, CUDAShort, CUDAHalf, SparseCUDABool, SparseCUDAByte, SparseCUDAChar, SparseCUDADouble, SparseCUDAFloat, SparseCUDAInt, SparseCUDALong, SparseCUDAShort, MSNPUBool, MSNPUByte, MSNPUChar, MSNPUDouble, MSNPUFloat, MSNPUInt, MSNPULong, MSNPUShort, MSNPUHalf, XLABool, XLAByte, XLAChar, XLADouble, XLAFloat, XLAInt, XLALong, XLAShort, XLAHalf, CPUComplexFloat, CPUComplexDouble, CUDAComplexFloat, CUDAComplexDouble, Undefined, NumOptions, CPUComplexFloat, CPUComplexDouble, CUDAComplexFloat, CUDAComplexDouble, Undefined, NumOptions }

enum	MemOverlap { NO, YES, TOO_HARD }

enum	TypeID { CPUBool, CPUByte, CPUChar, CPUDouble, CPUFloat, CPUInt, CPULong, CPUShort, CPUHalf, SparseCPUBool, SparseCPUByte, SparseCPUChar, SparseCPUDouble, SparseCPUFloat, SparseCPUInt, SparseCPULong, SparseCPUShort, CUDABool, CUDAByte, CUDAChar, CUDADouble, CUDAFloat, CUDAInt, CUDALong, CUDAShort, CUDAHalf, SparseCUDABool, SparseCUDAByte, SparseCUDAChar, SparseCUDADouble, SparseCUDAFloat, SparseCUDAInt, SparseCUDALong, SparseCUDAShort, MSNPUBool, MSNPUByte, MSNPUChar, MSNPUDouble, MSNPUFloat, MSNPUInt, MSNPULong, MSNPUShort, MSNPUHalf, XLABool, XLAByte, XLAChar, XLADouble, XLAFloat, XLAInt, XLALong, XLAShort, XLAHalf, CPUComplexFloat, CPUComplexDouble, CUDAComplexFloat, CUDAComplexDouble, Undefined, NumOptions, CPUComplexFloat, CPUComplexDouble, CUDAComplexFloat, CUDAComplexDouble, Undefined, NumOptions }

Functions
Context &	globalContext ()

TypeExtendedInterface &	getType (TensorOptions options)

TypeExtendedInterface &	getType (const TensorImpl *impl)

TypeExtendedInterface &	getType (const Tensor &t)

LegacyTHDispatcher &	getLegacyTHDispatcher (TensorOptions options)

LegacyTHDispatcher &	getLegacyTHDispatcher (const TensorImpl *impl)

Allocator *	getCPUAllocator ()

	REGISTER_LEGACY_TYPE_INIT (LegacyDeviceTypeInit)

CAFFE2_API LegacyTHDispatcher &	getLegacyTHDispatcher (const Tensor &)

	C10_DEFINE_TYPED_REGISTRY (ContextRegistry, at::DeviceType, at::BaseContext, std::unique_ptr, at::Device)

	C10_DECLARE_TYPED_REGISTRY (ContextRegistry, at::DeviceType, at::BaseContext, std::unique_ptr, at::Device)

std::unique_ptr< at::BaseContext >	CreateContext (const at::Device &device)

std::ios_base &	defaultfloat (std::ios_base &__base)

std::ostream &	operator<< (std::ostream &out, const Type &t)

void	__printTensor (std::ostream &stream, Tensor &self, int64_t linesize)

std::ostream &	print (std::ostream &stream, const Tensor &tensor_, int64_t linesize)

	C10_DEFINE_REGISTRY (LegacyDeviceTypeInitRegistry, LegacyDeviceTypeInitInterface, LegacyDeviceTypeInitArgs) const LegacyDeviceTypeInitInterface &getLegacyDeviceTypeInit()

	C10_DECLARE_REGISTRY (LegacyDeviceTypeInitRegistry, LegacyDeviceTypeInitInterface, LegacyDeviceTypeInitArgs)

CAFFE2_API const LegacyDeviceTypeInitInterface &	getLegacyDeviceTypeInit ()

LegacyTypeDispatch &	globalLegacyTypeDispatch ()

Type &	legacyTensorType (const TensorImpl &tensor)
	Return the Type object corresponding to this Tensor, which we can use to do dynamic dispatch to operators from. More...

void	initializeLegacyTypeDispatchFor (const TensorImpl &tensor)

std::ostream &	operator<< (std::ostream &out, const Range &range)

int64_t	get_device (Tensor self)

bool	is_cuda (Tensor self)

bool	is_hip (Tensor self)

bool	is_sparse (Tensor self)

	C10_DECLARE_REGISTRY (VariableHooksRegistry, VariableHooksInterface, VariableHooksArgs)

template<typename T >
std::pair< int64_t, int64_t >	collapse_dims (T sizes, T strides, int64_t dims, const int excludeDim=-1)

Tensor	sort_strides (Tensor &tensor_)

bool	_all_equal_numel (at::ArrayRef< Tensor > tensors)

std::string	_all_equal_numel_error (at::ArrayRef< Tensor > tensors)

bool	_apply_preamble (ArrayRef< Tensor > tensors)

int64_t	_max_dim_tensors (ArrayRef< Tensor > tensors)

void	iterate (int64_t size)

template<typename Arg , typename... Args>
void	iterate (int64_t size, Arg &iter, Args &...iter_tail)

bool	iterate_continue ()

template<typename Arg , typename... Args>
bool	iterate_continue (Arg &iter, Args &...iter_tail)

int64_t	max_iterate_size ()

template<typename Arg , typename... Args>
int64_t	max_iterate_size (Arg &iter, Args &...iter_tail)

void	iterate_overflow ()

template<typename Arg , typename... Args>
void	iterate_overflow (Arg &iter, Args &...iter_tail)

void	forward (int64_t offset)

template<typename Arg , typename... Args>
void	forward (int64_t offset, Arg &iter, Args &...iter_tail)

int64_t	max_dim ()

template<typename Arg , typename... Args>
int64_t	max_dim (Arg &iter, Args &...iter_tail)

void	apply_op ()

template<typename Op , typename... Args>
void	apply_op (int64_t numel, int64_t offset, const Op &op, Args...iters)

void	apply_kernel ()

template<typename Op , typename... Args>
void	apply_kernel (int64_t numel, int64_t offset, const Op &op, Args...iters)

template<typename scalar1 , typename scalar2 , typename Op >
void	CPU_tensor_parallel_kernel_apply2 (Tensor tensor1, Tensor tensor2, const Op op)

template<typename scalar1 , typename Op >
void	CPU_tensor_apply1 (Tensor tensor1, const Op op)

template<typename scalar1 , typename scalar2 , typename Op >
void	CPU_tensor_apply2 (Tensor tensor1, Tensor tensor2, const Op op)

template<typename scalar1 , typename scalar2 , typename scalar3 , typename Op >
void	CPU_tensor_apply3 (Tensor tensor1, Tensor tensor2, Tensor tensor3, const Op op)

template<typename scalar1 , typename scalar2 , typename scalar3 , typename scalar4 , typename Op >
void	CPU_tensor_apply4 (Tensor tensor1, Tensor tensor2, Tensor tensor3, Tensor tensor4, const Op op)

template<typename scalar1 , typename Op >
void	CPU_tensor_parallel_apply1 (Tensor tensor1, const Op op, int64_t grain_size=internal::GRAIN_SIZE)

template<typename scalar1 , typename scalar2 , typename Op >
void	CPU_tensor_parallel_apply2 (Tensor tensor1, Tensor tensor2, const Op op, int64_t grain_size=internal::GRAIN_SIZE)

std::atomic< int >	num_threads (-1)

void	set_num_threads (int num_threads_)

int	get_num_threads ()

	C10_DECLARE_REGISTRY (ComplexHooksRegistry, ComplexHooksInterface, ComplexHooksArgs)

	C10_DECLARE_REGISTRY (CUDAHooksRegistry, CUDAHooksInterface, CUDAHooksArgs)

	C10_DECLARE_REGISTRY (HIPHooksRegistry, HIPHooksInterface, HIPHooksArgs)

optional< Device >	device_of (Tensor t)
	Return the Device of a Tensor, if the Tensor is defined.

optional< Device >	device_of (TensorList t)
	Return the Device of a TensorList, if the list is non-empty and the first Tensor is defined. More...

ScalarType	toScalarType (const DLDataType &dtype)

void	deleter (DLManagedTensor *arg)

DLManagedTensor *	toDLPack (const Tensor &src)

Tensor	fromDLPack (const DLManagedTensor *src)

std::vector< int64_t >	infer_size (IntArrayRef a, IntArrayRef b)

std::tuple< std::vector< int64_t >, std::vector< int64_t > >	inferExpandGeometry (IntArrayRef tensor_sizes, IntArrayRef tensor_strides, IntArrayRef sizes)

void	check_defined (std::initializer_list< std::reference_wrapper< const Tensor >> tensors, const char *api_name)

std::tuple< Tensor >	expand_inplace (const Tensor &tensor, const Tensor &to_expand)

std::tuple< Tensor >	expand_inplace (const Tensor &tensor, const Tensor &to_expand, const char *api_name)

std::tuple< Tensor, Tensor >	expand_inplace (const Tensor &tensor, const Tensor &to_expand1, const Tensor &to_expand2)

std::tuple< Tensor, Tensor >	expand_inplace (const Tensor &tensor, const Tensor &to_expand1, const Tensor &to_expand2, const char *api_name)

std::tuple< Tensor, Tensor >	expand_outplace (const Tensor &to_expand1, const Tensor &to_expand2)

std::tuple< Tensor, Tensor >	expand_outplace (const Tensor &to_expand1, const Tensor &to_expand2, const char *api_name)

std::tuple< Tensor, Tensor, Tensor >	expand_outplace (const Tensor &to_expand1, const Tensor &to_expand2, const Tensor &to_expand3)

std::tuple< Tensor, Tensor, Tensor >	expand_outplace (const Tensor &to_expand1, const Tensor &to_expand2, const Tensor &to_expand3, const char *api_name)

std::tuple< Tensor >	expand_size (const Tensor &to_expand, IntArrayRef sizes)

std::tuple< Tensor >	expand_size (const Tensor &to_expand, IntArrayRef sizes, const char *api_name)

std::vector< Tensor >	expand_outplace (TensorList to_expand)

TensorOptions	initialTensorOptions ()

LegacyTHDispatch &	globalLegacyTHDispatch ()

MemOverlap	has_internal_overlap (const Tensor &tensor)

MemOverlap	has_internal_overlap (TensorImpl *t)

void	assert_no_internal_overlap (const Tensor &t, std::string op)

void	assert_no_internal_overlap (TensorImpl *t, std::string op)

template<typename T , typename std::enable_if< std::is_integral< T >::value, int >::type = 0>
bool	_isnan (T val)

int64_t	divup (int64_t x, int64_t y)

int	get_max_threads ()

int	get_thread_num ()

bool	in_parallel_region ()

template<class F >
void	parallel_for (const int64_t begin, const int64_t end, const int64_t grain_size, const F &f)

template<class scalar_t , class F , class SF >
scalar_t	parallel_reduce (const int64_t begin, const int64_t end, const int64_t grain_size, const scalar_t ident, const F f, const SF sf)

struct {
}	Generator (Context *context)

virtual CAFFE2_API uint64_t	seed () override

virtual CAFFE2_API uint64_t	initialSeed () override

virtual CAFFE2_API void *	unsafeGetTH () override

struct {
}	Dispatcher ()

void	register_cpu_types (Context *context)

void	register_cuda_types (Context *context)

struct {
}	Type ()

virtual ScalarType	scalarType () const override

virtual caffe2::TypeMeta	typeMeta () const override

virtual Backend	backend () const override

virtual const char *	toString () const override

virtual TypeID	ID () const override

struct at::CAFFE2_API	get_function (const std::string &schema)

Allocator *	allocator () const override

Device	getDeviceFromPtr (void *data) const override

std::unique_ptr< Generator >	generator () const override

std::ostream &	operator<< (std::ostream &out, TensorGeometryArg t)

void	checkDim (CheckedFrom c, const TensorGeometryArg &t, int64_t dim)

void	checkDimRange (CheckedFrom c, const TensorGeometryArg &t, int64_t dim_start, int64_t dim_end)

void	checkContiguous (CheckedFrom c, const TensorGeometryArg &t)

void	checkAllContiguous (CheckedFrom c, at::ArrayRef< TensorArg > ts)

void	checkSize (CheckedFrom c, const TensorGeometryArg &t, IntArrayRef sizes)

void	checkSize (CheckedFrom c, const TensorGeometryArg &t, int64_t dim, int64_t size)

void	checkAllSame (CheckedFrom c, ArrayRef< TensorArg > tensors, void(*fn)(CheckedFrom, const TensorArg &, const TensorArg &))

void	checkSameSize (CheckedFrom c, const TensorArg &t1, const TensorArg &t2)

void	checkAllSameSize (CheckedFrom c, ArrayRef< TensorArg > tensors)

void	checkNumel (CheckedFrom c, const TensorGeometryArg &t, int64_t numel)

void	checkSameNumel (CheckedFrom c, const TensorArg &t1, const TensorArg &t2)

void	checkAllSameNumel (CheckedFrom c, ArrayRef< TensorArg > tensors)

void	checkSameGPU (CheckedFrom c, const TensorArg &t1, const TensorArg &t2)

void	checkAllSameGPU (CheckedFrom c, ArrayRef< TensorArg > tensors)

void	checkSameType (CheckedFrom c, const TensorArg &t1, const TensorArg &t2)

void	checkScalarType (CheckedFrom c, const TensorArg &t, ScalarType ty)

void	checkScalarTypes (CheckedFrom c, const TensorArg &t, at::ArrayRef< ScalarType > l)

void	checkAllSameType (CheckedFrom c, ArrayRef< TensorArg > tensors)

void	checkSameDim (CheckedFrom c, const TensorGeometryArg &t1, const TensorGeometryArg &t2)

void	checkDefined (CheckedFrom c, const TensorArg &t)

void	checkAllDefined (CheckedFrom c, ArrayRef< TensorArg > ts)

void	checkBackend (CheckedFrom c, const Tensor &t, Backend backend)

void	checkBackend (CheckedFrom c, ArrayRef< Tensor > tensors, at::Backend backend)

void *	maybe_data_ptr (const Tensor &tensor)

void *	maybe_data_ptr (const TensorArg &tensor)

bool	geometry_is_contiguous (IntArrayRef sizes, IntArrayRef strides)

CAFFE2_API void	checkSameNumel (CheckedFrom c, const TensorGeometryArg &t1, const TensorGeometryArg &t2)

int	_crash_if_asan (int arg)

template<size_t N>
std::array< int64_t, N >	check_intlist (ArrayRef< int64_t > list, const char *name, int pos, ArrayRef< int64_t > def={})

int64_t	sum_intlist (ArrayRef< int64_t > list)

int64_t	prod_intlist (ArrayRef< int64_t > list)

	REGISTER_CONTEXT (DeviceType::CPU, caffe2::CPUContext)

	REGISTER_COPY_BYTES_FUNCTION (DeviceType::CPU, DeviceType::CPU, caffe2::CopyBytesWrapper)

	REGISTER_CONTEXT (DeviceType::IDEEP, caffe2::IDEEPContext)

	REGISTER_COPY_BYTES_FUNCTION (DeviceType::IDEEP, DeviceType::CPU, CopyBytesWrapper)

	REGISTER_COPY_BYTES_FUNCTION (DeviceType::CPU, DeviceType::IDEEP, CopyBytesWrapper)

	REGISTER_COPY_BYTES_FUNCTION (DeviceType::IDEEP, DeviceType::IDEEP, CopyBytesWrapper)

	REGISTER_COMPLEX_HOOKS (ComplexHooks)

Variables
thread_local bool	NonVariableTypeMode_enabled = false
	NOTE [ Treating Variables as non-Variables in type dispatch ]. More...

constexpr const char *	CUDA_HELP

virtual	CAFFE2_API {name}Generator()

Context *	context

struct CAFFE2_API at::LegacyTHDispatcher	Generator

class CAFFE2_API at::Tensor	Dispatcher

at::TypeExtendedInterface	Type

constexpr size_t	dim_bitset_size = 64

Detailed Description

Flush-To-Zero and Denormals-Are-Zero mode.

Contains the implementation of parallel reductions in TensorIterator.

Flush-To-Zero (FTZ) and Denormals-Are-Zero (DAZ) are modes that bypass IEEE 754 methods of dealing with denormal floating-point numbers on x86-64 and some x86 CPUs. They result in reduced precision for values near zero, but increased performance.

See https://software.intel.com/en-us/articles/x87-and-sse-floating-point-assists-in-ia-32-flush-to-zero-ftz-and-denormals-are-zero-daz

Function Documentation

optional<Device> at::device_of ( TensorList t )

inline

Return the Device of a TensorList, if the list is non-empty and the first Tensor is defined.

(This function implicitly assumes that all tensors in the list have the same device.)

Definition at line 28 of file DeviceGuard.h.

Type& at::legacyTensorType ( const TensorImpl & tensor )

inline

Return the Type object corresponding to this Tensor, which we can use to do dynamic dispatch to operators from.

This method is NOT intended to be used by end-users; it is purely an implementation detail.

NOTE: We also check at::NonVariableTypeMode, and if it's enabled we always return non-Variable type in this function. See NOTE [ Treating Variables as non-Variables in type dispatch ]

Definition at line 176 of file LegacyTypeDispatch.h.

Variable Documentation

constexpr const char* at::CUDA_HELP

Initial value:

=
  "PyTorch splits its backend into two shared libraries: a CPU library "
  "and a CUDA library; this error has occurred because you are trying "
  "to use some CUDA functionality, but the CUDA library has not been "
  "loaded by the dynamic linker for some reason.  The CUDA library MUST "
  "be loaded, EVEN IF you don't directly use any symbols from the CUDA library! "
  "One common culprit is a lack of -Wl,--no-as-needed in your link arguments; many "
  "dynamic linkers will delete dynamic library dependencies if you don't "
  "depend on any of their symbols.  You can check if this has occurred by "
  "using ldd on your binary to see if there is a dependency on *_cuda.so "
  "library."

Definition at line 23 of file CUDAHooksInterface.h.

thread_local bool at::NonVariableTypeMode_enabled = false

NOTE [ Treating Variables as non-Variables in type dispatch ].

Previously, in VariableType_*.cpp (generated by gen_variable_type.py), when a function is using the 'use_derived' strategy, we call its implementation on the base non-Variable type (baseType), passing unwrapped tensors to the call so that any .type() calls in the implementation can treat the passed tensors as non-Variables and won't dispatch back to functions in VariableType.

However, after the Variable/Tensor merge, there is no concept of unwrapping a tensor anymore, and directly passing variables to the base type calls will cause the .type() dispatch in the implementation to treat the tensor as a variable, and any function dispatch based on .type() will dispatch back to VariableType, which is not what we want.

The solution to the above problem is to add at::NonVariableTypeMode, which when enabled will cause legacyTensorType() and getType() to always return non-Variable type, even if the tensor being called on is a variable.

TODO: Since torch::NoGradGuard serves the same purpose in libtorch, we should merge these two thread-local guards. In the CAFFE2_FB_LIMITED_MOBILE_CAPABILITY build setting, thread_local is not supported. In that case, we don't provide at::NonVariableTypeMode.

Definition at line 31 of file LegacyTypeDispatch.cpp.

Data Structures

Typedefs

Enumerations

Functions

Variables

Detailed Description

Function Documentation

Variable Documentation

Facebook Open Source