Caffe2 - C++ API
A deep learning, cross platform ML framework
Data Structures | Typedefs | Enumerations | Functions | Variables
at Namespace Reference

Flush-To-Zero and Denormals-Are-Zero mode. More...

Data Structures

struct  AccumulateType
 
struct  AccumulateType< char, false >
 
struct  AccumulateType< char, true >
 
struct  AccumulateType< double, false >
 
struct  AccumulateType< double, true >
 
struct  AccumulateType< float, false >
 
struct  AccumulateType< float, true >
 
struct  AccumulateType< Half, true >
 
struct  AccumulateType< int16_t, false >
 
struct  AccumulateType< int16_t, true >
 
struct  AccumulateType< int32_t, false >
 
struct  AccumulateType< int32_t, true >
 
struct  AccumulateType< int64_t, false >
 
struct  AccumulateType< int64_t, true >
 
struct  AccumulateType< int8_t, false >
 
struct  AccumulateType< int8_t, true >
 
struct  AccumulateType< uint8_t, false >
 
struct  AccumulateType< uint8_t, true >
 
struct  ATenDLMTensor
 
struct  AutoNonVariableTypeMode
 
class  BaseContext
 Virtual interface for the Context class in Caffe2. More...
 
struct  CAFFE2_API
 
struct  ComplexHooks
 
struct  ComplexHooksArgs
 
struct  ComplexHooksInterface
 
class  Context
 
struct  CPUComplexFloatType
 
struct  CPUTypeDefault
 
struct  CUDAHooksArgs
 
struct  CUDAHooksInterface
 
struct  CUDATypeDefault
 
struct  DefaultPtrTraits
 
struct  DimCounter
 
struct  FormatGuard
 
struct  Generator
 
struct  HIPHooksArgs
 
struct  HIPHooksInterface
 
struct  LegacyDeviceTypeInit
 
struct  LegacyDeviceTypeInitArgs
 
struct  LegacyDeviceTypeInitInterface
 
class  LegacyTHDispatch
 
struct  LegacyTHDispatcher
 
struct  LegacyTHDispatcherDeleter
 
struct  LegacyTypeDeleter
 
class  LegacyTypeDispatch
 
class  MatrixRef
 MatrixRef - Like an ArrayRef, but with an extra recorded strides so that we can easily view it as a multidimensional array. More...
 
struct  NonVariableTypeMode
 
struct  OperandInfo
 
class  PackedTensorAccessor
 
class  PackedTensorAccessor< T, 1, PtrTraits, index_t >
 
class  PackedTensorAccessorBase
 
struct  Range
 
struct  SparseTensorImpl
 
struct  SparseTensorRef
 
struct  SplitUntil32Bit
 A container-like struct that acts as if it contains splits of a TensorIterator that can use 32-bit indexing. More...
 
struct  strided_tensor_iter
 
struct  strided_tensor_iter_fixed
 
class  Tensor
 
class  TensorAccessor
 
class  TensorAccessor< T, 1, PtrTraits, index_t >
 
class  TensorAccessorBase
 
struct  TensorArg
 
struct  TensorGeometry
 
struct  TensorGeometryArg
 
struct  TensorIterator
 
struct  Type
 
struct  TypeDefault
 
struct  TypeExtendedInterface
 
struct  UndefinedType
 
struct  VariableHooksArgs
 
struct  VariableHooksInterface
 
struct  WeakTensor
 

Typedefs

template<typename T , bool is_cuda>
using acc_type = typename AccumulateType< T, is_cuda >::type
 
using DimVector = SmallVector< int64_t, 5 >
 A container for sizes or strides.
 
using TensorList = ArrayRef< Tensor >
 
using DimMask = TensorIterator::DimMask
 
using PtrVector = TensorIterator::PtrVector
 
using loop_t = TensorIterator::loop_t
 
using loop2d_t = TensorIterator::loop2d_t
 
using CheckedFrom = const char *
 
using DataType = caffe2::TypeIdentifier
 

Enumerations

enum  TypeID {
  CPUBool, CPUByte, CPUChar, CPUDouble,
  CPUFloat, CPUInt, CPULong, CPUShort,
  CPUHalf, SparseCPUBool, SparseCPUByte, SparseCPUChar,
  SparseCPUDouble, SparseCPUFloat, SparseCPUInt, SparseCPULong,
  SparseCPUShort, CUDABool, CUDAByte, CUDAChar,
  CUDADouble, CUDAFloat, CUDAInt, CUDALong,
  CUDAShort, CUDAHalf, SparseCUDABool, SparseCUDAByte,
  SparseCUDAChar, SparseCUDADouble, SparseCUDAFloat, SparseCUDAInt,
  SparseCUDALong, SparseCUDAShort, MSNPUBool, MSNPUByte,
  MSNPUChar, MSNPUDouble, MSNPUFloat, MSNPUInt,
  MSNPULong, MSNPUShort, MSNPUHalf, XLABool,
  XLAByte, XLAChar, XLADouble, XLAFloat,
  XLAInt, XLALong, XLAShort, XLAHalf,
  CPUComplexFloat, CPUComplexDouble, CUDAComplexFloat, CUDAComplexDouble,
  Undefined, NumOptions, CPUComplexFloat, CPUComplexDouble,
  CUDAComplexFloat, CUDAComplexDouble, Undefined, NumOptions
}
 
enum  MemOverlap { NO, YES, TOO_HARD }
 
enum  TypeID {
  CPUBool, CPUByte, CPUChar, CPUDouble,
  CPUFloat, CPUInt, CPULong, CPUShort,
  CPUHalf, SparseCPUBool, SparseCPUByte, SparseCPUChar,
  SparseCPUDouble, SparseCPUFloat, SparseCPUInt, SparseCPULong,
  SparseCPUShort, CUDABool, CUDAByte, CUDAChar,
  CUDADouble, CUDAFloat, CUDAInt, CUDALong,
  CUDAShort, CUDAHalf, SparseCUDABool, SparseCUDAByte,
  SparseCUDAChar, SparseCUDADouble, SparseCUDAFloat, SparseCUDAInt,
  SparseCUDALong, SparseCUDAShort, MSNPUBool, MSNPUByte,
  MSNPUChar, MSNPUDouble, MSNPUFloat, MSNPUInt,
  MSNPULong, MSNPUShort, MSNPUHalf, XLABool,
  XLAByte, XLAChar, XLADouble, XLAFloat,
  XLAInt, XLALong, XLAShort, XLAHalf,
  CPUComplexFloat, CPUComplexDouble, CUDAComplexFloat, CUDAComplexDouble,
  Undefined, NumOptions, CPUComplexFloat, CPUComplexDouble,
  CUDAComplexFloat, CUDAComplexDouble, Undefined, NumOptions
}
 

Functions

ContextglobalContext ()
 
TypeExtendedInterfacegetType (TensorOptions options)
 
TypeExtendedInterfacegetType (const TensorImpl *impl)
 
TypeExtendedInterfacegetType (const Tensor &t)
 
LegacyTHDispatchergetLegacyTHDispatcher (TensorOptions options)
 
LegacyTHDispatchergetLegacyTHDispatcher (const TensorImpl *impl)
 
AllocatorgetCPUAllocator ()
 
 REGISTER_LEGACY_TYPE_INIT (LegacyDeviceTypeInit)
 
CAFFE2_API LegacyTHDispatchergetLegacyTHDispatcher (const Tensor &)
 
 C10_DEFINE_TYPED_REGISTRY (ContextRegistry, at::DeviceType, at::BaseContext, std::unique_ptr, at::Device)
 
 C10_DECLARE_TYPED_REGISTRY (ContextRegistry, at::DeviceType, at::BaseContext, std::unique_ptr, at::Device)
 
std::unique_ptr< at::BaseContextCreateContext (const at::Device &device)
 
std::ios_base & defaultfloat (std::ios_base &__base)
 
std::ostream & operator<< (std::ostream &out, const Type &t)
 
void __printTensor (std::ostream &stream, Tensor &self, int64_t linesize)
 
std::ostream & print (std::ostream &stream, const Tensor &tensor_, int64_t linesize)
 
 C10_DEFINE_REGISTRY (LegacyDeviceTypeInitRegistry, LegacyDeviceTypeInitInterface, LegacyDeviceTypeInitArgs) const LegacyDeviceTypeInitInterface &getLegacyDeviceTypeInit()
 
 C10_DECLARE_REGISTRY (LegacyDeviceTypeInitRegistry, LegacyDeviceTypeInitInterface, LegacyDeviceTypeInitArgs)
 
CAFFE2_API const LegacyDeviceTypeInitInterfacegetLegacyDeviceTypeInit ()
 
LegacyTypeDispatchglobalLegacyTypeDispatch ()
 
TypelegacyTensorType (const TensorImpl &tensor)
 Return the Type object corresponding to this Tensor, which we can use to do dynamic dispatch to operators from. More...
 
void initializeLegacyTypeDispatchFor (const TensorImpl &tensor)
 
std::ostream & operator<< (std::ostream &out, const Range &range)
 
int64_t get_device (Tensor self)
 
bool is_cuda (Tensor self)
 
bool is_hip (Tensor self)
 
bool is_sparse (Tensor self)
 
 C10_DECLARE_REGISTRY (VariableHooksRegistry, VariableHooksInterface, VariableHooksArgs)
 
template<typename T >
std::pair< int64_t, int64_t > collapse_dims (T *sizes, T *strides, int64_t dims, const int excludeDim=-1)
 
Tensor sort_strides (Tensor &tensor_)
 
bool _all_equal_numel (at::ArrayRef< Tensor > tensors)
 
std::string _all_equal_numel_error (at::ArrayRef< Tensor > tensors)
 
bool _apply_preamble (ArrayRef< Tensor > tensors)
 
int64_t _max_dim_tensors (ArrayRef< Tensor > tensors)
 
void iterate (int64_t size)
 
template<typename Arg , typename... Args>
void iterate (int64_t size, Arg &iter, Args &...iter_tail)
 
bool iterate_continue ()
 
template<typename Arg , typename... Args>
bool iterate_continue (Arg &iter, Args &...iter_tail)
 
int64_t max_iterate_size ()
 
template<typename Arg , typename... Args>
int64_t max_iterate_size (Arg &iter, Args &...iter_tail)
 
void iterate_overflow ()
 
template<typename Arg , typename... Args>
void iterate_overflow (Arg &iter, Args &...iter_tail)
 
void forward (int64_t offset)
 
template<typename Arg , typename... Args>
void forward (int64_t offset, Arg &iter, Args &...iter_tail)
 
int64_t max_dim ()
 
template<typename Arg , typename... Args>
int64_t max_dim (Arg &iter, Args &...iter_tail)
 
void apply_op ()
 
template<typename Op , typename... Args>
void apply_op (int64_t numel, int64_t offset, const Op &op, Args...iters)
 
void apply_kernel ()
 
template<typename Op , typename... Args>
void apply_kernel (int64_t numel, int64_t offset, const Op &op, Args...iters)
 
template<typename scalar1 , typename scalar2 , typename Op >
void CPU_tensor_parallel_kernel_apply2 (Tensor tensor1, Tensor tensor2, const Op op)
 
template<typename scalar1 , typename Op >
void CPU_tensor_apply1 (Tensor tensor1, const Op op)
 
template<typename scalar1 , typename scalar2 , typename Op >
void CPU_tensor_apply2 (Tensor tensor1, Tensor tensor2, const Op op)
 
template<typename scalar1 , typename scalar2 , typename scalar3 , typename Op >
void CPU_tensor_apply3 (Tensor tensor1, Tensor tensor2, Tensor tensor3, const Op op)
 
template<typename scalar1 , typename scalar2 , typename scalar3 , typename scalar4 , typename Op >
void CPU_tensor_apply4 (Tensor tensor1, Tensor tensor2, Tensor tensor3, Tensor tensor4, const Op op)
 
template<typename scalar1 , typename Op >
void CPU_tensor_parallel_apply1 (Tensor tensor1, const Op op, int64_t grain_size=internal::GRAIN_SIZE)
 
template<typename scalar1 , typename scalar2 , typename Op >
void CPU_tensor_parallel_apply2 (Tensor tensor1, Tensor tensor2, const Op op, int64_t grain_size=internal::GRAIN_SIZE)
 
std::atomic< int > num_threads (-1)
 
void set_num_threads (int num_threads_)
 
int get_num_threads ()
 
 C10_DECLARE_REGISTRY (ComplexHooksRegistry, ComplexHooksInterface, ComplexHooksArgs)
 
 C10_DECLARE_REGISTRY (CUDAHooksRegistry, CUDAHooksInterface, CUDAHooksArgs)
 
 C10_DECLARE_REGISTRY (HIPHooksRegistry, HIPHooksInterface, HIPHooksArgs)
 
optional< Devicedevice_of (Tensor t)
 Return the Device of a Tensor, if the Tensor is defined.
 
optional< Devicedevice_of (TensorList t)
 Return the Device of a TensorList, if the list is non-empty and the first Tensor is defined. More...
 
ScalarType toScalarType (const DLDataType &dtype)
 
void deleter (DLManagedTensor *arg)
 
DLManagedTensortoDLPack (const Tensor &src)
 
Tensor fromDLPack (const DLManagedTensor *src)
 
std::vector< int64_t > infer_size (IntArrayRef a, IntArrayRef b)
 
std::tuple< std::vector< int64_t >, std::vector< int64_t > > inferExpandGeometry (IntArrayRef tensor_sizes, IntArrayRef tensor_strides, IntArrayRef sizes)
 
void check_defined (std::initializer_list< std::reference_wrapper< const Tensor >> tensors, const char *api_name)
 
std::tuple< Tensorexpand_inplace (const Tensor &tensor, const Tensor &to_expand)
 
std::tuple< Tensorexpand_inplace (const Tensor &tensor, const Tensor &to_expand, const char *api_name)
 
std::tuple< Tensor, Tensorexpand_inplace (const Tensor &tensor, const Tensor &to_expand1, const Tensor &to_expand2)
 
std::tuple< Tensor, Tensorexpand_inplace (const Tensor &tensor, const Tensor &to_expand1, const Tensor &to_expand2, const char *api_name)
 
std::tuple< Tensor, Tensorexpand_outplace (const Tensor &to_expand1, const Tensor &to_expand2)
 
std::tuple< Tensor, Tensorexpand_outplace (const Tensor &to_expand1, const Tensor &to_expand2, const char *api_name)
 
std::tuple< Tensor, Tensor, Tensorexpand_outplace (const Tensor &to_expand1, const Tensor &to_expand2, const Tensor &to_expand3)
 
std::tuple< Tensor, Tensor, Tensorexpand_outplace (const Tensor &to_expand1, const Tensor &to_expand2, const Tensor &to_expand3, const char *api_name)
 
std::tuple< Tensorexpand_size (const Tensor &to_expand, IntArrayRef sizes)
 
std::tuple< Tensorexpand_size (const Tensor &to_expand, IntArrayRef sizes, const char *api_name)
 
std::vector< Tensorexpand_outplace (TensorList to_expand)
 
TensorOptions initialTensorOptions ()
 
LegacyTHDispatchglobalLegacyTHDispatch ()
 
MemOverlap has_internal_overlap (const Tensor &tensor)
 
MemOverlap has_internal_overlap (TensorImpl *t)
 
void assert_no_internal_overlap (const Tensor &t, std::string op)
 
void assert_no_internal_overlap (TensorImpl *t, std::string op)
 
template<typename T , typename std::enable_if< std::is_integral< T >::value, int >::type = 0>
bool _isnan (T val)
 
int64_t divup (int64_t x, int64_t y)
 
int get_max_threads ()
 
int get_thread_num ()
 
bool in_parallel_region ()
 
template<class F >
void parallel_for (const int64_t begin, const int64_t end, const int64_t grain_size, const F &f)
 
template<class scalar_t , class F , class SF >
scalar_t parallel_reduce (const int64_t begin, const int64_t end, const int64_t grain_size, const scalar_t ident, const F f, const SF sf)
 
struct {
Generator (Context *context)
 
virtual CAFFE2_API uint64_t seed () override
 
virtual CAFFE2_API uint64_t initialSeed () override
 
virtual CAFFE2_API void * unsafeGetTH () override
 
struct {
Dispatcher ()
 
void register_cpu_types (Context *context)
 
void register_cuda_types (Context *context)
 
struct {
Type ()
 
virtual ScalarType scalarType () const override
 
virtual caffe2::TypeMeta typeMeta () const override
 
virtual Backend backend () const override
 
virtual const char * toString () const override
 
virtual TypeID ID () const override
 
struct at::CAFFE2_API get_function (const std::string &schema)
 
Allocatorallocator () const override
 
Device getDeviceFromPtr (void *data) const override
 
std::unique_ptr< Generatorgenerator () const override
 
std::ostream & operator<< (std::ostream &out, TensorGeometryArg t)
 
void checkDim (CheckedFrom c, const TensorGeometryArg &t, int64_t dim)
 
void checkDimRange (CheckedFrom c, const TensorGeometryArg &t, int64_t dim_start, int64_t dim_end)
 
void checkContiguous (CheckedFrom c, const TensorGeometryArg &t)
 
void checkAllContiguous (CheckedFrom c, at::ArrayRef< TensorArg > ts)
 
void checkSize (CheckedFrom c, const TensorGeometryArg &t, IntArrayRef sizes)
 
void checkSize (CheckedFrom c, const TensorGeometryArg &t, int64_t dim, int64_t size)
 
void checkAllSame (CheckedFrom c, ArrayRef< TensorArg > tensors, void(*fn)(CheckedFrom, const TensorArg &, const TensorArg &))
 
void checkSameSize (CheckedFrom c, const TensorArg &t1, const TensorArg &t2)
 
void checkAllSameSize (CheckedFrom c, ArrayRef< TensorArg > tensors)
 
void checkNumel (CheckedFrom c, const TensorGeometryArg &t, int64_t numel)
 
void checkSameNumel (CheckedFrom c, const TensorArg &t1, const TensorArg &t2)
 
void checkAllSameNumel (CheckedFrom c, ArrayRef< TensorArg > tensors)
 
void checkSameGPU (CheckedFrom c, const TensorArg &t1, const TensorArg &t2)
 
void checkAllSameGPU (CheckedFrom c, ArrayRef< TensorArg > tensors)
 
void checkSameType (CheckedFrom c, const TensorArg &t1, const TensorArg &t2)
 
void checkScalarType (CheckedFrom c, const TensorArg &t, ScalarType ty)
 
void checkScalarTypes (CheckedFrom c, const TensorArg &t, at::ArrayRef< ScalarType > l)
 
void checkAllSameType (CheckedFrom c, ArrayRef< TensorArg > tensors)
 
void checkSameDim (CheckedFrom c, const TensorGeometryArg &t1, const TensorGeometryArg &t2)
 
void checkDefined (CheckedFrom c, const TensorArg &t)
 
void checkAllDefined (CheckedFrom c, ArrayRef< TensorArg > ts)
 
void checkBackend (CheckedFrom c, const Tensor &t, Backend backend)
 
void checkBackend (CheckedFrom c, ArrayRef< Tensor > tensors, at::Backend backend)
 
void * maybe_data_ptr (const Tensor &tensor)
 
void * maybe_data_ptr (const TensorArg &tensor)
 
bool geometry_is_contiguous (IntArrayRef sizes, IntArrayRef strides)
 
CAFFE2_API void checkSameNumel (CheckedFrom c, const TensorGeometryArg &t1, const TensorGeometryArg &t2)
 
int _crash_if_asan (int arg)
 
template<size_t N>
std::array< int64_t, N > check_intlist (ArrayRef< int64_t > list, const char *name, int pos, ArrayRef< int64_t > def={})
 
int64_t sum_intlist (ArrayRef< int64_t > list)
 
int64_t prod_intlist (ArrayRef< int64_t > list)
 
 REGISTER_CONTEXT (DeviceType::CPU, caffe2::CPUContext)
 
 REGISTER_COPY_BYTES_FUNCTION (DeviceType::CPU, DeviceType::CPU, caffe2::CopyBytesWrapper)
 
 REGISTER_CONTEXT (DeviceType::IDEEP, caffe2::IDEEPContext)
 
 REGISTER_COPY_BYTES_FUNCTION (DeviceType::IDEEP, DeviceType::CPU, CopyBytesWrapper)
 
 REGISTER_COPY_BYTES_FUNCTION (DeviceType::CPU, DeviceType::IDEEP, CopyBytesWrapper)
 
 REGISTER_COPY_BYTES_FUNCTION (DeviceType::IDEEP, DeviceType::IDEEP, CopyBytesWrapper)
 
 REGISTER_COMPLEX_HOOKS (ComplexHooks)
 

Variables

thread_local bool NonVariableTypeMode_enabled = false
 NOTE [ Treating Variables as non-Variables in type dispatch ]. More...
 
constexpr const char * CUDA_HELP
 
virtual CAFFE2_API {name}Generator()
 
Contextcontext
 
struct CAFFE2_API at::LegacyTHDispatcher Generator
 
class CAFFE2_API at::Tensor Dispatcher
 
at::TypeExtendedInterface Type
 
constexpr size_t dim_bitset_size = 64
 

Detailed Description

Flush-To-Zero and Denormals-Are-Zero mode.

Contains the implementation of parallel reductions in TensorIterator.

Flush-To-Zero (FTZ) and Denormals-Are-Zero (DAZ) are modes that bypass IEEE 754 methods of dealing with denormal floating-point numbers on x86-64 and some x86 CPUs. They result in reduced precision for values near zero, but increased performance.

See https://software.intel.com/en-us/articles/x87-and-sse-floating-point-assists-in-ia-32-flush-to-zero-ftz-and-denormals-are-zero-daz

Function Documentation

optional<Device> at::device_of ( TensorList  t)
inline

Return the Device of a TensorList, if the list is non-empty and the first Tensor is defined.

(This function implicitly assumes that all tensors in the list have the same device.)

Definition at line 28 of file DeviceGuard.h.

Type& at::legacyTensorType ( const TensorImpl tensor)
inline

Return the Type object corresponding to this Tensor, which we can use to do dynamic dispatch to operators from.

This method is NOT intended to be used by end-users; it is purely an implementation detail.

NOTE: We also check at::NonVariableTypeMode, and if it's enabled we always return non-Variable type in this function. See NOTE [ Treating Variables as non-Variables in type dispatch ]

Definition at line 176 of file LegacyTypeDispatch.h.

Variable Documentation

constexpr const char* at::CUDA_HELP
Initial value:
=
"PyTorch splits its backend into two shared libraries: a CPU library "
"and a CUDA library; this error has occurred because you are trying "
"to use some CUDA functionality, but the CUDA library has not been "
"loaded by the dynamic linker for some reason. The CUDA library MUST "
"be loaded, EVEN IF you don't directly use any symbols from the CUDA library! "
"One common culprit is a lack of -Wl,--no-as-needed in your link arguments; many "
"dynamic linkers will delete dynamic library dependencies if you don't "
"depend on any of their symbols. You can check if this has occurred by "
"using ldd on your binary to see if there is a dependency on *_cuda.so "
"library."

Definition at line 23 of file CUDAHooksInterface.h.

thread_local bool at::NonVariableTypeMode_enabled = false

NOTE [ Treating Variables as non-Variables in type dispatch ].

Previously, in VariableType_*.cpp (generated by gen_variable_type.py), when a function is using the 'use_derived' strategy, we call its implementation on the base non-Variable type (baseType), passing unwrapped tensors to the call so that any .type() calls in the implementation can treat the passed tensors as non-Variables and won't dispatch back to functions in VariableType.

However, after the Variable/Tensor merge, there is no concept of unwrapping a tensor anymore, and directly passing variables to the base type calls will cause the .type() dispatch in the implementation to treat the tensor as a variable, and any function dispatch based on .type() will dispatch back to VariableType, which is not what we want.

The solution to the above problem is to add at::NonVariableTypeMode, which when enabled will cause legacyTensorType() and getType() to always return non-Variable type, even if the tensor being called on is a variable.

TODO: Since torch::NoGradGuard serves the same purpose in libtorch, we should merge these two thread-local guards. In the CAFFE2_FB_LIMITED_MOBILE_CAPABILITY build setting, thread_local is not supported. In that case, we don't provide at::NonVariableTypeMode.

Definition at line 31 of file LegacyTypeDispatch.cpp.