Data Structures | |
struct | Builder |
Public Types | |
using | DimMask = std::bitset< 64 > |
using | PtrVector = SmallVector< char *, 4 > |
using | loop_t = std::function< void(int ntensors, char **data, const int64_t *strides, int64_t size)> |
using | loop2d_t = std::function< void(int ntensors, char **data, const int64_t *strides, int64_t size0, int64_t size1)> |
using | loop_subiter_t = std::function< void(TensorIterator &subiter)> |
Public Member Functions | |
void | foreach_reduced_elt (const loop_subiter_t &loop, bool parallelize=true) |
int | ndim () const |
IntArrayRef | shape () const |
int64_t | numel () const |
int | ntensors () const |
int64_t | num_output_elements () const |
number of elements in the output operand. More... | |
int | num_reduce_dims () const |
number of reduced dimensions in a reduction operation | |
bool | is_trivial_1d () const |
1-dimensional iteration and no buffering or type conversion | |
bool | is_dim_reduced (int dim) const |
IntArrayRef | strides (int arg) const |
Accessors for each operand. | |
void * | data_ptr (int arg) const |
const Type & | type (int arg=0) const |
ScalarType | dtype (int arg=0) const |
DeviceType | device_type (int arg=0) const |
int64_t | element_size (int arg) const |
bool | is_scalar (int arg) const |
bool | is_cpu_scalar (int arg) const |
const Tensor & | tensor (int arg) const |
Tensor & | tensor (int arg) |
Tensor | output (int arg=0) const |
void | remove_operand (int arg) |
Removes an operand from this iterator. | |
void | remove_dimension (int dim) |
Removes a dimension from this iterator. | |
void | narrow (int dim, int64_t start, int64_t size) |
Shrinks an iterated dimension. | |
void | select_all_keeping_dim (int start_dim, IntArrayRef starts) |
Narrows every dim after and including start_dim to size one. | |
void | replace_operand (int arg, void *data, IntArrayRef stride) |
Replaces the data pointer and strides for the operand at index arg | |
std::unique_ptr< TensorIterator > | split (int dim) |
Splits this TensorIterator into two iterators. More... | |
int | get_dim_to_split () const |
Returns the dimension with the largest extent: (size[dim]-1) * stride[dim]. | |
template<typename T > | |
T | scalar_value (int arg) |
void | for_each (const loop_t &loop) |
void | for_each (const loop2d_t &loop) |
void | parallel_reduce (const loop2d_t &loop) |
void | serial_for_each (const loop_t &loop, Range range) const |
void | serial_for_each (const loop2d_t &loop, Range range) const |
DimVector | compatible_stride (int element_size) const |
Create a strides array for a Tensor with shape of this iterator. More... | |
DimVector | invert_perm (IntArrayRef input) const |
Inverts the re-ordering done by reorder_dimensions. More... | |
DimVector | get_dim_strides (int dim) const |
Helper functions for CPU iteration. | |
DimVector | get_strides () const |
DimVector | get_inner_strides () const |
PtrVector | get_data_ptrs (ArrayRef< char * > base, IntArrayRef counter) const |
PtrVector | get_base_ptrs () const |
bool | can_use_32bit_indexing () const |
true if the stride computation can use 32-bit arithmetic. Used by GPU kernels | |
SplitUntil32Bit | with_32bit_indexing () const |
An "iteratable" object that recursively splits this iterator into sub-iterators that can use 32-bit indexing. More... | |
bool | should_accumulate () const |
If the kernel should accumulate into the output. More... | |
bool | is_final_output () const |
Whether this iterator produces the actual output, as opposed to something that will be accumulated further. More... | |
Static Public Member Functions | |
static std::unique_ptr< TensorIterator > | binary_op (Tensor &out, const Tensor &a, const Tensor &b) |
static std::unique_ptr< TensorIterator > | reduce_op (Tensor &out, const Tensor &a) |
Protected Member Functions | |
void | mark_outputs () |
void | compute_shape () |
void | compute_strides () |
void | reorder_dimensions () |
void | permute_dimensions (IntArrayRef perm) |
void | compute_types () |
Type & | compute_common_type () |
void | allocate_outputs () |
void | coalesce_dimensions () |
Protected Attributes | |
DimVector | shape_ |
DimVector | perm_ |
SmallVector< OperandInfo, 4 > | operands_ |
int | num_outputs_ = 0 |
bool | has_coalesced_dimensions_ = false |
bool | accumulate_ = false |
bool | resize_outputs_ = true |
bool | is_reduction_ = false |
bool | compute_common_dtype_ = true |
bool | allow_cpu_scalars_ = false |
bool | promote_gpu_output_dtypes_ = false |
bool | final_output_ = true |
Friends | |
struct | Builder |
Definition at line 101 of file TensorIterator.h.
DimVector at::TensorIterator::compatible_stride | ( | int | element_size | ) | const |
Create a strides array for a Tensor with shape of this iterator.
The parameter element_size
specifies the size of Tensor's data type in bytes (e.g. 4
for float
)
Definition at line 160 of file TensorIterator.cpp.
DimVector at::TensorIterator::invert_perm | ( | IntArrayRef | input | ) | const |
Inverts the re-ordering done by reorder_dimensions.
This can only be called before coalesce_dimensions() is called.
Definition at line 170 of file TensorIterator.cpp.
|
inline |
Whether this iterator produces the actual output, as opposed to something that will be accumulated further.
Only relevant for CUDA reductions.
Definition at line 230 of file TensorIterator.h.
int64_t at::TensorIterator::num_output_elements | ( | ) | const |
number of elements in the output operand.
this is the same as numel() for operations that are not reductions.
Definition at line 317 of file TensorIterator.cpp.
|
inline |
If the kernel should accumulate into the output.
Only relevant for CUDA reductions.
Definition at line 225 of file TensorIterator.h.
std::unique_ptr< TensorIterator > at::TensorIterator::split | ( | int | dim | ) |
Splits this TensorIterator into two iterators.
Together they iterate over the entire operation. Used by with_32bit_indexing()
.
Definition at line 590 of file TensorIterator.cpp.
SplitUntil32Bit at::TensorIterator::with_32bit_indexing | ( | ) | const |
An "iteratable" object that recursively splits this iterator into sub-iterators that can use 32-bit indexing.
Definition at line 623 of file TensorIterator.cpp.