3 #include <c10/macros/Macros.h> 17 #if defined(__CUDACC__) || defined(__HIPCC__) 19 struct RestrictPtrTraits {
29 template<
typename T,
size_t N,
template <
typename U>
class PtrTraits =
DefaultPtrTraits,
typename index_t = int64_t>
32 typedef typename PtrTraits<T>::PtrType PtrType;
36 const index_t* sizes_,
37 const index_t* strides_)
38 : data_(data_), sizes_(sizes_), strides_(strides_) {}
45 C10_HOST_DEVICE index_t stride(index_t i)
const {
48 C10_HOST_DEVICE index_t size(index_t i)
const {
51 C10_HOST_DEVICE PtrType data() {
54 C10_HOST_DEVICE
const PtrType data()
const {
59 const index_t* sizes_;
60 const index_t* strides_;
67 template<
typename T,
size_t N,
template <
typename U>
class PtrTraits =
DefaultPtrTraits,
typename index_t = int64_t>
70 typedef typename PtrTraits<T>::PtrType PtrType;
74 const index_t* sizes_,
75 const index_t* strides_)
78 C10_HOST_DEVICE TensorAccessor<
T, N - 1, PtrTraits, index_t> operator[](index_t i) {
79 return TensorAccessor<T,N-1,PtrTraits,index_t>(this->data_ + this->strides_[0]*i,this->sizes_+1,this->strides_+1);
82 C10_HOST_DEVICE
const TensorAccessor<T, N-1, PtrTraits, index_t> operator[](index_t i)
const {
83 return TensorAccessor<T,N-1,PtrTraits,index_t>(this->data_ + this->strides_[0]*i,this->sizes_+1,this->strides_+1);
87 template<
typename T,
template <
typename U>
class PtrTraits,
typename index_t>
90 typedef typename PtrTraits<T>::PtrType PtrType;
94 const index_t* sizes_,
95 const index_t* strides_)
97 C10_HOST_DEVICE
T & operator[](index_t i) {
98 return this->data_[this->strides_[0]*i];
100 C10_HOST_DEVICE
const T & operator[](index_t i)
const {
101 return this->data_[this->strides_[0]*i];
114 template<
typename T,
size_t N,
template <
typename U>
class PtrTraits =
DefaultPtrTraits,
typename index_t = int64_t>
117 typedef typename PtrTraits<T>::PtrType PtrType;
120 const index_t* sizes_,
121 const index_t* strides_)
123 std::copy(sizes_, sizes_ + N, std::begin(this->sizes_));
124 std::copy(strides_, strides_ + N, std::begin(this->strides_));
128 template <typename source_index_t, class = typename std::enable_if<std::is_same<source_index_t, int64_t>::value>::type>
129 C10_HOST PackedTensorAccessorBase(
131 const source_index_t* sizes_,
132 const source_index_t* strides_)
134 for (
int i = 0; i < N; i++) {
135 this->sizes_[i] = sizes_[i];
136 this->strides_[i] = strides_[i];
140 C10_HOST_DEVICE index_t stride(index_t i)
const {
143 C10_HOST_DEVICE index_t size(index_t i)
const {
146 C10_HOST_DEVICE PtrType data() {
149 C10_HOST_DEVICE
const PtrType data()
const {
158 template<
typename T,
size_t N,
template <
typename U>
class PtrTraits =
DefaultPtrTraits,
typename index_t = int64_t>
161 typedef typename PtrTraits<T>::PtrType PtrType;
165 const index_t* sizes_,
166 const index_t* strides_)
170 template <typename source_index_t, class = typename std::enable_if<std::is_same<source_index_t, int64_t>::value>::type>
171 C10_HOST PackedTensorAccessor(
173 const source_index_t* sizes_,
174 const source_index_t* strides_)
177 C10_DEVICE
TensorAccessor<
T, N - 1, PtrTraits, index_t> operator[](index_t i) {
178 index_t* new_sizes = this->sizes_ + 1;
179 index_t* new_strides = this->strides_ + 1;
180 return TensorAccessor<T,N-1,PtrTraits,index_t>(this->data_ + this->strides_[0]*i, new_sizes, new_strides);
183 C10_DEVICE
const TensorAccessor<T, N - 1, PtrTraits, index_t> operator[](index_t i)
const {
184 const index_t* new_sizes = this->sizes_ + 1;
185 const index_t* new_strides = this->strides_ + 1;
186 return TensorAccessor<T,N-1,PtrTraits,index_t>(this->data_ + this->strides_[0]*i, new_sizes, new_strides);
190 template<
typename T,
template <
typename U>
class PtrTraits,
typename index_t>
193 typedef typename PtrTraits<T>::PtrType PtrType;
196 const index_t* sizes_,
197 const index_t* strides_)
201 template <typename source_index_t, class = typename std::enable_if<std::is_same<source_index_t, int64_t>::value>::type>
202 C10_HOST PackedTensorAccessor(
204 const source_index_t* sizes_,
205 const source_index_t* strides_)
208 C10_DEVICE
T & operator[](index_t i) {
209 return this->data_[this->strides_[0] * i];
211 C10_DEVICE
const T& operator[](index_t i)
const {
212 return this->data_[this->strides_[0]*i];
Flush-To-Zero and Denormals-Are-Zero mode.