Caffe2 - C++ API
A deep learning, cross platform ML framework
TensorAccessor.h
1 #pragma once
2 
3 #include <c10/macros/Macros.h>
4 #include <stdint.h>
5 #include <cstddef>
6 
7 namespace at {
8 
9 // The PtrTraits argument to the TensorAccessor/PackedTensorAccessor
10 // is used to enable the __restrict__ keyword/modifier for the data
11 // passed to cuda.
12 template <typename T>
14  typedef T* PtrType;
15 };
16 
17 #if defined(__CUDACC__) || defined(__HIPCC__)
18 template <typename T>
19 struct RestrictPtrTraits {
20  typedef T* __restrict__ PtrType;
21 };
22 #endif
23 
24 // TensorAccessorBase and TensorAccessor are used for both CPU and CUDA tensors.
25 // For CUDA tensors it is used in device code (only). This means that we restrict ourselves
26 // to functions and types available there (e.g. IntArrayRef isn't).
27 
28 // The PtrTraits argument is only relevant to cuda to support `__restrict__` pointers.
29 template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
31 public:
32  typedef typename PtrTraits<T>::PtrType PtrType;
33 
34  C10_HOST_DEVICE TensorAccessorBase(
35  PtrType data_,
36  const index_t* sizes_,
37  const index_t* strides_)
38  : data_(data_), sizes_(sizes_), strides_(strides_) {}
39  C10_HOST IntArrayRef sizes() const {
40  return IntArrayRef(sizes_,N);
41  }
42  C10_HOST IntArrayRef strides() const {
43  return IntArrayRef(strides_,N);
44  }
45  C10_HOST_DEVICE index_t stride(index_t i) const {
46  return strides_[i];
47  }
48  C10_HOST_DEVICE index_t size(index_t i) const {
49  return sizes_[i];
50  }
51  C10_HOST_DEVICE PtrType data() {
52  return data_;
53  }
54  C10_HOST_DEVICE const PtrType data() const {
55  return data_;
56  }
57 protected:
58  PtrType data_;
59  const index_t* sizes_;
60  const index_t* strides_;
61 };
62 
63 // The `TensorAccessor` is typically instantiated for CPU `Tensor`s using
64 // `Tensor.accessor<T, N>()`.
65 // For CUDA `Tensor`s, `PackedTensorAccessor` is used on the host and only
66 // indexing on the device uses `TensorAccessor`s.
67 template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
68 class TensorAccessor : public TensorAccessorBase<T,N,PtrTraits,index_t> {
69 public:
70  typedef typename PtrTraits<T>::PtrType PtrType;
71 
72  C10_HOST_DEVICE TensorAccessor(
73  PtrType data_,
74  const index_t* sizes_,
75  const index_t* strides_)
76  : TensorAccessorBase<T, N, PtrTraits, index_t>(data_,sizes_,strides_) {}
77 
78  C10_HOST_DEVICE TensorAccessor<T, N - 1, PtrTraits, index_t> operator[](index_t i) {
79  return TensorAccessor<T,N-1,PtrTraits,index_t>(this->data_ + this->strides_[0]*i,this->sizes_+1,this->strides_+1);
80  }
81 
82  C10_HOST_DEVICE const TensorAccessor<T, N-1, PtrTraits, index_t> operator[](index_t i) const {
83  return TensorAccessor<T,N-1,PtrTraits,index_t>(this->data_ + this->strides_[0]*i,this->sizes_+1,this->strides_+1);
84  }
85 };
86 
87 template<typename T, template <typename U> class PtrTraits, typename index_t>
88 class TensorAccessor<T,1,PtrTraits,index_t> : public TensorAccessorBase<T,1,PtrTraits,index_t> {
89 public:
90  typedef typename PtrTraits<T>::PtrType PtrType;
91 
92  C10_HOST_DEVICE TensorAccessor(
93  PtrType data_,
94  const index_t* sizes_,
95  const index_t* strides_)
96  : TensorAccessorBase<T, 1, PtrTraits, index_t>(data_,sizes_,strides_) {}
97  C10_HOST_DEVICE T & operator[](index_t i) {
98  return this->data_[this->strides_[0]*i];
99  }
100  C10_HOST_DEVICE const T & operator[](index_t i) const {
101  return this->data_[this->strides_[0]*i];
102  }
103 };
104 
105 
106 // PackedTensorAccessorBase and PackedTensorAccessor are used on for CUDA `Tensor`s on the host
107 // and as
108 // In contrast to `TensorAccessor`s, they copy the strides and sizes on instantiation (on the host)
109 // in order to transfer them on the device when calling kernels.
110 // On the device, indexing of multidimensional tensors gives to `TensorAccessor`s.
111 // Use RestrictPtrTraits as PtrTraits if you want the tensor's data pointer to be marked as __restrict__.
112 // Instantiation from data, sizes, strides is only needed on the host and std::copy isn't available
113 // on the device, so those functions are host only.
114 template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
116 public:
117  typedef typename PtrTraits<T>::PtrType PtrType;
118  C10_HOST PackedTensorAccessorBase(
119  PtrType data_,
120  const index_t* sizes_,
121  const index_t* strides_)
122  : data_(data_) {
123  std::copy(sizes_, sizes_ + N, std::begin(this->sizes_));
124  std::copy(strides_, strides_ + N, std::begin(this->strides_));
125  }
126 
127  // if index_t is not int64_t, we want to have an int64_t constructor
128  template <typename source_index_t, class = typename std::enable_if<std::is_same<source_index_t, int64_t>::value>::type>
129  C10_HOST PackedTensorAccessorBase(
130  PtrType data_,
131  const source_index_t* sizes_,
132  const source_index_t* strides_)
133  : data_(data_) {
134  for (int i = 0; i < N; i++) {
135  this->sizes_[i] = sizes_[i];
136  this->strides_[i] = strides_[i];
137  }
138  }
139 
140  C10_HOST_DEVICE index_t stride(index_t i) const {
141  return strides_[i];
142  }
143  C10_HOST_DEVICE index_t size(index_t i) const {
144  return sizes_[i];
145  }
146  C10_HOST_DEVICE PtrType data() {
147  return data_;
148  }
149  C10_HOST_DEVICE const PtrType data() const {
150  return data_;
151  }
152 protected:
153  PtrType data_;
154  index_t sizes_[N];
155  index_t strides_[N];
156 };
157 
158 template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
159 class PackedTensorAccessor : public PackedTensorAccessorBase<T,N,PtrTraits,index_t> {
160 public:
161  typedef typename PtrTraits<T>::PtrType PtrType;
162 
163  C10_HOST PackedTensorAccessor(
164  PtrType data_,
165  const index_t* sizes_,
166  const index_t* strides_)
167  : PackedTensorAccessorBase<T, N, PtrTraits, index_t>(data_, sizes_, strides_) {}
168 
169  // if index_t is not int64_t, we want to have an int64_t constructor
170  template <typename source_index_t, class = typename std::enable_if<std::is_same<source_index_t, int64_t>::value>::type>
171  C10_HOST PackedTensorAccessor(
172  PtrType data_,
173  const source_index_t* sizes_,
174  const source_index_t* strides_)
175  : PackedTensorAccessorBase<T, N, PtrTraits, index_t>(data_, sizes_, strides_) {}
176 
177  C10_DEVICE TensorAccessor<T, N - 1, PtrTraits, index_t> operator[](index_t i) {
178  index_t* new_sizes = this->sizes_ + 1;
179  index_t* new_strides = this->strides_ + 1;
180  return TensorAccessor<T,N-1,PtrTraits,index_t>(this->data_ + this->strides_[0]*i, new_sizes, new_strides);
181  }
182 
183  C10_DEVICE const TensorAccessor<T, N - 1, PtrTraits, index_t> operator[](index_t i) const {
184  const index_t* new_sizes = this->sizes_ + 1;
185  const index_t* new_strides = this->strides_ + 1;
186  return TensorAccessor<T,N-1,PtrTraits,index_t>(this->data_ + this->strides_[0]*i, new_sizes, new_strides);
187  }
188 };
189 
190 template<typename T, template <typename U> class PtrTraits, typename index_t>
191 class PackedTensorAccessor<T,1,PtrTraits,index_t> : public PackedTensorAccessorBase<T,1,PtrTraits,index_t> {
192 public:
193  typedef typename PtrTraits<T>::PtrType PtrType;
194  C10_HOST PackedTensorAccessor(
195  PtrType data_,
196  const index_t* sizes_,
197  const index_t* strides_)
198  : PackedTensorAccessorBase<T, 1, PtrTraits, index_t>(data_, sizes_, strides_) {}
199 
200  // if index_t is not int64_t, we want to have an int64_t constructor
201  template <typename source_index_t, class = typename std::enable_if<std::is_same<source_index_t, int64_t>::value>::type>
202  C10_HOST PackedTensorAccessor(
203  PtrType data_,
204  const source_index_t* sizes_,
205  const source_index_t* strides_)
206  : PackedTensorAccessorBase<T, 1, PtrTraits, index_t>(data_, sizes_, strides_) {}
207 
208  C10_DEVICE T & operator[](index_t i) {
209  return this->data_[this->strides_[0] * i];
210  }
211  C10_DEVICE const T& operator[](index_t i) const {
212  return this->data_[this->strides_[0]*i];
213  }
214 };
215 
216 }
Flush-To-Zero and Denormals-Are-Zero mode.