Caffe2 - C++ API
A deep learning, cross platform ML framework
argument_spec.h
1 #pragma once
2 
3 #include <torch/csrc/autograd/variable.h>
4 #include <torch/csrc/jit/ir.h>
5 #include <ATen/core/stack.h>
6 #include <ATen/core/jit_type.h>
7 #include <torch/csrc/jit/variable_tensor_list.h>
8 #include <torch/csrc/utils/hash.h>
9 #include <iostream>
10 #include <vector>
11 
12 namespace torch {
13 namespace jit {
14 
15 // GraphExecutor creates specializations of Graphs for different
16 // dimensionalitities and types of inputs.
17 
18 inline static at::Device ConvertIntToCPUOrCUDA(int device) {
19  return device < 0 ? at::kCPU : at::Device(at::DeviceType::CUDA, device);
20 }
21 struct ArgumentInfo {
22  friend struct ArgumentSpec;
23  using plain_data_type = uint32_t;
24 
25  bool isTensor() const {
26  return is_tensor_;
27  }
28  bool defined() const {
29  return defined_;
30  }
31  int device() const {
32  return device_;
33  }
34  // XXX: It is guaranteed that this will return false when called on non-tensor
35  // arguments
36  bool requires_grad() const {
37  return requires_grad_;
38  }
39  int dim() const {
40  return dim_;
41  }
42  at::ScalarType type() const {
43  return at::ScalarType(type_);
44  }
45  operator TypePtr() const {
46  if (!defined())
47  return TensorType::get();
48  return DimensionedTensorType::create(type(), ConvertIntToCPUOrCUDA(device()), dim());
49  }
50 
51  private:
52  unsigned is_tensor_ : 1;
53  unsigned defined_ : 1;
54  unsigned requires_grad_ : 1;
55  unsigned : 5;
56  unsigned dim_ : 8;
57  int device_ : 8; // NOTE: this needs to be signed because we use -1 to
58  // represent CPU
59  unsigned type_ : 8;
60 };
61 
62 static_assert(
63  std::is_pod<ArgumentInfo>::value,
64  "ArgumentInfo is to be a POD struct");
65 static_assert(
66  sizeof(ArgumentInfo) == sizeof(ArgumentInfo::plain_data_type),
67  "ArgumentInfo is expected to be a 32-bit struct");
68 
69 struct ArgumentSpec {
71  bool with_grad,
72  at::ArrayRef<IValue> inputs,
73  size_t num_flat_inputs) {
74  hash_code = num_flat_inputs;
75  args.resize(num_flat_inputs);
76  size_t offset = 0;
77  for (const auto& i : inputs) {
78  addInput(i, offset, with_grad);
79  }
80  AT_ASSERT(offset <= num_flat_inputs);
81  }
82 
83  void addInput(const IValue& input, size_t& offset, bool with_grad) {
84  auto& arg = args.at(offset);
85  // Initialize all fields to 0. This is convenient, because e.g.
86  // requires_grad() can be checked even on tensors AND will make
87  // padding bits all 0s.
88  std::memset(&arg, 0, sizeof(ArgumentInfo));
89 
90  if (input.isTensor()) {
91  at::Tensor t = input.toTensor();
92  if ((arg.defined_ = t.defined())) {
93  arg.requires_grad_ = with_grad && autograd::Variable(t).requires_grad();
94  arg.dim_ = t.dim();
95  arg.device_ = t.is_cuda() ? t.get_device() : -1;
96  arg.type_ = static_cast<unsigned>(t.scalar_type());
97  }
98 
99  arg.is_tensor_ = true;
100  combineHash(arg);
101  offset++;
102  } else if (input.isTuple()) {
103  for (const IValue& elem : input.toTuple()->elements()) {
104  addInput(elem, offset, with_grad);
105  }
106  } else {
107  // NB: no need to set is_tensor to false, because we memset the struct to
108  // 0 above
109  combineHash(arg);
110  offset++;
111  }
112  }
113 
114  void combineHash(const ArgumentInfo& arg) {
115  ArgumentInfo::plain_data_type arg_data;
116  std::memcpy(&arg_data, &arg, sizeof(ArgumentInfo));
117  hash_code = hash_combine(hash_code, arg_data);
118  }
119 
120  // equality is fast: check ninputs, and then check the raw array data,
121  // there are no size/stride indirections
122  bool operator==(const ArgumentSpec& spec) const {
123  if (args.size() != spec.args.size())
124  return false;
125  // NB: we need to break out early when there are no elements, because
126  // passing a nullptr to memcmp is UB.
127  if (args.size() == 0)
128  return true;
129  return std::memcmp(
130  args.data(),
131  spec.args.data(),
132  args.size() * sizeof(ArgumentInfo)) == 0;
133  }
134  bool operator!=(const ArgumentSpec& spec) const {
135  return !(*this == spec);
136  }
137  size_t size() const {
138  return args.size();
139  }
140  const ArgumentInfo& at(size_t i) const {
141  return args[i];
142  }
143  size_t hashCode() const {
144  return hash_code;
145  }
146  // For every input of a given graph, returns a most detailed type that can be
147  // inferred for it based on this ArgumentSpec.
148  std::vector<TypePtr> getTypes(Graph& graph) const {
149  size_t offset = 0;
150  return fmap(
151  graph.inputs(), [&](Value* v) { return fillType(v->type(), offset); });
152  }
153 
154  private:
155  TypePtr fillType(TypePtr original, size_t& offset) const {
156  if (original->isSubtypeOf(TensorType::get())) {
157  auto& arg = args.at(offset++);
158  if (!arg.defined())
159  return AutogradZeroTensorType::get();
160  return DimensionedTensorType::create(
161  arg.type(),
162  ConvertIntToCPUOrCUDA(arg.device()),
163  arg.dim(),
164  arg.requires_grad());
165  } else if (auto tuple_type = original->cast<TupleType>()) {
166  return TupleType::create(fmap(
167  tuple_type->elements(),
168  [&](const TypePtr& subtype) { return fillType(subtype, offset); }));
169  } else {
170  offset++;
171  return original;
172  }
173  }
174  size_t hash_code; // precomputed on construction
175  std::vector<ArgumentInfo> args;
176 };
177 
178 // CompleteArgumentSpec represents one particular specialization.
179 // It is designed so that it can be created, hashed, and compared quickly
180 // since it is used along the hot-path of the JIT to check if the code
181 // we have created is valid for the given inputs.
182 
183 // COmpleteArgumentInfoPOD is only used internally in CompleteArgumentSpec
184 // API users should use ArgumentInfo
186  // total size is 64-bit
187  unsigned is_tensor : 8; // all other fields are invalid if this is false
188  unsigned type : 8; // scalar type
189  unsigned defined : 1;
190  unsigned requires_grad : 1;
191  signed device : 14;
192  uint32_t total_dims; // all TensorInfoPODs are in CompleteArgumentSpec's
193  // tensor_info() array. total_dims is the total number of
194  // dimensions seen so far in all previous members of
195  // tensor_info(), including this tensor 2*total_dims
196  // becomes the offset into the sizes_strides list for the
197  // _next_ tensor in the tensor_info array for tensor 0,
198  // the offset is always 0
199 };
200 
201 static_assert(
202  sizeof(CompleteArgumentInfoPOD) == sizeof(int64_t),
203  "CompleteArgumentInfoPOD must be 64-bit struct for CompleteArgumentSpec encoding to work");
204 
205 struct CompleteArgumentInfo;
206 
208  CompleteArgumentSpec(bool with_grad, at::ArrayRef<IValue> inputs)
209  : hash_code(0), ninputs(inputs.size()) {
210  int32_t all_dims = 0;
211  const int32_t num_inputs = inputs.size();
212  for (int32_t i = 0; i < num_inputs; i++) {
213  if (!inputs[i].isTensor())
214  continue;
215  auto tensor = inputs[i].toTensor();
216  all_dims += tensor.defined() ? tensor.ndimension() : 0;
217  }
218  // allocate enough room for all TensorPODs and dimensions
219  data.resize(ninputs + all_dims * 2);
220 
221  // and reinterpret our data array as these structs
222  auto* pods = reinterpret_cast<CompleteArgumentInfoPOD*>(data.data());
223  int64_t* next_dim = sizes_strides();
224  int32_t total_dims = 0;
225  for (int32_t i = 0; i < num_inputs; i++) {
226  auto& pod = pods[i];
227  pod.is_tensor = static_cast<uint32_t>(inputs[i].isTensor());
228  if (pod.is_tensor) {
229  at::Tensor t = inputs[i].toTensor();
230  pod.defined = t.defined();
231  if (pod.defined) {
232  pod.type = static_cast<int>(t.scalar_type());
233  pod.device = (!t.is_cuda()) ? -1 : t.get_device();
234  pod.requires_grad =
235  with_grad && autograd::as_variable_ref(t).requires_grad();
236  total_dims += t.ndimension();
237  auto sizes = t.sizes();
238  std::copy(sizes.begin(), sizes.end(), next_dim);
239  next_dim += sizes.size();
240  auto strides = t.strides();
241  std::copy(strides.begin(), strides.end(), next_dim);
242  next_dim += strides.size();
243  }
244  }
245  // each POD has a running tally of all dimensions including its own
246  pod.total_dims = total_dims;
247  }
248  // we precompute the hash_code to minimize the time inside of hash
249  // table operations where we may need to hold a compiler cache lock.
250  hash_code = hash_combine(0, ninputs);
251  for (auto d : data) {
252  hash_code = hash_combine(hash_code, d);
253  }
254  }
255 
256  // equality is fast: check ninputs, and then check the raw array data,
257  // there are no size/stride indirections
258  bool operator==(const CompleteArgumentSpec& spec) const {
259  return ninputs == spec.ninputs && data == spec.data;
260  }
261  bool operator!=(const CompleteArgumentSpec& spec) const {
262  return !(*this == spec);
263  }
264  friend struct CompleteArgumentInfo;
265  CompleteArgumentInfo at(size_t i) const;
266  size_t size() const {
267  return ninputs;
268  }
269  size_t hashCode() const {
270  return hash_code;
271  }
272 
273  private:
274  ArrayRef<CompleteArgumentInfoPOD> tensor_info() const {
276  reinterpret_cast<const CompleteArgumentInfoPOD*>(data.data()), ninputs);
277  }
278  // the start of the sizes_strides information, which comes after the
279  // CompleteArgumentInfoPOD list.
280  const int64_t* sizes_strides() const {
281  return data.data() + ninputs;
282  }
283  int64_t* sizes_strides() {
284  return data.data() + ninputs;
285  }
286  size_t hash_code; // precomputed on construction
287  int32_t ninputs;
288  // layout is ninputs of TensorPOD (each 64-bit) followed by their size and
289  // stride info for 3 tensors:
290  // [t0POD][t1POD][t2POD]...
291  // [t0 sizes][t0 strides][t1 sizes][t1 strides][t2 sizes][t2 strides]
292  std::vector<int64_t> data;
293 };
294 
295 // public view of compressed CompleteArgumentInfo
297  CompleteArgumentInfo(const CompleteArgumentSpec& spec, const int i)
298  : spec(spec), i(i) {}
299  bool isTensor() const {
300  return pod(i).is_tensor;
301  }
302  at::ScalarType type() const {
303  return at::ScalarType(pod(i).type);
304  }
305  bool defined() const {
306  return pod(i).defined;
307  }
308  bool requires_grad() const {
309  return pod(i).requires_grad;
310  }
311  int device() const {
312  return pod(i).device;
313  }
314  int ndimension() const {
315  // See [valid range], it is always valid to ask for offset for (i + 1)
316  return (sizes_strides_offset(i + 1) - sizes_strides_offset(i)) / 2;
317  }
318  at::IntArrayRef sizes() const {
319  return at::IntArrayRef(
320  spec.sizes_strides() + sizes_strides_offset(i), ndimension());
321  }
322  at::IntArrayRef strides() const {
323  int ndim = ndimension();
324  return at::IntArrayRef(
325  spec.sizes_strides() + sizes_strides_offset(i) + ndim, ndim);
326  }
327  operator TypePtr() const {
328  if (!defined())
329  return TensorType::get();
330  return CompleteTensorType::create(
331  type(), ConvertIntToCPUOrCUDA(device()), sizes(), strides());
332  }
333 
334  private:
335  // offsetinto sizes_strides() array where the sizes start for tensor j
336  // [valid range] valid range is [0, ninputs]
337  // (i.e. you can ask for the offset at ninputs, which would be the offset of
338  // the next tensor if it existed)
339  int sizes_strides_offset(int j) const {
340  if (j == 0)
341  return 0;
342  return 2 * pod(j - 1).total_dims;
343  }
344  const CompleteArgumentInfoPOD& pod(int j) const {
345  return spec.tensor_info().at(j);
346  }
347  const CompleteArgumentSpec& spec;
348  const int i;
349 };
350 
351 inline std::ostream& operator<<(std::ostream& out, const ArgumentInfo& info) {
352  if (!info.defined()) {
353  return out << "<undefined>";
354  }
355  out << "Tensor(device=" << info.device() << ", type=" << toString(info.type())
356  << ", requires_grad=" << info.requires_grad() << ", dims=" << info.dim()
357  << ")";
358  return out;
359 }
360 
361 inline std::ostream& operator<<(std::ostream& out, const ArgumentSpec& spec) {
362  out << "{";
363  for (size_t i = 0; i < spec.size(); ++i) {
364  if (i > 0)
365  out << ", ";
366  out << spec.at(i);
367  }
368  out << "}";
369  return out;
370 }
371 
372 inline std::ostream& operator<<(
373  std::ostream& out,
374  const CompleteArgumentInfo& info) {
375  if (!info.defined()) {
376  return out << "<undefined>";
377  }
378  out << "Tensor(device=" << info.device() << ", type=" << toString(info.type())
379  << ", requires_grad=" << info.requires_grad()
380  << ", sizes=" << info.sizes() << ", strides=" << info.strides() << ")";
381  return out;
382 }
383 
384 inline std::ostream& operator<<(
385  std::ostream& out,
386  const CompleteArgumentSpec& spec) {
387  out << "{";
388  for (size_t i = 0; i < spec.size(); ++i) {
389  if (i > 0)
390  out << ", ";
391  out << spec.at(i);
392  }
393  out << "}";
394  return out;
395 }
396 
397 inline CompleteArgumentInfo CompleteArgumentSpec::at(size_t i) const {
398  return CompleteArgumentInfo(*this, i);
399 }
400 
401 inline void setInputTypes(Graph& g, const ArgumentSpec& spec) {
402  auto input_types = spec.getTypes(g);
403  auto inputs = g.inputs();
404  for (size_t i = 0; i < inputs.size(); ++i) {
405  inputs[i]->setType(input_types[i]);
406  }
407 }
408 
409 } // namespace jit
410 } // namespace torch
411 
412 namespace std {
413 template <>
414 struct hash<torch::jit::ArgumentSpec> {
415  size_t operator()(const torch::jit::ArgumentSpec& spec) const {
416  return spec.hashCode();
417  }
418 };
419 template <>
420 struct hash<torch::jit::CompleteArgumentSpec> {
421  size_t operator()(const torch::jit::CompleteArgumentSpec& spec) const {
422  return spec.hashCode();
423  }
424 };
425 } // namespace std
int64_t get_device() const
Returns a Tensor&#39;s device index.
Represents a a compute device on which a tensor is located.
Definition: Device.h:30
constexpr size_t size() const
size - Get the array size.
Definition: ArrayRef.h:138
bool is_cuda() const
Returns if a Tensor has CUDA backend.
Variable A Variable augments a Tensor with the ability to interact in our autograd machinery...
Definition: variable.h:85
Definition: jit_type.h:17
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:41
TensorOptions requires_grad(bool requires_grad=true)
Convenience function that returns a TensorOptions object with the requires_grad set to the given one...
Flush-To-Zero and Denormals-Are-Zero mode.
C10_NODISCARD TensorOptions requires_grad(c10::optional< bool > requires_grad) const noexcept
Sets the requires_grad property of the TensorOptions.