Caffe2 - C++ API
A deep learning, cross platform ML framework
resource_strings.h
1 #pragma once
2 
3 #include <torch/csrc/jit/code_template.h>
4 
5 namespace torch {
6 namespace jit {
7 namespace fuser {
8 namespace cpu {
9 
10 /*with type_as not checking type of its input, a fusion group can have non-fp32
11 tensor as input. Correct code for this case is generated, however, nvrtc does
12 not know how to handle int*_t integer types, so typedefs help it handle those
13 cases*/
14 
15 static auto type_declarations_template = CodeTemplate(R"(
16 
17 #define POS_INFINITY INFINITY
18 #define NEG_INFINITY -INFINITY
19 
20 typedef ${IndexType} IndexType;
21 template<typename T, size_t N>
22 struct TensorInfo {
23  T* data;
24  IndexType sizes[N];
25  IndexType strides[N];
26 };
27 template<typename T>
28 struct TensorInfo<T, 0> {
29  T * data;
30 };
31 )");
32 
33 static auto cpu_compilation_unit_template = CodeTemplate(R"(
34 #include <math.h>
35 #include <cstddef>
36 #include <cstdint>
37 
38 template <typename scalar_t>
39 scalar_t rsqrtf(scalar_t x) {
40  return 1.0/sqrtf(x);
41 }
42 
43 ${type_declarations}
44 
45 #define OMP_THRESHOLD 100000
46 static void ${kernelName}_kernel(IndexType totalElements, ${formals}) {
47  #pragma omp parallel for if(totalElements > OMP_THRESHOLD)
48  for (IndexType linearIndex = 0;
49  linearIndex < totalElements;
50  linearIndex += 1) {
51  // Convert `linearIndex` into an offset of tensor:
52  ${tensorOffsets}
53  // calculate the results
54  ${kernelBody}
55  }
56 }
57 
58 extern "C"
59 void ${kernelName}(IndexType totalElements, void ** args) {
60  ${kernelName}_kernel(totalElements ${,argument_loads});
61 }
62 )");
63 
64 } // namespace cpu
65 } // namespace fuser
66 } // namespace jit
67 } // namespace torch
Definition: jit_type.h:17