Caffe2 - C++ API
A deep learning, cross platform ML framework
typed_axpy.cc
1 #include "caffe2/perfkernels/typed_axpy.h"
2 #include "caffe2/core/types.h"
3 #include "caffe2/perfkernels/common.h"
4 #include "caffe2/utils/cpuid.h"
5 #include "caffe2/utils/math.h"
6 
7 namespace caffe2 {
8 
9 template <>
10 void TypedAxpy<float, float>(int N, const float a, const float* x, float* y) {
11  // This uses a hack that axpy implementation actually does not use the
12  // CPUContext, so passing in a nullpointer works.
13  math::Axpy<float, CPUContext>(N, a, x, y, nullptr);
14 }
15 
16 void TypedAxpyHalffloat__base(
17  int N,
18  const float a,
19  const at::Half* x,
20  float* y) {
21  for (int i = 0; i < N; ++i) {
22  union {
23  uint32_t intval;
24  float floatval;
25  } t1;
26  uint32_t t2, t3;
27  t1.intval = x[i].x & 0x7fff; // Non-sign bits
28  t2 = x[i].x & 0x8000; // Sign bit
29  t3 = x[i].x & 0x7c00; // Exponent
30  t1.intval <<= 13; // Align mantissa on MSB
31  t2 <<= 16; // Shift sign bit into position
32  t1.intval += 0x38000000; // Adjust bias
33  t1.intval = (t3 == 0 ? 0 : t1.intval); // Denormals-as-zero
34  t1.intval |= t2; // Re-insert sign bit
35  y[i] += t1.floatval * a;
36  }
37 }
38 
39 decltype(TypedAxpyHalffloat__base) TypedAxpyHalffloat__avx2_fma;
40 decltype(TypedAxpyHalffloat__base) TypedAxpyHalffloat__avx_f16c;
41 template <>
42 void TypedAxpy<at::Half, float>(
43  int N,
44  const float a,
45  const at::Half* x,
46  float* y) {
47  AVX2_FMA_DO(TypedAxpyHalffloat, N, a, x, y);
48  AVX_F16C_DO(TypedAxpyHalffloat, N, a, x, y);
49  BASE_DO(TypedAxpyHalffloat, N, a, x, y);
50 }
51 
52 void TypedAxpy_uint8_float__base(
53  int N,
54  const float a,
55  const std::uint8_t* x,
56  float* y) {
57  for (int i = 0; i < N; ++i) {
58  y[i] += (float)(x[i]) * a;
59  }
60 }
61 
62 decltype(TypedAxpy_uint8_float__base) TypedAxpy_uint8_float__avx2_fma;
63 decltype(TypedAxpy_uint8_float__base) TypedAxpy_uint8_float__avx_f16c;
64 template <>
65 void TypedAxpy<std::uint8_t, float>(
66  int N,
67  const float a,
68  const std::uint8_t* x,
69  float* y) {
70  AVX2_FMA_DO(TypedAxpy_uint8_float, N, a, x, y);
71  BASE_DO(TypedAxpy_uint8_float, N, a, x, y);
72 }
73 
74 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Flush-To-Zero and Denormals-Are-Zero mode.