Caffe2 - C++ API
A deep learning, cross platform ML framework
cpu_neon.h
1 
17 #ifndef CAFFE2_UTILS_CPU_NEON_H_
18 #define CAFFE2_UTILS_CPU_NEON_H_
19 
20 // Provides a variety of ARM NEON-specific utility functions
21 #ifdef __ARM_NEON__
22 #include <arm_neon.h>
23 
24 namespace caffe2 {
25 
26 template <typename T>
27 inline bool isPointerAligned(T* p, size_t align) {
28  return (reinterpret_cast<uintptr_t>(p) % align == 0);
29 }
30 
31 inline float32x4_t vert_sum_f32(float32x4_t v0,
32  float32x4_t v1,
33  float32x4_t v2,
34  float32x4_t v3) {
35  v0 = vaddq_f32(v0, v1);
36  v2 = vaddq_f32(v2, v3);
37  return vaddq_f32(v0, v2);
38 }
39 
40 inline float horizontal_sum_f32(float32x4_t v0,
41  float32x4_t v1,
42  float32x4_t v2,
43  float32x4_t v3) {
44  v0 = vert_sum_f32(v0, v1, v2, v3);
45  float32x2_t v = vadd_f32(vget_high_f32(v0), vget_low_f32(v0));
46  return vget_lane_f32(vpadd_f32(v, v), 0);
47 }
48 
49 // Load/store functions that assume alignment
50 
51 inline float32x4_t vld1q_f32_aligned(const float* p) {
52  return vld1q_f32((const float*)
53  __builtin_assume_aligned(p, sizeof(float32x4_t)));
54 }
55 
56 inline void vst1q_f32_aligned(float* p, float32x4_t v) {
57  vst1q_f32((float*) __builtin_assume_aligned(p, sizeof(float32x4_t)), v);
58 }
59 
60 inline void vst4_u8_aligned(uint8_t* p, uint8x8x4_t v) {
61  vst4_u8((uint8_t*)
62  __builtin_assume_aligned(p, sizeof(uint8x8x4_t)), v);
63 }
64 
65 } // namespace caffe2
66 
67 #endif // __ARM_NEON__
68 
69 #endif // CAFFE2_UTILS_CPU_NEON_H_
Copyright (c) 2016-present, Facebook, Inc.