Caffe2 - C++ API
A deep learning, cross platform ML framework
cpu_neon.h
1 #ifndef CAFFE2_UTILS_CPU_NEON_H_
2 #define CAFFE2_UTILS_CPU_NEON_H_
3 
4 // Provides a variety of ARM NEON-specific utility functions
5 #if defined(__ARM_NEON__) || defined(__ARM_NEON)
6 #include <arm_neon.h>
7 
8 namespace caffe2 {
9 
10 template <typename T>
11 inline bool isPointerAligned(T* p, size_t align) {
12  return (reinterpret_cast<uintptr_t>(p) % align == 0);
13 }
14 
15 inline float32x4_t vert_sum_f32(float32x4_t v0,
16  float32x4_t v1,
17  float32x4_t v2,
18  float32x4_t v3) {
19  v0 = vaddq_f32(v0, v1);
20  v2 = vaddq_f32(v2, v3);
21  return vaddq_f32(v0, v2);
22 }
23 
24 inline float horizontal_sum_f32(float32x4_t v0,
25  float32x4_t v1,
26  float32x4_t v2,
27  float32x4_t v3) {
28  v0 = vert_sum_f32(v0, v1, v2, v3);
29  float32x2_t v = vadd_f32(vget_high_f32(v0), vget_low_f32(v0));
30  return vget_lane_f32(vpadd_f32(v, v), 0);
31 }
32 
33 // Load/store functions that assume alignment
34 
35 inline float32x4_t vld1q_f32_aligned(const float* p) {
36  return vld1q_f32((const float*)
37  __builtin_assume_aligned(p, sizeof(float32x4_t)));
38 }
39 
40 inline void vst1q_f32_aligned(float* p, float32x4_t v) {
41  vst1q_f32((float*) __builtin_assume_aligned(p, sizeof(float32x4_t)), v);
42 }
43 
44 inline void vst4_u8_aligned(uint8_t* p, uint8x8x4_t v) {
45  vst4_u8((uint8_t*)
46  __builtin_assume_aligned(p, sizeof(uint8x8x4_t)), v);
47 }
48 
49 } // namespace caffe2
50 
51 #endif // defined(__ARM_NEON__) || defined(__ARM_NEON)
52 
53 #endif // CAFFE2_UTILS_CPU_NEON_H_
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13