11 void ReluAVX2(
const int N,
const int zero_point,
const T* X,
T* Y);
14 void ReluAVX2<uint8_t>(
19 constexpr
int kVLen = 32;
20 const int n = N / kVLen * kVLen;
21 const int r = N % kVLen;
22 const __m256i zero_v = _mm256_set1_epi8(static_cast<uint8_t>(zero_point));
23 for (
int i = 0; i < n; i += kVLen) {
24 __m256i cur_v = _mm256_max_epu8(
25 _mm256_loadu_si256(reinterpret_cast<const __m256i*>(X + i)), zero_v);
26 _mm256_storeu_si256(reinterpret_cast<__m256i*>(Y + i), cur_v);
28 for (
int i = 0; i < r; ++i) {
29 Y[n + i] = std::max(X[n + i], static_cast<uint8_t>(zero_point));
34 void ReluAVX2<uint16_t>(
39 constexpr
int kVLen = 16;
40 const int n = N / kVLen * kVLen;
41 const int r = N % kVLen;
42 const __m256i zero_v = _mm256_set1_epi16(static_cast<uint16_t>(zero_point));
43 for (
int i = 0; i < n; i += kVLen) {
44 __m256i cur_v = _mm256_max_epu16(
45 _mm256_loadu_si256(reinterpret_cast<const __m256i*>(X + i)), zero_v);
46 _mm256_storeu_si256(reinterpret_cast<__m256i*>(Y + i), cur_v);
48 for (
int i = 0; i < r; ++i) {
49 Y[n + i] = std::max(X[n + i], static_cast<uint16_t>(zero_point));
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...