7 #include "caffe2/core/operator.h" 8 #include "caffe2/utils/math.h" 9 #include "caffe2/utils/math/utils.h" 16 static void Im2ColNCHW(
33 const T& zero_point = 0) {
35 (height + pad_b + pad_t - (dilation_h * (kernel_h - 1) + 1)) / stride_h +
38 (width + pad_l + pad_r - (dilation_w * (kernel_w - 1) + 1)) / stride_w +
43 if (dilation_h == 1 && dilation_w == 1 && pad_l == 0 && pad_r == 0 &&
44 pad_t == 0 && pad_b == 0) {
45 for (
auto k = 0; k < channels * kernel_h * kernel_w; k++) {
46 const auto nip = k / (kernel_h * kernel_w);
47 const auto rest = k % (kernel_h * kernel_w);
48 const auto kh = rest / kernel_w;
49 const auto kw = rest % kernel_w;
50 auto* dst = data_col + nip * (kernel_h * kernel_w * output_h * output_w) +
51 kh * (kernel_w * output_h * output_w) + kw * (output_h * output_w);
52 const auto* src = data_im + nip * (height * width);
53 for (
auto y = 0; y < output_h; y++) {
54 const auto iy = y * stride_h + kh;
59 src + (iy * width + ix),
60 sizeof(
T) * output_w);
62 for (
auto x = 0; x < output_w; x++) {
64 dst + (y * output_w + x),
65 src + (iy * width + ix + x * stride_w),
75 if (pad_l == pad_r && pad_t == pad_b) {
77 const int pad_h = pad_t;
78 const int pad_w = pad_l;
79 const int channel_size = height * width;
80 for (
int channel = channels; channel--; data_im += channel_size) {
81 for (
int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
82 for (
int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
83 int input_row = -pad_h + kernel_row * dilation_h;
84 for (
int output_rows = output_h; output_rows; output_rows--) {
85 if (!utils::IsAGeZeroAndALtB(input_row, height)) {
86 for (
int output_cols = output_w; output_cols; output_cols--) {
87 *(data_col++) = zero_point;
90 int input_col = -pad_w + kernel_col * dilation_w;
91 for (
int output_col = output_w; output_col; output_col--) {
92 if (utils::IsAGeZeroAndALtB(input_col, width)) {
93 *(data_col++) = data_im[input_row * width + input_col];
95 *(data_col++) = zero_point;
97 input_col += stride_w;
100 input_row += stride_h;
109 const int dkernel_h = dilation_h * (kernel_h - 1) + 1;
110 const int dkernel_w = dilation_w * (kernel_w - 1) + 1;
112 int height_col = (height + pad_t + pad_b - dkernel_h) / stride_h + 1;
113 int width_col = (width + pad_l + pad_r - dkernel_w) / stride_w + 1;
115 int channels_col = channels * kernel_h * kernel_w;
116 for (
int c = 0; c < channels_col; ++c) {
117 int w_offset = c % kernel_w;
118 int h_offset = (c / kernel_w) % kernel_h;
119 int c_im = c / kernel_h / kernel_w;
120 for (
int h = 0; h < height_col; ++h) {
121 for (
int w = 0; w < width_col; ++w) {
122 int h_pad = h * stride_h - pad_t + h_offset * dilation_h;
123 int w_pad = w * stride_w - pad_l + w_offset * dilation_w;
124 if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
125 data_col[(c * height_col + h) * width_col + w] =
126 data_im[(c_im * height + h_pad) * width + w_pad];
128 data_col[(c * height_col + h) * width_col + w] = zero_point;
134 template <
typename T>
135 static void Im2ColNdNCHW(
139 const int* img_shape,
140 const int* col_shape,
141 const int* kernel_shape,
148 const T& zero_point = 0) {
149 const int outer_size = col_shape[0];
150 const int inner_size = col_size / outer_size;
151 const int kernel_size = std::accumulate(
152 kernel_shape, kernel_shape + N, 1, std::multiplies<int>());
153 std::vector<int> d_offset(N, 0);
154 std::vector<int> d_iter(N, 0);
155 for (
int i = 0; i < outer_size; ++i) {
158 for (
int d_i = N - 1; d_i >= 0; --d_i) {
159 d_offset[d_i] = offset % kernel_shape[d_i];
160 offset /= kernel_shape[d_i];
162 for (
int j = 0; j < inner_size; ++j) {
165 const int col_index = i * inner_size + j;
166 int img_index = i / kernel_size;
167 bool is_padding =
false;
168 for (
int d_i = 0; d_i < N; ++d_i) {
169 const int d_img = d_iter[d_i] * stride[d_i] - pad[d_i] +
170 d_offset[d_i] * dilation[d_i];
171 is_padding |= d_img < 0 || d_img >= img_shape[d_i + 1];
172 img_index = img_index * img_shape[d_i + 1] + d_img;
174 Y_data[col_index] = is_padding ? zero_point : X_data[img_index];
175 utils::IncreaseIndexInDims(N, col_shape + 1, d_iter.data());
185 template <
typename T>
186 static void Im2ColNHWC(
192 const int dilation_h,
193 const int dilation_w,
204 const T& zero_point) {
205 const int dkernel_h = dilation_h * (kernel_h - 1) + 1;
206 const int dkernel_w = dilation_w * (kernel_w - 1) + 1;
208 int height_col = (height + pad_t + pad_b - dkernel_h) / stride_h + 1;
209 int width_col = (width + pad_l + pad_r - dkernel_w) / stride_w + 1;
212 #pragma omp parallel for if (!omp_in_parallel()) 214 for (
int h = 0; h < height_col; ++h) {
215 int h_pad = -pad_t + h * stride_h;
217 data_col + h * width_col * kernel_h * kernel_w * channels;
219 for (
int w = 0; w < width_col; ++w) {
221 for (
int ih = h_pad; ih < h_pad + dkernel_h; ih += dilation_h, ++r) {
223 for (
int iw = w_pad; iw < w_pad + dkernel_w; iw += dilation_w, ++s) {
224 if (ih >= 0 && ih < height && iw >= 0 && iw < width) {
225 for (
int g = 0; g < groups; ++g) {
228 ((g * kernel_h + r) * kernel_w + s) * (channels / groups),
229 data_im + (ih * width + iw) * channels +
230 g * (channels / groups),
231 sizeof(
T) * (channels / groups));
235 for (
int g = 0; g < groups; ++g) {
236 for (
int i = 0; i < channels / groups; ++i) {
238 [(((g * kernel_h + r) * kernel_w) + s) *
239 (channels / groups) +
246 data_col_temp += kernel_h * kernel_w * channels;
257 template <
typename T>
258 static void Im2Col3DNHWC(
260 const int num_frames,
266 const int dilation_t,
267 const int dilation_h,
268 const int dilation_w,
282 const T& zero_point) {
283 const int dkernel_t = dilation_t * (kernel_t - 1) + 1;
284 const int dkernel_h = dilation_h * (kernel_h - 1) + 1;
285 const int dkernel_w = dilation_w * (kernel_w - 1) + 1;
287 int frame_col = (num_frames + pad_p + pad_n - dkernel_t) / stride_t + 1;
288 int height_col = (height + pad_t + pad_b - dkernel_h) / stride_h + 1;
289 int width_col = (width + pad_l + pad_r - dkernel_w) / stride_w + 1;
292 #pragma omp parallel for if (!omp_in_parallel()) 294 for (
int t = 0; t < frame_col; ++t) {
295 int t_pad = -pad_p + t * stride_t;
296 for (
int h = 0; h < height_col; ++h) {
297 int h_pad = -pad_t + h * stride_h;
298 T* data_col_temp = data_col +
299 (t * height_col + h) * width_col * kernel_t * kernel_h * kernel_w *
301 for (
int w = 0; w < width_col; ++w) {
302 int w_pad = -pad_l + w * stride_w;
304 for (
int it = t_pad; it < t_pad + dkernel_t; it += dilation_t, ++q) {
306 for (
int ih = h_pad; ih < h_pad + dkernel_h; ih += dilation_h, ++r) {
308 for (
int iw = w_pad; iw < w_pad + dkernel_w;
309 iw += dilation_w, ++s) {
310 if (it >= 0 && it < num_frames && ih >= 0 && ih < height &&
311 iw >= 0 && iw < width) {
312 for (
int g = 0; g < groups; ++g) {
315 (((g * kernel_t + q) * kernel_h + r) * kernel_w + s) *
317 data_im + ((it * height + ih) * width + iw) * channels +
318 g * (channels / groups),
319 sizeof(
T) * (channels / groups));
323 for (
int g = 0; g < groups; ++g) {
324 for (
int i = 0; i < channels / groups; ++i) {
326 [((((g * kernel_t + q) * kernel_h + r) * kernel_w) +
328 (channels / groups) +
336 data_col_temp += kernel_t * kernel_h * kernel_w * channels;
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...