Caffe2 - C++ API
A deep learning, cross platform ML framework
pool_gradient_op.cc
1 #include "caffe2/operators/pool_op.h"
2 
3 #include <string>
4 
5 #include "caffe2/utils/eigen_utils.h"
6 
7 namespace caffe2 {
8 
9 namespace {
10 
11 template <typename T, StorageOrder kOrder>
12 void ComputeAveragePoolGradient1D(
13  int l,
14  int r,
15  int y,
16  T scale,
17  const ConstEigenArrayMap<T>& dY_arr,
18  EigenArrayMap<T>* dX_arr);
19 
20 template <>
21 void ComputeAveragePoolGradient1D<float, StorageOrder::NCHW>(
22  const int l,
23  const int r,
24  const int y,
25  const float scale,
26  const ConstEigenArrayMap<float>& dY_arr,
27  EigenArrayMap<float>* dX_arr) {
28  dX_arr->col(0).segment(l, r - l) += dY_arr(y) * scale;
29 }
30 
31 template <>
32 void ComputeAveragePoolGradient1D<float, StorageOrder::NHWC>(
33  const int l,
34  const int r,
35  const int y,
36  const float scale,
37  const ConstEigenArrayMap<float>& dY_arr,
38  EigenArrayMap<float>* dX_arr) {
39  for (int i = l; i < r; ++i) {
40  dX_arr->col(i) += dY_arr.col(y) * scale;
41  }
42 }
43 
44 template <typename T, StorageOrder kOrder>
45 void ComputeAveragePoolGradient2D(
46  int W,
47  int t,
48  int b,
49  int l,
50  int r,
51  int y,
52  T scale,
53  const ConstEigenArrayMap<T>& dY_arr,
54  EigenArrayMap<T>* dX_arr);
55 
56 template <>
57 void ComputeAveragePoolGradient2D<float, StorageOrder::NCHW>(
58  const int /* W */,
59  const int t,
60  const int b,
61  const int l,
62  const int r,
63  const int y,
64  const float scale,
65  const ConstEigenArrayMap<float>& dY_arr,
66  EigenArrayMap<float>* dX_arr) {
67  dX_arr->block(l, t, r - l, b - t) += dY_arr(y) * scale;
68 }
69 
70 template <>
71 void ComputeAveragePoolGradient2D<float, StorageOrder::NHWC>(
72  const int W,
73  const int t,
74  const int b,
75  const int l,
76  const int r,
77  const int y,
78  const float scale,
79  const ConstEigenArrayMap<float>& dY_arr,
80  EigenArrayMap<float>* dX_arr) {
81  for (int i = t; i < b; ++i) {
82  for (int j = l; j < r; ++j) {
83  dX_arr->col(i * W + j) += dY_arr.col(y) * scale;
84  }
85  }
86 }
87 
88 template <typename T, StorageOrder kOrder>
89 void ComputeAveragePoolGradient3D(
90  int H,
91  int W,
92  int p,
93  int a,
94  int t,
95  int b,
96  int l,
97  int r,
98  int y,
99  T scale,
100  const ConstEigenArrayMap<T>& dY_arr,
101  EigenArrayMap<T>* dX_arr);
102 
103 template <>
104 void ComputeAveragePoolGradient3D<float, StorageOrder::NCHW>(
105  const int H,
106  const int /* W */,
107  const int p,
108  const int a,
109  const int t,
110  const int b,
111  const int l,
112  const int r,
113  const int y,
114  const float scale,
115  const ConstEigenArrayMap<float>& dY_arr,
116  EigenArrayMap<float>* dX_arr) {
117  for (int i = p; i < a; ++i) {
118  dX_arr->block(l, i * H + t, r - l, b - t) += dY_arr(y) * scale;
119  }
120 }
121 
122 template <>
123 void ComputeAveragePoolGradient3D<float, StorageOrder::NHWC>(
124  const int H,
125  const int W,
126  const int p,
127  const int a,
128  const int t,
129  const int b,
130  const int l,
131  const int r,
132  const int y,
133  const float scale,
134  const ConstEigenArrayMap<float>& dY_arr,
135  EigenArrayMap<float>* dX_arr) {
136  for (int i = p; i < a; ++i) {
137  for (int j = t; j < b; ++j) {
138  for (int k = l; k < r; ++k) {
139  dX_arr->col(i * H * W + j * W + k) += dY_arr.col(y) * scale;
140  }
141  }
142  }
143 }
144 
145 template <typename T, StorageOrder kOrder>
146 void RunAveragePoolGradient1D(
147  const int N,
148  const int C,
149  const int X_size,
150  const int Y_size,
151  const int kernel,
152  const int stride,
153  const int pad,
154  const bool count_include_pad,
155  const T* dY,
156  T* dX) {
157  const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
158  const int X_stride = kOrder == StorageOrder::NCHW ? X_size : X_size * C;
159  const int Y_stride = kOrder == StorageOrder::NCHW ? Y_size : Y_size * C;
160  std::memset(dX, 0, sizeof(T) * N * C * X_size);
161  const T* dY_ptr = dY;
162  T* dX_ptr = dX;
163  for (int i = 0; i < batch_size; ++i) {
164  ConstEigenArrayMap<T> dY_arr = kOrder == StorageOrder::NCHW
165  ? ConstEigenArrayMap<T>(dY_ptr, Y_size, 1)
166  : ConstEigenArrayMap<T>(dY_ptr, C, Y_size);
167  EigenArrayMap<T> dX_arr = kOrder == StorageOrder::NCHW
168  ? EigenArrayMap<T>(dX_ptr, X_size, 1)
169  : EigenArrayMap<T>(dX_ptr, C, X_size);
170  for (int y = 0; y < Y_size; ++y) {
171  const int l = std::max(y * stride - pad, 0);
172  const int r = std::min(y * stride - pad + kernel, X_size);
173  const T scale = T(1) / static_cast<T>(count_include_pad ? kernel : r - l);
174  ComputeAveragePoolGradient1D<T, kOrder>(l, r, y, scale, dY_arr, &dX_arr);
175  }
176  dY_ptr += Y_stride;
177  dX_ptr += X_stride;
178  }
179 }
180 
181 template <typename T, StorageOrder kOrder>
182 void RunAveragePoolGradient2D(
183  const int N,
184  const int C,
185  const int X_H,
186  const int X_W,
187  const int Y_H,
188  const int Y_W,
189  const int kernel_h,
190  const int kernel_w,
191  const int stride_h,
192  const int stride_w,
193  const int pad_t,
194  const int pad_l,
195  const bool count_include_pad,
196  const T* dY,
197  T* dX) {
198  const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
199  const int X_HxW = X_H * X_W;
200  const int Y_HxW = Y_H * Y_W;
201  const int X_stride = kOrder == StorageOrder::NCHW ? X_HxW : X_HxW * C;
202  const int Y_stride = kOrder == StorageOrder::NCHW ? Y_HxW : Y_HxW * C;
203  std::memset(dX, 0, sizeof(T) * N * C * X_HxW);
204  const T* dY_ptr = dY;
205  T* dX_ptr = dX;
206  for (int i = 0; i < batch_size; ++i) {
207  ConstEigenArrayMap<T> dY_arr = kOrder == StorageOrder::NCHW
208  ? ConstEigenArrayMap<T>(dY_ptr, Y_W, Y_H)
209  : ConstEigenArrayMap<T>(dY_ptr, C, Y_HxW);
210  EigenArrayMap<T> dX_arr = kOrder == StorageOrder::NCHW
211  ? EigenArrayMap<T>(dX_ptr, X_W, X_H)
212  : EigenArrayMap<T>(dX_ptr, C, X_HxW);
213  for (int h = 0; h < Y_H; ++h) {
214  const int t = std::max(h * stride_h - pad_t, 0);
215  const int b = std::min(h * stride_h - pad_t + kernel_h, X_H);
216  for (int w = 0; w < Y_W; ++w) {
217  const int l = std::max(w * stride_w - pad_l, 0);
218  const int r = std::min(w * stride_w - pad_l + kernel_w, X_W);
219  const int y = h * Y_W + w;
220  const T scale = T(1) /
221  static_cast<T>(count_include_pad ? kernel_h * kernel_w
222  : (b - t) * (r - l));
223  ComputeAveragePoolGradient2D<T, kOrder>(
224  X_W, t, b, l, r, y, scale, dY_arr, &dX_arr);
225  }
226  }
227  dY_ptr += Y_stride;
228  dX_ptr += X_stride;
229  }
230 }
231 
232 template <typename T, StorageOrder kOrder>
233 void RunAveragePoolGradient3D(
234  const int N,
235  const int C,
236  const int X_D,
237  const int X_H,
238  const int X_W,
239  const int Y_D,
240  const int Y_H,
241  const int Y_W,
242  const int kernel_d,
243  const int kernel_h,
244  const int kernel_w,
245  const int stride_d,
246  const int stride_h,
247  const int stride_w,
248  const int pad_p,
249  const int pad_t,
250  const int pad_l,
251  const bool count_include_pad,
252  const T* dY,
253  T* dX) {
254  const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
255  const int X_HxW = X_D * X_H * X_W;
256  const int Y_HxW = Y_D * Y_H * Y_W;
257  const int X_stride = kOrder == StorageOrder::NCHW ? X_HxW : X_HxW * C;
258  const int Y_stride = kOrder == StorageOrder::NCHW ? Y_HxW : Y_HxW * C;
259  std::memset(dX, 0, sizeof(T) * N * C * X_HxW);
260  const T* dY_ptr = dY;
261  T* dX_ptr = dX;
262  for (int i = 0; i < batch_size; ++i) {
263  ConstEigenArrayMap<T> dY_arr = kOrder == StorageOrder::NCHW
264  ? ConstEigenArrayMap<T>(dY_ptr, Y_W, Y_D * Y_H)
265  : ConstEigenArrayMap<T>(dY_ptr, C, Y_HxW);
266  EigenArrayMap<T> dX_arr = kOrder == StorageOrder::NCHW
267  ? EigenArrayMap<T>(dX_ptr, X_W, X_D * X_H)
268  : EigenArrayMap<T>(dX_ptr, C, X_HxW);
269  for (int d = 0; d < Y_D; ++d) {
270  const int p = std::max(d * stride_d - pad_p, 0);
271  const int a = std::min(d * stride_d - pad_p + kernel_d, X_D);
272  for (int h = 0; h < Y_H; ++h) {
273  const int t = std::max(h * stride_h - pad_t, 0);
274  const int b = std::min(h * stride_h - pad_t + kernel_h, X_H);
275  for (int w = 0; w < Y_W; ++w) {
276  const int l = std::max(w * stride_w - pad_l, 0);
277  const int r = std::min(w * stride_w - pad_l + kernel_w, X_W);
278  const int y = d * Y_H * Y_W + h * Y_W + w;
279  const T scale = T(1) /
280  static_cast<T>(count_include_pad ? kernel_d * kernel_h * kernel_w
281  : (a - p) * (b - t) * (r - l));
282  ComputeAveragePoolGradient3D<T, kOrder>(
283  X_H, X_W, p, a, t, b, l, r, y, scale, dY_arr, &dX_arr);
284  }
285  }
286  }
287  dY_ptr += Y_stride;
288  dX_ptr += X_stride;
289  }
290 }
291 
292 template <typename T, StorageOrder kOrder>
293 void ComputeMaxPoolGradient1D(
294  int l,
295  int r,
296  int y,
297  const ConstEigenArrayMap<T>& dY_arr,
298  const ConstEigenArrayMap<T>& X_arr,
299  const ConstEigenArrayMap<T>& Y_arr,
300  EigenArrayMap<T>* dX_arr);
301 
302 template <>
303 void ComputeMaxPoolGradient1D<float, StorageOrder::NCHW>(
304  const int l,
305  const int r,
306  const int y,
307  const ConstEigenArrayMap<float>& dY_arr,
308  const ConstEigenArrayMap<float>& X_arr,
309  const ConstEigenArrayMap<float>& Y_arr,
310  EigenArrayMap<float>* dX_arr) {
311  dX_arr->col(0).segment(l, r - l) +=
312  (X_arr.col(0).segment(l, r - l) == Y_arr(y)).cast<float>() * dY_arr(y);
313 }
314 
315 template <>
316 void ComputeMaxPoolGradient1D<float, StorageOrder::NHWC>(
317  const int l,
318  const int r,
319  const int y,
320  const ConstEigenArrayMap<float>& dY_arr,
321  const ConstEigenArrayMap<float>& X_arr,
322  const ConstEigenArrayMap<float>& Y_arr,
323  EigenArrayMap<float>* dX_arr) {
324  for (int i = l; i < r; ++i) {
325  dX_arr->col(i) +=
326  (X_arr.col(i) == Y_arr.col(y)).cast<float>() * dY_arr.col(y);
327  }
328 }
329 
330 template <typename T, StorageOrder kOrder>
331 void ComputeMaxPoolGradient2D(
332  int W,
333  int t,
334  int b,
335  int l,
336  int r,
337  int y,
338  const ConstEigenArrayMap<T>& dY_arr,
339  const ConstEigenArrayMap<T>& X_arr,
340  const ConstEigenArrayMap<T>& Y_arr,
341  EigenArrayMap<T>* dX_arr);
342 
343 template <>
344 void ComputeMaxPoolGradient2D<float, StorageOrder::NCHW>(
345  const int /* W */,
346  const int t,
347  const int b,
348  const int l,
349  const int r,
350  const int y,
351  const ConstEigenArrayMap<float>& dY_arr,
352  const ConstEigenArrayMap<float>& X_arr,
353  const ConstEigenArrayMap<float>& Y_arr,
354  EigenArrayMap<float>* dX_arr) {
355  dX_arr->block(l, t, r - l, b - t) +=
356  (X_arr.block(l, t, r - l, b - t) == Y_arr(y)).cast<float>() * dY_arr(y);
357 }
358 
359 template <>
360 void ComputeMaxPoolGradient2D<float, StorageOrder::NHWC>(
361  const int W,
362  const int t,
363  const int b,
364  const int l,
365  const int r,
366  const int y,
367  const ConstEigenArrayMap<float>& dY_arr,
368  const ConstEigenArrayMap<float>& X_arr,
369  const ConstEigenArrayMap<float>& Y_arr,
370  EigenArrayMap<float>* dX_arr) {
371  for (int i = t; i < b; ++i) {
372  for (int j = l; j < r; ++j) {
373  const int x = i * W + j;
374  dX_arr->col(x) +=
375  (X_arr.col(x) == Y_arr.col(y)).cast<float>() * dY_arr.col(y);
376  }
377  }
378 }
379 
380 template <typename T, StorageOrder kOrder>
381 void ComputeMaxPoolGradient3D(
382  int H,
383  int W,
384  int p,
385  int a,
386  int t,
387  int b,
388  int l,
389  int r,
390  int y,
391  const ConstEigenArrayMap<T>& dY_arr,
392  const ConstEigenArrayMap<T>& X_arr,
393  const ConstEigenArrayMap<T>& Y_arr,
394  EigenArrayMap<T>* dX_arr);
395 
396 template <>
397 void ComputeMaxPoolGradient3D<float, StorageOrder::NCHW>(
398  const int H,
399  const int /* W */,
400  const int p,
401  const int a,
402  const int t,
403  const int b,
404  const int l,
405  const int r,
406  const int y,
407  const ConstEigenArrayMap<float>& dY_arr,
408  const ConstEigenArrayMap<float>& X_arr,
409  const ConstEigenArrayMap<float>& Y_arr,
410  EigenArrayMap<float>* dX_arr) {
411  for (int i = p; i < a; ++i) {
412  dX_arr->block(l, i * H + t, r - l, b - t) +=
413  (X_arr.block(l, i * H + t, r - l, b - t) == Y_arr(y)).cast<float>() *
414  dY_arr(y);
415  }
416 }
417 
418 template <>
419 void ComputeMaxPoolGradient3D<float, StorageOrder::NHWC>(
420  const int H,
421  const int W,
422  const int p,
423  const int a,
424  const int t,
425  const int b,
426  const int l,
427  const int r,
428  const int y,
429  const ConstEigenArrayMap<float>& dY_arr,
430  const ConstEigenArrayMap<float>& X_arr,
431  const ConstEigenArrayMap<float>& Y_arr,
432  EigenArrayMap<float>* dX_arr) {
433  for (int i = p; i < a; ++i) {
434  for (int j = t; j < b; ++j) {
435  for (int k = l; k < r; ++k) {
436  const int x = i * H * W + j * W + k;
437  dX_arr->col(x) +=
438  (X_arr.col(x) == Y_arr.col(y)).cast<float>() * dY_arr.col(y);
439  }
440  }
441  }
442 }
443 
444 template <typename T, StorageOrder kOrder>
445 void RunMaxPoolGradient1D(
446  const int N,
447  const int C,
448  const int X_size,
449  const int Y_size,
450  const int kernel,
451  const int stride,
452  const int pad,
453  const T* dY,
454  const T* X,
455  const T* Y,
456  T* dX) {
457  const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
458  const int X_stride = kOrder == StorageOrder::NCHW ? X_size : X_size * C;
459  const int Y_stride = kOrder == StorageOrder::NCHW ? Y_size : Y_size * C;
460  std::memset(dX, 0, sizeof(T) * N * C * X_size);
461  const T* dY_ptr = dY;
462  const T* X_ptr = X;
463  const T* Y_ptr = Y;
464  T* dX_ptr = dX;
465  for (int i = 0; i < batch_size; ++i) {
466  ConstEigenArrayMap<T> dY_arr = kOrder == StorageOrder::NCHW
467  ? ConstEigenArrayMap<T>(dY_ptr, Y_size, 1)
468  : ConstEigenArrayMap<T>(dY_ptr, C, Y_size);
469  ConstEigenArrayMap<T> X_arr = kOrder == StorageOrder::NCHW
470  ? ConstEigenArrayMap<T>(X_ptr, X_size, 1)
471  : ConstEigenArrayMap<T>(X_ptr, C, X_size);
472  ConstEigenArrayMap<T> Y_arr = kOrder == StorageOrder::NCHW
473  ? ConstEigenArrayMap<T>(Y_ptr, Y_size, 1)
474  : ConstEigenArrayMap<T>(Y_ptr, C, Y_size);
475  EigenArrayMap<T> dX_arr = kOrder == StorageOrder::NCHW
476  ? EigenArrayMap<T>(dX_ptr, X_size, 1)
477  : EigenArrayMap<T>(dX_ptr, C, X_size);
478  for (int y = 0; y < Y_size; ++y) {
479  const int l = std::max(y * stride - pad, 0);
480  const int r = std::min(y * stride - pad + kernel, X_size);
481  ComputeMaxPoolGradient1D<T, kOrder>(
482  l, r, y, dY_arr, X_arr, Y_arr, &dX_arr);
483  }
484  dY_ptr += Y_stride;
485  X_ptr += X_stride;
486  Y_ptr += Y_stride;
487  dX_ptr += X_stride;
488  }
489 }
490 
491 template <typename T, StorageOrder kOrder>
492 void RunMaxPoolGradient2D(
493  const int N,
494  const int C,
495  const int X_H,
496  const int X_W,
497  const int Y_H,
498  const int Y_W,
499  const int kernel_h,
500  const int kernel_w,
501  const int stride_h,
502  const int stride_w,
503  const int pad_t,
504  const int pad_l,
505  const T* dY,
506  const T* X,
507  const T* Y,
508  T* dX) {
509  const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
510  const int X_HxW = X_H * X_W;
511  const int Y_HxW = Y_H * Y_W;
512  const int X_stride = kOrder == StorageOrder::NCHW ? X_HxW : X_HxW * C;
513  const int Y_stride = kOrder == StorageOrder::NCHW ? Y_HxW : Y_HxW * C;
514  std::memset(dX, 0, sizeof(T) * N * C * X_HxW);
515  const T* dY_ptr = dY;
516  const T* X_ptr = X;
517  const T* Y_ptr = Y;
518  T* dX_ptr = dX;
519  for (int i = 0; i < batch_size; ++i) {
520  ConstEigenArrayMap<T> dY_arr = kOrder == StorageOrder::NCHW
521  ? ConstEigenArrayMap<T>(dY_ptr, Y_W, Y_H)
522  : ConstEigenArrayMap<T>(dY_ptr, C, Y_HxW);
523  ConstEigenArrayMap<T> X_arr = kOrder == StorageOrder::NCHW
524  ? ConstEigenArrayMap<T>(X_ptr, X_W, X_H)
525  : ConstEigenArrayMap<T>(X_ptr, C, X_HxW);
526  ConstEigenArrayMap<T> Y_arr = kOrder == StorageOrder::NCHW
527  ? ConstEigenArrayMap<T>(Y_ptr, Y_W, Y_H)
528  : ConstEigenArrayMap<T>(Y_ptr, C, Y_HxW);
529  EigenArrayMap<T> dX_arr = kOrder == StorageOrder::NCHW
530  ? EigenArrayMap<T>(dX_ptr, X_W, X_H)
531  : EigenArrayMap<T>(dX_ptr, C, X_HxW);
532  for (int h = 0; h < Y_H; ++h) {
533  const int t = std::max(h * stride_h - pad_t, 0);
534  const int b = std::min(h * stride_h - pad_t + kernel_h, X_H);
535  for (int w = 0; w < Y_W; ++w) {
536  const int l = std::max(w * stride_w - pad_l, 0);
537  const int r = std::min(w * stride_w - pad_l + kernel_w, X_W);
538  const int y = h * Y_W + w;
539  ComputeMaxPoolGradient2D<T, kOrder>(
540  X_W, t, b, l, r, y, dY_arr, X_arr, Y_arr, &dX_arr);
541  }
542  }
543  dY_ptr += Y_stride;
544  X_ptr += X_stride;
545  Y_ptr += Y_stride;
546  dX_ptr += X_stride;
547  }
548 }
549 
550 template <typename T, StorageOrder kOrder>
551 void RunMaxPoolGradient3D(
552  const int N,
553  const int C,
554  const int X_D,
555  const int X_H,
556  const int X_W,
557  const int Y_D,
558  const int Y_H,
559  const int Y_W,
560  const int kernel_d,
561  const int kernel_h,
562  const int kernel_w,
563  const int stride_d,
564  const int stride_h,
565  const int stride_w,
566  const int pad_p,
567  const int pad_t,
568  const int pad_l,
569  const T* dY,
570  const T* X,
571  const T* Y,
572  T* dX) {
573  const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
574  const int X_HxW = X_D * X_H * X_W;
575  const int Y_HxW = Y_D * Y_H * Y_W;
576  const int X_stride = kOrder == StorageOrder::NCHW ? X_HxW : X_HxW * C;
577  const int Y_stride = kOrder == StorageOrder::NCHW ? Y_HxW : Y_HxW * C;
578  std::memset(dX, 0, sizeof(T) * N * C * X_HxW);
579  const T* dY_ptr = dY;
580  const T* X_ptr = X;
581  const T* Y_ptr = Y;
582  T* dX_ptr = dX;
583  for (int i = 0; i < batch_size; ++i) {
584  ConstEigenArrayMap<T> dY_arr = kOrder == StorageOrder::NCHW
585  ? ConstEigenArrayMap<T>(dY_ptr, Y_W, Y_D * Y_H)
586  : ConstEigenArrayMap<T>(dY_ptr, C, Y_HxW);
587  ConstEigenArrayMap<T> X_arr = kOrder == StorageOrder::NCHW
588  ? ConstEigenArrayMap<T>(X_ptr, X_W, X_D * X_H)
589  : ConstEigenArrayMap<T>(X_ptr, C, X_HxW);
590  ConstEigenArrayMap<T> Y_arr = kOrder == StorageOrder::NCHW
591  ? ConstEigenArrayMap<T>(Y_ptr, Y_W, Y_D * Y_H)
592  : ConstEigenArrayMap<T>(Y_ptr, C, Y_HxW);
593  EigenArrayMap<T> dX_arr = kOrder == StorageOrder::NCHW
594  ? EigenArrayMap<T>(dX_ptr, X_W, X_D * X_H)
595  : EigenArrayMap<T>(dX_ptr, C, X_HxW);
596  for (int d = 0; d < Y_D; ++d) {
597  const int p = std::max(d * stride_d - pad_p, 0);
598  const int a = std::min(d * stride_d - pad_p + kernel_d, X_D);
599  for (int h = 0; h < Y_H; ++h) {
600  const int t = std::max(h * stride_h - pad_t, 0);
601  const int b = std::min(h * stride_h - pad_t + kernel_h, X_H);
602  for (int w = 0; w < Y_W; ++w) {
603  const int l = std::max(w * stride_w - pad_l, 0);
604  const int r = std::min(w * stride_w - pad_l + kernel_w, X_W);
605  const int y = d * Y_H * Y_W + h * Y_W + w;
606  ComputeMaxPoolGradient3D<T, kOrder>(
607  X_H, X_W, p, a, t, b, l, r, y, dY_arr, X_arr, Y_arr, &dX_arr);
608  }
609  }
610  }
611  dY_ptr += Y_stride;
612  X_ptr += X_stride;
613  Y_ptr += Y_stride;
614  dX_ptr += X_stride;
615  }
616 }
617 
618 } // namespace
619 
620 template <>
621 template <>
622 bool AveragePoolFunctor<CPUContext>::
623  GlobalPoolingBackward<float, StorageOrder::NCHW>(
624  const int N,
625  const int C,
626  const int HxW,
627  const float* dY,
628  const float* /* X */,
629  const float* /* Y */,
630  float* dX,
631  CPUContext* /* context */) const {
632  const int NxC = N * C;
633  EigenArrayMap<float> dX_arr(dX, HxW, NxC);
634  const float scale = 1.0f / static_cast<float>(HxW);
635  for (int i = 0; i < NxC; ++i) {
636  dX_arr.col(i).setConstant(dY[i] * scale);
637  }
638  return true;
639 }
640 
641 template <>
642 template <>
643 bool AveragePoolFunctor<CPUContext>::
644  GlobalPoolingBackward<float, StorageOrder::NHWC>(
645  const int N,
646  const int C,
647  const int HxW,
648  const float* dY,
649  const float* /* X */,
650  const float* /* Y */,
651  float* dX,
652  CPUContext* /* context */) const {
653  ConstEigenArrayMap<float> dY_arr(dY, C, N);
654  const float scale = 1.0f / static_cast<float>(HxW);
655  for (int i = 0; i < N; ++i) {
656  EigenArrayMap<float>(dX + i * HxW * C, C, HxW).colwise() =
657  dY_arr.col(i) * scale;
658  }
659  return true;
660 }
661 
662 template <>
663 template <typename T, StorageOrder kOrder>
664 bool AveragePoolFunctor<CPUContext>::Backward(
665  const int N,
666  const int C,
667  const std::vector<int>& X_dims,
668  const std::vector<int>& Y_dims,
669  const std::vector<int>& kernel,
670  const std::vector<int>& /* dilation */,
671  const std::vector<int>& stride,
672  const std::vector<int>& pads,
673  const T* dY,
674  const T* /* X */,
675  const T* /* Y */,
676  T* dX,
677  CPUContext* /* context */) const {
678  const int ndim = X_dims.size();
679  switch (ndim) {
680  case 1: {
681  RunAveragePoolGradient1D<T, kOrder>(
682  N,
683  C,
684  X_dims[0],
685  Y_dims[0],
686  kernel[0],
687  stride[0],
688  pads[0],
689  count_include_pad,
690  dY,
691  dX);
692  return true;
693  }
694  case 2: {
695  RunAveragePoolGradient2D<T, kOrder>(
696  N,
697  C,
698  X_dims[0],
699  X_dims[1],
700  Y_dims[0],
701  Y_dims[1],
702  kernel[0],
703  kernel[1],
704  stride[0],
705  stride[1],
706  pads[0],
707  pads[1],
708  count_include_pad,
709  dY,
710  dX);
711  return true;
712  }
713  case 3: {
714  RunAveragePoolGradient3D<T, kOrder>(
715  N,
716  C,
717  X_dims[0],
718  X_dims[1],
719  X_dims[2],
720  Y_dims[0],
721  Y_dims[1],
722  Y_dims[2],
723  kernel[0],
724  kernel[1],
725  kernel[2],
726  stride[0],
727  stride[1],
728  stride[2],
729  pads[0],
730  pads[1],
731  pads[2],
732  count_include_pad,
733  dY,
734  dX);
735  return true;
736  }
737  default: {
738  CAFFE_THROW("Unsupported pooling dim: ", ndim);
739  return false;
740  }
741  }
742 }
743 
744 template <>
745 template <>
746 bool MaxPoolFunctor<CPUContext>::
747  GlobalPoolingBackward<float, StorageOrder::NCHW>(
748  const int N,
749  const int C,
750  const int HxW,
751  const float* dY,
752  const float* X,
753  const float* Y,
754  float* dX,
755  CPUContext* /* context */) const {
756  const int NxC = N * C;
757  ConstEigenArrayMap<float> X_arr(X, HxW, NxC);
758  EigenArrayMap<float> dX_arr(dX, HxW, NxC);
759  for (int i = 0; i < NxC; ++i) {
760  dX_arr.col(i) = (X_arr.col(i) == Y[i]).template cast<float>() * dY[i];
761  }
762  return true;
763 }
764 
765 template <>
766 template <>
767 bool MaxPoolFunctor<CPUContext>::
768  GlobalPoolingBackward<float, StorageOrder::NHWC>(
769  const int N,
770  const int C,
771  const int HxW,
772  const float* dY,
773  const float* X,
774  const float* Y,
775  float* dX,
776  CPUContext* /* context */) const {
777  ConstEigenArrayMap<float> Y_arr(Y, C, N);
778  ConstEigenArrayMap<float> dY_arr(dY, C, N);
779  for (int i = 0; i < N; ++i) {
780  ConstEigenArrayMap<float> X_arr(X + i * HxW * C, C, HxW);
781  EigenArrayMap<float> dX_arr(dX + i * HxW * C, C, HxW);
782  for (int j = 0; j < HxW; ++j) {
783  dX_arr.col(j) =
784  (X_arr.col(j) == Y_arr.col(i)).template cast<float>() * dY_arr.col(i);
785  }
786  }
787  return true;
788 }
789 
790 template <>
791 template <typename T, StorageOrder kOrder>
792 bool MaxPoolFunctor<CPUContext>::Backward(
793  const int N,
794  const int C,
795  const std::vector<int>& X_dims,
796  const std::vector<int>& Y_dims,
797  const std::vector<int>& kernel,
798  const std::vector<int>& /* dilation */,
799  const std::vector<int>& stride,
800  const std::vector<int>& pads,
801  const T* dY,
802  const T* X,
803  const T* Y,
804  T* dX,
805  CPUContext* /* context */) const {
806  const int ndim = X_dims.size();
807  switch (ndim) {
808  case 1: {
809  RunMaxPoolGradient1D<T, kOrder>(
810  N,
811  C,
812  X_dims[0],
813  Y_dims[0],
814  kernel[0],
815  stride[0],
816  pads[0],
817  dY,
818  X,
819  Y,
820  dX);
821  return true;
822  }
823  case 2: {
824  RunMaxPoolGradient2D<T, kOrder>(
825  N,
826  C,
827  X_dims[0],
828  X_dims[1],
829  Y_dims[0],
830  Y_dims[1],
831  kernel[0],
832  kernel[1],
833  stride[0],
834  stride[1],
835  pads[0],
836  pads[1],
837  dY,
838  X,
839  Y,
840  dX);
841  return true;
842  }
843  case 3: {
844  RunMaxPoolGradient3D<T, kOrder>(
845  N,
846  C,
847  X_dims[0],
848  X_dims[1],
849  X_dims[2],
850  Y_dims[0],
851  Y_dims[1],
852  Y_dims[2],
853  kernel[0],
854  kernel[1],
855  kernel[2],
856  stride[0],
857  stride[1],
858  stride[2],
859  pads[0],
860  pads[1],
861  pads[2],
862  dY,
863  X,
864  Y,
865  dX);
866  return true;
867  }
868  default: {
869  CAFFE_THROW("Unsupported pooling dim: ", ndim);
870  return false;
871  }
872  }
873 }
874 
875 REGISTER_CPU_OPERATOR(
876  AveragePoolGradient,
877  PoolGradientOp<float, CPUContext, AveragePoolFunctor<CPUContext>>);
878 OPERATOR_SCHEMA(AveragePoolGradient).NumInputs(3).NumOutputs(1);
879 
880 REGISTER_CPU_OPERATOR(
881  AveragePool1DGradient,
882  PoolGradientOp<float, CPUContext, AveragePoolFunctor<CPUContext>>);
883 OPERATOR_SCHEMA(AveragePool1DGradient).NumInputs(3).NumOutputs(1);
884 
885 REGISTER_CPU_OPERATOR(
886  AveragePool2DGradient,
887  PoolGradientOp<float, CPUContext, AveragePoolFunctor<CPUContext>>);
888 OPERATOR_SCHEMA(AveragePool2DGradient).NumInputs(3).NumOutputs(1);
889 
890 REGISTER_CPU_OPERATOR(
891  AveragePool3DGradient,
892  PoolGradientOp<float, CPUContext, AveragePoolFunctor<CPUContext>>);
893 OPERATOR_SCHEMA(AveragePool3DGradient).NumInputs(3).NumOutputs(1);
894 
895 REGISTER_CPU_OPERATOR(
896  MaxPoolGradient,
897  PoolGradientOp<float, CPUContext, MaxPoolFunctor<CPUContext>>);
898 OPERATOR_SCHEMA(MaxPoolGradient).NumInputs(3).NumOutputs(1);
899 
900 REGISTER_CPU_OPERATOR(
901  MaxPool1DGradient,
902  PoolGradientOp<float, CPUContext, MaxPoolFunctor<CPUContext>>);
903 OPERATOR_SCHEMA(MaxPool1DGradient).NumInputs(3).NumOutputs(1);
904 
905 REGISTER_CPU_OPERATOR(
906  MaxPool2DGradient,
907  PoolGradientOp<float, CPUContext, MaxPoolFunctor<CPUContext>>);
908 OPERATOR_SCHEMA(MaxPool2DGradient).NumInputs(3).NumOutputs(1);
909 
910 REGISTER_CPU_OPERATOR(
911  MaxPool3DGradient,
912  PoolGradientOp<float, CPUContext, MaxPoolFunctor<CPUContext>>);
913 OPERATOR_SCHEMA(MaxPool3DGradient).NumInputs(3).NumOutputs(1);
914 
915 namespace {
916 
917 class GetPoolGradient : public GradientMakerBase {
918  using GradientMakerBase::GradientMakerBase;
919 
920  std::vector<OperatorDef> GetGradientDefs() override {
921  return SingleGradientDef(
922  def_.type() + "Gradient",
923  "",
924  std::vector<std::string>{I(0), O(0), GO(0)},
925  std::vector<std::string>{GI(0)});
926  }
927 };
928 
929 } // namespace
930 
931 REGISTER_GRADIENT(AveragePool, GetPoolGradient);
932 REGISTER_GRADIENT(AveragePool1D, GetPoolGradient);
933 REGISTER_GRADIENT(AveragePool2D, GetPoolGradient);
934 REGISTER_GRADIENT(AveragePool3D, GetPoolGradient);
935 
936 REGISTER_GRADIENT(MaxPool, GetPoolGradient);
937 REGISTER_GRADIENT(MaxPool1D, GetPoolGradient);
938 REGISTER_GRADIENT(MaxPool2D, GetPoolGradient);
939 REGISTER_GRADIENT(MaxPool3D, GetPoolGradient);
940 
941 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...
Definition: static.cpp:64