1 #ifndef TH_GENERIC_FILE 2 #define TH_GENERIC_FILE "TH/generic/THTensorConv.cpp" 8 void THTensor_(validXCorr2Dptr)(scalar_t *r_,
10 scalar_t *t_, int64_t ir, int64_t ic,
11 scalar_t *k_, int64_t kr, int64_t kc,
12 int64_t sr, int64_t sc)
14 int64_t or_ = (ir - kr) / sr + 1;
15 int64_t oc = (ic - kc) / sc + 1;
17 int64_t xx, yy, kx, ky;
19 if ((sc != 1) || (oc < 4)) {
21 for(yy = 0; yy < or_; yy++) {
22 for(xx = 0; xx < oc; xx++) {
24 scalar_t *pi_ = t_ + yy*sr*ic + xx*sc;
27 for(ky = 0; ky < kr; ky++) {
28 for(kx = 0; kx < kc; kx++) {
29 sum += pi_[kx]*pw_[kx];
41 for(yy = 0; yy < or_; yy++) {
42 scalar_t *pi_ = t_ + yy*sr*ic;
44 for (ky = 0; ky < kr; ky++) {
46 for (kx = 0; kx < kc; kx++) {
47 THVector_(cadd)(r_, r_, pis_, alpha*pw_[kx], oc);
61 void THTensor_(validConv2Dptr)(scalar_t *r_,
63 scalar_t *t_, int64_t ir, int64_t ic,
64 scalar_t *k_, int64_t kr, int64_t kc,
65 int64_t sr, int64_t sc)
67 int64_t or_ = (ir - kr) / sr + 1;
68 int64_t oc = (ic - kc) / sc + 1;
70 int64_t xx, yy, kx, ky;
72 if ((sc != 1) || (oc < 4)) {
74 for(yy = 0; yy < or_; yy++) {
75 for(xx = 0; xx < oc; xx++) {
77 scalar_t *pi_ = t_ + yy*sr*ic + xx*sc;
78 scalar_t *pw_ = k_ + kr*kc - 1;
80 for(ky = 0; ky < kr; ky++) {
81 for(kx = 0; kx < kc; kx++) {
82 sum += pi_[kx]*pw_[-kx];
94 for(yy = 0; yy < or_; yy++) {
95 scalar_t *pw_ = k_ + kr*kc - 1;
96 scalar_t *pi_ = t_ + yy*sr*ic;
97 for (ky = 0; ky < kr; ky++) {
99 for (kx = 0; kx < kc; kx++) {
100 THVector_(cadd)(r_, r_, pis_, alpha*pw_[-kx], oc);
114 void THTensor_(fullConv2Dptr)(scalar_t *r_,
116 scalar_t *t_, int64_t ir, int64_t ic,
117 scalar_t *k_, int64_t kr, int64_t kc,
118 int64_t sr, int64_t sc)
120 int64_t oc = (ic - 1) * sc + kc;
122 int64_t xx, yy, kx, ky;
124 if ((sc != 1) || (ic < 4)) {
126 for(yy = 0; yy < ir; yy++) {
127 for(xx = 0; xx < ic; xx++) {
129 scalar_t *po_ = r_ + yy*sr*oc + xx*sc;
131 for(ky = 0; ky < kr; ky++)
133 scalar_t z = *t_ * alpha;
134 for(kx = 0; kx < kc; kx++) {
135 po_[kx] += z * pw_[kx];
146 for(yy = 0; yy < ir; yy++) {
147 scalar_t *po_ = r_ + yy*sr*oc;
149 for (ky = 0; ky < kr; ky++) {
150 scalar_t *pos_ = po_;
151 for (kx = 0; kx < kc; kx++) {
152 THVector_(cadd)(pos_, pos_, t_, alpha*pw_[kx], ic);
166 void THTensor_(fullXCorr2Dptr)(scalar_t *r_,
168 scalar_t *t_, int64_t ir, int64_t ic,
169 scalar_t *k_, int64_t kr, int64_t kc,
170 int64_t sr, int64_t sc)
172 int64_t oc = (ic - 1) * sc + kc;
174 int64_t xx, yy, kx, ky;
176 if ((sc != 1) || (ic < 4)) {
178 for(yy = 0; yy < ir; yy++) {
179 for(xx = 0; xx < ic; xx++) {
181 scalar_t *po_ = r_ + yy*sr*oc + xx*sc;
182 scalar_t *pw_ = k_ + kr*kc -1;
184 for(ky = 0; ky < kr; ky++)
186 scalar_t z = *t_ * alpha;
187 for(kx = 0; kx < kc; kx++) {
188 po_[kx] += z * pw_[-kx];
199 for(yy = 0; yy < ir; yy++) {
200 scalar_t *po_ = r_ + yy*sr*oc;
201 scalar_t *pw_ = k_ + kr*kc -1;
202 for (ky = 0; ky < kr; ky++) {
203 scalar_t *pos_ = po_;
204 for (kx = 0; kx < kc; kx++) {
205 THVector_(cadd)(pos_, pos_, t_, pw_[-kx]*alpha, ic);
221 void THTensor_(validXCorr2DRevptr)(scalar_t *r_,
223 scalar_t *t_, int64_t ir, int64_t ic,
224 scalar_t *k_, int64_t kr, int64_t kc,
225 int64_t sr, int64_t sc)
227 int64_t or_ = ir - (kr - 1) * sr;
228 int64_t oc = ic - (kc - 1) * sc;
230 int64_t xx, yy, kx, ky;
232 if ((sc != 1) || (kc < 4)) {
234 for(yy = 0; yy < kr; yy++) {
235 for(xx = 0; xx < kc; xx++) {
237 scalar_t *pi_ = t_ + yy*sr*ic + xx*sc;
238 scalar_t z = *k_++ * alpha;
240 for(ky = 0; ky < or_; ky++) {
241 for(kx = 0; kx < oc; kx++)
242 po_[kx] += z * pi_[kx];
251 for(yy = 0; yy < kr; yy++) {
252 for(xx = 0; xx < kc; xx++) {
254 scalar_t *pi_ = t_ + yy*sr*ic + xx*sc;
255 scalar_t z = *k_++ * alpha;
257 for(ky = 0; ky < or_; ky++) {
258 THVector_(cadd)(po_, po_, pi_, z, oc);
269 void THTensor_(validXCorr3Dptr)(scalar_t *r_,
271 scalar_t *t_, int64_t it, int64_t ir, int64_t ic,
272 scalar_t *k_, int64_t kt, int64_t kr, int64_t kc,
273 int64_t st, int64_t sr, int64_t sc)
275 int64_t ot = (it - kt) / st + 1;
276 int64_t or_ = (ir - kr) / sr + 1;
277 int64_t oc = (ic - kc) / sc + 1;
281 for (zz = 0; zz < ot; zz++)
283 for(yy = 0; yy < or_; yy++)
285 for(xx = 0; xx < oc; xx++)
288 scalar_t *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc;
292 for(kz = 0; kz < kt; kz++)
294 for(ky = 0; ky < kr; ky++)
296 for(kx = 0; kx < kc; kx++) {
297 sum += pi_[kx]*pw_[kx];
314 void THTensor_(validConv3Dptr)(scalar_t *r_,
316 scalar_t *t_, int64_t it, int64_t ir, int64_t ic,
317 scalar_t *k_, int64_t kt, int64_t kr, int64_t kc,
318 int64_t st, int64_t sr, int64_t sc)
320 int64_t ot = (it - kt) / st + 1;
321 int64_t or_ = (ir - kr) / sr + 1;
322 int64_t oc = (ic - kc) / sc + 1;
326 for(zz = 0; zz < ot; zz++)
328 for(yy = 0; yy < or_; yy++)
330 for(xx = 0; xx < oc; xx++)
333 scalar_t *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc;
334 scalar_t *pw_ = k_ + kt*kr*kc - 1;
337 for(kz = 0; kz < kt; kz++)
339 for(ky = 0; ky < kr; ky++)
341 for(kx = 0; kx < kc; kx++) {
342 sum += pi_[kx]*pw_[-kx];
360 void THTensor_(fullConv3Dptr)(scalar_t *r_,
362 scalar_t *t_, int64_t it, int64_t ir, int64_t ic,
363 scalar_t *k_, int64_t kt, int64_t kr, int64_t kc,
364 int64_t st, int64_t sr, int64_t sc)
366 int64_t or_ = (ir - 1) * sr + kr;
367 int64_t oc = (ic - 1) * sc + kc;
371 for(zz = 0; zz < it; zz++)
373 for(yy = 0; yy < ir; yy++)
375 for(xx = 0; xx < ic; xx++)
378 scalar_t *po_ = r_ + zz*st*or_*oc + yy*sr*oc + xx*sc;
382 for(kz = 0; kz < kt; kz++)
384 for(ky = 0; ky < kr; ky++)
386 scalar_t z = *t_ * alpha;
387 for(kx = 0; kx < kc; kx++) {
389 po_[kx] += z * pw_[kx];
408 void THTensor_(fullXCorr3Dptr)(scalar_t *r_,
410 scalar_t *t_, int64_t it, int64_t ir, int64_t ic,
411 scalar_t *k_, int64_t kt, int64_t kr, int64_t kc,
412 int64_t st, int64_t sr, int64_t sc)
414 int64_t or_ = (ir - 1) * sr + kr;
415 int64_t oc = (ic - 1) * sc + kc;
419 for(zz = 0; zz < it; zz++)
421 for(yy = 0; yy < ir; yy++)
423 for(xx = 0; xx < ic; xx++)
426 scalar_t *po_ = r_ + zz*st*or_*oc + yy*sr*oc + xx*sc;
427 scalar_t *pw_ = k_ + kt*kr*kc -1;
429 for(kz = 0; kz < kt; kz++)
431 for(ky = 0; ky < kr; ky++)
433 scalar_t z = *t_ * alpha;
434 for(kx = 0; kx < kc; kx++) {
435 po_[kx] += z * pw_[-kx];
453 void THTensor_(validXCorr3DRevptr)(scalar_t *r_,
455 scalar_t *t_, int64_t it, int64_t ir, int64_t ic,
456 scalar_t *k_, int64_t kt, int64_t kr, int64_t kc,
457 int64_t st, int64_t sr, int64_t sc)
459 int64_t ot = it - (kt - 1) * st;
460 int64_t or_ = ir - (kr - 1) * sr;
461 int64_t oc = ic - (kc - 1) * sc;
464 for(zz = 0; zz < kt; zz++)
466 for(yy = 0; yy < kr; yy++)
468 for(xx = 0; xx < kc; xx++)
471 scalar_t *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc;
472 scalar_t z = *k_++ * alpha;
474 for(kz = 0; kz < ot; kz++)
476 for(ky = 0; ky < or_; ky++)
478 for(kx = 0; kx < oc; kx++)
479 po_[kx] += z * pi_[kx];
490 void THTensor_(conv2d)(scalar_t* output_data,
492 scalar_t* ptr_input, int64_t nInputRows, int64_t nInputCols,
493 scalar_t* ptr_weight, int64_t nKernelRows, int64_t nKernelCols,
494 int64_t srow, int64_t scol,
495 const char *vf,
const char *xc)
497 THArgCheck(*vf ==
'V' || *vf ==
'F', 7,
"type of convolution can be 'V' or 'F'");
498 THArgCheck(*xc ==
'C' || *xc ==
'X', 7,
"type of convolution can be 'X' or 'C'");
501 THTensor_(fullXCorr2Dptr)(output_data,
503 ptr_input, nInputRows, nInputCols,
504 ptr_weight, nKernelRows, nKernelCols,
507 THTensor_(fullConv2Dptr)(output_data,
509 ptr_input, nInputRows, nInputCols,
510 ptr_weight, nKernelRows, nKernelCols,
514 THTensor_(validXCorr2Dptr)(output_data,
516 ptr_input, nInputRows, nInputCols,
517 ptr_weight, nKernelRows, nKernelCols,
520 THTensor_(validConv2Dptr)(output_data,
522 ptr_input, nInputRows, nInputCols,
523 ptr_weight, nKernelRows, nKernelCols,
527 void THTensor_(conv3d)(scalar_t* output_data,
529 scalar_t* ptr_input, int64_t nInputDepth, int64_t nInputRows, int64_t nInputCols,
530 scalar_t* ptr_weight, int64_t nKernelDepth, int64_t nKernelRows, int64_t nKernelCols,
531 int64_t sdepth, int64_t srow, int64_t scol,
532 const char *vf,
const char *xc)
534 THArgCheck(*vf ==
'V' || *vf ==
'F', 7,
"type of convolution can be 'V' or 'F'");
535 THArgCheck(*xc ==
'C' || *xc ==
'X', 7,
"type of convolution can be 'X' or 'C'");
538 THTensor_(fullXCorr3Dptr)(output_data,
540 ptr_input, nInputDepth, nInputRows, nInputCols,
541 ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
544 THTensor_(fullConv3Dptr)(output_data,
546 ptr_input, nInputDepth, nInputRows, nInputCols,
547 ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
551 THTensor_(validXCorr3Dptr)(output_data,
553 ptr_input, nInputDepth, nInputRows, nInputCols,
554 ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
557 THTensor_(validConv3Dptr)(output_data,
559 ptr_input, nInputDepth, nInputRows, nInputCols,
560 ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
564 int64_t THTensor_(convsize)(int64_t x, int64_t k, int64_t s,
const char* vf)
566 THArgCheck(*vf ==
'V' || *vf ==
'F', 1,
"type of convolution can be 'V' or 'F'");
581 void THTensor_(conv2DRevger)(THTensor *r_, scalar_t beta, scalar_t alpha, THTensor *t_, THTensor *k_, int64_t srow, int64_t scol)
583 int64_t nInputPlane, nInputRows, nInputCols;
584 int64_t nKernelPlane, nKernelRows, nKernelCols;
585 int64_t nOutputRows, nOutputCols;
586 int64_t istride0, kstride0;
589 scalar_t *input_data;
590 scalar_t *weight_data;
591 scalar_t *output_data;
595 AT_CHECK(!t_->is_empty() && t_->dim() == 3,
"input: non-empty 3D Tensor expected, got size: ", t_->sizes());
596 AT_CHECK(!k_->is_empty() && k_->dim() == 3,
"kernel: non-empty 3D Tensor expected, got size: ", k_->sizes());
597 THArgCheck(srow >= 1, 5,
"Stride should be a positive integer");
598 THArgCheck(scol >= 1, 6,
"Stride should be a positive integer");
600 input = THTensor_(newContiguous)(t_);
601 kernel = THTensor_(newContiguous)(k_);
603 nInputPlane = input->size(0);
604 istride0 = input->stride(0);
605 nInputRows = input->size(1);
606 nInputCols = input->size(2);
608 kstride0 = kernel->stride(0);
609 nKernelPlane = kernel->size(0);
610 nKernelRows = kernel->size(1);
611 nKernelCols = kernel->size(2);
613 THArgCheck(nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2,
"covn2DRevger : Input image is smaller than kernel");
615 nOutputRows = nInputRows - (nKernelRows - 1) * srow;
616 nOutputCols = nInputCols - (nKernelCols - 1) * scol;
618 nelem = THTensor_(nElement)(r_);
619 THTensor_(resize4d)(r_,nKernelPlane, nInputPlane, nOutputRows, nOutputCols);
621 input_data = input->data<scalar_t>();
622 weight_data = kernel->data<scalar_t>();
623 output_data = r_->data<scalar_t>();
625 if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
629 #pragma omp parallel for private(k) 630 for (k = 0; k < r_->size(0)*r_->size(1); k++)
632 scalar_t* ptr_output = output_data + k*nOutputCols*nOutputRows;
634 for (l = 0; l < nOutputRows*nOutputCols; l++)
641 #pragma omp parallel for private(k) 642 for (k = 0; k < r_->size(0)*r_->size(1); k++)
644 scalar_t* ptr_output = output_data + k*nOutputCols*nOutputRows;
646 for (l = 0; l < nOutputRows*nOutputCols; l++)
647 ptr_output[l] *= beta;
651 #pragma omp parallel for private(k) 652 for(k = 0; k < nKernelPlane; k++)
656 scalar_t *ptr_weight = weight_data+k*kstride0;
658 for(i = 0; i < nInputPlane; i++)
661 scalar_t *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows;
663 scalar_t *ptr_input = input_data+i*istride0;
666 THTensor_(validXCorr2DRevptr)(ptr_output,
668 ptr_input, nInputRows, nInputCols,
669 ptr_weight, nKernelRows, nKernelCols,
675 c10::raw::intrusive_ptr::decref(input);
676 c10::raw::intrusive_ptr::decref(kernel);
687 void THTensor_(conv2DRevgerm)(THTensor *r_, scalar_t beta, scalar_t alpha, THTensor *t_, THTensor *k_, int64_t srow, int64_t scol)
689 int64_t nbatch, nInputPlane, nInputRows, nInputCols;
690 int64_t nKernelPlane, nKernelRows, nKernelCols;
691 int64_t nOutputRows, nOutputCols;
692 int64_t istride0, kstride0, istride1, kstride1;
695 scalar_t *input_data;
696 scalar_t *weight_data;
697 scalar_t *output_data;
701 AT_CHECK(!t_->is_empty() && t_->dim() == 4,
"input: non-empty 4D Tensor expected, got size: ", t_->sizes());
702 AT_CHECK(!k_->is_empty() && k_->dim() == 4,
"kernel: non-empty 4D Tensor expected, got size: ", k_->sizes());
703 THArgCheck(srow >= 1, 5,
"Stride should be a positive integer");
704 THArgCheck(scol >= 1, 6,
"Stride should be a positive integer");
706 input = THTensor_(newContiguous)(t_);
707 kernel = THTensor_(newContiguous)(k_);
709 istride0 = input->stride(0);
710 istride1 = input->stride(1);
711 nbatch = input->size(0);
712 nInputPlane = input->size(1);
713 nInputRows = input->size(2);
714 nInputCols = input->size(3);
716 kstride0 = kernel->stride(0);
717 kstride1 = kernel->stride(1);
718 nKernelPlane = kernel->size(1);
719 nKernelRows = kernel->size(2);
720 nKernelCols = kernel->size(3);
722 THArgCheck(nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2,
"conv2DRevger : Input image is smaller than kernel");
723 THArgCheck(kernel->size(0) == input->size(0) , 2,
"conv2DRevger : Input batch and kernel batch is not same size");
725 nOutputRows = nInputRows - (nKernelRows - 1) * srow;
726 nOutputCols = nInputCols - (nKernelCols - 1) * scol;
728 nelem = THTensor_(nElement)(r_);
729 THTensor_(resize4d)(r_,nKernelPlane, nInputPlane, nOutputRows, nOutputCols);
731 input_data = input->data<scalar_t>();
732 weight_data = kernel->data<scalar_t>();
733 output_data = r_->data<scalar_t>();
735 if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
739 #pragma omp parallel for private(k) 740 for (k = 0; k < r_->size(0)*r_->size(1); k++)
742 scalar_t* ptr_output = output_data + k*nOutputCols*nOutputRows;
744 for (l = 0; l < nOutputRows*nOutputCols; l++)
751 #pragma omp parallel for private(k) 752 for (k = 0; k < r_->size(0)*r_->size(1); k++)
754 scalar_t* ptr_output = output_data + k*nOutputCols*nOutputRows;
756 for (l = 0; l < nOutputRows*nOutputCols; l++)
757 ptr_output[l] *= beta;
761 #pragma omp parallel for private(k) 762 for(k = 0; k < nKernelPlane; k++)
765 for(i = 0; i < nInputPlane; i++)
768 for(p = 0; p < nbatch; p++)
771 scalar_t *ptr_weight = weight_data + p*kstride0 + k*kstride1;
773 scalar_t *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows;
775 scalar_t *ptr_input = input_data + p*istride0 + i*istride1;
778 THTensor_(validXCorr2DRevptr)(ptr_output,
780 ptr_input, nInputRows, nInputCols,
781 ptr_weight, nKernelRows, nKernelCols,
788 c10::raw::intrusive_ptr::decref(input);
789 c10::raw::intrusive_ptr::decref(kernel);
798 void THTensor_(conv2Dger)(THTensor *r_, scalar_t beta, scalar_t alpha, THTensor *t_, THTensor *k_, int64_t srow, int64_t scol,
const char *vf,
const char *xc)
800 int64_t nInputPlane, nInputRows, nInputCols;
801 int64_t nKernelPlane, nKernelRows, nKernelCols;
802 int64_t nOutputRows, nOutputCols;
803 int64_t istride0, kstride0;
807 scalar_t *input_data;
808 scalar_t *weight_data;
809 scalar_t *output_data;
813 AT_CHECK(!t_->is_empty() && t_->dim() == 3,
"input: non-empty 3D Tensor expected, got size: ", t_->sizes());
814 AT_CHECK(!k_->is_empty() && k_->dim() == 3,
"kernel: non-empty 3D Tensor expected, got size: ", k_->sizes());
815 THArgCheck(srow >= 1, 5,
"Stride should be a positive integer");
816 THArgCheck(scol >= 1, 6,
"Stride should be a positive integer");
817 THArgCheck(*vf ==
'V' || *vf ==
'F', 7,
"type of convolution can 'V' or 'F'");
818 THArgCheck(*xc ==
'C' || *xc ==
'X', 7,
"type of convolution can 'X' or 'C'");
820 input = THTensor_(newContiguous)(t_);
821 kernel = THTensor_(newContiguous)(k_);
823 nInputPlane = input->size(0);
824 istride0 = input->stride(0);
825 nInputRows = input->size(1);
826 nInputCols = input->size(2);
828 kstride0 = kernel->stride(0);
829 nKernelPlane = kernel->size(0);
830 nKernelRows = kernel->size(1);
831 nKernelCols = kernel->size(2);
833 THArgCheck((nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf ==
'F', 2,
"conv2Dger : Input image is smaller than kernel");
836 nOutputRows = (nInputRows - 1) * srow + nKernelRows;
837 nOutputCols = (nInputCols - 1) * scol + nKernelCols;
839 nOutputRows = (nInputRows - nKernelRows) / srow + 1;
840 nOutputCols = (nInputCols - nKernelCols) / scol + 1;
843 nelem = THTensor_(nElement)(r_);
844 THTensor_(resize4d)(r_, nKernelPlane, nInputPlane, nOutputRows, nOutputCols);
846 input_data = input->data<scalar_t>();
847 weight_data = kernel->data<scalar_t>();
848 output_data = r_->data<scalar_t>();
850 if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
853 #pragma omp parallel for private(k) 854 for (k = 0; k < r_->size(0)*r_->size(1); k++)
856 scalar_t* ptr_output = output_data + k*nOutputCols*nOutputRows;
858 for (l = 0; l < nOutputRows*nOutputCols; l++)
865 #pragma omp parallel for private(k) 866 for (k = 0; k < r_->size(0)*r_->size(1); k++)
868 scalar_t* ptr_output = output_data + k*nOutputCols*nOutputRows;
870 for (l = 0; l < nOutputRows*nOutputCols; l++)
871 ptr_output[l] *= beta;
875 #pragma omp parallel for private(k) 876 for(k = 0; k < nKernelPlane; k++)
880 scalar_t *ptr_weight = weight_data+k*kstride0;
882 for(i = 0; i < nInputPlane; i++)
885 scalar_t *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows;
887 scalar_t *ptr_input = input_data+i*istride0;
892 THTensor_(fullXCorr2Dptr)(ptr_output,
894 ptr_input, nInputRows, nInputCols,
895 ptr_weight, nKernelRows, nKernelCols,
898 THTensor_(fullConv2Dptr)(ptr_output,
900 ptr_input, nInputRows, nInputCols,
901 ptr_weight, nKernelRows, nKernelCols,
905 THTensor_(validXCorr2Dptr)(ptr_output,
907 ptr_input, nInputRows, nInputCols,
908 ptr_weight, nKernelRows, nKernelCols,
911 THTensor_(validConv2Dptr)(ptr_output,
913 ptr_input, nInputRows, nInputCols,
914 ptr_weight, nKernelRows, nKernelCols,
920 c10::raw::intrusive_ptr::decref(input);
921 c10::raw::intrusive_ptr::decref(kernel);
930 void THTensor_(conv2Dmv)(THTensor *r_, scalar_t beta, scalar_t alpha, THTensor *t_, THTensor *k_, int64_t srow, int64_t scol,
const char *vf,
const char *xc)
932 int64_t nInputPlane, nInputRows, nInputCols;
933 int64_t nKernelRows, nKernelCols;
934 int64_t nOutputPlane, nOutputRows, nOutputCols;
935 int64_t istride0, kstride0, kstride1;
938 scalar_t *input_data;
939 scalar_t *weight_data;
940 scalar_t *output_data;
944 AT_CHECK(!t_->is_empty() && t_->dim() == 3,
"input: non-empty 3D Tensor expected, got size: ", t_->sizes());
945 AT_CHECK(!k_->is_empty() && k_->dim() == 4,
"kernel: non-empty 4D Tensor expected, got size: ", k_->sizes());
946 THArgCheck(srow >= 1, 5,
"Stride should be a positive integer");
947 THArgCheck(scol >= 1, 6,
"Stride should be a positive integer");
948 THArgCheck(*vf ==
'V' || *vf ==
'F', 7,
"type of convolution can 'V' or 'F'");
949 THArgCheck(*xc ==
'C' || *xc ==
'X', 7,
"type of convolution can 'X' or 'C'");
951 input = THTensor_(newContiguous)(t_);
952 if (!(k_->stride(3) == 1) || !(k_->stride(2) == k_->size(3))) {
953 kernel = THTensor_(newContiguous)(k_);
955 THTensor_(retain)(k_);
959 nInputPlane = input->size(0);
960 istride0 = input->stride(0);
961 nInputRows = input->size(1);
962 nInputCols = input->size(2);
964 kstride0 = kernel->stride(0);
965 kstride1 = kernel->stride(1);
966 nKernelRows = kernel->size(2);
967 nKernelCols = kernel->size(3);
968 nOutputPlane = kernel->size(0);
969 THArgCheck(kernel->size(1) == nInputPlane, 2,
"invalid number of input planes");
971 THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf ==
'F', 2,
"conv2Dmv : Input image is smaller than kernel");
974 nOutputRows = (nInputRows - 1) * srow + nKernelRows;
975 nOutputCols = (nInputCols - 1) * scol + nKernelCols;
977 nOutputRows = (nInputRows - nKernelRows) / srow + 1;
978 nOutputCols = (nInputCols - nKernelCols) / scol + 1;
981 nelem = THTensor_(nElement)(r_);
982 THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols);
984 input_data = input->data<scalar_t>();
985 weight_data = kernel->data<scalar_t>();
986 output_data = r_->data<scalar_t>();
988 if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
991 #pragma omp parallel for private(k) 992 for (k = 0; k < r_->size(0); k++)
994 scalar_t* ptr_output = output_data + k*nOutputCols*nOutputRows;
996 for (l = 0; l < nOutputRows*nOutputCols; l++)
1003 #pragma omp parallel for private(k) 1004 for (k = 0; k < r_->size(0); k++)
1006 scalar_t* ptr_output = output_data + k*nOutputCols*nOutputRows;
1008 for (l = 0; l < nOutputRows*nOutputCols; l++)
1009 ptr_output[l] *= beta;
1013 #pragma omp parallel for private(k) 1014 for(k = 0; k < nOutputPlane; k++)
1018 scalar_t *ptr_output = output_data + k*nOutputCols*nOutputRows;
1019 for(i = 0; i < nInputPlane; i++)
1022 scalar_t *ptr_weight = weight_data + k*kstride0 + i*kstride1;
1024 scalar_t *ptr_input = input_data + i*istride0;
1029 THTensor_(fullXCorr2Dptr)(ptr_output,
1031 ptr_input, nInputRows, nInputCols,
1032 ptr_weight, nKernelRows, nKernelCols,
1035 THTensor_(fullConv2Dptr)(ptr_output,
1037 ptr_input, nInputRows, nInputCols,
1038 ptr_weight, nKernelRows, nKernelCols,
1042 THTensor_(validXCorr2Dptr)(ptr_output,
1044 ptr_input, nInputRows, nInputCols,
1045 ptr_weight, nKernelRows, nKernelCols,
1048 THTensor_(validConv2Dptr)(ptr_output,
1050 ptr_input, nInputRows, nInputCols,
1051 ptr_weight, nKernelRows, nKernelCols,
1057 c10::raw::intrusive_ptr::decref(input);
1058 c10::raw::intrusive_ptr::decref(kernel);
1067 void THTensor_(conv2Dmm)(THTensor *r_, scalar_t beta, scalar_t alpha, THTensor *t_, THTensor *k_, int64_t srow, int64_t scol,
const char *vf,
const char *xc)
1069 int64_t nInputPlane, nInputRows, nInputCols;
1070 int64_t nKernelRows, nKernelCols;
1071 int64_t nOutputPlane, nOutputRows, nOutputCols;
1072 int64_t kstride0, kstride1;
1077 scalar_t *input_data;
1078 scalar_t *weight_data;
1079 scalar_t *output_data;
1082 AT_CHECK(!t_->is_empty() && t_->dim() == 4,
"input: non-empty 4D Tensor expected, got size: ", t_->sizes());
1083 AT_CHECK(!k_->is_empty() && k_->dim() == 4,
"kernel: non-empty 4D Tensor expected, got size: ", k_->sizes());
1084 THArgCheck(srow >= 1, 5,
"Stride should be a positive integer");
1085 THArgCheck(scol >= 1, 6,
"Stride should be a positive integer");
1086 THArgCheck(*vf ==
'V' || *vf ==
'F', 7,
"type of convolution can 'V' or 'F'");
1087 THArgCheck(*xc ==
'C' || *xc ==
'X', 7,
"type of convolution can 'X' or 'C'");
1089 input = THTensor_(newContiguous)(t_);
1090 if (!(k_->stride(3) == 1) || !(k_->stride(2) == k_->size(3))) {
1091 kernel = THTensor_(newContiguous)(k_);
1093 THTensor_(retain)(k_);
1097 nbatch = input->size(0);
1098 nInputPlane = input->size(1);
1099 nInputRows = input->size(2);
1100 nInputCols = input->size(3);
1102 kstride0 = kernel->stride(0);
1103 kstride1 = kernel->stride(1);
1104 nKernelRows = kernel->size(2);
1105 nKernelCols = kernel->size(3);
1106 nOutputPlane = kernel->size(0);
1107 THArgCheck(kernel->size(1) == nInputPlane, 2,
"invalid number of input planes");
1109 THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf ==
'F', 2,
"conv2Dmv : Input image is smaller than kernel");
1112 nOutputRows = (nInputRows - 1) * srow + nKernelRows;
1113 nOutputCols = (nInputCols - 1) * scol + nKernelCols;
1115 nOutputRows = (nInputRows - nKernelRows) / srow + 1;
1116 nOutputCols = (nInputCols - nKernelCols) / scol + 1;
1119 nelem = THTensor_(nElement)(r_);
1120 THTensor_(resize4d)(r_, nbatch, nOutputPlane, nOutputRows, nOutputCols);
1122 input_data = input->data<scalar_t>();
1123 weight_data = kernel->data<scalar_t>();
1124 output_data = r_->data<scalar_t>();
1126 if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
1129 #pragma omp parallel for private(p) 1130 for (p=0; p < r_->size(0); p++)
1133 for (k = 0; k < r_->size(1); k++)
1135 scalar_t* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows;
1137 for (l = 0; l < nOutputRows*nOutputCols; l++)
1138 ptr_output[l] = 0.0;
1145 #pragma omp parallel for private(p) 1146 for(p=0; p < r_->size(0); p++)
1149 for (k = 0; k < r_->size(1); k++)
1151 scalar_t* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows;
1153 for (l = 0; l < nOutputRows*nOutputCols; l++)
1154 ptr_output[l] *= beta;
1159 #pragma omp parallel for private(p) 1160 for(p=0; p < nbatch; p++)
1163 for(k = 0; k < nOutputPlane; k++)
1167 scalar_t *ptr_output = output_data + p*nOutputPlane*nOutputCols*nOutputRows + k*nOutputCols*nOutputRows;
1168 for(i = 0; i < nInputPlane; i++)
1171 scalar_t *ptr_weight = weight_data + k*kstride0 + i*kstride1;
1173 scalar_t *ptr_input = input_data + p*nInputPlane*nInputRows*nInputCols + i*nInputRows*nInputCols;
1178 THTensor_(fullXCorr2Dptr)(ptr_output,
1180 ptr_input, nInputRows, nInputCols,
1181 ptr_weight, nKernelRows, nKernelCols,
1184 THTensor_(fullConv2Dptr)(ptr_output,
1186 ptr_input, nInputRows, nInputCols,
1187 ptr_weight, nKernelRows, nKernelCols,
1191 THTensor_(validXCorr2Dptr)(ptr_output,
1193 ptr_input, nInputRows, nInputCols,
1194 ptr_weight, nKernelRows, nKernelCols,
1197 THTensor_(validConv2Dptr)(ptr_output,
1199 ptr_input, nInputRows, nInputCols,
1200 ptr_weight, nKernelRows, nKernelCols,
1207 c10::raw::intrusive_ptr::decref(input);
1208 c10::raw::intrusive_ptr::decref(kernel);
1217 void THTensor_(conv2Dmul)(THTensor *r_, scalar_t beta, scalar_t alpha, THTensor *t_, THTensor *k_, int64_t srow, int64_t scol,
const char *vf,
const char *xc)
1223 int64_t nKernelRows;
1224 int64_t nKernelCols;
1225 int64_t nOutputRows, nOutputCols;
1226 scalar_t *ptr_input;
1227 scalar_t *ptr_weight;
1228 scalar_t *output_data;
1231 AT_CHECK(!t_->is_empty() && t_->dim() == 2,
"input: non-empty 2D Tensor expected, got size: ", t_->sizes());
1232 AT_CHECK(!k_->is_empty() && k_->dim() == 2,
"kernel: non-empty 2D Tensor expected, got size: ", k_->sizes());
1233 THArgCheck(srow >= 1, 5,
"Stride should be a positive integer");
1234 THArgCheck(scol >= 1, 6,
"Stride should be a positive integer");
1236 input = THTensor_(newContiguous)(t_);
1237 kernel = THTensor_(newContiguous)(k_);
1239 nInputRows = input->size(0);
1240 nInputCols = input->size(1);
1241 nKernelRows = kernel->size(0);
1242 nKernelCols = kernel->size(1);
1244 THArgCheck((nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf ==
'F', 2,
"conv2Dmul : Input image is smaller than kernel");
1246 nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);
1247 nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);
1249 nelem = THTensor_(nElement)(r_);
1250 THTensor_(resize2d)(r_, nOutputRows, nOutputCols);
1251 if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
1252 THTensor_(zero)(r_);
1254 THTensor_(mul)(r_, r_, beta);
1256 ptr_input = input->data<scalar_t>();
1257 ptr_weight = kernel->data<scalar_t>();
1258 output_data = r_->data<scalar_t>();
1262 THTensor_(conv2d)(output_data,
1264 ptr_input, nInputRows, nInputCols,
1265 ptr_weight, nKernelRows, nKernelCols,
1266 srow, scol, vf, xc);
1267 c10::raw::intrusive_ptr::decref(input);
1268 c10::raw::intrusive_ptr::decref(kernel);
1276 void THTensor_(conv2Dcmul)(THTensor *r_, scalar_t beta, scalar_t alpha, THTensor *t_, THTensor *k_, int64_t srow, int64_t scol,
const char *vf,
const char *xc)
1278 int64_t nInputPlane, nInputRows, nInputCols;
1279 int64_t nKernelRows, nKernelCols;
1280 int64_t nOutputPlane, nOutputRows, nOutputCols;
1281 int64_t istride0, kstride0;
1284 scalar_t *input_data;
1285 scalar_t *weight_data;
1286 scalar_t *output_data;
1290 AT_CHECK(!t_->is_empty() && t_->dim() == 3,
"input: non-empty 3D Tensor expected, got size: ", t_->sizes());
1291 AT_CHECK(!k_->is_empty() && k_->dim() == 3,
"kernel: non-empty 3D Tensor expected, got size: ", k_->sizes());
1292 THArgCheck(srow >= 1, 5,
"Stride should be a positive integer");
1293 THArgCheck(scol >= 1, 6,
"Stride should be a positive integer");
1295 input = THTensor_(newContiguous)(t_);
1296 kernel = THTensor_(newContiguous)(k_);
1298 istride0 = input->stride(0);
1299 nInputPlane = input->size(0);
1300 nInputRows = input->size(1);
1301 nInputCols = input->size(2);
1303 kstride0 = kernel->stride(0);
1304 nOutputPlane = kernel->size(0);
1305 nKernelRows = kernel->size(1);
1306 nKernelCols = kernel->size(2);
1308 THArgCheck(nOutputPlane == nInputPlane, 2,
"invalid number of input/kernel planes");
1309 THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf ==
'F', 2,
"conv2Dcmul : Input image is smaller than kernel");
1311 nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);
1312 nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);
1314 nelem = THTensor_(nElement)(r_);
1315 THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols);
1317 if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
1319 THTensor_(zero)(r_);
1322 THTensor_(mul)(r_, r_, beta);
1324 input_data = input->data<scalar_t>();
1325 weight_data = kernel->data<scalar_t>();
1326 output_data = r_->data<scalar_t>();
1328 for(k = 0; k < nOutputPlane; k++)
1331 scalar_t *ptr_weight = weight_data + k*kstride0;
1333 scalar_t *ptr_input = input_data + k*istride0;
1336 THTensor_(conv2d)(output_data,
1338 ptr_input, nInputRows, nInputCols,
1339 ptr_weight, nKernelRows, nKernelCols,
1340 srow, scol, vf, xc);
1342 output_data += nOutputCols*nOutputRows;
1344 c10::raw::intrusive_ptr::decref(input);
1345 c10::raw::intrusive_ptr::decref(kernel);
1353 void THTensor_(conv2Dmap)(THTensor *r_, scalar_t beta, scalar_t alpha, THTensor *t_, THTensor *k_, THTensor *map, int64_t srow, int64_t scol,
const char *vf,
const char *xc)
1355 int64_t nInputPlane, nInputRows, nInputCols;
1356 int64_t nKernelRows, nKernelCols;
1357 int64_t nOutputPlane, nOutputRows, nOutputCols;
1358 int64_t istride0, kstride0;
1361 scalar_t *input_data;
1362 scalar_t *weight_data;
1363 scalar_t *output_data;
1368 AT_CHECK(!t_->is_empty() && t_->dim() == 3,
"input: non-empty 3D Tensor expected, got size: ", t_->sizes());
1369 AT_CHECK(!k_->is_empty() && k_->dim() == 3,
"kernel: non-empty 3D Tensor expected, got size: ", k_->sizes());
1370 THArgCheck(THTensor_nDimensionLegacyAll(map) == 2 , 4,
"map: 2D Tensor expected");
1371 THArgCheck(srow >= 1, 6,
"Stride should be a positive integer");
1372 THArgCheck(scol >= 1, 7,
"Stride should be a positive integer");
1374 input = THTensor_(newContiguous)(t_);
1375 kernel = THTensor_(newContiguous)(k_);
1377 istride0 = input->stride(0);
1378 nInputPlane = input->size(0);
1379 nInputRows = input->size(1);
1380 nInputCols = input->size(2);
1382 kstride0 = kernel->stride(0);
1383 nOutputPlane = kernel->size(0);
1384 nKernelRows = kernel->size(1);
1385 nKernelCols = kernel->size(2);
1387 THArgCheck(nOutputPlane == nInputPlane, 2,
"invalid number of input/kernel planes");
1388 THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols)
1389 || *vf ==
'F', 2,
"conv2Dmap : Input image is smaller than kernel");
1391 nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);
1392 nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);
1394 nelem = THTensor_(nElement)(r_);
1395 THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols);
1397 if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
1399 THTensor_(zero)(r_);
1402 THTensor_(mul)(r_, r_, beta);
1404 input_data = input->data<scalar_t>();
1405 weight_data = kernel->data<scalar_t>();
1406 output_data = r_->data<scalar_t>();
1408 nmaps = map->size(0);
1410 for(k = 0; k < nmaps; k++)
1413 int64_t from = (int64_t)THTensor_(get2d)(map,k,0)-1;
1414 int64_t to = (int64_t)THTensor_(get2d)(map,k,1)-1;
1417 scalar_t *ptr_weight = weight_data + k*kstride0;
1419 scalar_t *ptr_input = input_data + from*istride0;
1421 scalar_t *ptr_output = output_data + to*nOutputRows*nOutputCols;
1424 THTensor_(conv2d)(ptr_output,
1426 ptr_input, nInputRows, nInputCols,
1427 ptr_weight, nKernelRows, nKernelCols,
1428 srow, scol, vf, xc);
1430 c10::raw::intrusive_ptr::decref(input);
1431 c10::raw::intrusive_ptr::decref(kernel);
1441 void THTensor_(conv3DRevger)(THTensor *r_, scalar_t beta, scalar_t alpha, THTensor *t_, THTensor *k_,
1442 int64_t sdepth, int64_t srow, int64_t scol)
1444 int64_t nInputPlane, nInputDepth, nInputRows, nInputCols;
1445 int64_t nKernelPlane, nKernelDepth, nKernelRows, nKernelCols;
1446 int64_t nOutputDepth, nOutputRows, nOutputCols;
1447 int64_t istride0, kstride0;
1450 scalar_t *input_data;
1451 scalar_t *weight_data;
1452 scalar_t *output_data;
1456 AT_CHECK(!t_->is_empty() && t_->dim() == 4,
"input: non-empty 4D Tensor expected, got size: ", t_->sizes());
1457 AT_CHECK(!k_->is_empty() && k_->dim() == 4,
"kernel: non-empty 4D Tensor expected, got size: ", k_->sizes());
1458 THArgCheck(sdepth >= 1, 5,
"Stride should be a positive integer");
1459 THArgCheck(srow >= 1, 6,
"Stride should be a positive integer");
1460 THArgCheck(scol >= 1, 7,
"Stride should be a positive integer");
1462 input = THTensor_(newContiguous)(t_);
1463 kernel = THTensor_(newContiguous)(k_);
1465 nInputPlane = input->size(0);
1466 istride0 = input->stride(0);
1467 nInputDepth = input->size(1);
1468 nInputRows = input->size(2);
1469 nInputCols = input->size(3);
1471 kstride0 = kernel->stride(0);
1472 nKernelPlane = kernel->size(0);
1473 nKernelDepth= kernel->size(1);
1474 nKernelRows = kernel->size(2);
1475 nKernelCols = kernel->size(3);
1477 THArgCheck(nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2,
"conv3DRevger : Input image is smaller than kernel");
1479 nOutputDepth = nInputDepth - (nKernelDepth - 1) * sdepth;
1480 nOutputRows = nInputRows - (nKernelRows - 1) * srow;
1481 nOutputCols = nInputCols - (nKernelCols - 1) * scol;
1483 nelem = THTensor_(nElement)(r_);
1484 THTensor_(resize5d)(r_,nKernelPlane, nInputPlane, nOutputDepth, nOutputRows, nOutputCols);
1486 if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
1488 THTensor_(zero)(r_);
1491 THTensor_(mul)(r_, r_, beta);
1493 input_data = input->data<scalar_t>();
1494 weight_data = kernel->data<scalar_t>();
1495 output_data = r_->data<scalar_t>();
1497 for(k = 0; k < nKernelPlane; k++)
1500 scalar_t *ptr_weight = weight_data+k*kstride0;
1502 for(i = 0; i < nInputPlane; i++)
1505 scalar_t *ptr_input = input_data+i*istride0;
1508 THTensor_(validXCorr3DRevptr)(output_data,
1510 ptr_input, nInputDepth, nInputRows, nInputCols,
1511 ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
1512 sdepth, srow, scol);
1514 output_data += nOutputDepth*nOutputCols*nOutputRows;
1517 c10::raw::intrusive_ptr::decref(input);
1518 c10::raw::intrusive_ptr::decref(kernel);
1527 void THTensor_(conv3Dger)(THTensor *r_, scalar_t beta, scalar_t alpha, THTensor *t_, THTensor *k_,
1528 int64_t sdepth, int64_t srow, int64_t scol,
const char *vf,
const char *xc)
1530 int64_t nInputPlane, nInputDepth, nInputRows, nInputCols;
1531 int64_t nKernelPlane, nKernelDepth, nKernelRows, nKernelCols;
1532 int64_t nOutputDepth, nOutputRows, nOutputCols;
1533 int64_t istride0, kstride0;
1536 scalar_t *input_data;
1537 scalar_t *weight_data;
1538 scalar_t *output_data;
1542 AT_CHECK(!t_->is_empty() && t_->dim() == 4,
"input: non-empty 4D Tensor expected, got size: ", t_->sizes());
1543 AT_CHECK(!k_->is_empty() && k_->dim() == 4,
"kernel: non-empty 4D Tensor expected, got size: ", k_->sizes());
1544 THArgCheck(sdepth >= 1, 5,
"Stride should be a positive integer");
1545 THArgCheck(srow >= 1, 6,
"Stride should be a positive integer");
1546 THArgCheck(scol >= 1, 7,
"Stride should be a positive integer");
1547 THArgCheck(*vf ==
'V' || *vf ==
'F', 8,
"type of convolution can 'V' or 'F'");
1548 THArgCheck(*xc ==
'C' || *xc ==
'X', 8,
"type of convolution can 'X' or 'C'");
1550 input = THTensor_(newContiguous)(t_);
1551 kernel = THTensor_(newContiguous)(k_);
1553 nInputPlane = input->size(0);
1554 istride0 = input->stride(0);
1555 nInputDepth = input->size(1);
1556 nInputRows = input->size(2);
1557 nInputCols = input->size(3);
1559 kstride0 = kernel->stride(0);
1560 nKernelPlane = kernel->size(0);
1561 nKernelDepth = kernel->size(1);
1562 nKernelRows = kernel->size(2);
1563 nKernelCols = kernel->size(3);
1565 THArgCheck((nInputDepth >= nKernelDepth
1566 && nInputRows >= nKernelRows
1567 && nInputCols >= nKernelCols)
1568 || *vf ==
'F', 2,
"conv3Dger : Input image is smaller than kernel");
1570 nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf);
1571 nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);
1572 nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);
1574 nelem = THTensor_(nElement)(r_);
1575 THTensor_(resize5d)(r_,nKernelPlane, nInputPlane, nOutputDepth, nOutputRows, nOutputCols);
1577 if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
1579 THTensor_(zero)(r_);
1582 THTensor_(mul)(r_, r_, beta);
1584 input_data = input->data<scalar_t>();
1585 weight_data = kernel->data<scalar_t>();
1586 output_data = r_->data<scalar_t>();
1588 for(k = 0; k < nKernelPlane; k++)
1591 scalar_t *ptr_weight = weight_data+k*kstride0;
1593 for(i = 0; i < nInputPlane; i++)
1596 scalar_t *ptr_input = input_data+i*istride0;
1599 THTensor_(conv3d)(output_data,
1601 ptr_input, nInputDepth, nInputRows, nInputCols,
1602 ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
1603 sdepth, srow, scol, vf, xc);
1606 output_data += nOutputDepth*nOutputCols*nOutputRows;
1609 c10::raw::intrusive_ptr::decref(input);
1610 c10::raw::intrusive_ptr::decref(kernel);
1618 void THTensor_(conv3Dmv)(THTensor *r_, scalar_t beta, scalar_t alpha, THTensor *t_, THTensor *k_,
1619 int64_t sdepth, int64_t srow, int64_t scol,
const char *vf,
const char *xc)
1621 int64_t nInputPlane, nInputDepth, nInputRows, nInputCols;
1622 int64_t nKernelDepth, nKernelRows, nKernelCols;
1623 int64_t nOutputPlane, nOutputDepth, nOutputRows, nOutputCols;
1624 int64_t istride0, kstride0, kstride1;
1627 scalar_t *input_data;
1628 scalar_t *weight_data;
1629 scalar_t *output_data;
1633 AT_CHECK(!t_->is_empty() && t_->dim() == 4,
"input: non-empty 4D Tensor expected, got size: ", t_->sizes());
1634 AT_CHECK(!k_->is_empty() && k_->dim() == 5,
"kernel: non-empty 5D Tensor expected, got size: ", k_->sizes());
1635 THArgCheck(sdepth >= 1, 5,
"Stride should be a positive integer");
1636 THArgCheck(srow >= 1, 6,
"Stride should be a positive integer");
1637 THArgCheck(scol >= 1, 7,
"Stride should be a positive integer");
1638 THArgCheck(*vf ==
'V' || *vf ==
'F', 8,
"type of convolution can 'V' or 'F'");
1639 THArgCheck(*xc ==
'C' || *xc ==
'X', 8,
"type of convolution can 'X' or 'C'");
1641 input = THTensor_(newContiguous)(t_);
1642 if (!(k_->stride(4) == 1) || !(k_->stride(3) == k_->size(4))) {
1643 kernel = THTensor_(newContiguous)(k_);
1645 THTensor_(retain)(k_);
1649 nInputPlane = input->size(0);
1650 istride0 = input->stride(0);
1651 nInputDepth = input->size(1);
1652 nInputRows = input->size(2);
1653 nInputCols = input->size(3);
1655 kstride0 = kernel->stride(0);
1656 kstride1 = kernel->stride(1);
1657 nKernelDepth = kernel->size(2);
1658 nKernelRows = kernel->size(3);
1659 nKernelCols = kernel->size(4);
1660 nOutputPlane = kernel->size(0);
1661 THArgCheck(kernel->size(1) == nInputPlane, 2,
"invalid number of input planes");
1663 THArgCheck( (nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf ==
'F', 2,
"conv3Dmv : Input image is smaller than kernel");
1665 nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf);
1666 nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);
1667 nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);
1669 nelem = THTensor_(nElement)(r_);
1670 THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols);
1672 if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
1674 THTensor_(zero)(r_);
1677 THTensor_(mul)(r_, r_, beta);
1679 input_data = input->data<scalar_t>();
1680 weight_data = kernel->data<scalar_t>();
1681 output_data = r_->data<scalar_t>();
1683 for(k = 0; k < nOutputPlane; k++)
1685 for(i = 0; i < nInputPlane; i++)
1688 scalar_t *ptr_weight = weight_data + k*kstride0 + i*kstride1;
1690 scalar_t *ptr_input = input_data + i*istride0;
1693 THTensor_(conv3d)(output_data,
1695 ptr_input, nInputDepth, nInputRows, nInputCols,
1696 ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
1697 sdepth, srow, scol, vf, xc);
1700 output_data += nOutputDepth*nOutputCols*nOutputRows;
1702 c10::raw::intrusive_ptr::decref(input);
1703 c10::raw::intrusive_ptr::decref(kernel);
1711 void THTensor_(conv3Dmul)(THTensor *r_, scalar_t beta, scalar_t alpha, THTensor *t_, THTensor *k_,
1712 int64_t sdepth, int64_t srow, int64_t scol,
const char *vf,
const char *xc)
1716 int64_t nInputDepth;
1719 int64_t nKernelDepth;
1720 int64_t nKernelRows;
1721 int64_t nKernelCols;
1722 int64_t nOutputDepth, nOutputRows, nOutputCols;
1723 scalar_t *ptr_input;
1724 scalar_t *ptr_weight;
1725 scalar_t *output_data;
1728 AT_CHECK(!t_->is_empty() && t_->dim() == 3,
"input: non-empty 3D Tensor expected, got size: ", t_->sizes());
1729 AT_CHECK(!k_->is_empty() && k_->dim() == 3,
"kernel: non-empty 3D Tensor expected, got size: ", k_->sizes());
1730 THArgCheck(sdepth >= 1, 5,
"Stride should be a positive integer");
1731 THArgCheck(srow >= 1, 6,
"Stride should be a positive integer");
1732 THArgCheck(scol >= 1, 7,
"Stride should be a positive integer");
1733 THArgCheck(*vf ==
'V' || *vf ==
'F', 8,
"type of convolution can 'V' or 'F'");
1734 THArgCheck(*xc ==
'C' || *xc ==
'X', 8,
"type of convolution can 'X' or 'C'");
1736 input = THTensor_(newContiguous)(t_);
1737 kernel = THTensor_(newContiguous)(k_);
1739 nInputDepth = input->size(0);
1740 nInputRows = input->size(1);
1741 nInputCols = input->size(2);
1742 nKernelDepth = kernel->size(0);
1743 nKernelRows = kernel->size(1);
1744 nKernelCols = kernel->size(2);
1746 THArgCheck((nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf ==
'F', 2,
"conv3Dmul : Input image is smaller than kernel");
1748 nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf);
1749 nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);
1750 nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);
1752 nelem = THTensor_(nElement)(r_);
1753 THTensor_(resize3d)(r_, nOutputDepth, nOutputRows, nOutputCols);
1754 if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
1755 THTensor_(zero)(r_);
1757 THTensor_(mul)(r_, r_, beta);
1759 ptr_input = input->data<scalar_t>();
1760 ptr_weight = kernel->data<scalar_t>();
1761 output_data = r_->data<scalar_t>();
1765 THTensor_(conv3d)(output_data,
1767 ptr_input, nInputDepth, nInputRows, nInputCols,
1768 ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
1769 sdepth, srow, scol, vf, xc);
1770 c10::raw::intrusive_ptr::decref(input);
1771 c10::raw::intrusive_ptr::decref(kernel);
1779 void THTensor_(conv3Dcmul)(THTensor *r_, scalar_t beta, scalar_t alpha, THTensor *t_, THTensor *k_,
1780 int64_t sdepth, int64_t srow, int64_t scol,
const char *vf,
const char *xc)
1782 int64_t nInputPlane, nInputDepth, nInputRows, nInputCols;
1783 int64_t nKernelDepth, nKernelRows, nKernelCols;
1784 int64_t nOutputPlane, nOutputDepth, nOutputRows, nOutputCols;
1785 int64_t istride0, kstride0;
1789 scalar_t *input_data;
1790 scalar_t *weight_data;
1791 scalar_t *output_data;
1795 AT_CHECK(!t_->is_empty() && t_->dim() == 4,
"input: non-empty 4D Tensor expected, got size: ", t_->sizes());
1796 AT_CHECK(!k_->is_empty() && k_->dim() == 4,
"kernel: non-empty 4D Tensor expected, got size: ", k_->sizes());
1797 THArgCheck(srow >= 1, 5,
"Stride should be a positive integer");
1798 THArgCheck(scol >= 1, 6,
"Stride should be a positive integer");
1799 THArgCheck(*vf ==
'V' || *vf ==
'F', 7,
"type of convolution can 'V' or 'F'");
1800 THArgCheck(*xc ==
'C' || *xc ==
'X', 7,
"type of convolution can 'X' or 'C'");
1802 input = THTensor_(newContiguous)(t_);
1803 kernel = THTensor_(newContiguous)(k_);
1805 istride0 = input->stride(0);
1806 nInputPlane = input->size(0);
1807 nInputDepth = input->size(1);
1808 nInputRows = input->size(2);
1809 nInputCols = input->size(3);
1811 kstride0 = kernel->stride(0);
1812 nOutputPlane = kernel->size(0);
1813 nKernelDepth = kernel->size(1);
1814 nKernelRows = kernel->size(2);
1815 nKernelCols = kernel->size(3);
1817 THArgCheck(nOutputPlane == nInputPlane, 2,
"invalid number of input/kernel planes");
1818 THArgCheck( (nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf ==
'F', 2,
"conv3Dcmul : Input image is smaller than kernel");
1820 nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf);
1821 nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);
1822 nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);
1824 nelem = THTensor_(nElement)(r_);
1825 THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols);
1827 if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
1829 THTensor_(zero)(r_);
1832 THTensor_(mul)(r_, r_, beta);
1834 input_data = input->data<scalar_t>();
1835 weight_data = kernel->data<scalar_t>();
1836 output_data = r_->data<scalar_t>();
1838 for(k = 0; k < nOutputPlane; k++)
1841 scalar_t *ptr_weight = weight_data + k*kstride0;
1843 scalar_t *ptr_input = input_data + k*istride0;
1846 THTensor_(conv3d)(output_data,
1848 ptr_input, nInputDepth, nInputRows, nInputCols,
1849 ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
1850 sdepth, srow, scol, vf, xc);
1853 output_data += nOutputDepth*nOutputCols*nOutputRows;
1855 c10::raw::intrusive_ptr::decref(input);
1856 c10::raw::intrusive_ptr::decref(kernel);
1864 void THTensor_(conv3Dmap)(THTensor *r_, scalar_t beta, scalar_t alpha, THTensor *t_, THTensor *k_, THTensor *map,
1865 int64_t sdepth, int64_t srow, int64_t scol,
const char *vf,
const char *xc)
1867 int64_t nInputPlane, nInputDepth, nInputRows, nInputCols;
1868 int64_t nKernelDepth, nKernelRows, nKernelCols;
1869 int64_t nOutputPlane, nOutputDepth, nOutputRows, nOutputCols;
1870 int64_t istride0, kstride0;
1875 scalar_t *input_data;
1876 scalar_t *weight_data;
1877 scalar_t *output_data;
1881 AT_CHECK(!t_->is_empty() && t_->dim() == 4,
"input: non-empty 4D Tensor expected, got size: ", t_->sizes());
1882 AT_CHECK(!k_->is_empty() && k_->dim() == 4,
"kernel: non-empty 4D Tensor expected, got size: ", k_->sizes());
1883 THArgCheck(THTensor_nDimensionLegacyAll(map) == 2 , 4,
"map: 2D Tensor expected");
1884 THArgCheck(srow >= 1, 6,
"Stride should be a positive integer");
1885 THArgCheck(scol >= 1, 7,
"Stride should be a positive integer");
1886 THArgCheck(*vf ==
'V' || *vf ==
'F', 8,
"type of convolution can 'V' or 'F'");
1887 THArgCheck(*xc ==
'C' || *xc ==
'X', 8,
"type of convolution can 'X' or 'C'");
1889 input = THTensor_(newContiguous)(t_);
1890 kernel = THTensor_(newContiguous)(k_);
1892 istride0 = input->stride(0);
1893 nInputPlane = input->size(0);
1894 nInputDepth = input->size(1);
1895 nInputRows = input->size(2);
1896 nInputCols = input->size(3);
1898 kstride0 = kernel->stride(0);
1899 nOutputPlane = kernel->size(0);
1900 nKernelDepth = kernel->size(1);
1901 nKernelRows = kernel->size(2);
1902 nKernelCols = kernel->size(3);
1904 THArgCheck(nOutputPlane == nInputPlane, 2,
"invalid number of input/kernel planes");
1905 THArgCheck((nInputDepth >= nKernelDepth
1906 && nInputRows >= nKernelRows
1907 && nInputCols >= nKernelCols) || *vf ==
'F',
1908 2,
"conv3Dmap : Input image is smaller than kernel");
1910 nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf);
1911 nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);
1912 nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);
1914 nelem = THTensor_(nElement)(r_);
1915 THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols);
1917 if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
1919 THTensor_(zero)(r_);
1922 THTensor_(mul)(r_, r_, beta);
1924 input_data = input->data<scalar_t>();
1925 weight_data = kernel->data<scalar_t>();
1926 output_data = r_->data<scalar_t>();
1928 nmaps = map->size(0);
1930 for(k = 0; k < nmaps; k++)
1933 int64_t from = (int64_t)THTensor_(get2d)(map,k,0)-1;
1934 int64_t to = (int64_t)THTensor_(get2d)(map,k,1)-1;
1937 scalar_t *ptr_weight = weight_data + k*kstride0;
1939 scalar_t *ptr_input = input_data + from*istride0;
1941 scalar_t *ptr_output = output_data + to*nOutputDepth*nOutputRows*nOutputCols;
1944 THTensor_(conv3d)(ptr_output,
1946 ptr_input, nInputDepth, nInputRows, nInputCols,
1947 ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
1948 sdepth, srow, scol, vf, xc);
1950 c10::raw::intrusive_ptr::decref(input);
1951 c10::raw::intrusive_ptr::decref(kernel);