1 #ifndef TH_GENERIC_FILE 2 #define TH_GENERIC_FILE "THNN/generic/unfold.c" 6 void THNN_(unfolded_acc)(
27 scalar_t *input_data = input->data<scalar_t>();
28 scalar_t *finput_data = finput->data<scalar_t>();
30 #pragma omp parallel for private(nip) 31 for(nip = 0; nip < nInputPlane; nip++)
35 for(kh = 0; kh < kH; kh++)
37 for(kw = 0; kw < kW; kw++)
39 scalar_t *src = finput_data + nip*((size_t)kH*kW*outputHeight*outputWidth) + kh*((size_t)kW*outputHeight*outputWidth) + kw*((size_t)outputHeight*outputWidth);
40 scalar_t *dst = input_data + nip*((size_t)inputHeight*inputWidth);
41 if (padW > 0 || padH > 0) {
43 for(y = 0; y < outputHeight; y++) {
44 iy = (int64_t)y*dH - padH + kh;
45 if (iy < 0 || iy >= inputHeight) {
49 lpad = fmaxf(0,padW-kw);
50 rpad = fmaxf(0,padW-(kW-kw-1));
51 scalar_t *dst_slice = dst+(size_t)iy*inputWidth+ix+lpad;
52 THVector_(cadd)(dst_slice, dst_slice, src+(size_t)y*outputWidth+lpad, 1, outputWidth - lpad - rpad);
55 for (x=0; x<outputWidth; x++){
56 ix = (int64_t)x*dW - padW + kw;
57 if (ix < 0 || ix >= inputWidth){
59 scalar_t *dst_slice = dst+(size_t)iy*inputWidth+ix;
60 THVector_(cadd)(dst_slice, dst_slice, src+(size_t)y*outputWidth+x, 1, 1);
67 for(y = 0; y < outputHeight; y++) {
68 iy = (int64_t)y*dH + kh;
71 scalar_t *dst_slice = dst+(size_t)iy*inputWidth+ix;
72 THVector_(cadd)(dst_slice, dst_slice, src+(size_t)y*outputWidth, 1, outputWidth);
74 for(x = 0; x < outputWidth; x++) {
75 scalar_t *dst_slice = dst+(size_t)iy*inputWidth+ix+x*dW;
76 THVector_(cadd)(dst_slice, dst_slice, src+(size_t)y*outputWidth+x, 1, 1);
86 void THNN_(unfolded_copy)(
108 scalar_t *input_data = input->data<scalar_t>();
109 scalar_t *finput_data = finput->data<scalar_t>();
111 #pragma omp parallel for private(k) 112 for(k = 0; k < (int64_t)nInputPlane*kH*kW; k++) {
113 int64_t nip = k / (kH*kW);
114 int64_t rest = k % (kH*kW);
115 int64_t kh = rest / kW;
116 int64_t kw = rest % kW;
119 scalar_t *dst = finput_data + nip*((size_t)kH*kW*outputHeight*outputWidth) + kh*((size_t)kW*outputHeight*outputWidth) + kw*((size_t)outputHeight*outputWidth);
120 scalar_t *src = input_data + nip*((size_t)inputHeight*inputWidth);
121 if (padW > 0 || padH > 0) {
123 for(y = 0; y < outputHeight; y++) {
124 iy = (int64_t)y*dH - padH + kh;
125 if (iy < 0 || iy >= inputHeight) {
126 memset(dst+(
size_t)y*outputWidth, 0,
sizeof(scalar_t)*outputWidth);
130 lpad = fmaxf(0,padW-kw);
131 rpad = fmaxf(0,padW-(kW-kw-1));
132 if (outputWidth-rpad-lpad <= 0) {
133 memset(dst+(
size_t)y*outputWidth, 0,
sizeof(scalar_t)*outputWidth);
135 if (lpad > 0) memset(dst+(
size_t)y*outputWidth, 0,
sizeof(scalar_t)*lpad);
136 memcpy(dst+(
size_t)y*outputWidth+lpad, src+(
size_t)iy*inputWidth+ix+lpad,
sizeof(scalar_t)*(outputWidth-rpad-lpad));
137 if (rpad > 0) memset(dst+(
size_t)y*outputWidth + outputWidth - rpad, 0,
sizeof(scalar_t)*rpad);
141 for (x=0; x<outputWidth; x++){
142 ix = (int64_t)x*dW - padW + kw;
143 if (ix < 0 || ix >= inputWidth)
144 memset(dst+(
size_t)y*outputWidth+x, 0,
sizeof(scalar_t)*1);
146 memcpy(dst+(
size_t)y*outputWidth+x, src+(
size_t)iy*inputWidth+ix,
sizeof(scalar_t)*(1));
152 for(y = 0; y < outputHeight; y++) {
153 iy = (int64_t)y*dH + kh;
156 memcpy(dst+(
size_t)y*outputWidth, src+(
size_t)iy*inputWidth+ix,
sizeof(scalar_t)*outputWidth);
158 for (x=0; x<outputWidth; x++)
159 memcpy(dst+(
size_t)y*outputWidth+x, src+(
size_t)iy*inputWidth+ix+(int64_t)x*dW,
sizeof(scalar_t)*(1));