1 #ifndef TH_GENERIC_FILE 2 #define TH_GENERIC_FILE "TH/generic/THTensorMoreMath.cpp" 5 #include <TH/generic/THTensorApply.hpp> 6 #include <TH/THGenerator.hpp> 8 void THTensor_(baddbmm)(THTensor *result, scalar_t beta, THTensor *t, scalar_t alpha, THTensor *batch1, THTensor *batch2)
12 THArgCheck(THTensor_(nDimensionLegacyNoScalars)(batch1) == 3, 1,
"expected 3D tensor, got %dD", THTensor_(nDimensionLegacyNoScalars)(batch1));
13 THArgCheck(THTensor_(nDimensionLegacyNoScalars)(batch2) == 3, 2,
"expected 3D tensor, got %dD", THTensor_(nDimensionLegacyNoScalars)(batch2));
14 THArgCheck(THTensor_(size)(batch1, 0) == THTensor_(size)(batch2, 0), 2,
15 "equal number of batches expected, got %d, %d",
16 THTensor_(size)(batch1, 0), THTensor_(size)(batch2, 0));
17 THArgCheck(THTensor_(size)(batch1, 2) == THTensor_(size)(batch2, 1), 2,
18 "wrong matrix size, batch1: %dx%d, batch2: %dx%d",
19 THTensor_(size)(batch1, 1), THTensor_(size)(batch1, 2),
20 THTensor_(size)(batch2, 1), THTensor_(size)(batch2, 2));
22 int64_t bs = THTensor_(size)(batch1, 0);
23 int64_t dim1 = THTensor_(size)(batch1, 1);
24 int64_t dim2 = THTensor_(size)(batch2, 2);
25 THArgCheck(THTensor_(size)(t, 0) == bs, 1,
"output tensor of incorrect size");
26 THArgCheck(THTensor_(size)(t, 1) == dim1, 1,
"output tensor of incorrect size");
27 THArgCheck(THTensor_(size)(t, 2) == dim2, 1,
"output tensor of incorrect size");
30 THTensor_(resizeAs)(result, t);
32 at::Tensor result_wrap = THTensor_wrap(result);
34 at::_copy_same_type_(result_wrap, t_wrap);
38 THTensor *matrix1 = THTensor_(
new)();
39 THTensor *matrix2 = THTensor_(
new)();
40 THTensor *result_matrix = THTensor_(
new)();
42 for (batch = 0; batch < THTensor_(size)(batch1, 0); ++batch) {
43 THTensor_(select)(matrix1, batch1, 0, batch);
44 THTensor_(select)(matrix2, batch2, 0, batch);
45 THTensor_(select)(result_matrix, result, 0, batch);
47 THTensor_(addmm)(result_matrix, beta, result_matrix, alpha, matrix1, matrix2);
50 c10::raw::intrusive_ptr::decref(matrix1);
51 c10::raw::intrusive_ptr::decref(matrix2);
52 c10::raw::intrusive_ptr::decref(result_matrix);
55 ptrdiff_t THTensor_(numel)(THTensor *t)
57 return THTensor_(nElement)(t);
68 void THTensor_(preserveReduceDimSemantics)(
69 THTensor *r_,
int in_dims,
int reduce_dimension,
int keepdim) {
71 THTensor_(nDimensionLegacyAll)(r_) == in_dims - 1 &&
72 THTensor_(nDimensionLegacyAll)(r_) != 0) {
73 THTensor_(unsqueeze1d)(r_, r_, reduce_dimension);
77 void THTensor_(max)(THTensor *values_, THLongTensor *indices_, THTensor *t,
int dimension,
int keepdim)
79 THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyAll)(t), 2,
"dimension %d out of range",
82 int in_dims = THTensor_(nDimensionLegacyAll)(t);
83 THTensor_(preserveReduceDimSemantics)(values_, in_dims, dimension, keepdim);
84 THLongTensor_preserveReduceDimSemantics(indices_, in_dims, dimension, keepdim);
85 std::vector<int64_t> dim = THTensor_sizesLegacyNoScalars(t);
87 THTensor_(resize)(values_, dim, {});
88 THLongTensor_resize(indices_, dim, {});
91 if (THTensor_strideLegacyNoScalars(t, dimension) == 1) {
96 TH_TENSOR_DIM_APPLY3(scalar_t, t, scalar_t, values_, int64_t, indices_, dimension,
97 TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM,
101 for(i = 0; i < t_size; i++)
103 value = t_data[i*t_stride];
105 if(!(value <= theMax))
109 th_isnan_break(value)
112 *indices__data = theIndex;
113 *values__data = theMax;);
115 if (THTensor_(nDimensionLegacyAll)(t) > 1) {
116 THTensor *t0 = THTensor_(newSelect)(t, dimension, 0);
117 at::Tensor values__wrap = THTensor_wrap(values_);
119 at::_copy_same_type_(values__wrap, t0_wrap);
120 c10::raw::intrusive_ptr::decref(t0);
122 THTensor_(fill)(values_, THTensor_(get1d)(t, 0));
124 THLongTensor_zero(indices_);
126 if(THTensor_sizeLegacyNoScalars(t, dimension) == 1) {
128 THTensor_(squeeze1d)(values_, values_, dimension);
129 THLongTensor_squeeze1d(indices_, indices_, dimension);
134 THTensor *tempValues_ = THTensor_(newWithTensor)(values_);
136 tempValues_->set_size(dimension,THTensor_sizeLegacyNoScalars(t, dimension));
137 tempValues_->set_stride(dimension, 0);
139 THLongTensor *tempIndices_ = THLongTensor_newWithTensor(indices_);
141 tempIndices_->set_size(dimension,THTensor_sizeLegacyNoScalars(t, dimension));
142 tempIndices_->set_stride(dimension, 0);
144 TH_TENSOR_APPLY3_D(scalar_t, t, scalar_t, tempValues_, int64_t, tempIndices_, dimension,
145 if(!(*t_data <= *tempValues__data) && !th_isnan(*tempValues__data)) {
146 *tempValues__data = *t_data;
147 *tempIndices__data = *tempIndices__dimOffset;
150 c10::raw::intrusive_ptr::decref(tempValues_);
151 THLongTensor_free(tempIndices_);
155 THTensor_(squeeze1d)(values_, values_, dimension);
156 THLongTensor_squeeze1d(indices_, indices_, dimension);
160 void THTensor_(min)(THTensor *values_, THLongTensor *indices_, THTensor *t,
int dimension,
int keepdim)
162 THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyAll)(t), 2,
"dimension %d out of range",
165 int in_dims = THTensor_(nDimensionLegacyAll)(t);
166 THTensor_(preserveReduceDimSemantics)(values_, in_dims, dimension, keepdim);
167 THLongTensor_preserveReduceDimSemantics(indices_, in_dims, dimension, keepdim);
168 std::vector<int64_t> dim = THTensor_sizesLegacyNoScalars(t);
170 THTensor_(resize)(values_, dim, {});
171 THLongTensor_resize(indices_, dim, {});
174 if (THTensor_strideLegacyNoScalars(t, dimension) == 1) {
179 TH_TENSOR_DIM_APPLY3(scalar_t, t, scalar_t, values_, int64_t, indices_, dimension,
180 TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM,
184 for(i = 0; i < t_size; i++)
186 value = t_data[i*t_stride];
188 if(!(value >= theMax))
192 th_isnan_break(value)
195 *indices__data = theIndex;
196 *values__data = theMax;);
198 if (THTensor_(nDimensionLegacyAll)(t) > 1) {
199 THTensor *t0 = THTensor_(newSelect)(t, dimension, 0);
200 at::Tensor values__wrap = THTensor_wrap(values_);
202 at::_copy_same_type_(values__wrap, t0_wrap);
203 c10::raw::intrusive_ptr::decref(t0);
205 THTensor_(fill)(values_, THTensor_(get1d)(t, 0));
207 THLongTensor_zero(indices_);
209 if(THTensor_sizeLegacyNoScalars(t, dimension) == 1) {
211 THTensor_(squeeze1d)(values_, values_, dimension);
212 THLongTensor_squeeze1d(indices_, indices_, dimension);
217 THTensor *tempValues_ = THTensor_(newWithTensor)(values_);
219 tempValues_->set_size(dimension,THTensor_sizeLegacyNoScalars(t, dimension));
220 tempValues_->set_stride(dimension, 0);
222 THLongTensor *tempIndices_ = THLongTensor_newWithTensor(indices_);
224 tempIndices_->set_size(dimension,THTensor_sizeLegacyNoScalars(t, dimension));
225 tempIndices_->set_stride(dimension, 0);
227 TH_TENSOR_APPLY3_D(scalar_t, t, scalar_t, tempValues_, int64_t, tempIndices_, dimension,
228 if(!(*t_data >= *tempValues__data) && !th_isnan(*tempValues__data)) {
229 *tempValues__data = *t_data;
230 *tempIndices__data = *tempIndices__dimOffset;
233 c10::raw::intrusive_ptr::decref(tempValues_);
234 THLongTensor_free(tempIndices_);
238 THTensor_(squeeze1d)(values_, values_, dimension);
239 THLongTensor_squeeze1d(indices_, indices_, dimension);
243 void THTensor_(prod)(THTensor *r_, THTensor *t,
int dimension,
int keepdim)
245 THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyAll)(t), 2,
"dimension %d out of range",
248 THTensor_(preserveReduceDimSemantics)(r_, THTensor_(nDimensionLegacyAll)(t), dimension, keepdim);
249 std::vector<int64_t> dim = THTensor_sizesLegacyNoScalars(t);
251 THTensor_(resize)(r_, dim, {});
255 int inOMP = omp_in_parallel();
259 int r_Contig = THTensor_(isContiguous)(r_);
260 scalar_t *tp = t->data<scalar_t>();
261 scalar_t *rp = r_->data<scalar_t>();
262 if(r_Contig && (tp != rp)){
264 ptrdiff_t r_Size = THTensor_(nElement)(r_);
265 int r_Dim = THTensor_nDimensionLegacyAll(r_);
266 #pragma omp parallel for if ( r_Size > HYPER_TH_OMP_OVERHEAD_THRESHOLD) 267 for (iter = 0; iter < r_Size; iter++) {
271 ptrdiff_t tBasicIndex = 0;
273 for(j = 0; j < r_Dim; ++j) {
275 quot = rem/r_->stride(j);
276 rem = rem%r_->stride(j);
277 tBasicIndex += quot*t->stride(j);
280 scalar_t *t_data = tp+tBasicIndex;
281 scalar_t *r__data = rp+iter;
283 for(j=0; j < THTensor_sizeLegacyNoScalars(t, dimension); ++j) {
284 *r__data *= *(t_data + j*THTensor_strideLegacyNoScalars(t, dimension));
297 if (THTensor_strideLegacyNoScalars(t, dimension) == 1) {
298 TH_TENSOR_DIM_APPLY2(scalar_t, t, scalar_t, r_, dimension,
301 for(i = 0; i < t_size; i++)
302 prod *= t_data[i*t_stride];
303 *r__data = (scalar_t)prod;);
305 THTensor_(fill)(r_, 1);
306 THTensor *temp_ = THTensor_(newWithTensor)(r_);
308 temp_->set_size(dimension,THTensor_sizeLegacyNoScalars(t, dimension));
309 temp_->set_stride(dimension, 0);
311 TH_TENSOR_APPLY2(scalar_t, temp_, scalar_t, t, *temp__data = *temp__data * *t_data;);
312 c10::raw::intrusive_ptr::decref(temp_);
316 THTensor_(squeeze1d)(r_, r_, dimension);
320 void THTensor_(cumsum)(THTensor *r_, THTensor *t,
int dimension)
322 THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyNoScalars)(t), 2,
"dimension %d out of range",
325 THTensor_(resizeAs)(r_, t);
327 TH_TENSOR_DIM_APPLY2(scalar_t, t, scalar_t, r_, dimension,
330 for(i = 0; i < t_size; i++)
332 cumsum += t_data[i*t_stride];
333 r__data[i*r__stride] = (scalar_t)cumsum;
337 void THTensor_(cumprod)(THTensor *r_, THTensor *t,
int dimension)
339 THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyNoScalars)(t), 2,
"dimension %d out of range",
342 THTensor_(resizeAs)(r_, t);
344 TH_TENSOR_DIM_APPLY2(scalar_t, t, scalar_t, r_, dimension,
347 for(i = 0; i < t_size; i++)
349 cumprod *= t_data[i*t_stride];
350 r__data[i*r__stride] = (scalar_t)cumprod;
355 void THTensor_(sign)(THTensor *r_, THTensor *t)
357 THTensor_(resizeAs)(r_, t);
359 #if defined (TH_REAL_IS_BYTE) 360 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t,
361 if (*t_data > 0) *r__data = 1;
364 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t,
365 if (*t_data > 0) *r__data = 1;
366 else if (*t_data < 0) *r__data = -1;
372 accreal THTensor_(trace)(THTensor *t)
374 scalar_t *t_data = t->data<scalar_t>();
377 int64_t t_stride_0, t_stride_1, t_diag_size;
379 THArgCheck(THTensor_(nDimensionLegacyAll)(t) == 2, 1,
"expected a matrix");
381 t_stride_0 = THTensor_(stride)(t, 0);
382 t_stride_1 = THTensor_(stride)(t, 1);
383 t_diag_size = THMin(THTensor_(size)(t, 0), THTensor_(size)(t, 1));
384 while(i < t_diag_size)
386 sum += t_data[i*(t_stride_0+t_stride_1)];
393 void THTensor_(cross)(THTensor *r_, THTensor *a, THTensor *b,
int dimension)
397 if(THTensor_(nDimensionLegacyNoScalars)(a) != THTensor_(nDimensionLegacyNoScalars)(b))
398 THError(
"inconsistent tensor dimension %dD, %dD",
399 THTensor_(nDimensionLegacyNoScalars)(a), THTensor_(nDimensionLegacyNoScalars)(b));
401 for(i = 0; i < a->dim(); i++)
403 if(THTensor_(size)(a, i) != THTensor_(size)(b, i)) {
404 THDescBuff ba = THTensor_(sizeDesc)(a);
405 THDescBuff bb = THTensor_(sizeDesc)(b);
406 THError(
"inconsistent tensor sizes %s, %s", ba.str, bb.str);
412 for(i = 0; i < THTensor_(nDimensionLegacyNoScalars)(a); i++)
414 if(THTensor_sizeLegacyNoScalars(a, i) == 3)
421 THDescBuff ba = THTensor_(sizeDesc)(a);
422 THError(
"no dimension of size 3 in a: %s", ba.str);
426 THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyNoScalars)(a), 3,
"dimension %d out of range",
428 THArgCheck(THTensor_sizeLegacyNoScalars(a, dimension) == 3, 3,
"dimension %d does not have size 3",
431 THTensor_(resizeAs)(r_, a);
433 TH_TENSOR_DIM_APPLY3(scalar_t, a, scalar_t, b, scalar_t, r_, dimension,
434 TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM,
435 r__data[0*r__stride] = a_data[1*a_stride]*b_data[2*b_stride] - a_data[2*a_stride]*b_data[1*b_stride];
436 r__data[1*r__stride] = a_data[2*a_stride]*b_data[0*b_stride] - a_data[0*a_stride]*b_data[2*b_stride];
437 r__data[2*r__stride] = a_data[0*a_stride]*b_data[1*b_stride] - a_data[1*a_stride]*b_data[0*b_stride];);
440 void THTensor_(cmax)(THTensor *r, THTensor *t, THTensor *src) {
441 THTensor_(resizeAs)(r, t);
442 TH_TENSOR_APPLY3(scalar_t, r, scalar_t, t, scalar_t, src,
443 *r_data = *t_data > *src_data ? *t_data : *src_data;);
446 void THTensor_(cmin)(THTensor *r, THTensor *t, THTensor *src) {
447 THTensor_(resizeAs)(r, t);
448 TH_TENSOR_APPLY3(scalar_t, r, scalar_t, t, scalar_t, src,
449 *r_data = *t_data < *src_data ? *t_data : *src_data;);
452 void THTensor_(cmaxValue)(THTensor *r, THTensor *t, scalar_t value) {
453 THTensor_(resizeAs)(r, t);
454 TH_TENSOR_APPLY2(scalar_t, r, scalar_t, t,
455 *r_data = *t_data < value ? value : *t_data;);
458 void THTensor_(cminValue)(THTensor *r, THTensor *t, scalar_t value) {
459 THTensor_(resizeAs)(r, t);
460 TH_TENSOR_APPLY2(scalar_t, r, scalar_t, t,
461 *r_data = *t_data > value ? value : *t_data;);
464 void THTensor_(diag)(THTensor *r_, THTensor *t,
int k)
466 THArgCheck(THTensor_(nDimensionLegacyNoScalars)(t) == 1 || THTensor_(nDimensionLegacyNoScalars)(t) == 2, 1,
"matrix or a vector expected");
468 if(THTensor_(nDimensionLegacyNoScalars)(t) == 1)
470 scalar_t *t_data = t->data<scalar_t>();
471 int64_t t_stride_0 = THTensor_strideLegacyNoScalars(t, 0);
472 int64_t t_size = THTensor_sizeLegacyNoScalars(t, 0);
473 int64_t sz = t_size + (k >= 0 ? k : -k);
479 THTensor_(resize2d)(r_, sz, sz);
481 r__data = r_->data<scalar_t>();
482 r__stride_0 = THTensor_(stride)(r_, 0);
483 r__stride_1 = THTensor_(stride)(r_, 1);
484 r__data += (k >= 0 ? k*r__stride_1 : -k*r__stride_0);
486 for(i = 0; i < t_size; i++)
487 r__data[i*(r__stride_0+r__stride_1)] = t_data[i*t_stride_0];
491 scalar_t *t_data = t->data<scalar_t>();
492 int64_t t_stride_0 = THTensor_(stride)(t, 0);
493 int64_t t_stride_1 = THTensor_(stride)(t, 1);
500 sz = THMin(THTensor_(size)(t, 0), THTensor_(size)(t, 1)-k);
502 sz = THMin(THTensor_(size)(t, 0)+k, THTensor_(size)(t, 1));
503 THTensor_(resize1d)(r_, sz);
504 r__data = r_->data<scalar_t>();
505 r__stride_0 = THTensor_(stride)(r_, 0);
507 t_data += (k >= 0 ? k*t_stride_1 : -k*t_stride_0);
508 for(i = 0; i < sz; i++)
509 r__data[i*r__stride_0] = t_data[i*(t_stride_0+t_stride_1)];
528 #define MAX_LEVELS 300 531 #define ARR(III) arr[(III)*stride] 532 #define IDX(III) idx[(III)*stride] 534 #define LONG_SWAP(AAA, BBB) swap = AAA; AAA = BBB; BBB = swap 535 #define REAL_SWAP(AAA, BBB) rswap = AAA; AAA = BBB; BBB = rswap 537 #define ARR_SWAP(III, JJJ) \ 538 REAL_SWAP(ARR(III), ARR(JJJ)); 540 #define BOTH_SWAP(III, JJJ) \ 541 REAL_SWAP(ARR(III), ARR(JJJ)); \ 542 LONG_SWAP(IDX(III), IDX(JJJ)) 546 #define GT_OR_NAN(x, y) \ 547 ((x != x && y == y) || (x > y)) 549 static void THTensor_(quicksortascend)(scalar_t *arr, int64_t *idx, int64_t elements, int64_t stride)
551 int64_t beg[MAX_LEVELS], end[MAX_LEVELS], i, j, L, R, P, swap, pid, stack = 0, sz_right, sz_left;
553 unsigned char done = 0;
557 L = 0; R = elements-1;
558 done = elements-1 <= M_SMALL;
564 if (GT_OR_NAN(ARR(L+1), ARR(R))) { BOTH_SWAP(L+1, R); }
565 if (GT_OR_NAN(ARR(L), ARR(R))) { BOTH_SWAP(L, R); }
566 if (GT_OR_NAN(ARR(L+1), ARR(L))) { BOTH_SWAP(L+1, L); }
568 i = L+1; j = R; piv = ARR(L); pid = IDX(L);
571 do { i = i+1; }
while(GT_OR_NAN(piv, ARR(i)));
572 do { j = j-1; }
while(GT_OR_NAN(ARR(j), piv));
582 if (sz_left <= M_SMALL && sz_right <= M_SMALL) {
592 }
else if (sz_left <= M_SMALL || sz_right <= M_SMALL) {
595 if (sz_left > sz_right) {
606 if (sz_left > sz_right) {
622 for(i=elements-2; i>=0; i--) {
623 if (GT_OR_NAN(ARR(i),ARR(i+1))) {
631 }
while(j < elements && GT_OR_NAN(piv, ARR(j)));
638 static void THTensor_(quicksortdescend)(scalar_t *arr, int64_t *idx, int64_t elements, int64_t stride)
640 int64_t beg[MAX_LEVELS], end[MAX_LEVELS], i, j, L, R, P, swap, pid, stack = 0, sz_right, sz_left;
642 unsigned char done = 0;
646 L = 0; R = elements-1;
647 done = elements-1 <= M_SMALL;
653 if (GT_OR_NAN(ARR(R), ARR(L+1))) { BOTH_SWAP(L+1, R); }
654 if (GT_OR_NAN(ARR(R), ARR(L))) { BOTH_SWAP(L, R); }
655 if (GT_OR_NAN(ARR(L), ARR(L+1))) { BOTH_SWAP(L+1, L); }
657 i = L+1; j = R; piv = ARR(L); pid = IDX(L);
660 do { i = i+1; }
while(GT_OR_NAN(ARR(i), piv));
661 do { j = j-1; }
while(GT_OR_NAN(piv, ARR(j)));
671 if (sz_left <= M_SMALL && sz_right <= M_SMALL) {
681 }
else if (sz_left <= M_SMALL || sz_right <= M_SMALL) {
684 if (sz_left > sz_right) {
695 if (sz_left > sz_right) {
711 for(i=elements-2; i>=0; i--) {
712 if (GT_OR_NAN(ARR(i+1), ARR(i))) {
720 }
while(j < elements && GT_OR_NAN(ARR(j), piv));
730 void THTensor_(sort)(THTensor *rt_, THLongTensor *ri_, THTensor *t,
int dimension,
int descendingOrder)
732 THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyNoScalars)(t), 2,
"invalid dimension %d",
735 THTensor_(resizeAs)(rt_, t);
738 at::_copy_same_type_(rt__wrap, t_wrap);
739 THLongTensor_resize(ri_, t->sizes(), {});
743 TH_TENSOR_DIM_APPLY2(scalar_t, rt_, int64_t, ri_, dimension,
745 for(i = 0; i < ri__size; i++)
746 ri__data[i*ri__stride] = i;
747 THTensor_(quicksortdescend)(rt__data, ri__data, rt__size, rt__stride);)
751 TH_TENSOR_DIM_APPLY2(scalar_t, rt_, int64_t, ri_, dimension,
753 for(i = 0; i < ri__size; i++)
754 ri__data[i*ri__stride] = i;
755 THTensor_(quicksortascend)(rt__data, ri__data, rt__size, rt__stride);)
763 static void THTensor_(quickselectnoidx)(scalar_t *arr, int64_t k, int64_t elements, int64_t stride)
765 int64_t P, L, R, i, j;
775 if (ARR(L) > ARR(R)) {
784 if (ARR(L+1) > ARR(R)) { ARR_SWAP(L+1, R); }
785 if (ARR(L) > ARR(R)) { ARR_SWAP(L, R); }
786 if (ARR(L+1) > ARR(L)) { ARR_SWAP(L+1, L); }
792 do i++;
while(ARR(i) < piv);
793 do j--;
while(ARR(j) > piv);
809 static void THTensor_(quickselect)(scalar_t *arr, int64_t *idx, int64_t k, int64_t elements, int64_t stride)
811 int64_t P, L, R, i, j, swap;
821 if (ARR(L) > ARR(R)) {
830 if (ARR(L+1) > ARR(R)) { BOTH_SWAP(L+1, R); }
831 if (ARR(L) > ARR(R)) { BOTH_SWAP(L, R); }
832 if (ARR(L+1) > ARR(L)) { BOTH_SWAP(L+1, L); }
838 do i++;
while(ARR(i) < piv);
839 do j--;
while(ARR(j) > piv);
858 scalar_t THTensor_(medianall)(THTensor *tensor)
860 THArgCheck(THTensor_nDimensionLegacyAll(tensor) > 0, 1,
"tensor must have one dimension");
866 scalar_t *temp__data;
868 numel = THTensor_(nElement)(tensor);
871 temp_ = THTensor_(newClone)(tensor);
872 temp__data = temp_->data<scalar_t>();
874 THTensor_(quickselectnoidx)(temp__data, k, numel, 1);
876 theMedian = temp__data[k];
878 c10::raw::intrusive_ptr::decref(temp_);
883 void THTensor_(mode)(THTensor *values_, THLongTensor *indices_, THTensor *t,
int dimension,
int keepdim)
886 THLongTensor *tempi_;
887 scalar_t *temp__data;
888 int64_t *tempi__data;
891 THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyAll)(t), 3,
"dimension out of range");
893 int in_dims = THTensor_(nDimensionLegacyAll)(t);
894 THTensor_(preserveReduceDimSemantics)(values_, in_dims, dimension, keepdim);
895 THLongTensor_preserveReduceDimSemantics(indices_, in_dims, dimension, keepdim);
896 std::vector<int64_t> dim = THTensor_sizesLegacyNoScalars(t);
898 THTensor_(resize)(values_, dim, {});
899 THLongTensor_resize(indices_, dim, {});
901 t_size_dim = THTensor_sizeLegacyNoScalars(t, dimension);
903 temp_ = THTensor_(
new)();
904 THTensor_(resize1d)(temp_, t_size_dim);
905 temp__data = temp_->data<scalar_t>();
907 tempi_ = THLongTensor_new();
908 THLongTensor_resize1d(tempi_, t_size_dim);
909 tempi__data = THLongTensor_data(tempi_);
911 TH_TENSOR_DIM_APPLY3(scalar_t, t, scalar_t, values_, int64_t, indices_, dimension,
912 TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM,
916 int64_t temp_freq = 0;
917 int64_t max_freq = 0;
918 for(i = 0; i < t_size_dim; i++)
919 temp__data[i] = t_data[i*t_stride];
920 for(i = 0; i < t_size_dim; i++)
922 THTensor_(quicksortascend)(temp__data, tempi__data, t_size_dim, 1);
924 for(i = 0; i < t_size_dim; i++)
927 if ((i == t_size_dim - 1) || (temp__data[i] != temp__data[i+1]))
929 if (temp_freq > max_freq)
931 mode = temp__data[i];
932 modei = tempi__data[i];
933 max_freq = temp_freq;
938 *values__data = mode;
939 *indices__data = modei;);
941 c10::raw::intrusive_ptr::decref(temp_);
942 THLongTensor_free(tempi_);
944 THTensor_(squeeze1d)(values_, values_, dimension);
945 THLongTensor_squeeze1d(indices_, indices_, dimension);
949 void THTensor_(kthvalue)(THTensor *values_, THLongTensor *indices_, THTensor *t, int64_t k,
int dimension,
int keepdim)
952 THLongTensor *tempi_;
953 scalar_t *temp__data;
954 int64_t *tempi__data;
957 THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyAll)(t), 3,
"dimension out of range");
958 THArgCheck(k > 0 && k <= THTensor_sizeLegacyNoScalars(t, dimension), 2,
"selected index out of range");
960 int in_dims = THTensor_(nDimensionLegacyAll)(t);
961 THTensor_(preserveReduceDimSemantics)(values_, in_dims, dimension, keepdim);
962 THLongTensor_preserveReduceDimSemantics(indices_, in_dims, dimension, keepdim);
963 std::vector<int64_t> dim = THTensor_sizesLegacyNoScalars(t);
965 THTensor_(resize)(values_, dim, {});
966 THLongTensor_resize(indices_, dim, {});
968 t_size_dim = THTensor_sizeLegacyNoScalars(t, dimension);
970 temp_ = THTensor_(
new)();
971 THTensor_(resize1d)(temp_, t_size_dim);
972 temp__data = temp_->data<scalar_t>();
974 tempi_ = THLongTensor_new();
975 THLongTensor_resize1d(tempi_, t_size_dim);
976 tempi__data = THLongTensor_data(tempi_);
978 TH_TENSOR_DIM_APPLY3(scalar_t, t, scalar_t, values_, int64_t, indices_, dimension,
979 TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM,
981 for(i = 0; i < t_size_dim; i++)
982 temp__data[i] = t_data[i*t_stride];
983 for(i = 0; i < t_size_dim; i++)
985 THTensor_(quickselect)(temp__data, tempi__data, k - 1, t_size_dim, 1);
986 *values__data = temp__data[k-1];
987 *indices__data = tempi__data[k-1];);
989 c10::raw::intrusive_ptr::decref(temp_);
990 THLongTensor_free(tempi_);
992 THTensor_(squeeze1d)(values_, values_, dimension);
993 THLongTensor_squeeze1d(indices_, indices_, dimension);
997 void THTensor_(median)(THTensor *values_, THLongTensor *indices_, THTensor *t,
int dimension,
int keepdim)
999 int64_t t_size_dim, k;
1001 THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyAll)(t), 3,
"dimension out of range");
1003 t_size_dim = THTensor_sizeLegacyNoScalars(t, dimension);
1004 k = (t_size_dim-1) >> 1;
1006 THTensor_(kthvalue)(values_, indices_, t, k+1, dimension, keepdim);
1009 void THTensor_(topk)(THTensor *rt_, THLongTensor *ri_, THTensor *t, int64_t k,
int dim,
int dir,
int sorted)
1011 int numDims = THTensor_(nDimensionLegacyNoScalars)(t);
1012 THArgCheck(dim >= 0 && dim < numDims, 3,
"dim not in range");
1014 int64_t sliceSize = THTensor_sizeLegacyNoScalars(t, dim);
1015 THArgCheck(k >= 0 && k <= sliceSize, 2,
"k not in range for dimension");
1017 THTensor *tmpResults = THTensor_(
new)();
1018 THTensor_(resize1d)(tmpResults, sliceSize);
1019 scalar_t *tmp__data = tmpResults->data<scalar_t>();
1021 THLongTensor *tmpIndices = THLongTensor_new();
1022 THLongTensor_resize1d(tmpIndices, sliceSize);
1023 int64_t *tmpi__data = THLongTensor_data(tmpIndices);
1025 std::vector<int64_t> topKSize = t->sizes().vec();
1026 if (topKSize.size() > 0) {
1029 THTensor_(resize)(rt_, topKSize, {});
1030 THLongTensor_resize(ri_, topKSize, {});
1034 int64_t K = sliceSize - k;
1035 TH_TENSOR_DIM_APPLY3(scalar_t, t, scalar_t, rt_, int64_t, ri_, dim,
1036 TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM,
1038 for(i = 0; i < sliceSize; i++)
1040 tmp__data[i] = t_data[i*t_stride];
1044 THTensor_(quickselect)(tmp__data, tmpi__data, K - 1, sliceSize, 1);
1046 THTensor_(quicksortdescend)(tmp__data + K, tmpi__data + K, k, 1);
1047 for(i = 0; i < k; i++)
1049 rt__data[i*rt__stride] = tmp__data[i + K];
1050 ri__data[i*ri__stride] = tmpi__data[i + K];
1055 TH_TENSOR_DIM_APPLY3(scalar_t, t, scalar_t, rt_, int64_t, ri_, dim,
1056 TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM,
1058 for(i = 0; i < sliceSize; i++)
1060 tmp__data[i] = t_data[i*t_stride];
1063 THTensor_(quickselect)(tmp__data, tmpi__data, k - 1, sliceSize, 1);
1065 THTensor_(quicksortascend)(tmp__data, tmpi__data, k - 1, 1);
1066 for(i = 0; i < k; i++)
1068 rt__data[i*rt__stride] = tmp__data[i];
1069 ri__data[i*ri__stride] = tmpi__data[i];
1073 c10::raw::intrusive_ptr::decref(tmpResults);
1074 THLongTensor_free(tmpIndices);
1077 void THTensor_(triu)(THTensor *r_, THTensor *t, int64_t k)
1079 int64_t t_size_0, t_size_1;
1080 int64_t t_stride_0, t_stride_1;
1081 int64_t r__stride_0, r__stride_1;
1082 scalar_t *t_data, *r__data;
1085 THArgCheck(THTensor_(nDimensionLegacyAll)(t) == 2, 1,
"expected a matrix");
1087 THTensor_(resizeAs)(r_, t);
1089 t_size_0 = THTensor_(size)(t, 0);
1090 t_size_1 = THTensor_(size)(t, 1);
1091 t_stride_0 = THTensor_(stride)(t, 0);
1092 t_stride_1 = THTensor_(stride)(t, 1);
1093 r__stride_0 = THTensor_(stride)(r_, 0);
1094 r__stride_1 = THTensor_(stride)(r_, 1);
1095 r__data = r_->data<scalar_t>();
1096 t_data = t->data<scalar_t>();
1098 for(r = 0; r < t_size_0; r++)
1100 int64_t sz = THMin(r+k, t_size_1);
1101 for(c = THMax(0, r+k); c < t_size_1; c++)
1102 r__data[r*r__stride_0+c*r__stride_1] = t_data[r*t_stride_0+c*t_stride_1];
1103 for(c = 0; c < sz; c++)
1104 r__data[r*r__stride_0+c*r__stride_1] = 0;
1108 int THTensor_(equal)(THTensor *ta, THTensor* tb)
1111 if(!THTensor_(isSameSizeAs)(ta, tb))
1114 if (THTensor_(isContiguous)(ta) && THTensor_(isContiguous)(tb)) {
1115 scalar_t *tap = ta->data<scalar_t>();
1116 scalar_t *tbp = tb->data<scalar_t>();
1117 ptrdiff_t sz = THTensor_(nElement)(ta);
1119 for (i=0; i<sz; ++i){
1120 if(tap[i] != tbp[i])
return 0;
1124 TH_TENSOR_APPLY2(scalar_t, ta, scalar_t, tb,
1125 if (equal && *ta_data != *tb_data) {
1127 TH_TENSOR_APPLY_hasFinished = 1;
break;
1133 #define TENSOR_IMPLEMENT_LOGICAL(NAME,OP) \ 1134 void THTensor_(NAME##Value)(THByteTensor *r_, THTensor* t, scalar_t value) \ 1136 THByteTensor_resizeNd(r_, t->dim(), THTensor_getSizePtr(t), NULL); \ 1137 TH_TENSOR_APPLY2(unsigned char, r_, scalar_t, t, \ 1138 *r__data = (*t_data OP value) ? 1 : 0;); \ 1140 void THTensor_(NAME##ValueT)(THTensor* r_, THTensor* t, scalar_t value) \ 1142 THTensor_(resizeNd)(r_, t->dim(), THTensor_getSizePtr(t), NULL); \ 1143 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, \ 1144 *r__data = (*t_data OP value) ? 1 : 0;); \ 1146 void THTensor_(NAME##Tensor)(THByteTensor *r_, THTensor *ta, THTensor *tb) \ 1148 THByteTensor_resizeNd(r_, ta->dim(), THTensor_getSizePtr(ta), NULL); \ 1149 TH_TENSOR_APPLY3(unsigned char, r_, scalar_t, ta, scalar_t, tb, \ 1150 *r__data = (*ta_data OP *tb_data) ? 1 : 0;); \ 1152 void THTensor_(NAME##TensorT)(THTensor *r_, THTensor *ta, THTensor *tb) \ 1154 THTensor_(resizeNd)(r_, ta->dim(), THTensor_getSizePtr(ta), NULL); \ 1155 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, ta, scalar_t, tb, \ 1156 *r__data = (*ta_data OP *tb_data) ? 1 : 0;); \ 1160 TENSOR_IMPLEMENT_LOGICAL(lt,<)
1161 TENSOR_IMPLEMENT_LOGICAL(gt,>)
1162 TENSOR_IMPLEMENT_LOGICAL(le,<=)
1163 TENSOR_IMPLEMENT_LOGICAL(ge,>=)
1164 TENSOR_IMPLEMENT_LOGICAL(eq,==)
1165 TENSOR_IMPLEMENT_LOGICAL(ne,!=)
1170 #define LAB_IMPLEMENT_BASIC_FUNCTION_3_ARGS(NAME, CFUNC, OMP_THRESHOLD) \ 1171 void THTensor_(NAME)(THTensor *r_, THTensor *t) \ 1173 THTensor_(resizeAs)(r_, t); \ 1174 ptrdiff_t r_Size = THTensor_(nElement)(r_); \ 1175 int r_Contig = THTensor_(isContiguous)(r_); \ 1176 int tContig = THTensor_(isContiguous)(t); \ 1177 int inOMP = omp_in_parallel(); \ 1179 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = CFUNC(*t_data);, OMP_THRESHOLD); \ 1181 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = CFUNC(*t_data);); \ 1185 #define LAB_IMPLEMENT_BASIC_FUNCTION_2_ARGS(NAME, CFUNC) \ 1186 LAB_IMPLEMENT_BASIC_FUNCTION_3_ARGS(NAME, CFUNC, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD) 1188 #define LAB_IMPLEMENT_VECTORIZED_FUNCTION_3_ARGS(NAME, CFUNC, OMP_THRESHOLD) \ 1189 void THTensor_(NAME)(THTensor *r_, THTensor *t) \ 1191 THTensor_(resizeAs)(r_, t); \ 1192 ptrdiff_t r_Size = THTensor_(nElement)(r_); \ 1193 int r_Contig = THTensor_(isContiguous)(r_); \ 1194 int tContig = THTensor_(isContiguous)(t); \ 1195 if (r_Contig && tContig) { \ 1196 TH_TENSOR_APPLY2_CONTIG(scalar_t, r_, scalar_t, t, THVector_(NAME)(r__data, t_data, r__len);); \ 1198 int inOMP = omp_in_parallel(); \ 1200 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = CFUNC(*t_data);, OMP_THRESHOLD); \ 1203 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = CFUNC(*t_data);); \ 1208 #define LAB_IMPLEMENT_VECTORIZED_FUNCTION_2_ARGS(NAME, CFUNC) \ 1209 LAB_IMPLEMENT_VECTORIZED_FUNCTION_3_ARGS(NAME, CFUNC, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD) 1213 #define LAB_IMPLEMENT_BASIC_FUNCTION_2_ARGS(NAME, CFUNC) \ 1214 void THTensor_(NAME)(THTensor *r_, THTensor *t) \ 1216 THTensor_(resizeAs)(r_, t); \ 1217 TH_TENSOR_APPLY2(scalar_t, t, scalar_t, r_, *r__data = CFUNC(*t_data);); \ 1220 #define LAB_IMPLEMENT_BASIC_FUNCTION_3_ARGS(NAME, CFUNC, PSEUDO_OMP_THRESHOLD) \ 1221 LAB_IMPLEMENT_BASIC_FUNCTION_2_ARGS(NAME, CFUNC) 1223 #define LAB_IMPLEMENT_VECTORIZED_FUNCTION_2_ARGS(NAME, CFUNC) \ 1224 void THTensor_(NAME)(THTensor *r_, THTensor *t) \ 1226 THTensor_(resizeAs)(r_, t); \ 1227 int r_Contig = THTensor_(isContiguous)(r_); \ 1228 int tContig = THTensor_(isContiguous)(t); \ 1229 if (r_Contig && tContig) { \ 1230 TH_TENSOR_APPLY2_CONTIG(scalar_t, r_, scalar_t, t, THVector_(NAME)(r__data, t_data, r__len);); \ 1232 TH_TENSOR_APPLY2(scalar_t, t, scalar_t, r_, *r__data = CFUNC(*t_data);); \ 1236 #define LAB_IMPLEMENT_VECTORIZED_FUNCTION_3_ARGS(NAME, CFUNC, PSEUDO_OMP_THRESHOLD) \ 1237 LAB_IMPLEMENT_VECTORIZED_FUNCTION_2_ARGS(NAME, CFUNC) 1241 #define EXPAND(...) __VA_ARGS__ 1243 #define GET_4TH_ARG(ARG0, ARG1, ARG2, ARG3, ...) ARG3 1245 #define LAB_IMPLEMENT_BASIC_FUNCTION_CHOOSE(...) \ 1246 EXPAND(GET_4TH_ARG(__VA_ARGS__, LAB_IMPLEMENT_BASIC_FUNCTION_3_ARGS, LAB_IMPLEMENT_BASIC_FUNCTION_2_ARGS, )) 1248 #define LAB_IMPLEMENT_VECTORIZED_FUNCTION_CHOOSE(...) \ 1249 EXPAND(GET_4TH_ARG(__VA_ARGS__, LAB_IMPLEMENT_VECTORIZED_FUNCTION_3_ARGS, LAB_IMPLEMENT_VECTORIZED_FUNCTION_2_ARGS, )) 1251 #define LAB_IMPLEMENT_BASIC_FUNCTION(...) EXPAND(LAB_IMPLEMENT_BASIC_FUNCTION_CHOOSE(__VA_ARGS__)(__VA_ARGS__)) 1253 #define LAB_IMPLEMENT_VECTORIZED_FUNCTION(...) EXPAND(LAB_IMPLEMENT_VECTORIZED_FUNCTION_CHOOSE(__VA_ARGS__)(__VA_ARGS__)) 1268 LAB_IMPLEMENT_BASIC_FUNCTION(neg,-)
1270 #if defined(TH_REAL_IS_LONG) 1271 LAB_IMPLEMENT_BASIC_FUNCTION(abs,labs)
1274 #if defined(TH_REAL_IS_SHORT) || defined(TH_REAL_IS_INT) || defined(TH_REAL_IS_CHAR) 1275 LAB_IMPLEMENT_BASIC_FUNCTION(abs,abs)
1278 #if defined(TH_REAL_IS_BYTE) 1279 LAB_IMPLEMENT_BASIC_FUNCTION(abs,)
1283 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) 1285 #if defined (TH_REAL_IS_FLOAT) 1286 #define TH_MATH_NAME(fn) fn##f 1288 #define TH_MATH_NAME(fn) fn 1291 LAB_IMPLEMENT_BASIC_FUNCTION(lgamma,TH_MATH_NAME(lgamma))
1292 LAB_IMPLEMENT_BASIC_FUNCTION(digamma,TH_MATH_NAME(TH_digamma))
1293 LAB_IMPLEMENT_BASIC_FUNCTION(trigamma,TH_MATH_NAME(TH_trigamma))
1294 LAB_IMPLEMENT_BASIC_FUNCTION(erfinv,TH_erfinv)
1295 LAB_IMPLEMENT_BASIC_FUNCTION(abs,TH_MATH_NAME(fabs))
1296 LAB_IMPLEMENT_BASIC_FUNCTION(frac,TH_MATH_NAME(TH_frac))
1297 LAB_IMPLEMENT_BASIC_FUNCTION(cinv, TH_MATH_NAME(1.0) / )
1299 LAB_IMPLEMENT_BASIC_FUNCTION(cosh,TH_MATH_NAME(cosh),HYPER_TH_OMP_OVERHEAD_THRESHOLD)
1300 LAB_IMPLEMENT_BASIC_FUNCTION(sinh,TH_MATH_NAME(sinh),HYPER_TH_OMP_OVERHEAD_THRESHOLD)
1301 LAB_IMPLEMENT_BASIC_FUNCTION(tanh,TH_MATH_NAME(tanh),HYPER_TH_OMP_OVERHEAD_THRESHOLD)
1302 LAB_IMPLEMENT_BASIC_FUNCTION(sqrt,TH_MATH_NAME(sqrt),HYPER_TH_OMP_OVERHEAD_THRESHOLD)
1303 LAB_IMPLEMENT_BASIC_FUNCTION(rsqrt,TH_MATH_NAME(TH_rsqrt),HYPER_TH_OMP_OVERHEAD_THRESHOLD)
1305 LAB_IMPLEMENT_VECTORIZED_FUNCTION(sigmoid,TH_MATH_NAME(TH_sigmoid),HYPER_TH_OMP_OVERHEAD_THRESHOLD)
1307 void THTensor_(atan2)(THTensor *r_, THTensor *tx, THTensor *ty)
1309 THTensor_(resizeAs)(r_, tx);
1310 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, tx, scalar_t, ty, *r__data = TH_MATH_NAME(atan2)(*tx_data,*ty_data););
1313 void THTensor_(polygamma)(THTensor *r_, int64_t n, THTensor *t) {
1315 case 0: THTensor_(digamma)(r_, t);
return;
1316 case 1: THTensor_(trigamma)(r_, t);
return;
1317 default: THError(
"polygamma(n,x) is not implemented for n>=2");
1321 void THTensor_(
std)(THTensor *r_, THTensor *t,
int dimension,
int biased,
int keepdim)
1323 THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyAll)(t), 3,
"invalid dimension %d",
1326 THTensor_(preserveReduceDimSemantics)(r_, THTensor_(nDimensionLegacyAll)(t), dimension, keepdim);
1327 std::vector<int64_t> dim = THTensor_sizesLegacyNoScalars(t);
1329 THTensor_(resize)(r_, dim, {});
1331 TH_TENSOR_DIM_APPLY2(scalar_t, t, scalar_t, r_, dimension,
1337 for (i = 0; i < t_size; i++)
1339 scalar_t z = t_data[i*t_stride];
1340 scalar_t delta = z - mean;
1341 mean += delta / (i + 1);
1342 scalar_t delta2 = z - mean;
1343 M2 += delta * delta2;
1346 if (biased && t_size >= 2)
1348 *r__data = TH_MATH_NAME(sqrt)(M2 / t_size);
1349 }
else if (!biased && t_size >= 2) {
1350 *r__data = TH_MATH_NAME(sqrt)(M2 / (t_size - 1));
1351 }
else if (biased && t_size == 1) {
1358 THTensor_(squeeze1d)(r_, r_, dimension);
1362 void THTensor_(var)(THTensor *r_, THTensor *t,
int dimension,
int biased,
int keepdim)
1364 THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyAll)(t), 3,
"invalid dimension %d",
1367 THTensor_(preserveReduceDimSemantics)(r_, THTensor_(nDimensionLegacyAll)(t), dimension, keepdim);
1368 std::vector<int64_t> dim = THTensor_sizesLegacyNoScalars(t);
1370 THTensor_(resize)(r_, dim, {});
1372 TH_TENSOR_DIM_APPLY2(scalar_t, t, scalar_t, r_, dimension,
1378 for (i = 0; i < t_size; i++)
1380 scalar_t z = t_data[i*t_stride];
1381 scalar_t delta = z - mean;
1382 mean += delta / (i + 1);
1383 scalar_t delta2 = z - mean;
1384 M2 += delta * delta2;
1387 if (biased && t_size >= 2)
1389 *r__data = M2 / t_size;
1390 }
else if (!biased && t_size >= 2) {
1391 *r__data = M2 / (t_size - 1);
1392 }
else if (biased && t_size == 1) {
1399 THTensor_(squeeze1d)(r_, r_, dimension);
1403 void THTensor_(norm)(THTensor *r_, THTensor *t, scalar_t value,
int dimension,
int keepdim)
1405 THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyAll)(t), 3,
"invalid dimension %d",
1408 THTensor_(preserveReduceDimSemantics)(r_, THTensor_(nDimensionLegacyAll)(t), dimension, keepdim);
1409 std::vector<int64_t> dim = THTensor_sizesLegacyNoScalars(t);
1411 THTensor_(resize)(r_, dim, {});
1413 #define DIM_REDUCE(reduce, transform, init) \ 1414 TH_TENSOR_DIM_APPLY2(scalar_t, t, scalar_t, r_, dimension, \ 1415 accreal sum = init; \ 1417 for(i = 0; i < t_size; i++) { \ 1423 DIM_REDUCE(sum += t_data[i*t_stride] != 0.0,
1425 }
else if (value == 1) {
1426 DIM_REDUCE(sum += TH_MATH_NAME(fabs)(t_data[i*t_stride]),
1428 }
else if (value == 2) {
1429 DIM_REDUCE(sum += t_data[i*t_stride] * t_data[i*t_stride],
1430 *r__data = TH_MATH_NAME(sqrt)(sum), 0);
1431 }
else if (value == 3) {
1432 DIM_REDUCE(sum += TH_MATH_NAME(fabs)(t_data[i*t_stride] * t_data[i*t_stride] * t_data[i*t_stride]),
1433 *r__data = TH_MATH_NAME(pow)(sum, 1.0/3), 0);
1434 }
else if (value == INFINITY) {
1435 DIM_REDUCE(sum = THMax(sum, TH_MATH_NAME(fabs)(t_data[i*t_stride])),
1437 }
else if (value == -INFINITY) {
1438 DIM_REDUCE(sum = THMin(sum, TH_MATH_NAME(fabs)(t_data[i*t_stride])),
1439 *r__data = sum, INFINITY);
1441 DIM_REDUCE(sum += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(t_data[i*t_stride]), value),
1442 *r__data = TH_MATH_NAME(pow)(sum, 1.0/value), 0);
1446 THTensor_(squeeze1d)(r_, r_, dimension);
1451 accreal THTensor_(normall)(THTensor *tensor, scalar_t value)
1455 TH_TENSOR_APPLY(scalar_t, tensor, sum += *tensor_data != 0.0;);
1457 }
else if(value == 1) {
1458 TH_TENSOR_APPLY(scalar_t, tensor, sum += TH_MATH_NAME(fabs)(*tensor_data););
1460 }
else if(value == 2) {
1461 TH_TENSOR_APPLY(scalar_t, tensor, accreal z = *tensor_data; sum += z*z;);
1463 }
else if(value == 3) {
1464 TH_TENSOR_APPLY(scalar_t, tensor, accreal z = *tensor_data; sum += std::abs(z*z*z););
1465 return TH_MATH_NAME(pow)(sum, 1.0/3);
1466 }
else if(value == INFINITY) {
1467 TH_TENSOR_APPLY(scalar_t, tensor, sum = THMax(sum, TH_MATH_NAME(fabs)(*tensor_data)););
1469 }
else if(value == -INFINITY) {
1471 TH_TENSOR_APPLY(scalar_t, tensor, sum = THMin(sum, TH_MATH_NAME(fabs)(*tensor_data)););
1474 TH_TENSOR_APPLY(scalar_t, tensor, sum += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(*tensor_data), value););
1475 return TH_MATH_NAME(pow)(sum, 1.0/value);
1479 void THTensor_(renorm)(THTensor *res, THTensor *src, scalar_t value,
int dimension, scalar_t maxnorm)
1481 THTensor *rowR, *rowS;
1483 THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyNoScalars)(src), 3,
"invalid dimension %d",
1485 THArgCheck(value > 0, 2,
"non-positive-norm not supported");
1486 THArgCheck(THTensor_(nDimensionLegacyNoScalars)(src) > 1, 1,
"need at least 2 dimensions, got %d dimensions",
1487 THTensor_(nDimensionLegacyNoScalars)(src));
1489 rowR = THTensor_(
new)();
1490 rowS = THTensor_(
new)();
1492 THTensor_(resizeAs)(res, src);
1494 for (int64_t i = 0; i < THTensor_sizeLegacyNoScalars(src, dimension); i++)
1499 THTensor_(select)(rowS, src, dimension, i);
1500 THTensor_(select)(rowR, res, dimension, i);
1502 TH_TENSOR_APPLY(scalar_t, rowS, norm += fabs(*rowS_data););
1503 }
else if (value == 2) {
1504 TH_TENSOR_APPLY(scalar_t, rowS, accreal z = *rowS_data; norm += z*z;);
1505 }
else if (value == INFINITY) {
1506 TH_TENSOR_APPLY(scalar_t, rowS, norm = THMax(norm, TH_MATH_NAME(fabs)(*rowS_data)););
1508 TH_TENSOR_APPLY(scalar_t, rowS, norm += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(*rowS_data), value););
1511 if (value != INFINITY) {
1512 norm = pow(norm, 1/value);
1517 new_norm = maxnorm / (norm + 1e-7);
1520 scalar_t, rowR, scalar_t, rowS,
1521 *rowR_data = (*rowS_data) * new_norm;
1528 at::_copy_same_type_(rowR_wrap, rowS_wrap);
1532 c10::raw::intrusive_ptr::decref(rowR);
1533 c10::raw::intrusive_ptr::decref(rowS);
1536 accreal THTensor_(dist)(THTensor *tensor, THTensor *src, scalar_t value)
1539 if (value == INFINITY) {
1541 TH_TENSOR_APPLY2(scalar_t, tensor, scalar_t, src,
1542 sum = THMax(sum, TH_MATH_NAME(fabs)(*tensor_data - *src_data)););
1544 }
else if (value == -INFINITY) {
1546 TH_TENSOR_APPLY2(scalar_t, tensor, scalar_t, src,
1547 sum = THMin(sum, TH_MATH_NAME(fabs)(*tensor_data - *src_data)););
1549 }
else if (value == 0.0) {
1551 TH_TENSOR_APPLY2(scalar_t, tensor, scalar_t, src,
1552 sum += (*tensor_data - *src_data != 0.0););
1556 TH_TENSOR_APPLY2(scalar_t, tensor, scalar_t, src,
1557 sum += TH_MATH_NAME(pow)(
1558 TH_MATH_NAME(fabs)(*tensor_data - *src_data), value););
1559 return TH_MATH_NAME(pow)(sum, 1.0/value);
1563 accreal THTensor_(meanall)(THTensor *tensor)
1565 return THTensor_(sumall)(tensor)/THTensor_(nElement)(tensor);
1568 accreal THTensor_(varall)(THTensor *tensor,
int biased)
1570 accreal mean = THTensor_(meanall)(tensor);
1572 TH_TENSOR_APPLY(scalar_t, tensor, sum += (*tensor_data - mean)*(*tensor_data - mean););
1573 sum /= std::max<int64_t>(0, THTensor_(nElement)(tensor) - (biased ? 0 : 1));
1577 accreal THTensor_(stdall)(THTensor *tensor,
int biased)
1579 return sqrt(THTensor_(varall)(tensor, biased));
1582 void THTensor_(histc)(THTensor *hist, THTensor *tensor, int64_t nbins, scalar_t minvalue, scalar_t maxvalue)
1585 THError(
"bins must be > 0");
1591 THTensor_(resize1d)(hist, nbins);
1592 THTensor_(zero)(hist);
1595 if (minval == maxval)
1597 minval = THTensor_(minall)(tensor);
1598 maxval = THTensor_(maxall)(tensor);
1600 if (minval == maxval)
1602 minval = minval - 1;
1603 maxval = maxval + 1;
1606 h_data = hist->data<scalar_t>();
1608 TH_TENSOR_APPLY(scalar_t, tensor,
1609 if (*tensor_data >= minval && *tensor_data <= maxval) {
1610 const int bin = (int)((*tensor_data-minval) / (maxval-minval) * nbins);
1611 h_data[THMin(bin, nbins-1)] += 1;
1616 void THTensor_(bhistc)(THTensor *hist, THTensor *tensor, int64_t nbins, scalar_t minvalue, scalar_t maxvalue)
1618 THArgCheck(THTensor_(nDimensionLegacyAll)(tensor) < 3, 2,
"invalid dimension %d, the input must be a 2d tensor", THTensor_(nDimensionLegacyAll)(tensor));
1621 THArgCheck(dimension >= 0 && dimension < THTensor_(nDimensionLegacyAll)(tensor), 2,
"invalid dimension %d",
1627 THTensor_(resize2d)(hist, THTensor_sizeLegacyNoScalars(tensor, 0), nbins);
1628 THTensor_(zero)(hist);
1632 if (minval == maxval)
1634 minval = THTensor_(minall)(tensor);
1635 maxval = THTensor_(maxall)(tensor);
1637 if (minval == maxval)
1639 minval = minval - 1;
1640 maxval = maxval + 1;
1643 TH_TENSOR_DIM_APPLY2(scalar_t, tensor, scalar_t, hist, dimension, int64_t i;
1644 for(i = 0; i < tensor_size; i++)
1646 if(tensor_data[i*tensor_stride] >= minval && tensor_data[i*tensor_stride] <= maxval) {
1647 const int bin = (int)((tensor_data[i*tensor_stride]-minval) / (maxval-minval) * nbins);
1648 hist_data[THMin(bin, nbins-1)] += 1;
1656 static inline scalar_t THTensor_(beta_grad_alpha_small)(scalar_t x, scalar_t alpha, scalar_t beta) {
1657 const scalar_t factor = TH_MATH_NAME(TH_digamma)(alpha) - TH_MATH_NAME(TH_digamma)(alpha + beta) - TH_MATH_NAME(log)(x);
1659 scalar_t series = numer / alpha * (factor + 1 / alpha);
1660 for (
int i = 1; i <= 10; ++i) {
1661 numer *= (i - beta) * x / i;
1662 const scalar_t denom = alpha + i;
1663 series += numer / denom * (factor + 1 / denom);
1665 const scalar_t result = x * TH_MATH_NAME(pow)(1 - x, -beta) * series;
1666 return th_isnan(result) ? 0.0 : result;
1671 static inline scalar_t THTensor_(beta_grad_beta_small)(scalar_t x, scalar_t alpha, scalar_t beta) {
1672 const scalar_t factor = TH_MATH_NAME(TH_digamma)(alpha+beta) - TH_MATH_NAME(TH_digamma)(beta);
1675 scalar_t dbetas = 0;
1676 scalar_t series = factor / alpha;
1677 for (
int i = 1; i <= 8; ++i) {
1679 dbetas = dbetas * (beta - i) + betas;
1680 betas = betas * (beta - i);
1681 series += numer / (alpha + i) * (dbetas + factor * betas);
1683 const scalar_t result = -TH_MATH_NAME(pow)(1 - x, 1 - beta) * series;
1684 return th_isnan(result) ? 0.0 : result;
1690 static inline scalar_t THTensor_(beta_grad_alpha_mid)(
double x,
double alpha,
double beta) {
1691 const double total = alpha + beta;
1692 const double mean = alpha / total;
1693 const double std = sqrt(alpha * beta / (total + 1)) / total;
1694 if (mean - 0.1 * std <= x && x <= mean + 0.1 * std) {
1696 const double poly = 47 * x * (beta*beta)*(beta*beta) + alpha * (
1697 (43 + 20 * (16 + 27 * beta) * x) * (beta*beta)*beta + alpha * (
1698 3 * (59 + 180 * beta - 90 * x) * (beta*beta) + alpha * (
1699 (453 + 1620 * beta * (1 - x) - 455 * x) * beta + alpha * (
1700 8 * (1 - x) * (135 * beta - 11)))));
1701 const double prefactor_num = (1 + 12 * alpha) * (1 + 12 * beta) / (total * total);
1702 const double prefactor_den = 12960 * alpha * alpha * alpha * beta * beta * (1 + 12 * total);
1703 return prefactor_num / (1 - x) * poly / prefactor_den;
1705 const double prefactor = -x / sqrt(2 * alpha * beta / total);
1706 const double stirling = (1 + 1 / (12 * alpha) + 1 / (288 * alpha*alpha))
1707 * (1 + 1 / (12 * beta) + 1 / (288 * beta*beta))
1708 / (1 + 1 / (12 * total) + 1 / (288 * total*total));
1709 const double term1_num = 2 * (alpha*alpha) * (x - 1) + alpha * beta * (x - 1) - x * (beta*beta);
1710 const double axbx = alpha * (x-1) + beta * x;
1711 const double term1_den = sqrt(2 * alpha / beta) * pow(total, 1.5f) * axbx*axbx;
1712 const double term1 = term1_num / term1_den;
1713 const double term2 = 0.5f * log(alpha / (total * x));
1714 const double term3_num = sqrt(8 * alpha * beta / total);
1715 const double term3_den = beta * x + alpha * (x - 1);
1716 const double term3 = term3_num / term3_den;
1717 const double term4_base = beta * log(beta / (total * (1 - x))) +
1718 alpha * log(alpha / (total * x));
1719 const double term4 = pow(term4_base, -1.5f);
1720 const double term1234 = term1 + term2 * (term3 + (x < mean ? term4 : -term4));
1721 return stirling * prefactor * term1234;
1729 static inline scalar_t THTensor_(dirichlet_grad_one)(scalar_t x, scalar_t alpha, scalar_t total) {
1730 const scalar_t beta = total - alpha;
1731 const scalar_t boundary = total * x * (1 - x);
1734 if (x <= 0.5f && boundary < 2.5f) {
1735 return THTensor_(beta_grad_alpha_small)(x, alpha, beta);
1739 if (x >= 0.5f && boundary < 0.75f) {
1740 return -THTensor_(beta_grad_beta_small)(1 - x, beta, alpha);
1744 if (alpha > 6 && beta > 6) {
1745 return THTensor_(beta_grad_alpha_mid)(x, alpha, beta);
1749 static const scalar_t c[2][3][3][4] = {
1750 {{{1.003668233, -0.01061107488, -0.0657888334, 0.01201642863},
1751 {0.6336835991, -0.3557432599, 0.05486251648, -0.001465281033},
1752 {-0.03276231906, 0.004474107445, 0.002429354597, -0.0001557569013}},
1753 {{0.221950385, -0.3187676331, 0.01799915743, 0.01074823814},
1754 {-0.2951249643, 0.06219954479, 0.01535556598, 0.001550077057},
1755 {0.02155310298, 0.004170831599, 0.001292462449, 6.976601077e-05}},
1756 {{-0.05980841433, 0.008441916499, 0.01085618172, 0.002319392565},
1757 {0.02911413504, 0.01400243777, -0.002721828457, 0.000751041181},
1758 {0.005900514878, -0.001936558688, -9.495446725e-06, 5.385558597e-05}}},
1759 {{{1, -0.02924021934, -0.04438342661, 0.007285809825},
1760 {0.6357567472, -0.3473456711, 0.05454656494, -0.002407477521},
1761 {-0.03301322327, 0.004845219414, 0.00231480583, -0.0002307248149}},
1762 {{0.5925320577, -0.1757678135, 0.01505928619, 0.000564515273},
1763 {0.1014815858, -0.06589186703, 0.01272886114, -0.0007316646956},
1764 {-0.007258481865, 0.001096195486, 0.0003934994223, -4.12701925e-05}},
1765 {{0.06469649321, -0.0236701437, 0.002902096474, -5.896963079e-05},
1766 {0.001925008108, -0.002869809258, 0.0008000589141, -6.063713228e-05},
1767 {-0.0003477407336, 6.959756487e-05, 1.097287507e-05, -1.650964693e-06}}},
1769 const scalar_t u = TH_MATH_NAME(log)(x);
1770 const scalar_t a = TH_MATH_NAME(log)(alpha) - u;
1771 const scalar_t b = TH_MATH_NAME(log)(total) - a;
1772 const scalar_t pow_u[3] = {1, u, u * u};
1773 const scalar_t pow_a[3] = {1, a, a * a};
1776 for (
int i = 0; i < 3; ++i) {
1777 for (
int j = 0; j < 3; ++j) {
1778 const scalar_t ua = pow_u[i] * pow_a[j];
1779 p += ua * (c[0][i][j][0] + b * (c[0][i][j][1] + b * (c[0][i][j][2] + b * c[0][i][j][3])));
1780 q += ua * (c[1][i][j][0] + b * (c[1][i][j][1] + b * (c[1][i][j][2] + b * c[1][i][j][3])));
1783 const scalar_t approx = x * (TH_MATH_NAME(TH_digamma)(total) - TH_MATH_NAME(TH_digamma)(alpha)) / beta;
1784 return p / q * approx;
1787 void THTensor_(dirichlet_grad)(THTensor *
self, THTensor *x, THTensor *alpha, THTensor *total)
1789 x = THTensor_(newContiguous)(x);
1790 alpha = THTensor_(newContiguous)(alpha);
1791 total = THTensor_(newContiguous)(total);
1792 TH_CHECK_SAME_SIZE(alpha, x);
1793 TH_CHECK_SAME_SIZE(total, x);
1794 THTensor_(resizeAs)(
self, x);
1795 THTensor* grad = THTensor_(newContiguous)(
self);
1797 scalar_t*
const grad_data = grad->data<scalar_t>();
1798 scalar_t*
const x_data = x->data<scalar_t>();
1799 scalar_t*
const alpha_data = alpha->data<scalar_t>();
1800 scalar_t*
const total_data = total->data<scalar_t>();
1801 const int64_t numel = THTensor_(nElement)(x);
1803 #pragma omp parallel for if(numel > TH_OMP_OVERHEAD_THRESHOLD) private(i) 1804 for(i = 0; i < numel; ++i) {
1805 grad_data[i] = THTensor_(dirichlet_grad_one)(x_data[i], alpha_data[i], total_data[i]);
1808 THTensor_(freeCopyTo)(grad,
self);