1 #ifndef TH_GENERIC_FILE 2 #define TH_GENERIC_FILE "TH/generic/THTensorMath.cpp" 5 #include <TH/generic/THTensorApply.hpp> 26 static inline bool modulo_wrap(scalar_t a, scalar_t b) {
27 return (a != 0) && (a < 0) != (b < 0);
30 void THTensor_(bitor)(THTensor *r_, THTensor *t, scalar_t value)
32 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF) 36 return THError(
"bitor is only supported for integer type tensors");
38 THTensor_(resizeAs)(r_, t);
39 int64_t r_Size = THTensor_(nElement)(r_);
40 int r_Contig = THTensor_(isContiguous)(r_);
41 int tContig = THTensor_(isContiguous)(t);
43 if (r_Contig && tContig) {
44 scalar_t *tp = t->data<scalar_t>();
45 scalar_t *rp = r_->data<scalar_t>();
47 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i) 48 for (i=0; i<r_Size; i++) {
49 rp[i] = tp[i] | value;
53 int inOMP = omp_in_parallel();
57 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = *t_data | value;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
64 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = *t_data | value;);
69 void THTensor_(bitxor)(THTensor *r_, THTensor *t, scalar_t value)
71 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF) 75 return THError(
"bitxor is only supported for integer type tensors");
77 THTensor_(resizeAs)(r_, t);
78 int64_t r_Size = THTensor_(nElement)(r_);
79 int r_Contig = THTensor_(isContiguous)(r_);
80 int tContig = THTensor_(isContiguous)(t);
82 if (r_Contig && tContig) {
83 scalar_t *tp = t->data<scalar_t>();
84 scalar_t *rp = r_->data<scalar_t>();
86 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i) 87 for (i=0; i<r_Size; i++) {
88 rp[i] = tp[i] ^ value;
92 int inOMP = omp_in_parallel();
96 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = *t_data ^ value;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
103 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = *t_data ^ value;);
108 void THTensor_(clamp)(THTensor *r_, THTensor *t, scalar_t min_value, scalar_t max_value)
110 THTensor_(resizeAs)(r_, t);
111 int64_t r_Size = THTensor_(nElement)(r_);
112 int r_Contig = THTensor_(isContiguous)(r_);
113 int tContig = THTensor_(isContiguous)(t);
115 if (r_Contig && tContig) {
116 scalar_t *tp = t->data<scalar_t>();
117 scalar_t *rp = r_->data<scalar_t>();
120 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD) private(i) 121 for (i=0; i<r_Size; i++)
122 rp[i] = (tp[i] < min_value) ? min_value : (tp[i] > max_value ? max_value : tp[i]);
125 int inOMP = omp_in_parallel();
129 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = (*t_data < min_value) ? min_value : (*t_data > max_value ? max_value : *t_data);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
136 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = (*t_data < min_value) ? min_value : (*t_data > max_value ? max_value : *t_data););
140 void THTensor_(cadd)(THTensor *r_, THTensor *t, scalar_t value, THTensor *src)
142 THTensor_(resizeAs)(r_, t);
143 int64_t r_Size = THTensor_(nElement)(r_);
144 int64_t srcSize = THTensor_(nElement)(src);
145 int r_Contig = THTensor_(isContiguous)(r_);
146 int tContig = THTensor_(isContiguous)(t);
147 int srcContig = THTensor_(isContiguous)(src);
149 if (srcSize == r_Size){
150 if (r_Contig && tContig && srcContig) {
152 THBlas_(axpy)(THTensor_(nElement)(t), value, src->data<scalar_t>(), 1, r_->data<scalar_t>(), 1);
154 TH_TENSOR_APPLY3_CONTIG(scalar_t, r_, scalar_t, t, scalar_t, src, THVector_(cadd)(r__data, t_data, src_data, value, r__len););
158 int inOMP = omp_in_parallel();
162 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data + value * *src_data;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
172 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data + value * *src_data;);
176 void THTensor_(csub)(THTensor *r_, THTensor *t, scalar_t value, THTensor *src)
178 THTensor_(cadd)(r_, t, -value, src);
181 void THTensor_(cmul)(THTensor *r_, THTensor *t, THTensor *src)
183 THTensor_(resizeAs)(r_, t);
184 int64_t r_Size = THTensor_(nElement)(r_);
185 int64_t srcSize = THTensor_(nElement)(src);
186 int r_Contig = THTensor_(isContiguous)(r_);
187 int tContig = THTensor_(isContiguous)(t);
188 int srcContig = THTensor_(isContiguous)(src);
190 if (srcSize == r_Size){
191 if (r_Contig && tContig && srcContig) {
192 TH_TENSOR_APPLY3_CONTIG(scalar_t, r_, scalar_t, t, scalar_t, src, THVector_(cmul)(r__data, t_data, src_data, r__len););
195 int inOMP = omp_in_parallel();
199 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data * *src_data;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
209 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data * *src_data;);
213 void THTensor_(pow)(THTensor *r_, THTensor *t, scalar_t value)
215 THTensor_(resizeAs)(r_, t);
219 at::_copy_same_type_(r__wrap, t_wrap);
222 THTensor_(cmul)(r_, t, t);
225 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = *t_data * *t_data * *t_data;);
227 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) 228 #if defined (TH_REAL_IS_FLOAT) 229 #define TH_MATH_NAME(fn) fn##f 231 #define TH_MATH_NAME(fn) fn 233 else if(value == 0.5){
234 THTensor_(sqrt)(r_, t);
236 else if(value == -0.5){
237 THTensor_(rsqrt)(r_, t);
239 else if(value == -1){
240 THTensor_(cinv)(r_, t);
242 else if(value == -2){
243 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = TH_MATH_NAME(1.0) / (*t_data * *t_data););
246 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = TH_MATH_NAME(pow)(*t_data, value););
251 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = THTensor_(powOne)(*t_data, value););
256 scalar_t THTensor_(powOne)(scalar_t x, scalar_t y) {
257 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_HALF) 259 #elif defined(TH_REAL_IS_DOUBLE) 262 THArgCheck(y >= 0, 1,
263 "Integers to negative integer powers are not allowed");
276 void THTensor_(cpow)(THTensor *r_, THTensor *t, THTensor *src)
278 THTensor_(resizeAs)(r_, t);
279 int64_t r_Size = THTensor_(nElement)(r_);
280 int64_t srcSize = THTensor_(nElement)(src);
281 int r_Contig = THTensor_(isContiguous)(r_);
282 int tContig = THTensor_(isContiguous)(t);
283 int srcContig = THTensor_(isContiguous)(src);
285 if (srcSize == r_Size){
286 if (r_Contig && tContig && srcContig) {
287 scalar_t *tp = t->data<scalar_t>();
288 scalar_t *sp = src->data<scalar_t>();
289 scalar_t *rp = r_->data<scalar_t>();
291 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD) private(i) 292 for (i=0; i<r_Size; i++)
293 rp[i] = THTensor_(powOne)(tp[i], sp[i]);
296 int inOMP = omp_in_parallel();
300 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = THTensor_(powOne)(*t_data, *src_data);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
310 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = THTensor_(powOne)(*t_data, *src_data););
314 void THTensor_(cdiv)(THTensor *r_, THTensor *t, THTensor *src)
316 THTensor_(resizeAs)(r_, t);
317 int64_t r_Size = THTensor_(nElement)(r_);
318 int64_t srcSize = THTensor_(nElement)(src);
319 int r_Contig = THTensor_(isContiguous)(r_);
320 int tContig = THTensor_(isContiguous)(t);
321 int srcContig = THTensor_(isContiguous)(src);
323 if (srcSize == r_Size){
324 if (r_Contig && tContig && srcContig) {
325 TH_TENSOR_APPLY3_CONTIG(scalar_t, r_, scalar_t, t, scalar_t, src, THVector_(cdiv)(r__data, t_data, src_data, r__len););
328 int inOMP = omp_in_parallel();
332 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data / *src_data;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
342 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data / *src_data;);
346 void THTensor_(clshift)(THTensor *r_, THTensor *t, THTensor *src)
348 #if defined(TH_REAL_IS_HALF) 349 return THError(
"clshift is not supported for torch.HalfTensor");
351 THTensor_(resizeAs)(r_, t);
352 int64_t r_Size = THTensor_(nElement)(r_);
353 int64_t srcSize = THTensor_(nElement)(src);
354 int r_Contig = THTensor_(isContiguous)(r_);
355 int tContig = THTensor_(isContiguous)(t);
356 int srcContig = THTensor_(isContiguous)(src);
358 if (srcSize == r_Size){
359 if (r_Contig && tContig && srcContig) {
360 scalar_t *tp = t->data<scalar_t>();
361 scalar_t *sp = src->data<scalar_t>();
362 scalar_t *rp = r_->data<scalar_t>();
364 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD) private(i) 365 for (i=0; i<r_Size; i++) {
366 #if defined(TH_REAL_IS_FLOAT) 367 rp[i] = tp[i] * powf(2, sp[i]);
368 #elif defined(TH_REAL_IS_DOUBLE) 369 rp[i] = tp[i] * pow(2, sp[i]);
370 #elif defined(TH_REAL_IS_BYTE) 371 rp[i] = ((scalar_t) tp[i]) << sp[i];
373 rp[i] = ((ureal) tp[i]) << sp[i];
378 int inOMP = omp_in_parallel();
382 #if defined(TH_REAL_IS_FLOAT) 383 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data * powf(2, *src_data);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
384 #elif defined(TH_REAL_IS_DOUBLE) 385 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data * pow(2, *src_data);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
386 #elif defined(TH_REAL_IS_BYTE) 387 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = ((scalar_t)*t_data) << *src_data;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
389 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = ((ureal)*t_data) << *src_data;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
400 #if defined(TH_REAL_IS_FLOAT) 401 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data * powf(2, *src_data););
402 #elif defined(TH_REAL_IS_DOUBLE) 403 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data * pow(2, *src_data););
404 #elif defined(TH_REAL_IS_BYTE) 405 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = ((scalar_t)*t_data) << *src_data;);
407 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = ((ureal)*t_data) << *src_data;);
412 void THTensor_(crshift)(THTensor *r_, THTensor *t, THTensor *src)
414 #if defined(TH_REAL_IS_HALF) 415 return THError(
"crshift is not supported for torch.HalfTensor");
417 THTensor_(resizeAs)(r_, t);
418 int64_t r_Size = THTensor_(nElement)(r_);
419 int64_t srcSize = THTensor_(nElement)(src);
420 int r_Contig = THTensor_(isContiguous)(r_);
421 int tContig = THTensor_(isContiguous)(t);
422 int srcContig = THTensor_(isContiguous)(src);
424 if (srcSize == r_Size){
425 if (r_Contig && tContig && srcContig) {
426 scalar_t *tp = t->data<scalar_t>();
427 scalar_t *sp = src->data<scalar_t>();
428 scalar_t *rp = r_->data<scalar_t>();
430 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD) private(i) 431 for (i=0; i<r_Size; i++) {
432 #if defined(TH_REAL_IS_FLOAT) 433 rp[i] = tp[i] / powf(2, sp[i]);
434 #elif defined(TH_REAL_IS_DOUBLE) 435 rp[i] = tp[i] / pow(2, sp[i]);
436 #elif defined(TH_REAL_IS_BYTE) 437 rp[i] = ((scalar_t) tp[i]) >> sp[i];
439 rp[i] = ((ureal) tp[i]) >> sp[i];
444 int inOMP = omp_in_parallel();
448 #if defined(TH_REAL_IS_FLOAT) 449 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data / powf(2, *src_data);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
450 #elif defined(TH_REAL_IS_DOUBLE) 451 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data / pow(2, *src_data);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
452 #elif defined(TH_REAL_IS_BYTE) 453 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = ((scalar_t)*t_data) >> *src_data;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
455 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = ((ureal)*t_data) >> *src_data;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
466 #if defined(TH_REAL_IS_FLOAT) 467 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data / powf(2, *src_data););
468 #elif defined(TH_REAL_IS_DOUBLE) 469 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data / pow(2, *src_data););
470 #elif defined(TH_REAL_IS_BYTE) 471 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = ((scalar_t)*t_data) >> *src_data;);
473 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = ((ureal)*t_data) >> *src_data;);
478 void THTensor_(cfmod)(THTensor *r_, THTensor *t, THTensor *src)
480 THTensor_(resizeAs)(r_, t);
481 int64_t r_Size = THTensor_(nElement)(r_);
482 int64_t srcSize = THTensor_(nElement)(src);
483 int r_Contig = THTensor_(isContiguous)(r_);
484 int tContig = THTensor_(isContiguous)(t);
485 int srcContig = THTensor_(isContiguous)(src);
487 if (srcSize == r_Size){
488 if (r_Contig && tContig && srcContig) {
489 scalar_t *tp = t->data<scalar_t>();
490 scalar_t *sp = src->data<scalar_t>();
491 scalar_t *rp = r_->data<scalar_t>();
493 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD) private(i) 494 for (i=0; i<r_Size; i++) {
495 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) 496 rp[i] = fmod(tp[i], sp[i]);
498 rp[i] = tp[i] % sp[i];
503 int inOMP = omp_in_parallel();
507 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) 508 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig,scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = fmod(*t_data, *src_data);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
510 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = (*t_data % *src_data);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
521 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) 522 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = fmod(*t_data, *src_data););
524 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = (*t_data % *src_data););
529 void THTensor_(cremainder)(THTensor *r_, THTensor *t, THTensor *src)
531 THTensor_(resizeAs)(r_, t);
532 int64_t r_Size = THTensor_(nElement)(r_);
533 int64_t srcSize = THTensor_(nElement)(src);
534 int r_Contig = THTensor_(isContiguous)(r_);
535 int tContig = THTensor_(isContiguous)(t);
536 int srcContig = THTensor_(isContiguous)(src);
538 if (srcSize == r_Size){
539 if (r_Contig && tContig && srcContig) {
540 scalar_t *tp = t->data<scalar_t>();
541 scalar_t *sp = src->data<scalar_t>();
542 scalar_t *rp = r_->data<scalar_t>();
544 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD) private(i) 545 for (i=0; i<r_Size; i++) {
546 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) 547 rp[i] = (sp[i] == 0)? NAN : tp[i] - sp[i] * floor(tp[i] / sp[i]);
550 rp[i] = tp[i] % sp[i];
551 if (modulo_wrap(rp[i], sp[i]))
557 int inOMP = omp_in_parallel();
561 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) 562 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = (*src_data == 0)? NAN : *t_data - *src_data * floor(*t_data / *src_data);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
564 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data % *src_data;
565 if (modulo_wrap(*r__data, *src_data)) *r__data += *src_data;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
576 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) 577 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = (*src_data == 0)? NAN : *t_data - *src_data * floor(*t_data / *src_data););
580 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data % *src_data;
581 if (modulo_wrap(*r__data, *src_data)) *r__data += *src_data;);
587 void THTensor_(cbitand)(THTensor *r_, THTensor *t, THTensor *src)
589 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF) 593 return THError(
"cbitand is only supported for integer type tensors");
595 THTensor_(resizeAs)(r_, t);
596 int64_t r_Size = THTensor_(nElement)(r_);
597 int64_t srcSize = THTensor_(nElement)(src);
598 int r_Contig = THTensor_(isContiguous)(r_);
599 int tContig = THTensor_(isContiguous)(t);
600 int srcContig = THTensor_(isContiguous)(src);
602 if (srcSize == r_Size){
603 if (r_Contig && tContig && srcContig) {
604 scalar_t *tp = t->data<scalar_t>();
605 scalar_t *sp = src->data<scalar_t>();
606 scalar_t *rp = r_->data<scalar_t>();
608 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD) private(i) 609 for (i=0; i<r_Size; i++) {
610 rp[i] = tp[i] & sp[i];
614 int inOMP = omp_in_parallel();
618 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data & *src_data;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
628 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data & *src_data;);
633 void THTensor_(cbitor)(THTensor *r_, THTensor *t, THTensor *src)
635 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF) 639 return THError(
"cbitor is only supported for integer type tensors");
641 THTensor_(resizeAs)(r_, t);
642 int64_t r_Size = THTensor_(nElement)(r_);
643 int64_t srcSize = THTensor_(nElement)(src);
644 int r_Contig = THTensor_(isContiguous)(r_);
645 int tContig = THTensor_(isContiguous)(t);
646 int srcContig = THTensor_(isContiguous)(src);
648 if (srcSize == r_Size){
649 if (r_Contig && tContig && srcContig) {
650 scalar_t *tp = t->data<scalar_t>();
651 scalar_t *sp = src->data<scalar_t>();
652 scalar_t *rp = r_->data<scalar_t>();
654 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD) private(i) 655 for (i=0; i<r_Size; i++) {
656 rp[i] = tp[i] | sp[i];
660 int inOMP = omp_in_parallel();
664 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data | *src_data;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
674 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data | *src_data;);
679 void THTensor_(cbitxor)(THTensor *r_, THTensor *t, THTensor *src)
681 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF) 685 return THError(
"cbitxor is only supported for integer type tensors");
687 THTensor_(resizeAs)(r_, t);
688 int64_t r_Size = THTensor_(nElement)(r_);
689 int64_t srcSize = THTensor_(nElement)(src);
690 int r_Contig = THTensor_(isContiguous)(r_);
691 int tContig = THTensor_(isContiguous)(t);
692 int srcContig = THTensor_(isContiguous)(src);
694 if (srcSize == r_Size){
695 if (r_Contig && tContig && srcContig) {
696 scalar_t *tp = t->data<scalar_t>();
697 scalar_t *sp = src->data<scalar_t>();
698 scalar_t *rp = r_->data<scalar_t>();
700 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD) private(i) 701 for (i=0; i<r_Size; i++) {
702 rp[i] = tp[i] ^ sp[i];
706 int inOMP = omp_in_parallel();
710 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, tContig, srcContig, scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data ^ *src_data;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
720 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, t, scalar_t, src, *r__data = *t_data ^ *src_data;);
725 void THTensor_(tpow)(THTensor *r_, scalar_t value, THTensor *t)
727 THTensor_(resizeAs)(r_, t);
728 int64_t r_Size = THTensor_(nElement)(r_);
729 int r_Contig = THTensor_(isContiguous)(r_);
730 int tContig = THTensor_(isContiguous)(t);
732 if (r_Contig && tContig) {
733 scalar_t *tp = t->data<scalar_t>();
734 scalar_t *rp = r_->data<scalar_t>();
736 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD) private(i) 737 for (i=0; i<r_Size; i++)
738 rp[i] = THTensor_(powOne)(value, tp[i]);
741 int inOMP = omp_in_parallel();
745 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = THTensor_(powOne)(value, *t_data);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
752 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = THTensor_(powOne)(value, *t_data););
756 void THTensor_(addcmul)(THTensor *r_, THTensor *t, scalar_t value, THTensor *src1, THTensor *src2)
760 THTensor_(resizeAs)(r_, t);
763 at::_copy_same_type_(r__wrap, t_wrap);
765 int64_t r_Size = THTensor_(nElement)(r_);
766 int64_t src1Size = THTensor_(nElement)(src1);
767 int64_t src2Size = THTensor_(nElement)(src2);
768 int r_Contig = THTensor_(isContiguous)(r_);
769 int src1Contig = THTensor_(isContiguous)(src1);
770 int src2Contig = THTensor_(isContiguous)(src2);
772 if( (src1Size == src2Size) && (src1Size == r_Size) ){
774 int inOMP = omp_in_parallel();
778 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, src1Contig, src2Contig, scalar_t, r_, scalar_t, src1, scalar_t, src2, *r__data += value * *src1_data * *src2_data;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
790 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, src1, scalar_t, src2, *r__data += value * *src1_data * *src2_data;);
794 void THTensor_(addcdiv)(THTensor *r_, THTensor *t, scalar_t value, THTensor *src1, THTensor *src2)
798 THTensor_(resizeAs)(r_, t);
801 at::_copy_same_type_(r__wrap, t_wrap);
803 int64_t r_Size = THTensor_(nElement)(r_);
804 int64_t src1Size = THTensor_(nElement)(src1);
805 int64_t src2Size = THTensor_(nElement)(src2);
806 int r_Contig = THTensor_(isContiguous)(r_);
807 int src1Contig = THTensor_(isContiguous)(src1);
808 int src2Contig = THTensor_(isContiguous)(src2);
810 if( (src1Size == src2Size) && (src1Size == r_Size) ){
812 int inOMP = omp_in_parallel();
816 TH_TENSOR_APPLY3_OMP(r_Size, r_Contig, src1Contig, src2Contig, scalar_t, r_, scalar_t, src1, scalar_t, src2, *r__data += value * *src1_data / *src2_data;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
828 TH_TENSOR_APPLY3(scalar_t, r_, scalar_t, src1, scalar_t, src2, *r__data += value * *src1_data / *src2_data;);
832 void THTensor_(addmv)(THTensor *r_, scalar_t beta, THTensor *t, scalar_t alpha, THTensor *mat, THTensor *vec)
834 if( (mat->dim() != 2) || (THTensor_nDimensionLegacyNoScalars(vec) != 1) )
835 THError(
"matrix and vector expected, got %dD, %dD",
836 mat->dim(), THTensor_nDimensionLegacyNoScalars(vec));
838 if( mat->size(1) != THTensor_sizeLegacyNoScalars(vec, 0) ) {
839 THDescBuff bm = THTensor_(sizeDesc)(mat);
840 THDescBuff bv = THTensor_(sizeDesc)(vec);
841 THError(
"size mismatch, %s, %s", bm.str, bv.str);
844 if(THTensor_nDimensionLegacyNoScalars(t) != 1)
845 THError(
"vector expected, got t: %dD", t->dim());
847 if(THTensor_sizeLegacyNoScalars(t, 0) != mat->size(0)) {
848 THDescBuff bt = THTensor_(sizeDesc)(t);
849 THDescBuff bm = THTensor_(sizeDesc)(mat);
850 THError(
"size mismatch, t: %s, mat: %s", bt.str, bm.str);
855 THTensor_(resizeAs)(r_, t);
858 at::_copy_same_type_(r__wrap, t_wrap);
861 auto r_stride = THTensor_strideLegacyNoScalars(r_, 0);
864 #define LDA_COND(M, N, LDA) ((N) == 1 || (LDA) >= THMax(1, (M))) 866 if(mat->stride(0) == 1 && LDA_COND(mat->size(0), mat->size(1), mat->stride(1)))
868 THBlas_(gemv)(
'n', mat->size(0), mat->size(1),
869 alpha, mat->data<scalar_t>(), mat->stride(1),
870 vec->data<scalar_t>(), THTensor_strideLegacyNoScalars(vec, 0),
871 beta, r_->data<scalar_t>(), r_stride);
873 else if(mat->stride(1) == 1 && LDA_COND(mat->size(1), mat->size(0), mat->stride(0)))
875 THBlas_(gemv)(
't', mat->size(1), mat->size(0),
876 alpha, mat->data<scalar_t>(), mat->stride(0),
877 vec->data<scalar_t>(), THTensor_strideLegacyNoScalars(vec, 0),
878 beta, r_->data<scalar_t>(), r_stride);
882 THTensor *cmat = THTensor_(newContiguous)(mat);
884 THBlas_(gemv)(
't', mat->size(1), mat->size(0),
885 alpha, cmat->data<scalar_t>(), cmat->stride(0),
886 vec->data<scalar_t>(), THTensor_strideLegacyNoScalars(vec, 0),
887 beta, r_->data<scalar_t>(), r_stride);
889 c10::raw::intrusive_ptr::decref(cmat);
894 if (THTensor_sizeLegacyNoScalars(vec, 0) == 0 && mat->size(0) != 0) {
897 }
else if (beta != 1) {
898 THTensor_(mul)(r_, r_, beta);
905 void THTensor_(match)(THTensor *r_, THTensor *m1, THTensor *m2, scalar_t gain)
907 int64_t N1 = m1->size(0);
908 int64_t N2 = m2->size(0);
915 THTensor_(resize2d)(r_, N1, N2);
917 m1 = THTensor_(newContiguous)(m1);
918 m2 = THTensor_(newContiguous)(m2);
920 THTensor_(resize2d)(m1, N1, THTensor_(nElement)(m1) / N1);
921 THTensor_(resize2d)(m2, N2, THTensor_(nElement)(m2) / N2);
924 THArgCheck(m1->size(1) == m2->size(1), 3,
"m1 and m2 must have the same inner vector dim");
926 m1_p = m1->data<scalar_t>();
927 m2_p = m2->data<scalar_t>();
928 r_p = r_->data<scalar_t>();
930 #pragma omp parallel for private(i) 931 for (i=0; i<N1; i++) {
933 for (j=0; j<N2; j++) {
935 for (k=0; k<dim; k++) {
936 scalar_t term = m1_p[ i*dim + k ] - m2_p[ j*dim + k ];
939 r_p[ i*N2 + j ] = gain * sum;
943 c10::raw::intrusive_ptr::decref(m1);
944 c10::raw::intrusive_ptr::decref(m2);
947 void THTensor_(addmm)(THTensor *r_, scalar_t beta, THTensor *t, scalar_t alpha, THTensor *m1, THTensor *m2)
949 char transpose_r, transpose_m1, transpose_m2;
950 THTensor *r__, *m1_, *m2_;
954 if( (m1->dim() != 2) || (m2->dim() != 2))
955 THError(
"matrices expected, got %dD, %dD tensors", m1->dim(), m2->dim());
957 if(m1->size(1) != m2->size(0)) {
958 THDescBuff bm1 = THTensor_(sizeDesc)(m1);
959 THDescBuff bm2 = THTensor_(sizeDesc)(m2);
960 THError(
"size mismatch, m1: %s, m2: %s", bm1.str, bm2.str);
964 THError(
"matrix expected, got %dD tensor for t", t->dim());
966 if( (t->size(0) != m1->size(0)) || (t->size(1) != m2->size(1)) ) {
967 THDescBuff bt = THTensor_(sizeDesc)(t);
968 THDescBuff bm1 = THTensor_(sizeDesc)(m1);
969 THDescBuff bm2 = THTensor_(sizeDesc)(m2);
970 THError(
"size mismatch, t: %s, m1: %s, m2: %s", bt.str, bm1.str, bm2.str);
975 THTensor_(resizeAs)(r_, t);
979 at::_copy_same_type_(r__wrap, t_wrap);
984 #define LDC_COND(M, N, LDC) ((N) == 1 || (LDC) >= THMax(1, M)) 987 if(r_->stride(0) == 1 &&
988 LDC_COND(r_->size(0), r_->size(1), r_->stride(1)))
993 else if(r_->stride(1) == 1 &&
994 LDC_COND(r_->size(1), r_->size(0), r_->stride(0)))
1006 THTensor *transp_r_ = THTensor_(newTranspose)(r_, 0, 1);
1007 r__ = THTensor_(newClone)(transp_r_);
1008 c10::raw::intrusive_ptr::decref(transp_r_);
1009 THTensor_(transpose)(r__, NULL, 0, 1);
1014 int64_t m = r__->size((transpose_r ==
'n' ? 0 : 1));
1015 int64_t n = r__->size((transpose_r ==
'n' ? 1 : 0));
1016 int64_t k = m1->size((transpose_r ==
'n' ? 1 : 0));
1017 int64_t ldr__ = r__->stride((transpose_r ==
'n' ? 1 : 0));
1021 if(m1->stride((transpose_r ==
'n' ? 0 : 1)) == 1 &&
1022 m1->stride((transpose_r ==
'n' ? 1 : 0)) >= THMax(1, m))
1027 else if(m1->stride((transpose_r ==
'n' ? 1 : 0)) == 1 &&
1028 m1->stride((transpose_r ==
'n' ? 0 : 1)) >= THMax(1, k))
1035 transpose_m1 = (transpose_r ==
'n' ?
't' :
'n');
1036 m1_ = THTensor_(newContiguous)(m1);
1042 if(m2->stride((transpose_r ==
'n' ? 0 : 1)) == 1 &&
1043 m2->stride((transpose_r ==
'n' ? 1 : 0)) >= THMax(1, k))
1048 else if(m2->stride((transpose_r ==
'n' ? 1 : 0)) == 1 &&
1049 m2->stride((transpose_r ==
'n' ? 0 : 1)) >= THMax(1, n))
1056 transpose_m2 = (transpose_r ==
'n' ?
't' :
'n');
1057 m2_ = THTensor_(newContiguous)(m2);
1061 int64_t ldm1_ = (transpose_m1 ==
'n' ? m1_->stride((transpose_r ==
'n' ? 1 : 0)) : m1_->stride((transpose_r ==
'n' ? 0 : 1)));
1062 int64_t ldm2_ = (transpose_m2 ==
'n' ? m2_->stride((transpose_r ==
'n' ? 1 : 0)) : m2_->stride((transpose_r ==
'n' ? 0 : 1)));
1065 THBlas_(gemm)(transpose_m1,
1071 m1_->data<scalar_t>(),
1073 m2_->data<scalar_t>(),
1076 r__->data<scalar_t>(),
1081 c10::raw::intrusive_ptr::decref(m1_);
1084 c10::raw::intrusive_ptr::decref(m2_);
1087 THTensor_(freeCopyTo)(r__, r_);
1090 void THTensor_(addr)(THTensor *r_, scalar_t beta, THTensor *t, scalar_t alpha, THTensor *vec1, THTensor *vec2)
1092 if( (THTensor_nDimensionLegacyNoScalars(vec1) != 1) || (THTensor_nDimensionLegacyNoScalars(vec2) != 1) )
1093 THError(
"vector and vector expected, got %dD, %dD tensors",
1094 THTensor_nDimensionLegacyNoScalars(vec1), THTensor_nDimensionLegacyNoScalars(vec2));
1097 THError(
"expected matrix, got %dD tensor for t", t->dim());
1099 auto vec1_size = THTensor_sizeLegacyNoScalars(vec1, 0);
1100 auto vec2_size = THTensor_sizeLegacyNoScalars(vec2, 0);
1101 auto vec1_stride = THTensor_strideLegacyNoScalars(vec1, 0);
1102 auto vec2_stride = THTensor_strideLegacyNoScalars(vec2, 0);
1104 if( (t->size(0) != vec1_size) || (t->size(1) != vec2_size) ) {
1105 THDescBuff bt = THTensor_(sizeDesc)(t);
1106 THDescBuff bv1 = THTensor_(sizeDesc)(vec1);
1107 THDescBuff bv2 = THTensor_(sizeDesc)(vec2);
1108 THError(
"size mismatch, t: %s, vec1: %s, vec2: %s", bt.str, bv1.str, bv2.str);
1113 THTensor_(resizeAs)(r_, t);
1116 at::_copy_same_type_(r__wrap, t_wrap);
1120 THTensor_(zero)(r_);
1123 THTensor_(mul)(r_, r_, beta);
1126 #define LDA_COND(M, N, LDA) ((N) == 1 || (LDA) >= THMax(1, (M))) 1128 if(r_->stride(0) == 1 && LDA_COND(vec1_size, vec2_size, r_->stride(1)))
1130 THBlas_(ger)(vec1_size, vec2_size,
1131 alpha, vec1->data<scalar_t>(), vec1_stride,
1132 vec2->data<scalar_t>(), vec2_stride,
1133 r_->data<scalar_t>(), r_->stride(1));
1135 else if(r_->stride(1) == 1 && LDA_COND(vec2_size, vec1_size, r_->stride(0)))
1137 THBlas_(ger)(vec2_size, vec1_size,
1138 alpha, vec2->data<scalar_t>(), vec2_stride,
1139 vec1->data<scalar_t>(), vec1_stride,
1140 r_->data<scalar_t>(), r_->stride(0));
1144 THTensor *cr = THTensor_(newClone)(r_);
1146 THBlas_(ger)(vec2_size, vec1_size,
1147 alpha, vec2->data<scalar_t>(), vec2_stride,
1148 vec1->data<scalar_t>(), vec1_stride,
1149 cr->data<scalar_t>(), cr->stride(0));
1151 THTensor_(freeCopyTo)(cr, r_);
1157 void THTensor_(addbmm)(THTensor *result, scalar_t beta, THTensor *t, scalar_t alpha, THTensor *batch1, THTensor *batch2)
1161 THArgCheck(THTensor_(nDimensionLegacyNoScalars)(batch1) == 3, 1,
"expected 3D tensor");
1162 THArgCheck(THTensor_(nDimensionLegacyNoScalars)(batch2) == 3, 2,
"expected 3D tensor");
1163 THArgCheck(THTensor_(size)(batch1, 0) == THTensor_(size)(batch2, 0), 2,
1164 "equal number of batches expected, got %d, %d",
1165 THTensor_(size)(batch1, 0), THTensor_(size)(batch2, 0));
1166 THArgCheck(THTensor_(size)(batch1, 2) == THTensor_(size)(batch2, 1), 2,
1167 "wrong matrix size, batch1: %dx%d, batch2: %dx%d",
1168 THTensor_(size)(batch1, 1), THTensor_(size)(batch1,2),
1169 THTensor_(size)(batch2, 1), THTensor_(size)(batch2,2));
1171 int64_t dim1 = THTensor_(size)(batch1, 1);
1172 int64_t dim2 = THTensor_(size)(batch2, 2);
1173 THArgCheck(THTensor_(size)(t, 0) == dim1, 1,
"output tensor of incorrect size");
1174 THArgCheck(THTensor_(size)(t, 1) == dim2, 1,
"output tensor of incorrect size");
1177 THTensor_(resizeAs)(result, t);
1179 at::Tensor result_wrap = THTensor_wrap(result);
1181 at::_copy_same_type_(result_wrap, t_wrap);
1185 THTensor *matrix1 = THTensor_(
new)();
1186 THTensor *matrix2 = THTensor_(
new)();
1188 for (batch = 0; batch < THTensor_(size)(batch1, 0); ++batch) {
1189 THTensor_(select)(matrix1, batch1, 0, batch);
1190 THTensor_(select)(matrix2, batch2, 0, batch);
1192 THTensor_(addmm)(result, beta, result, alpha, matrix1, matrix2);
1196 c10::raw::intrusive_ptr::decref(matrix1);
1197 c10::raw::intrusive_ptr::decref(matrix2);