1 #ifndef TH_GENERIC_FILE 2 #define TH_GENERIC_FILE "TH/generic/THTensorEvenMoreMath.cpp" 5 #include <TH/generic/THTensorApply.hpp> 7 void THTensor_(maskedFill)(THTensor *tensor, THByteTensor *mask, scalar_t value)
10 int64_t tensor_size = THTensor_(nElement)(tensor);
11 int tensor_contig = THTensor_(isContiguous)(tensor);
12 int mask_contig = THTensor_(isContiguous)(mask);
13 if (!omp_in_parallel() && tensor_contig && mask_contig) {
14 TH_TENSOR_APPLY2_OMP(tensor_size, tensor_contig, mask_contig,
15 scalar_t, tensor,
unsigned char, mask,
17 THError(
"Mask tensor can take 0 and 1 values only");
18 }
else if (*mask_data == 1) {
21 TH_OMP_OVERHEAD_THRESHOLD);
25 TH_TENSOR_APPLY2(scalar_t, tensor,
unsigned char, mask,
28 THFree(tensor_counter);
29 THError(
"Mask tensor can take 0 and 1 values only");
30 }
else if (*mask_data == 1) {
35 void THTensor_(maskedCopy)(THTensor *tensor, THByteTensor *mask, THTensor* src )
37 THTensor *srct = THTensor_(newContiguous)(src);
38 scalar_t *src_data = srct->data<scalar_t>();
40 ptrdiff_t nelem = THTensor_(nElement)(srct);
41 if (THTensor_(nElement)(tensor) != THByteTensor_nElement(mask))
43 c10::raw::intrusive_ptr::decref(srct);
44 THError(
"Number of elements of destination tensor != Number of elements in mask");
46 TH_TENSOR_APPLY2(scalar_t, tensor,
unsigned char, mask,
49 c10::raw::intrusive_ptr::decref(srct);
51 THFree(tensor_counter);
52 THError(
"Mask tensor can take 0 and 1 values only");
54 else if (*mask_data == 1)
58 c10::raw::intrusive_ptr::decref(srct);
60 THFree(tensor_counter);
61 THError(
"Number of elements of src < number of ones in mask");
63 *tensor_data = *src_data;
67 c10::raw::intrusive_ptr::decref(srct);
70 void THTensor_(maskedSelect)(THTensor *tensor, THTensor *src, THByteTensor *mask)
72 ptrdiff_t numel = THByteTensor_sumall(mask);
73 scalar_t *tensor_data;
76 THAssert(numel <= LONG_MAX);
78 THTensor_(resize1d)(tensor,numel);
79 tensor_data = tensor->data<scalar_t>();
80 TH_TENSOR_APPLY2(scalar_t, src,
unsigned char, mask,
85 THError(
"Mask tensor can take 0 and 1 values only");
87 else if (*mask_data == 1)
89 *tensor_data = *src_data;
95 void THTensor_(nonzero)(THLongTensor *subscript, THTensor *tensor)
98 int64_t *subscript_data;
100 #ifdef TH_REAL_IS_HALF 101 #define IS_NONZERO(val) ((val.x & 0x7fff) != 0) 103 #define IS_NONZERO(val) ((val)!=0) 107 TH_TENSOR_APPLY(scalar_t, tensor,
108 if IS_NONZERO(*tensor_data) {
112 THAssert(numel <= LONG_MAX);
114 THLongTensor_resize2d(subscript, numel, tensor->dim());
118 int64_t dimensions = tensor->dim();
120 int64_t *sizes =
new int64_t[dimensions+1];
121 int64_t *idx =
new int64_t[dimensions+1];
124 std::fill(idx, idx+dimensions+1, 0);
125 for (i = 0; i < dimensions; ++i) {
126 sizes[dimensions - i - 1] = THTensor_(size)(tensor, i);
128 sizes[dimensions] = 0;
130 subscript_data = THLongTensor_data(subscript);
131 auto subscript_strides = THTensor_stridesLegacyNoScalars(subscript);
132 subscript_strides[0] -= subscript_strides[1] * tensor->dim();
133 TH_TENSOR_APPLY(scalar_t, tensor,
134 if IS_NONZERO(*tensor_data) {
135 ii = idx + dimensions;
136 for (int64_t dim = dimensions - 1; dim >= 0; dim--) {
138 *subscript_data = *ii;
139 subscript_data += subscript_strides[1];
141 subscript_data += subscript_strides[0];
157 void THTensor_(indexSelect)(THTensor *tensor, THTensor *src,
int dim, THLongTensor *index)
160 THTensor *tSlice, *sSlice;
162 scalar_t *tensor_data, *src_data;
164 THArgCheck(THTensor_nDimensionLegacyNoScalars(index) == 1, 3,
"Index is supposed to be 1-dimensional");
165 THArgCheck(dim < THTensor_nDimensionLegacyNoScalars(src), 4,
"Indexing dim %d is out of bounds of tensor", dim);
167 numel = THLongTensor_nElement(index);
169 std::vector<int64_t> newSize = THTensor_sizesLegacyNoScalars(src);
171 THAssert(numel <= LONG_MAX);
173 newSize[dim] = numel;
174 THTensor_(resize)(tensor,newSize,{});
176 index = THLongTensor_newContiguous(index);
177 index_data = THLongTensor_data(index);
179 if (dim == 0 && THTensor_(isContiguous)(src) && THTensor_(isContiguous)(tensor))
181 tensor_data = tensor->data<scalar_t>();
182 src_data = src->data<scalar_t>();
183 auto src_size0 = THTensor_sizeLegacyNoScalars(src, 0);
184 ptrdiff_t rowsize = src_size0 == 0 ? 1 : THTensor_(nElement)(src) / src_size0;
187 int64_t max = src_size0 - 1;
188 for (i=0; i<numel; i++) {
189 if (index_data[i] < 0 || index_data[i] > max) {
190 THLongTensor_free(index);
191 THError(
"index out of range");
198 if (src->dim() <= 1) {
199 #pragma omp parallel for if(numel > TH_OMP_OVERHEAD_THRESHOLD) private(i) 200 for (i=0; i<numel; i++)
201 tensor_data[i] = src_data[index_data[i]];
203 #pragma omp parallel for if(numel*rowsize > TH_OMP_OVERHEAD_THRESHOLD) private(i) 204 for (i=0; i<numel; i++)
205 memcpy(tensor_data + i*rowsize, src_data + index_data[i] * rowsize, rowsize*
sizeof(scalar_t));
209 else if (src->dim() <= 1)
211 for (i=0; i<numel; i++)
212 THTensor_(set1d)(tensor,i,THTensor_(get1d)(src,index_data[i]));
216 for (i=0; i<numel; i++)
218 tSlice = THTensor_(
new)();
219 sSlice = THTensor_(
new)();
220 THTensor_(select)(tSlice, tensor, dim, i);
221 THTensor_(select)(sSlice, src, dim, index_data[i]);
222 at::Tensor tSlice_wrap = THTensor_wrap(tSlice);
223 at::Tensor sSlice_wrap = THTensor_wrap(sSlice);
224 at::_copy_same_type_(tSlice_wrap, sSlice_wrap);
225 c10::raw::intrusive_ptr::decref(tSlice);
226 c10::raw::intrusive_ptr::decref(sSlice);
230 THLongTensor_free(index);
233 void THTensor_(indexCopy)(THTensor *tensor,
int dim, THLongTensor *index, THTensor *src)
236 THTensor *tSlice, *sSlice;
241 numel = THLongTensor_nElement(index);
243 index = THLongTensor_newContiguous(index);
244 index_data = THLongTensor_data(index);
246 if (tensor->dim() > 1 )
248 tSlice = THTensor_(
new)();
249 sSlice = THTensor_(
new)();
251 for (i=0; i<numel; i++)
253 THTensor_(select)(tSlice, tensor, dim, index_data[i]);
254 THTensor_(select)(sSlice, src, dim, i);
255 at::Tensor tSlice_wrap = THTensor_wrap(tSlice);
256 at::Tensor sSlice_wrap = THTensor_wrap(sSlice);
257 at::_copy_same_type_(tSlice_wrap, sSlice_wrap);
260 c10::raw::intrusive_ptr::decref(tSlice);
261 c10::raw::intrusive_ptr::decref(sSlice);
265 for (i=0; i<numel; i++)
267 THTensor_(set1d)(tensor, index_data[i], THTensor_(get1d)(src,i));
270 THLongTensor_free(index);
273 static ptrdiff_t THTensor_(dataOffset)(THTensor* tensor, ptrdiff_t linearIndex) {
274 auto size = THTensor_sizesLegacyNoScalars(tensor);
275 auto stride = THTensor_stridesLegacyNoScalars(tensor);
276 int nDim = THTensor_nDimensionLegacyAll(tensor);
277 ptrdiff_t dataOffset = 0;
278 for (
int i = nDim - 1; i >= 0; i--) {
279 dataOffset += (linearIndex % size[i]) * stride[i];
280 linearIndex /= size[i];
285 static inline void THTensor_(checkLinearIndex)(int64_t linearIndex, int64_t numel) {
286 THArgCheck(linearIndex < numel && linearIndex >= -numel, 2,
"out of range: %d out of %d", (
int)linearIndex, (
int)numel);
289 static inline int64_t THTensor_(wrapLinearIndex)(int64_t linearIndex, int64_t numel) {
290 return linearIndex < 0 ? linearIndex + numel : linearIndex;
293 void THTensor_(take)(THTensor *r_, THTensor *src, THLongTensor *index)
295 THTensor_(resizeNd)(r_, index->dim(), THTensor_getSizePtr(index), NULL);
296 THTensor* dst = THTensor_(newContiguous)(r_);
298 index = THLongTensor_newContiguous(index);
299 int64_t* index_data = THLongTensor_data(index);
300 ptrdiff_t srcElements = THTensor_(nElement)(src);
301 scalar_t* src_data = src->data<scalar_t>();
302 scalar_t* dst_data = dst->data<scalar_t>();
303 ptrdiff_t nIndices = THLongTensor_nElement(index);
304 int isContiguous = THTensor_(isContiguous)(src);
309 std::atomic<int64_t> invalidIdxPos(-1);
312 #pragma omp parallel for if(nIndices > TH_OMP_OVERHEAD_THRESHOLD) private(i) 313 for (i = 0; i < nIndices; i++) {
314 int64_t idx = index_data[i];
315 if (idx < srcElements && idx >= -srcElements) {
316 idx = THTensor_(wrapLinearIndex)(idx, srcElements);
318 dst_data[i] = src_data[idx];
320 dst_data[i] = src_data[THTensor_(dataOffset)(src, idx)];
324 invalidIdxPos.compare_exchange_strong(tmp, i);
328 if (invalidIdxPos >= 0) {
329 THTensor_(checkLinearIndex)(index_data[invalidIdxPos], srcElements);
332 THLongTensor_free(index);
333 THTensor_(freeCopyTo)(dst, r_);
336 void THTensor_(put)(THTensor *tensor, THLongTensor *index, THTensor *src,
int accumulate)
338 THArgCheck(THLongTensor_nElement(index) == THTensor_(nElement)(src), 3,
339 "src should have the same number of elements as index");
341 index = THLongTensor_newContiguous(index);
342 src = THTensor_(newContiguous)(src);
343 scalar_t* data = tensor->data<scalar_t>();
344 ptrdiff_t numel = THTensor_(nElement)(tensor);
345 int is_contiguous = THTensor_(isContiguous)(tensor);
347 TH_TENSOR_APPLY2(int64_t, index, scalar_t, src,
348 THTensor_(checkLinearIndex)(*index_data, numel);
349 int64_t linearIndex = THTensor_(wrapLinearIndex)(*index_data, numel);
350 int64_t dataOffset = is_contiguous ? linearIndex : THTensor_(dataOffset)(tensor, linearIndex);
352 data[dataOffset] += *src_data;
354 data[dataOffset] = *src_data;
358 c10::raw::intrusive_ptr::decref(src);
359 THLongTensor_free(index);
362 void THTensor_(indexAdd)(THTensor *tensor,
int dim, THLongTensor *index, THTensor *src)
365 THTensor *tSlice, *sSlice;
368 numel = THLongTensor_nElement(index);
369 THArgCheck(THTensor_nDimensionLegacyNoScalars(index) == 1, 3,
"Index is supposed to be a vector");
370 THArgCheck(dim < THTensor_nDimensionLegacyNoScalars(src), 4,
"Indexing dim %d is out of bounds of tensor", dim);
371 THArgCheck(numel == THTensor_sizeLegacyNoScalars(src, dim),4,
"Number of indices should be equal to source:size(dim)");
373 index = THLongTensor_newContiguous(index);
374 index_data = THLongTensor_data(index);
376 if (tensor->dim() > 1)
378 tSlice = THTensor_(
new)();
379 sSlice = THTensor_(
new)();
381 for (i=0; i<numel; i++)
383 THTensor_(select)(tSlice, tensor, dim, index_data[i]);
384 THTensor_(select)(sSlice, src, dim, i);
385 THTensor_(cadd)(tSlice, tSlice, 1.0, sSlice);
388 c10::raw::intrusive_ptr::decref(tSlice);
389 c10::raw::intrusive_ptr::decref(sSlice);
393 for (i=0; i<numel; i++)
395 THTensor_(set1d)(tensor,
397 THTensor_(get1d)(src,i) + THTensor_(get1d)(tensor,index_data[i]));
400 THLongTensor_free(index);
403 void THTensor_(indexFill)(THTensor *tensor,
int dim, THLongTensor *index, scalar_t val)
409 numel = THLongTensor_nElement(index);
410 THArgCheck(THTensor_nDimensionLegacyNoScalars(index) == 1, 3,
"Index is supposed to be a vector");
411 THArgCheck(dim < THTensor_nDimensionLegacyNoScalars(tensor), 4,
"Indexing dim %d is out of bounds of tensor", dim);
413 index = THLongTensor_newContiguous(index);
414 index_data = THLongTensor_data(index);
416 for (i=0; i<numel; i++)
418 if (tensor->dim() > 1)
420 tSlice = THTensor_(
new)();
421 THTensor_(select)(tSlice, tensor,dim,index_data[i]);
422 THTensor_(fill)(tSlice, val);
423 c10::raw::intrusive_ptr::decref(tSlice);
427 THTensor_(set1d)(tensor, index_data[i], val);
430 THLongTensor_free(index);
433 void THTensor_(gather)(THTensor *tensor, THTensor *src,
int dim, THLongTensor *index)
435 int64_t elems_per_row, i, idx;
437 THArgCheck(THLongTensor_nDimensionLegacyNoScalars(index) == THTensor_(nDimensionLegacyNoScalars)(src), 4,
438 "Index tensor must have same dimensions as input tensor");
439 THArgCheck(dim >= 0 && dim < THTensor_(nDimensionLegacyNoScalars)(tensor), 3,
440 "Index dimension is out of bounds");
441 THArgCheck(THTensor_(nDimensionLegacyNoScalars)(src) == THTensor_(nDimensionLegacyNoScalars)(tensor), 2,
442 "Input tensor must have same dimensions as output tensor");
444 elems_per_row = THTensor_sizeLegacyNoScalars(index, dim);
446 TH_TENSOR_DIM_APPLY3(scalar_t, tensor, scalar_t, src, int64_t, index, dim,
447 TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM,
448 for (i = 0; i < elems_per_row; ++i)
450 idx = *(index_data + i*index_stride);
451 if (idx < 0 || idx >= src_size)
453 THFree(TH_TENSOR_DIM_APPLY_counter);
454 THError(
"Invalid index in gather");
456 *(tensor_data + i*tensor_stride) = src_data[idx * src_stride];
460 void THTensor_(scatter)(THTensor *tensor,
int dim, THLongTensor *index, THTensor *src)
462 int64_t elems_per_row, i, idx;
463 int index_ndim_legacy_all = THTensor_nDimensionLegacyAll(index);
465 THArgCheck(dim < THTensor_(nDimensionLegacyNoScalars)(tensor), 2,
"Index dimension is out of bounds");
466 THArgCheck(index_ndim_legacy_all == 0
467 || THLongTensor_nDimensionLegacyNoScalars(index) == THTensor_(nDimensionLegacyNoScalars)(tensor), 3,
468 "Index tensor must be either empty or have same dimensions as output tensor");
469 THArgCheck(THTensor_(nDimensionLegacyNoScalars)(src) == THTensor_(nDimensionLegacyNoScalars)(tensor), 4,
470 "Input tensor must have same dimensions as output tensor");
473 if (index_ndim_legacy_all == 0)
476 elems_per_row = THTensor_sizeLegacyNoScalars(index, dim);
478 TH_TENSOR_DIM_APPLY3(scalar_t, tensor, scalar_t, src, int64_t, index, dim,
479 TH_TENSOR_DIM_APPLY3_SIZE_SCATTER,
480 for (i = 0; i < elems_per_row; ++i)
482 idx = *(index_data + i*index_stride);
483 if (idx < 0 || idx >= tensor_size)
485 THFree(TH_TENSOR_DIM_APPLY_counter);
486 THError(
"Invalid index in scatter");
488 tensor_data[idx * tensor_stride] = *(src_data + i*src_stride);
492 void THTensor_(scatterAdd)(THTensor *tensor,
int dim, THLongTensor *index, THTensor *src)
494 int64_t elems_per_row, i, idx;
495 int index_ndim_legacy_all = THTensor_nDimensionLegacyAll(index);
497 THArgCheck(dim < THTensor_(nDimensionLegacyNoScalars)(tensor), 2,
"Index dimension is out of bounds");
498 THArgCheck(index_ndim_legacy_all == 0
499 || THLongTensor_nDimensionLegacyNoScalars(index) == THTensor_(nDimensionLegacyNoScalars)(tensor), 3,
500 "Index tensor must have same dimensions as output tensor");
501 THArgCheck(THTensor_(nDimensionLegacyNoScalars)(src) == THTensor_(nDimensionLegacyNoScalars)(tensor), 4,
502 "Input tensor must have same dimensions as output tensor");
505 if (index_ndim_legacy_all == 0)
508 elems_per_row = THTensor_sizeLegacyNoScalars(index, dim);
510 TH_TENSOR_DIM_APPLY3(scalar_t, tensor, scalar_t, src, int64_t, index, dim,
511 TH_TENSOR_DIM_APPLY3_SIZE_SCATTER,
512 for (i = 0; i < elems_per_row; ++i)
514 idx = *(index_data + i*index_stride);
515 if (idx < 0 || idx >= tensor_size)
517 THFree(TH_TENSOR_DIM_APPLY_counter);
518 THError(
"Invalid index in scatterAdd");
520 tensor_data[idx * tensor_stride] += *(src_data + i*src_stride);
524 void THTensor_(scatterFill)(THTensor *tensor,
int dim, THLongTensor *index, scalar_t val)
526 int64_t elems_per_row, i, idx;
527 int index_ndim_legacy_all = THLongTensor_nDimensionLegacyAll(index);
529 THArgCheck(dim < THTensor_(nDimensionLegacyAll)(tensor), 2,
"Index dimension is out of bounds");
530 THArgCheck(index_ndim_legacy_all == 0 || index_ndim_legacy_all == THLongTensor_nDimensionLegacyAll(tensor), 3,
531 "Index tensor must either be empty or have same dimensions as output tensor");
534 if (index_ndim_legacy_all == 0)
537 elems_per_row = THTensor_sizeLegacyNoScalars(index, dim);
539 TH_TENSOR_DIM_APPLY2(scalar_t, tensor, int64_t, index, dim,
540 for (i = 0; i < elems_per_row; ++i)
542 idx = *(index_data + i*index_stride);
543 if (idx < 0 || idx >= tensor_size)
545 THFree(TH_TENSOR_DIM_APPLY_counter);
546 THError(
"Invalid index in scatter");
548 tensor_data[idx * tensor_stride] = val;
552 accreal THTensor_(dot)(THTensor *tensor, THTensor *src)
556 TH_TENSOR_APPLY2(scalar_t, tensor, scalar_t, src,
557 int64_t sz = (tensor_size-tensor_i < src_size-src_i ? tensor_size-tensor_i : src_size-src_i);
558 sum += THBlas_(dot)(sz, src_data, src_stride, tensor_data, tensor_stride);
561 tensor_data += sz*tensor_stride;
562 src_data += sz*src_stride;
567 scalar_t THTensor_(minall)(THTensor *tensor)
572 THArgCheck(THTensor_nDimensionLegacyAll(tensor) > 0, 1,
"tensor must have one dimension");
573 theMin = tensor->data<scalar_t>()[0];
574 TH_TENSOR_APPLY(scalar_t, tensor,
575 value = *tensor_data;
577 if(!(value >= theMin))
580 th_isnan_break(value)
585 scalar_t THTensor_(maxall)(THTensor *tensor)
590 THArgCheck(THTensor_nDimensionLegacyAll(tensor) > 0, 1,
"tensor must have one dimension");
591 theMax = tensor->data<scalar_t>()[0];
592 TH_TENSOR_APPLY(scalar_t, tensor,
593 value = *tensor_data;
595 if(!(value <= theMax))
598 th_isnan_break(value)
603 accreal THTensor_(sumall)(THTensor *tensor)
608 int inOMP = omp_in_parallel();
612 TH_TENSOR_APPLY_REDUCTION_OMP(scalar_t, tensor, +:sum, sum += *tensor_data;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
618 TH_TENSOR_APPLY(scalar_t, tensor, sum += *tensor_data;);
623 void THTensor_(add)(THTensor *r_, THTensor *t, scalar_t value)
625 THTensor_(resizeAs)(r_, t);
626 int64_t r_Size = THTensor_(nElement)(r_);
627 int r_Contig = THTensor_(isContiguous)(r_);
628 int tContig = THTensor_(isContiguous)(t);
630 if (r_Contig && tContig) {
631 TH_TENSOR_APPLY2_CONTIG(scalar_t, r_, scalar_t, t, THVector_(adds)(r__data, t_data, value, r__len););
634 int inOMP = omp_in_parallel();
638 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = *t_data + value;, ORDIN_TH_OMP_OVERHEAD_THRESHOLD)
646 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = *t_data + value;);
650 void THTensor_(sub)(THTensor *r_, THTensor *t, scalar_t value)
652 THTensor_(add)(r_, t, -value);
655 void THTensor_(add_scaled)(THTensor *r_, THTensor *t, scalar_t value, scalar_t alpha)
657 THTensor_(add)(r_, t, value * alpha);
660 void THTensor_(sub_scaled)(THTensor *r_, THTensor *t, scalar_t value, scalar_t alpha)
662 THTensor_(add)(r_, t, -value * alpha);
665 void THTensor_(mul)(THTensor *r_, THTensor *t, scalar_t value)
667 THTensor_(resizeAs)(r_, t);
668 int64_t r_Size = THTensor_(nElement)(r_);
669 int r_Contig = THTensor_(isContiguous)(r_);
670 int tContig = THTensor_(isContiguous)(t);
672 if (r_Contig && tContig) {
673 TH_TENSOR_APPLY2_CONTIG(scalar_t, r_, scalar_t, t, THVector_(muls)(r__data, t_data, value, r__len););
676 int inOMP = omp_in_parallel();
680 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = *t_data * value;, ORDIN_TH_OMP_OVERHEAD_THRESHOLD)
688 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = *t_data * value;);
692 void THTensor_(div)(THTensor *r_, THTensor *t, scalar_t value)
694 THTensor_(resizeAs)(r_, t);
695 int64_t r_Size = THTensor_(nElement)(r_);
696 int r_Contig = THTensor_(isContiguous)(r_);
697 int tContig = THTensor_(isContiguous)(t);
699 if (r_Contig && tContig) {
700 TH_TENSOR_APPLY2_CONTIG(scalar_t, r_, scalar_t, t, THVector_(divs)(r__data, t_data, value, r__len););
703 int inOMP = omp_in_parallel();
707 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = *t_data / value;, ORDIN_TH_OMP_OVERHEAD_THRESHOLD)
715 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = *t_data / value;);
719 void THTensor_(lshift)(THTensor *r_, THTensor *t, scalar_t value)
721 #if defined(TH_REAL_IS_FLOAT) 722 return THTensor_(mul)(r_, t, powf(2, value));
723 #elif defined(TH_REAL_IS_DOUBLE) 724 return THTensor_(mul)(r_, t, pow(2, value));
725 #elif defined(TH_REAL_IS_HALF) 726 return THError(
"lshift is not supported for torch.HalfTensor");
728 THTensor_(resizeAs)(r_, t);
729 int64_t r_Size = THTensor_(nElement)(r_);
730 int r_Contig = THTensor_(isContiguous)(r_);
731 int tContig = THTensor_(isContiguous)(t);
733 if (r_Contig && tContig) {
734 scalar_t *tp = t->data<scalar_t>();
735 scalar_t *rp = r_->data<scalar_t>();
737 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i) 738 for (i=0; i<r_Size; i++) {
739 #if defined(TH_REAL_IS_BYTE) 740 rp[i] = ((scalar_t) tp[i]) << value;
742 rp[i] = ((ureal) tp[i]) << value;
747 int inOMP = omp_in_parallel();
751 #if defined(TH_REAL_IS_BYTE) 752 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = (((scalar_t) *t_data) << value);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
754 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = (((ureal) *t_data) << value);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
762 #if defined(TH_REAL_IS_BYTE) 763 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = (((scalar_t) *t_data) << value););
765 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = (((ureal) *t_data) << value););
771 void THTensor_(rshift)(THTensor *r_, THTensor *t, scalar_t value)
773 #if defined(TH_REAL_IS_FLOAT) 774 return THTensor_(div)(r_, t, powf(2, value));
775 #elif defined(TH_REAL_IS_DOUBLE) 776 return THTensor_(div)(r_, t, pow(2, value));
777 #elif defined(TH_REAL_IS_HALF) 778 return THError(
"rshift is not supported for torch.HalfTensor");
780 THTensor_(resizeAs)(r_, t);
781 int64_t r_Size = THTensor_(nElement)(r_);
782 int r_Contig = THTensor_(isContiguous)(r_);
783 int tContig = THTensor_(isContiguous)(t);
785 if (r_Contig && tContig) {
786 scalar_t *tp = t->data<scalar_t>();
787 scalar_t *rp = r_->data<scalar_t>();
789 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i) 790 for (i=0; i<r_Size; i++) {
791 #if defined(TH_REAL_IS_BYTE) 792 rp[i] = ((scalar_t) tp[i]) >> value;
794 rp[i] = ((ureal) tp[i]) >> value;
799 int inOMP = omp_in_parallel();
803 #if defined(TH_REAL_IS_BYTE) 804 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = (((scalar_t) *t_data) >> value);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
806 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = (((ureal) *t_data) >> value);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
814 #if defined(TH_REAL_IS_BYTE) 815 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = (((scalar_t) *t_data) >> value););
817 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = (((ureal) *t_data) >> value););
823 void THTensor_(fmod)(THTensor *r_, THTensor *t, scalar_t value)
825 THTensor_(resizeAs)(r_, t);
826 int64_t r_Size = THTensor_(nElement)(r_);
827 int r_Contig = THTensor_(isContiguous)(r_);
828 int tContig = THTensor_(isContiguous)(t);
830 if (r_Contig && tContig) {
831 scalar_t *tp = t->data<scalar_t>();
832 scalar_t *rp = r_->data<scalar_t>();
834 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD) private(i) 835 for (i=0; i<r_Size; i++) {
836 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) 837 rp[i] = fmod(tp[i], value);
839 rp[i] = tp[i] % value;
844 int inOMP = omp_in_parallel();
848 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) 849 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = fmod(*t_data, value);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
851 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = (*t_data % value);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
859 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) 860 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = fmod(*t_data, value););
862 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = (*t_data % value););
868 static inline bool modulo_wrap(scalar_t a, scalar_t b) {
869 return (a != 0) && (a < 0) != (b < 0);
872 void THTensor_(remainder)(THTensor *r_, THTensor *t, scalar_t value)
874 THTensor_(resizeAs)(r_, t);
875 int64_t r_Size = THTensor_(nElement)(r_);
876 int r_Contig = THTensor_(isContiguous)(r_);
877 int tContig = THTensor_(isContiguous)(t);
879 if (r_Contig && tContig) {
880 scalar_t *tp = t->data<scalar_t>();
881 scalar_t *rp = r_->data<scalar_t>();
883 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD) private(i) 884 for (i=0; i<r_Size; i++) {
885 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) 886 rp[i] = (value == 0)? NAN : tp[i] - value * floor(tp[i] / value);
889 rp[i] = tp[i] % value;
890 if (modulo_wrap(rp[i], value))
896 int inOMP = omp_in_parallel();
900 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) 901 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = (value == 0)? NAN : *t_data - value * floor(*t_data / value);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
904 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = *t_data % value;
905 if (modulo_wrap(*r__data, value)) *r__data += value;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
913 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) 914 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = (value == 0)? NAN : *t_data - value * floor(*t_data / value););
917 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = *t_data % value;
918 if (modulo_wrap(*r__data, value)) *r__data += value;);
923 void THTensor_(bitand)(THTensor *r_, THTensor *t, scalar_t value)
925 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF) 929 return THError(
"bitand is only supported for integer type tensors");
931 THTensor_(resizeAs)(r_, t);
932 int64_t r_Size = THTensor_(nElement)(r_);
933 int r_Contig = THTensor_(isContiguous)(r_);
935 int tContig = THTensor_(isContiguous)(t);
936 if (r_Contig && tContig) {
937 scalar_t *tp = t->data<scalar_t>();
938 scalar_t *rp = r_->data<scalar_t>();
940 #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i) 941 for (i=0; i<r_Size; i++) {
942 rp[i] = tp[i] & value;
946 int inOMP = omp_in_parallel();
950 TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = *t_data & value;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
957 TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = *t_data & value;);