Caffe2 - C++ API
A deep learning, cross platform ML framework
THTensorEvenMoreMath.cpp
1 #ifndef TH_GENERIC_FILE
2 #define TH_GENERIC_FILE "TH/generic/THTensorEvenMoreMath.cpp"
3 #else
4 
5 #include <TH/generic/THTensorApply.hpp>
6 
7 void THTensor_(maskedFill)(THTensor *tensor, THByteTensor *mask, scalar_t value)
8 {
9 #ifdef _OPENMP
10  int64_t tensor_size = THTensor_(nElement)(tensor);
11  int tensor_contig = THTensor_(isContiguous)(tensor);
12  int mask_contig = THTensor_(isContiguous)(mask);
13  if (!omp_in_parallel() && tensor_contig && mask_contig) {
14  TH_TENSOR_APPLY2_OMP(tensor_size, tensor_contig, mask_contig,
15  scalar_t, tensor, unsigned char, mask,
16  if (*mask_data > 1) {
17  THError("Mask tensor can take 0 and 1 values only");
18  } else if (*mask_data == 1) {
19  *tensor_data = value;
20  },
21  TH_OMP_OVERHEAD_THRESHOLD);
22  return;
23  }
24 #endif
25  TH_TENSOR_APPLY2(scalar_t, tensor, unsigned char, mask,
26  if (*mask_data > 1) {
27  THFree(mask_counter);
28  THFree(tensor_counter);
29  THError("Mask tensor can take 0 and 1 values only");
30  } else if (*mask_data == 1) {
31  *tensor_data = value;
32  });
33 }
34 
35 void THTensor_(maskedCopy)(THTensor *tensor, THByteTensor *mask, THTensor* src )
36 {
37  THTensor *srct = THTensor_(newContiguous)(src);
38  scalar_t *src_data = srct->data<scalar_t>();
39  ptrdiff_t cntr = 0;
40  ptrdiff_t nelem = THTensor_(nElement)(srct);
41  if (THTensor_(nElement)(tensor) != THByteTensor_nElement(mask))
42  {
43  c10::raw::intrusive_ptr::decref(srct);
44  THError("Number of elements of destination tensor != Number of elements in mask");
45  }
46  TH_TENSOR_APPLY2(scalar_t, tensor, unsigned char, mask,
47  if (*mask_data > 1)
48  {
49  c10::raw::intrusive_ptr::decref(srct);
50  THFree(mask_counter);
51  THFree(tensor_counter);
52  THError("Mask tensor can take 0 and 1 values only");
53  }
54  else if (*mask_data == 1)
55  {
56  if (cntr == nelem)
57  {
58  c10::raw::intrusive_ptr::decref(srct);
59  THFree(mask_counter);
60  THFree(tensor_counter);
61  THError("Number of elements of src < number of ones in mask");
62  }
63  *tensor_data = *src_data;
64  src_data++;
65  cntr++;
66  });
67  c10::raw::intrusive_ptr::decref(srct);
68 }
69 
70 void THTensor_(maskedSelect)(THTensor *tensor, THTensor *src, THByteTensor *mask)
71 {
72  ptrdiff_t numel = THByteTensor_sumall(mask);
73  scalar_t *tensor_data;
74 
75 #ifdef DEBUG
76  THAssert(numel <= LONG_MAX);
77 #endif
78  THTensor_(resize1d)(tensor,numel);
79  tensor_data = tensor->data<scalar_t>();
80  TH_TENSOR_APPLY2(scalar_t, src, unsigned char, mask,
81  if (*mask_data > 1)
82  {
83  THFree(mask_counter);
84  THFree(src_counter);
85  THError("Mask tensor can take 0 and 1 values only");
86  }
87  else if (*mask_data == 1)
88  {
89  *tensor_data = *src_data;
90  tensor_data++;
91  });
92 }
93 
94 // Finds non-zero elements of a tensor and returns their subscripts
95 void THTensor_(nonzero)(THLongTensor *subscript, THTensor *tensor)
96 {
97  ptrdiff_t numel = 0;
98  int64_t *subscript_data;
99  int64_t i = 0;
100 #ifdef TH_REAL_IS_HALF
101 #define IS_NONZERO(val) ((val.x & 0x7fff) != 0)
102 #else
103 #define IS_NONZERO(val) ((val)!=0)
104 #endif
105 
106  /* First Pass to determine size of subscripts */
107  TH_TENSOR_APPLY(scalar_t, tensor,
108  if IS_NONZERO(*tensor_data) {
109  ++numel;
110  });
111 #ifdef DEBUG
112  THAssert(numel <= LONG_MAX);
113 #endif
114  THLongTensor_resize2d(subscript, numel, tensor->dim());
115  if (numel <= 0) {
116  return;
117  }
118  int64_t dimensions = tensor->dim();
119  // +1 faster than additional condition check inside loop
120  int64_t *sizes = new int64_t[dimensions+1];
121  int64_t *idx = new int64_t[dimensions+1];
122  int64_t *ii;
123  int64_t *ss;
124  std::fill(idx, idx+dimensions+1, 0);
125  for (i = 0; i < dimensions; ++i) {
126  sizes[dimensions - i - 1] = THTensor_(size)(tensor, i); // reverse order important
127  }
128  sizes[dimensions] = 0;
129  /* Second pass populates subscripts */
130  subscript_data = THLongTensor_data(subscript);
131  auto subscript_strides = THTensor_stridesLegacyNoScalars(subscript);
132  subscript_strides[0] -= subscript_strides[1] * tensor->dim();
133  TH_TENSOR_APPLY(scalar_t, tensor,
134  if IS_NONZERO(*tensor_data) {
135  ii = idx + dimensions;
136  for (int64_t dim = dimensions - 1; dim >= 0; dim--) {
137  --ii;
138  *subscript_data = *ii;
139  subscript_data += subscript_strides[1];
140  }
141  subscript_data += subscript_strides[0];
142  }
143  ii = idx;
144  ss = sizes;
145  ++(*ii);
146  while (*ii == *ss) {
147  *ii = 0;
148  ++ii;
149  ++ss;
150  ++(*ii);
151  }
152  );
153  delete [] sizes;
154  delete [] idx;
155 }
156 
157 void THTensor_(indexSelect)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index)
158 {
159  ptrdiff_t i, numel;
160  THTensor *tSlice, *sSlice;
161  int64_t *index_data;
162  scalar_t *tensor_data, *src_data;
163 
164  THArgCheck(THTensor_nDimensionLegacyNoScalars(index) == 1, 3, "Index is supposed to be 1-dimensional");
165  THArgCheck(dim < THTensor_nDimensionLegacyNoScalars(src), 4, "Indexing dim %d is out of bounds of tensor", dim);
166 
167  numel = THLongTensor_nElement(index);
168 
169  std::vector<int64_t> newSize = THTensor_sizesLegacyNoScalars(src);
170 #ifdef DEBUG
171  THAssert(numel <= LONG_MAX);
172 #endif
173  newSize[dim] = numel;
174  THTensor_(resize)(tensor,newSize,{});
175 
176  index = THLongTensor_newContiguous(index);
177  index_data = THLongTensor_data(index);
178 
179  if (dim == 0 && THTensor_(isContiguous)(src) && THTensor_(isContiguous)(tensor))
180  {
181  tensor_data = tensor->data<scalar_t>();
182  src_data = src->data<scalar_t>();
183  auto src_size0 = THTensor_sizeLegacyNoScalars(src, 0);
184  ptrdiff_t rowsize = src_size0 == 0 ? 1 : THTensor_(nElement)(src) / src_size0;
185 
186  // check that the indices are within range
187  int64_t max = src_size0 - 1;
188  for (i=0; i<numel; i++) {
189  if (index_data[i] < 0 || index_data[i] > max) {
190  THLongTensor_free(index);
191  THError("index out of range");
192  }
193  }
194 
195  // When src is empty, tensor_data maybe nullptr, and the memcpy will trigger
196  // ubsan. So we skip copying at all when every slice to copy is empty.
197  if (rowsize > 0) {
198  if (src->dim() <= 1) {
199  #pragma omp parallel for if(numel > TH_OMP_OVERHEAD_THRESHOLD) private(i)
200  for (i=0; i<numel; i++)
201  tensor_data[i] = src_data[index_data[i]];
202  } else {
203  #pragma omp parallel for if(numel*rowsize > TH_OMP_OVERHEAD_THRESHOLD) private(i)
204  for (i=0; i<numel; i++)
205  memcpy(tensor_data + i*rowsize, src_data + index_data[i] * rowsize, rowsize*sizeof(scalar_t));
206  }
207  }
208  }
209  else if (src->dim() <= 1)
210  {
211  for (i=0; i<numel; i++)
212  THTensor_(set1d)(tensor,i,THTensor_(get1d)(src,index_data[i]));
213  }
214  else
215  {
216  for (i=0; i<numel; i++)
217  {
218  tSlice = THTensor_(new)();
219  sSlice = THTensor_(new)();
220  THTensor_(select)(tSlice, tensor, dim, i);
221  THTensor_(select)(sSlice, src, dim, index_data[i]);
222  at::Tensor tSlice_wrap = THTensor_wrap(tSlice);
223  at::Tensor sSlice_wrap = THTensor_wrap(sSlice);
224  at::_copy_same_type_(tSlice_wrap, sSlice_wrap);
225  c10::raw::intrusive_ptr::decref(tSlice);
226  c10::raw::intrusive_ptr::decref(sSlice);
227  }
228  }
229 
230  THLongTensor_free(index);
231 }
232 
233 void THTensor_(indexCopy)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src)
234 {
235  ptrdiff_t i, numel;
236  THTensor *tSlice, *sSlice;
237  int64_t *index_data;
238 
239  // Error checking for this function has moved to ATen!!
240 
241  numel = THLongTensor_nElement(index);
242 
243  index = THLongTensor_newContiguous(index);
244  index_data = THLongTensor_data(index);
245 
246  if (tensor->dim() > 1 )
247  {
248  tSlice = THTensor_(new)();
249  sSlice = THTensor_(new)();
250 
251  for (i=0; i<numel; i++)
252  {
253  THTensor_(select)(tSlice, tensor, dim, index_data[i]);
254  THTensor_(select)(sSlice, src, dim, i);
255  at::Tensor tSlice_wrap = THTensor_wrap(tSlice);
256  at::Tensor sSlice_wrap = THTensor_wrap(sSlice);
257  at::_copy_same_type_(tSlice_wrap, sSlice_wrap);
258  }
259 
260  c10::raw::intrusive_ptr::decref(tSlice);
261  c10::raw::intrusive_ptr::decref(sSlice);
262  }
263  else
264  {
265  for (i=0; i<numel; i++)
266  {
267  THTensor_(set1d)(tensor, index_data[i], THTensor_(get1d)(src,i));
268  }
269  }
270  THLongTensor_free(index);
271 }
272 
273 static ptrdiff_t THTensor_(dataOffset)(THTensor* tensor, ptrdiff_t linearIndex) {
274  auto size = THTensor_sizesLegacyNoScalars(tensor);
275  auto stride = THTensor_stridesLegacyNoScalars(tensor);
276  int nDim = THTensor_nDimensionLegacyAll(tensor);
277  ptrdiff_t dataOffset = 0;
278  for (int i = nDim - 1; i >= 0; i--) {
279  dataOffset += (linearIndex % size[i]) * stride[i];
280  linearIndex /= size[i];
281  }
282  return dataOffset;
283 }
284 
285 static inline void THTensor_(checkLinearIndex)(int64_t linearIndex, int64_t numel) {
286  THArgCheck(linearIndex < numel && linearIndex >= -numel, 2, "out of range: %d out of %d", (int)linearIndex, (int)numel);
287 }
288 
289 static inline int64_t THTensor_(wrapLinearIndex)(int64_t linearIndex, int64_t numel) {
290  return linearIndex < 0 ? linearIndex + numel : linearIndex;
291 }
292 
293 void THTensor_(take)(THTensor *r_, THTensor *src, THLongTensor *index)
294 {
295  THTensor_(resizeNd)(r_, index->dim(), THTensor_getSizePtr(index), NULL);
296  THTensor* dst = THTensor_(newContiguous)(r_);
297 
298  index = THLongTensor_newContiguous(index);
299  int64_t* index_data = THLongTensor_data(index);
300  ptrdiff_t srcElements = THTensor_(nElement)(src);
301  scalar_t* src_data = src->data<scalar_t>();
302  scalar_t* dst_data = dst->data<scalar_t>();
303  ptrdiff_t nIndices = THLongTensor_nElement(index);
304  int isContiguous = THTensor_(isContiguous)(src);
305 
306  // Exceptions must not be thrown across OpenMP parallel sections, so we
307  // record the position of the invalid index and throw the exception after the
308  // loop.
309  std::atomic<int64_t> invalidIdxPos(-1);
310 
311  ptrdiff_t i;
312  #pragma omp parallel for if(nIndices > TH_OMP_OVERHEAD_THRESHOLD) private(i)
313  for (i = 0; i < nIndices; i++) {
314  int64_t idx = index_data[i];
315  if (idx < srcElements && idx >= -srcElements) {
316  idx = THTensor_(wrapLinearIndex)(idx, srcElements);
317  if (isContiguous) {
318  dst_data[i] = src_data[idx];
319  } else {
320  dst_data[i] = src_data[THTensor_(dataOffset)(src, idx)];
321  }
322  } else {
323  int64_t tmp = -1;
324  invalidIdxPos.compare_exchange_strong(tmp, i);
325  }
326  }
327 
328  if (invalidIdxPos >= 0) {
329  THTensor_(checkLinearIndex)(index_data[invalidIdxPos], srcElements);
330  }
331 
332  THLongTensor_free(index);
333  THTensor_(freeCopyTo)(dst, r_);
334 }
335 
336 void THTensor_(put)(THTensor *tensor, THLongTensor *index, THTensor *src, int accumulate)
337 {
338  THArgCheck(THLongTensor_nElement(index) == THTensor_(nElement)(src), 3,
339  "src should have the same number of elements as index");
340 
341  index = THLongTensor_newContiguous(index);
342  src = THTensor_(newContiguous)(src);
343  scalar_t* data = tensor->data<scalar_t>();
344  ptrdiff_t numel = THTensor_(nElement)(tensor);
345  int is_contiguous = THTensor_(isContiguous)(tensor);
346 
347  TH_TENSOR_APPLY2(int64_t, index, scalar_t, src,
348  THTensor_(checkLinearIndex)(*index_data, numel);
349  int64_t linearIndex = THTensor_(wrapLinearIndex)(*index_data, numel);
350  int64_t dataOffset = is_contiguous ? linearIndex : THTensor_(dataOffset)(tensor, linearIndex);
351  if (accumulate) {
352  data[dataOffset] += *src_data;
353  } else {
354  data[dataOffset] = *src_data;
355  }
356  );
357 
358  c10::raw::intrusive_ptr::decref(src);
359  THLongTensor_free(index);
360 }
361 
362 void THTensor_(indexAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src)
363 {
364  ptrdiff_t i, numel;
365  THTensor *tSlice, *sSlice;
366  int64_t *index_data;
367 
368  numel = THLongTensor_nElement(index);
369  THArgCheck(THTensor_nDimensionLegacyNoScalars(index) == 1, 3, "Index is supposed to be a vector");
370  THArgCheck(dim < THTensor_nDimensionLegacyNoScalars(src), 4,"Indexing dim %d is out of bounds of tensor", dim);
371  THArgCheck(numel == THTensor_sizeLegacyNoScalars(src, dim),4,"Number of indices should be equal to source:size(dim)");
372 
373  index = THLongTensor_newContiguous(index);
374  index_data = THLongTensor_data(index);
375 
376  if (tensor->dim() > 1)
377  {
378  tSlice = THTensor_(new)();
379  sSlice = THTensor_(new)();
380 
381  for (i=0; i<numel; i++)
382  {
383  THTensor_(select)(tSlice, tensor, dim, index_data[i]);
384  THTensor_(select)(sSlice, src, dim, i);
385  THTensor_(cadd)(tSlice, tSlice, 1.0, sSlice);
386  }
387 
388  c10::raw::intrusive_ptr::decref(tSlice);
389  c10::raw::intrusive_ptr::decref(sSlice);
390  }
391  else
392  {
393  for (i=0; i<numel; i++)
394  {
395  THTensor_(set1d)(tensor,
396  index_data[i],
397  THTensor_(get1d)(src,i) + THTensor_(get1d)(tensor,index_data[i]));
398  }
399  }
400  THLongTensor_free(index);
401 }
402 
403 void THTensor_(indexFill)(THTensor *tensor, int dim, THLongTensor *index, scalar_t val)
404 {
405  ptrdiff_t i, numel;
406  THTensor *tSlice;
407  int64_t *index_data;
408 
409  numel = THLongTensor_nElement(index);
410  THArgCheck(THTensor_nDimensionLegacyNoScalars(index) == 1, 3, "Index is supposed to be a vector");
411  THArgCheck(dim < THTensor_nDimensionLegacyNoScalars(tensor), 4,"Indexing dim %d is out of bounds of tensor", dim);
412 
413  index = THLongTensor_newContiguous(index);
414  index_data = THLongTensor_data(index);
415 
416  for (i=0; i<numel; i++)
417  {
418  if (tensor->dim() > 1)
419  {
420  tSlice = THTensor_(new)();
421  THTensor_(select)(tSlice, tensor,dim,index_data[i]);
422  THTensor_(fill)(tSlice, val);
423  c10::raw::intrusive_ptr::decref(tSlice);
424  }
425  else
426  {
427  THTensor_(set1d)(tensor, index_data[i], val);
428  }
429  }
430  THLongTensor_free(index);
431 }
432 
433 void THTensor_(gather)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index)
434 {
435  int64_t elems_per_row, i, idx;
436 
437  THArgCheck(THLongTensor_nDimensionLegacyNoScalars(index) == THTensor_(nDimensionLegacyNoScalars)(src), 4,
438  "Index tensor must have same dimensions as input tensor");
439  THArgCheck(dim >= 0 && dim < THTensor_(nDimensionLegacyNoScalars)(tensor), 3,
440  "Index dimension is out of bounds");
441  THArgCheck(THTensor_(nDimensionLegacyNoScalars)(src) == THTensor_(nDimensionLegacyNoScalars)(tensor), 2,
442  "Input tensor must have same dimensions as output tensor");
443 
444  elems_per_row = THTensor_sizeLegacyNoScalars(index, dim);
445 
446  TH_TENSOR_DIM_APPLY3(scalar_t, tensor, scalar_t, src, int64_t, index, dim,
447  TH_TENSOR_DIM_APPLY3_SIZE_EQ_EXCEPT_DIM,
448  for (i = 0; i < elems_per_row; ++i)
449  {
450  idx = *(index_data + i*index_stride);
451  if (idx < 0 || idx >= src_size)
452  {
453  THFree(TH_TENSOR_DIM_APPLY_counter);
454  THError("Invalid index in gather");
455  }
456  *(tensor_data + i*tensor_stride) = src_data[idx * src_stride];
457  })
458 }
459 
460 void THTensor_(scatter)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src)
461 {
462  int64_t elems_per_row, i, idx;
463  int index_ndim_legacy_all = THTensor_nDimensionLegacyAll(index);
464 
465  THArgCheck(dim < THTensor_(nDimensionLegacyNoScalars)(tensor), 2, "Index dimension is out of bounds");
466  THArgCheck(index_ndim_legacy_all == 0
467  || THLongTensor_nDimensionLegacyNoScalars(index) == THTensor_(nDimensionLegacyNoScalars)(tensor), 3,
468  "Index tensor must be either empty or have same dimensions as output tensor");
469  THArgCheck(THTensor_(nDimensionLegacyNoScalars)(src) == THTensor_(nDimensionLegacyNoScalars)(tensor), 4,
470  "Input tensor must have same dimensions as output tensor");
471 
472  // no-op if index is empty
473  if (index_ndim_legacy_all == 0)
474  return;
475 
476  elems_per_row = THTensor_sizeLegacyNoScalars(index, dim);
477 
478  TH_TENSOR_DIM_APPLY3(scalar_t, tensor, scalar_t, src, int64_t, index, dim,
479  TH_TENSOR_DIM_APPLY3_SIZE_SCATTER,
480  for (i = 0; i < elems_per_row; ++i)
481  {
482  idx = *(index_data + i*index_stride);
483  if (idx < 0 || idx >= tensor_size)
484  {
485  THFree(TH_TENSOR_DIM_APPLY_counter);
486  THError("Invalid index in scatter");
487  }
488  tensor_data[idx * tensor_stride] = *(src_data + i*src_stride);
489  })
490 }
491 
492 void THTensor_(scatterAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src)
493 {
494  int64_t elems_per_row, i, idx;
495  int index_ndim_legacy_all = THTensor_nDimensionLegacyAll(index);
496 
497  THArgCheck(dim < THTensor_(nDimensionLegacyNoScalars)(tensor), 2, "Index dimension is out of bounds");
498  THArgCheck(index_ndim_legacy_all == 0
499  || THLongTensor_nDimensionLegacyNoScalars(index) == THTensor_(nDimensionLegacyNoScalars)(tensor), 3,
500  "Index tensor must have same dimensions as output tensor");
501  THArgCheck(THTensor_(nDimensionLegacyNoScalars)(src) == THTensor_(nDimensionLegacyNoScalars)(tensor), 4,
502  "Input tensor must have same dimensions as output tensor");
503 
504  // no-op if index is empty
505  if (index_ndim_legacy_all == 0)
506  return;
507 
508  elems_per_row = THTensor_sizeLegacyNoScalars(index, dim);
509 
510  TH_TENSOR_DIM_APPLY3(scalar_t, tensor, scalar_t, src, int64_t, index, dim,
511  TH_TENSOR_DIM_APPLY3_SIZE_SCATTER,
512  for (i = 0; i < elems_per_row; ++i)
513  {
514  idx = *(index_data + i*index_stride);
515  if (idx < 0 || idx >= tensor_size)
516  {
517  THFree(TH_TENSOR_DIM_APPLY_counter);
518  THError("Invalid index in scatterAdd");
519  }
520  tensor_data[idx * tensor_stride] += *(src_data + i*src_stride);
521  })
522 }
523 
524 void THTensor_(scatterFill)(THTensor *tensor, int dim, THLongTensor *index, scalar_t val)
525 {
526  int64_t elems_per_row, i, idx;
527  int index_ndim_legacy_all = THLongTensor_nDimensionLegacyAll(index);
528 
529  THArgCheck(dim < THTensor_(nDimensionLegacyAll)(tensor), 2, "Index dimension is out of bounds");
530  THArgCheck(index_ndim_legacy_all == 0 || index_ndim_legacy_all == THLongTensor_nDimensionLegacyAll(tensor), 3,
531  "Index tensor must either be empty or have same dimensions as output tensor");
532 
533  // no-op if index is empty
534  if (index_ndim_legacy_all == 0)
535  return;
536 
537  elems_per_row = THTensor_sizeLegacyNoScalars(index, dim);
538 
539  TH_TENSOR_DIM_APPLY2(scalar_t, tensor, int64_t, index, dim,
540  for (i = 0; i < elems_per_row; ++i)
541  {
542  idx = *(index_data + i*index_stride);
543  if (idx < 0 || idx >= tensor_size)
544  {
545  THFree(TH_TENSOR_DIM_APPLY_counter);
546  THError("Invalid index in scatter");
547  }
548  tensor_data[idx * tensor_stride] = val;
549  })
550 }
551 
552 accreal THTensor_(dot)(THTensor *tensor, THTensor *src)
553 {
554  accreal sum = 0;
555  /* we use a trick here. careful with that. */
556  TH_TENSOR_APPLY2(scalar_t, tensor, scalar_t, src,
557  int64_t sz = (tensor_size-tensor_i < src_size-src_i ? tensor_size-tensor_i : src_size-src_i);
558  sum += THBlas_(dot)(sz, src_data, src_stride, tensor_data, tensor_stride);
559  tensor_i += sz;
560  src_i += sz;
561  tensor_data += sz*tensor_stride;
562  src_data += sz*src_stride;
563  break;);
564  return sum;
565 }
566 
567 scalar_t THTensor_(minall)(THTensor *tensor)
568 {
569  scalar_t theMin;
570  scalar_t value;
571 
572  THArgCheck(THTensor_nDimensionLegacyAll(tensor) > 0, 1, "tensor must have one dimension");
573  theMin = tensor->data<scalar_t>()[0];
574  TH_TENSOR_APPLY(scalar_t, tensor,
575  value = *tensor_data;
576  /* This is not the same as value<theMin in the case of NaNs */
577  if(!(value >= theMin))
578  {
579  theMin = value;
580  th_isnan_break(value)
581  });
582  return theMin;
583 }
584 
585 scalar_t THTensor_(maxall)(THTensor *tensor)
586 {
587  scalar_t theMax;
588  scalar_t value;
589 
590  THArgCheck(THTensor_nDimensionLegacyAll(tensor) > 0, 1, "tensor must have one dimension");
591  theMax = tensor->data<scalar_t>()[0];
592  TH_TENSOR_APPLY(scalar_t, tensor,
593  value = *tensor_data;
594  /* This is not the same as value>theMax in the case of NaNs */
595  if(!(value <= theMax))
596  {
597  theMax = value;
598  th_isnan_break(value)
599  });
600  return theMax;
601 }
602 
603 accreal THTensor_(sumall)(THTensor *tensor)
604 {
605  accreal sum = 0;
606  int serial_path = 0;
607 #ifdef _OPENMP
608  int inOMP = omp_in_parallel();
609  if(inOMP) {
610  serial_path = 1;
611  } else {
612  TH_TENSOR_APPLY_REDUCTION_OMP(scalar_t, tensor, +:sum, sum += *tensor_data;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
613  }
614 #else
615  serial_path = 1;
616 #endif
617  if (serial_path) {
618  TH_TENSOR_APPLY(scalar_t, tensor, sum += *tensor_data;);
619  }
620  return sum;
621 }
622 
623 void THTensor_(add)(THTensor *r_, THTensor *t, scalar_t value)
624 {
625  THTensor_(resizeAs)(r_, t);
626  int64_t r_Size = THTensor_(nElement)(r_);
627  int r_Contig = THTensor_(isContiguous)(r_);
628  int tContig = THTensor_(isContiguous)(t);
629  int serial_path = 0;
630  if (r_Contig && tContig) {
631  TH_TENSOR_APPLY2_CONTIG(scalar_t, r_, scalar_t, t, THVector_(adds)(r__data, t_data, value, r__len););
632  } else {
633 #ifdef _OPENMP
634  int inOMP = omp_in_parallel();
635  if (inOMP) {
636  serial_path = 1;
637  } else {
638  TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = *t_data + value;, ORDIN_TH_OMP_OVERHEAD_THRESHOLD)
639  }
640 #else
641  (void)r_Size;
642  serial_path = 1;
643 #endif
644  }
645  if (serial_path) {
646  TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = *t_data + value;);
647  }
648 }
649 
650 void THTensor_(sub)(THTensor *r_, THTensor *t, scalar_t value)
651 {
652  THTensor_(add)(r_, t, -value);
653 }
654 
655 void THTensor_(add_scaled)(THTensor *r_, THTensor *t, scalar_t value, scalar_t alpha)
656 {
657  THTensor_(add)(r_, t, value * alpha);
658 }
659 
660 void THTensor_(sub_scaled)(THTensor *r_, THTensor *t, scalar_t value, scalar_t alpha)
661 {
662  THTensor_(add)(r_, t, -value * alpha);
663 }
664 
665 void THTensor_(mul)(THTensor *r_, THTensor *t, scalar_t value)
666 {
667  THTensor_(resizeAs)(r_, t);
668  int64_t r_Size = THTensor_(nElement)(r_);
669  int r_Contig = THTensor_(isContiguous)(r_);
670  int tContig = THTensor_(isContiguous)(t);
671  int serial_path = 0;
672  if (r_Contig && tContig) {
673  TH_TENSOR_APPLY2_CONTIG(scalar_t, r_, scalar_t, t, THVector_(muls)(r__data, t_data, value, r__len););
674  } else {
675 #ifdef _OPENMP
676  int inOMP = omp_in_parallel();
677  if (inOMP) {
678  serial_path = 1;
679  } else {
680  TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = *t_data * value;, ORDIN_TH_OMP_OVERHEAD_THRESHOLD)
681  }
682 #else
683  (void)r_Size;
684  serial_path = 1;
685 #endif
686  }
687  if (serial_path) {
688  TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = *t_data * value;);
689  }
690 }
691 
692 void THTensor_(div)(THTensor *r_, THTensor *t, scalar_t value)
693 {
694  THTensor_(resizeAs)(r_, t);
695  int64_t r_Size = THTensor_(nElement)(r_);
696  int r_Contig = THTensor_(isContiguous)(r_);
697  int tContig = THTensor_(isContiguous)(t);
698  int serial_path = 0;
699  if (r_Contig && tContig) {
700  TH_TENSOR_APPLY2_CONTIG(scalar_t, r_, scalar_t, t, THVector_(divs)(r__data, t_data, value, r__len););
701  } else {
702 #ifdef _OPENMP
703  int inOMP = omp_in_parallel();
704  if (inOMP) {
705  serial_path = 1;
706  } else {
707  TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = *t_data / value;, ORDIN_TH_OMP_OVERHEAD_THRESHOLD)
708  }
709 #else
710  (void)r_Size;
711  serial_path = 1;
712 #endif
713  }
714  if (serial_path) {
715  TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = *t_data / value;);
716  }
717 }
718 
719 void THTensor_(lshift)(THTensor *r_, THTensor *t, scalar_t value)
720 {
721 #if defined(TH_REAL_IS_FLOAT)
722  return THTensor_(mul)(r_, t, powf(2, value));
723 #elif defined(TH_REAL_IS_DOUBLE)
724  return THTensor_(mul)(r_, t, pow(2, value));
725 #elif defined(TH_REAL_IS_HALF)
726  return THError("lshift is not supported for torch.HalfTensor");
727 #else
728  THTensor_(resizeAs)(r_, t);
729  int64_t r_Size = THTensor_(nElement)(r_);
730  int r_Contig = THTensor_(isContiguous)(r_);
731  int tContig = THTensor_(isContiguous)(t);
732  int serial_path = 0;
733  if (r_Contig && tContig) {
734  scalar_t *tp = t->data<scalar_t>();
735  scalar_t *rp = r_->data<scalar_t>();
736  int64_t i;
737  #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i)
738  for (i=0; i<r_Size; i++) {
739 #if defined(TH_REAL_IS_BYTE)
740  rp[i] = ((scalar_t) tp[i]) << value;
741 #else
742  rp[i] = ((ureal) tp[i]) << value;
743 #endif
744  }
745  } else {
746 #ifdef _OPENMP
747  int inOMP = omp_in_parallel();
748  if (inOMP) {
749  serial_path = 1;
750  } else {
751 #if defined(TH_REAL_IS_BYTE)
752  TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = (((scalar_t) *t_data) << value);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
753 #else
754  TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = (((ureal) *t_data) << value);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
755 #endif
756  }
757 #else
758  serial_path = 1;
759 #endif
760  }
761  if (serial_path) {
762 #if defined(TH_REAL_IS_BYTE)
763  TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = (((scalar_t) *t_data) << value););
764 #else
765  TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = (((ureal) *t_data) << value););
766 #endif
767  }
768 #endif
769 }
770 
771 void THTensor_(rshift)(THTensor *r_, THTensor *t, scalar_t value)
772 {
773 #if defined(TH_REAL_IS_FLOAT)
774  return THTensor_(div)(r_, t, powf(2, value));
775 #elif defined(TH_REAL_IS_DOUBLE)
776  return THTensor_(div)(r_, t, pow(2, value));
777 #elif defined(TH_REAL_IS_HALF)
778  return THError("rshift is not supported for torch.HalfTensor");
779 #else
780  THTensor_(resizeAs)(r_, t);
781  int64_t r_Size = THTensor_(nElement)(r_);
782  int r_Contig = THTensor_(isContiguous)(r_);
783  int tContig = THTensor_(isContiguous)(t);
784  int serial_path = 0;
785  if (r_Contig && tContig) {
786  scalar_t *tp = t->data<scalar_t>();
787  scalar_t *rp = r_->data<scalar_t>();
788  int64_t i;
789  #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i)
790  for (i=0; i<r_Size; i++) {
791 #if defined(TH_REAL_IS_BYTE)
792  rp[i] = ((scalar_t) tp[i]) >> value;
793 #else
794  rp[i] = ((ureal) tp[i]) >> value;
795 #endif
796  }
797  } else {
798 #ifdef _OPENMP
799  int inOMP = omp_in_parallel();
800  if (inOMP) {
801  serial_path = 1;
802  } else {
803 #if defined(TH_REAL_IS_BYTE)
804  TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = (((scalar_t) *t_data) >> value);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
805 #else
806  TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = (((ureal) *t_data) >> value);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
807 #endif
808  }
809 #else
810  serial_path = 1;
811 #endif
812  }
813  if (serial_path) {
814 #if defined(TH_REAL_IS_BYTE)
815  TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = (((scalar_t) *t_data) >> value););
816 #else
817  TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = (((ureal) *t_data) >> value););
818 #endif
819  }
820 #endif
821 }
822 
823 void THTensor_(fmod)(THTensor *r_, THTensor *t, scalar_t value)
824 {
825  THTensor_(resizeAs)(r_, t);
826  int64_t r_Size = THTensor_(nElement)(r_);
827  int r_Contig = THTensor_(isContiguous)(r_);
828  int tContig = THTensor_(isContiguous)(t);
829  int serial_path = 0;
830  if (r_Contig && tContig) {
831  scalar_t *tp = t->data<scalar_t>();
832  scalar_t *rp = r_->data<scalar_t>();
833  int64_t i;
834  #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD) private(i)
835  for (i=0; i<r_Size; i++) {
836 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
837  rp[i] = fmod(tp[i], value);
838 #else
839  rp[i] = tp[i] % value;
840 #endif
841  }
842  } else {
843 #ifdef _OPENMP
844  int inOMP = omp_in_parallel();
845  if (inOMP) {
846  serial_path = 1;
847  } else {
848 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
849  TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = fmod(*t_data, value);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
850 #else
851  TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = (*t_data % value);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
852 #endif
853  }
854 #else
855  serial_path = 1;
856 #endif
857  }
858  if (serial_path) {
859 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
860  TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = fmod(*t_data, value););
861 #else
862  TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = (*t_data % value););
863 #endif
864  }
865 }
866 
867 // Should wrap if the value (a) has a different sign than the divisor (b), but is not 0.
868 static inline bool modulo_wrap(scalar_t a, scalar_t b) {
869  return (a != 0) && (a < 0) != (b < 0);
870 }
871 
872 void THTensor_(remainder)(THTensor *r_, THTensor *t, scalar_t value)
873 {
874  THTensor_(resizeAs)(r_, t);
875  int64_t r_Size = THTensor_(nElement)(r_);
876  int r_Contig = THTensor_(isContiguous)(r_);
877  int tContig = THTensor_(isContiguous)(t);
878  int serial_path = 0;
879  if (r_Contig && tContig) {
880  scalar_t *tp = t->data<scalar_t>();
881  scalar_t *rp = r_->data<scalar_t>();
882  int64_t i;
883  #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD) private(i)
884  for (i=0; i<r_Size; i++) {
885 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
886  rp[i] = (value == 0)? NAN : tp[i] - value * floor(tp[i] / value);
887 #else
888  // There is no NAN for integers
889  rp[i] = tp[i] % value;
890  if (modulo_wrap(rp[i], value))
891  rp[i] += value;
892 #endif
893  }
894  } else {
895 #ifdef _OPENMP
896  int inOMP = omp_in_parallel();
897  if (inOMP) {
898  serial_path = 1;
899  } else {
900 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
901  TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = (value == 0)? NAN : *t_data - value * floor(*t_data / value);, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
902 #else
903  // There is no NAN for integers
904  TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = *t_data % value;
905  if (modulo_wrap(*r__data, value)) *r__data += value;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
906 #endif
907  }
908 #else
909  serial_path = 1;
910 #endif
911  }
912  if (serial_path) {
913 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
914  TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = (value == 0)? NAN : *t_data - value * floor(*t_data / value););
915 #else
916  // There is no NAN for integers
917  TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = *t_data % value;
918  if (modulo_wrap(*r__data, value)) *r__data += value;);
919 #endif
920  }
921 }
922 
923 void THTensor_(bitand)(THTensor *r_, THTensor *t, scalar_t value)
924 {
925 #if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF)
926  (void)r_;
927  (void)t;
928  (void)value;
929  return THError("bitand is only supported for integer type tensors");
930 #else
931  THTensor_(resizeAs)(r_, t);
932  int64_t r_Size = THTensor_(nElement)(r_);
933  int r_Contig = THTensor_(isContiguous)(r_);
934  int serial_path = 0;
935  int tContig = THTensor_(isContiguous)(t);
936  if (r_Contig && tContig) {
937  scalar_t *tp = t->data<scalar_t>();
938  scalar_t *rp = r_->data<scalar_t>();
939  int64_t i;
940  #pragma omp parallel for if(r_Size > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i)
941  for (i=0; i<r_Size; i++) {
942  rp[i] = tp[i] & value;
943  }
944  } else {
945 #ifdef _OPENMP
946  int inOMP = omp_in_parallel();
947  if (inOMP) {
948  serial_path = 1;
949  } else {
950  TH_TENSOR_APPLY2_OMP(r_Size, r_Contig, tContig, scalar_t, r_, scalar_t, t, *r__data = *t_data & value;, UNCERTAIN_TH_OMP_OVERHEAD_THRESHOLD);
951  }
952 #else
953  serial_path = 1;
954 #endif
955  }
956  if (serial_path) {
957  TH_TENSOR_APPLY2(scalar_t, r_, scalar_t, t, *r__data = *t_data & value;);
958  }
959 #endif
960 }
961 
962 #endif /* TH_GENERIC_FILE */