Caffe2 - C++ API
A deep learning, cross platform ML framework
math.h
1 
17 #ifndef CAFFE2_UTILS_MATH_H_
18 #define CAFFE2_UTILS_MATH_H_
19 // This is a simple translation from the old Caffe math interfaces. We aim to
20 // still keep it simple, so all platforms would be able to support it fairly
21 // easily.
22 
23 // We include the cblas header here so that we can obtain the macros from cblas.
24 extern "C" {
25 #include "caffe2/utils/cblas.h"
26 }
27 
28 #ifdef CAFFE2_USE_ACCELERATE
29 #include <Accelerate/Accelerate.h>
30 #endif // CAFFE2_USE_ACCELERATE
31 
32 #include "caffe2/core/common.h"
33 #include "caffe2/core/types.h"
34 
35 #ifndef __CUDACC__
36 #include "Eigen/Core"
37 #include "Eigen/Dense"
38 #endif
39 
40 namespace caffe2 {
41 
42 template <class Context>
43 class Tensor;
44 
45 // An empty class as a placeholder for a math function that has no specific
46 // engine specified.
47 class DefaultEngine {};
48 
49 #ifndef __CUDACC__
50 // Common Eigen types that we will often use
51 template <typename T>
52 using EigenMatrixMap =
53  Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> >;
54 template <typename T>
55 using EigenArrayMap =
56  Eigen::Map<Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic> >;
57 template <typename T>
58 using EigenVectorMap = Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1> >;
59 template <typename T>
60 using EigenVectorArrayMap = Eigen::Map<Eigen::Array<T, Eigen::Dynamic, 1> >;
61 template <typename T>
62 using ConstEigenMatrixMap =
63  Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> >;
64 template <typename T>
65 using ConstEigenArrayMap =
66  Eigen::Map<const Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic> >;
67 template <typename T>
68 using ConstEigenVectorMap =
69  Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1> >;
70 template <typename T>
71 using ConstEigenVectorArrayMap =
72  Eigen::Map<const Eigen::Array<T, Eigen::Dynamic, 1> >;
73 #endif
74 
75 namespace math {
76 
77 template <typename T, class Context>
78 void Exp(const int N, const T* x, T* y, Context* context);
79 template <typename T, class Context>
80 void Log(const int N, const T* x, T* y, Context* context);
81 template <typename T, class Context>
82 void Cos(const int N, const T* x, T* y, Context* context);
83 template <typename T, class Context>
84 void Sin(const int N, const T* x, T* y, Context* context);
85 template <typename T, class Context>
86 void SinCos(const int N, const T* x, T* ys, T* yc, Context* context);
87 template <typename T, class Context>
88 void Abs(const int N, const T* x, T* y, Context* context);
89 template <typename T, class Context>
90 void Sqrt(const int N, const T* x, T* y, Context* context);
91 template <typename T, class Context>
92 void InvSqrt(const int N, const T* x, T* y, Context* context);
93 template <typename T, class Context>
94 void Sqr(const int N, const T* x, T* y, Context* context);
95 
96 template <typename T, class Context>
97 void Not(const int N, const T* x, T* y, Context* context);
98 
99 template <typename T, class Context>
100 void Powx(const int N, const T* a, const T b, T* y, Context* context);
101 
102 #define CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(name) \
103  template <typename T, class Context> \
104  void name(const int N, const T* a, const T* b, bool* y, Context* context); \
105  template <typename T, class Context> \
106  void name##ToRow( \
107  const int M, \
108  const int N, \
109  const T* a, \
110  const T* b, \
111  bool* y, \
112  Context* context);
113 
114 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(LT);
115 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(LE);
116 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(GT);
117 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(GE);
118 
119 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(And);
120 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(Or);
121 CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT(Xor);
122 
123 #undef CAFFE2_DECLARE_BINARY_OP_BINARY_RESULT
124 
125 #define CAFFE2_DECLARE_BINARY_OP(name) \
126  template <typename T, class Context> \
127  void name(const int N, const T* a, const T* b, T* y, Context* context); \
128  template <typename T, class Context> \
129  void name##ToRow( \
130  const int M, \
131  const int N, \
132  const T* a, \
133  const T* b, \
134  T* y, \
135  Context* context); \
136  template <typename T, class Context> \
137  void name##ToRow( \
138  const int M, const int N, const T* x, T* y, Context* context); \
139  template <typename T, class Context> \
140  void name##ToCol( \
141  const int M, const int N, const T* x, T* y, Context* context);
142 
143 CAFFE2_DECLARE_BINARY_OP(Add);
144 CAFFE2_DECLARE_BINARY_OP(Sub);
145 CAFFE2_DECLARE_BINARY_OP(Mul);
146 CAFFE2_DECLARE_BINARY_OP(Div);
147 
148 #undef CAFFE2_DECLARE_BINARY_OP
149 
150 template <typename T, class Context>
151 void ReduceMin(
152  const int N,
153  const T* x,
154  T* y,
155  Tensor<Context>* scratch_ptr,
156  Context* context);
157 template <typename T, class Context>
158 void ReduceMax(
159  const int N,
160  const T* x,
161  T* y,
162  Tensor<Context>* scratch_ptr,
163  Context* context);
164 
165 // Adds batch sub-tensors elementwise to output. Stripe is the stripe length
166 // and N is the number of elements to add (size of Y).
167 template <typename T, class Context>
168 void AddStripedBatch(
169  const int N,
170  const T* first,
171  T* y,
172  const int stripe,
173  const int batch,
174  Context* context);
175 
176 // Compute the row-wise max of a N*D matrix X, and write it to a N
177 // dimensional vector y.
178 template <typename T, class Context>
179 void RowwiseMax(const int N, const int D, const T* x, T* y,
180  Context* context);
181 
182 // Compute the column-wise max of a N*D matrix X, and write it to a D
183 // dimensional vector y.
184 template <typename T, class Context>
185 void ColwiseMax(const int N, const int D, const T* x, T* y,
186  Context* context);
187 
188 // Elemwise maximum of vector x and vector y. z[i] = max(x[i], y[i])
189 template <typename T, class Context>
190 void ElemwiseMax(const int N, const T* x, const T* y, T* z, Context* context);
191 
192 // Elemwise maximum of vector x and scalar alpha. y[i] = max(x[i], alpha)
193 template <typename T, class Context>
194 void Maximum(
195  const int N,
196  const float alpha,
197  const T* x,
198  T* y,
199  Context* context);
200 
201 // Transpose tensor X with x_dims by axes and write the result to tensor Y with
202 // y_dims.
203 template <typename T, class Context>
204 void Transpose(
205  const int num_axes,
206  const int* x_dims,
207  const int* y_dims,
208  const int* axes,
209  const int data_size,
210  const T* X,
211  T* Y,
212  Context* context);
213 
214 // Decaf gemm provides a simpler interface to the gemm functions, with the
215 // limitation that the data has to be contiguous in memory.
216 template <typename T, class Context, class Engine = DefaultEngine>
217 void Gemm(
218  const CBLAS_TRANSPOSE TransA,
219  const CBLAS_TRANSPOSE TransB,
220  const int M,
221  const int N,
222  const int K,
223  const float alpha,
224  const T* A,
225  const T* B,
226  const float beta,
227  T* C,
228  Context* context,
229  TensorProto::DataType math_type = TensorProto_DataType_FLOAT);
230 
231 // We also provide a gemm that has explicit lda, ldb and ldc specified.
232 // In most cases you probably want to use the function above, though.
233 template <typename T, class Context, class Engine = DefaultEngine>
234 void GemmEx(
235  const CBLAS_TRANSPOSE TransA,
236  const CBLAS_TRANSPOSE TransB,
237  const int M,
238  const int N,
239  const int K,
240  const T alpha,
241  const T* A,
242  const int lda,
243  const T* B,
244  const int ldb,
245  const T beta,
246  T* C,
247  const int ldc,
248  Context* context);
249 
250 // GemmBatched provides a simple abstraction into library routines
251 template <typename T, class Context, class Engine = DefaultEngine>
252 void GemmBatched(
253  const CBLAS_TRANSPOSE TransA,
254  const CBLAS_TRANSPOSE TransB,
255  const int batch_size,
256  const int M,
257  const int N,
258  const int K,
259  const float alpha,
260  const T* A,
261  const T* B,
262  const float beta,
263  T* C,
264  Context* context,
265  Tensor<Context>* scratch = nullptr,
266  TensorProto::DataType math_type = TensorProto_DataType_FLOAT);
267 
268 // Gemv always takes in a M*N matrix A, and depending on whether we set TransA
269 // to Trans, the output is:
270 // CblasNoTrans: x is an N dim vector and y is an M dim vector.
271 // CblasTrans: x is an M dim vector and y is an N dim vector.
272 template <typename T, class Context, class Engine = DefaultEngine>
273 void Gemv(
274  const CBLAS_TRANSPOSE TransA,
275  const int M,
276  const int N,
277  const float alpha,
278  const T* A,
279  const T* x,
280  const float beta,
281  T* y,
282  Context* context,
283  TensorProto::DataType math_type = TensorProto_DataType_FLOAT);
284 
285 template <typename T, class Context>
286 void Set(const size_t N, const T alpha, T* X, Context* context);
287 
288 template <typename T, class Context>
289 void RandUniform(const size_t n, const T a, const T b, T* r, Context* context);
290 
291 template <typename T, class Context>
292 void RandUniformUnique(
293  const size_t n,
294  const T a,
295  const T b,
296  T* r,
297  const size_t m,
298  const T* avoid,
299  Context* context);
300 
301 template <typename T, class Context>
302 void RandGaussian(
303  const size_t n,
304  const T mean,
305  const T std,
306  T* r,
307  Context* context);
308 
309 // Dot matrix of vector a and b, and writes the result to a single value y.
310 template <typename T, class Context>
311 void Dot(const int N, const T* a, const T* b, T* y, Context* context);
312 
313 // Sum of vector x, and writes the result to a single value y.
314 template <typename T, class Context>
315 void Sum(const int N, const T* x, T* y, Context* context,
316  Tensor<Context>* scratch_ptr = nullptr);
317 
318 // Sum of squares of vector x, and writes the result to a single value y.
319 template <typename T, class Context>
320 void SumSqr(
321  const int N,
322  const T* x,
323  T* y,
324  Context* context,
325  Tensor<Context>* scratch_ptr = nullptr);
326 
327 // Select does index selection of the rows a N*D matrix x, and gives the N
328 // dimensional vector y that contains the selected data.
329 template <typename T, class Context>
330 void Select(const int N, const int D, const T* x, const int* idx, T* y,
331  Context* context);
332 
333 template <typename T, class Context>
334 void Scale(const int N, const float alpha, const T* x, T* y, Context* context);
335 
336 // Different from the Scale function above, if alpha is passed in
337 // as a pointer, we will assume that it lives on the Context device,
338 // for example on GPU.
339 template <typename T, class Context>
340 void Scale(const int N, const float* alpha, const T* x, T* y, Context* context);
341 
342 template <typename T, class Context>
343 void Axpy(const int N, const float alpha, const T* x, T* y, Context* context);
344 
345 // Different from the Axpy function above, if alpha is passed in
346 // as a pointer, we will assume that it lives on the Context device,
347 // for example on GPU.
348 template <typename T, class Context>
349 void Axpy(const int N, const float* alpha, const T* x, T* y, Context* context);
350 
351 template <typename T, class Context>
352 void Axpby(
353  const int N,
354  const float alpha,
355  const T* x,
356  const T b,
357  T* y,
358  Context* context);
359 
360 template <typename T, class Context, int order>
361 void Im2colNd(
362  const T* data_img,
363  const int* im_shape,
364  const int* col_shape,
365  const int img_size,
366  const int col_size,
367  const int* kernel_shape,
368  const int* stride,
369  const int* dilation,
370  const int* pad,
371  const int N,
372  T* data_col,
373  Context* context,
374  bool accumulate_output = false);
375 
376 template <typename T, class Context, int order>
377 void Col2imNd(
378  const T* data_col,
379  const int* img_shape,
380  const int* col_shape,
381  const int img_size,
382  const int col_size,
383  const int* kernel_shape,
384  const int* stride,
385  const int* dilation,
386  const int* pad,
387  const int N,
388  T* data_img,
389  Context* context);
390 
391 template <typename T, class Context, int order>
392 void Im2col(
393  const T* data_im,
394  const int channels,
395  const int height,
396  const int width,
397  const int kernel_h,
398  const int kernel_w,
399  const int dilation_h,
400  const int dilation_w,
401  const int pad_t,
402  const int pad_l,
403  const int pad_b,
404  const int pad_r,
405  const int stride_h,
406  const int stride_w,
407  T* data_col,
408  Context* context);
409 
410 template <typename T, class Context, int order>
411 void Col2im(
412  const T* data_col,
413  const int channels,
414  const int height,
415  const int width,
416  const int patch_h,
417  const int patch_w,
418  const int dilation_h,
419  const int dilation_w,
420  const int pad_t,
421  const int pad_l,
422  const int pad_b,
423  const int pad_r,
424  const int stride_h,
425  const int stride_w,
426  T* data_im,
427  Context* context);
428 
429 // Applies a per-channel bias value to each channel of the input
430 // image. image_size is H * W
431 template <typename T, class Context>
432 void BiasCHW(
433  const T* bias,
434  const int bias_channels,
435  const int image_size,
436  T* image,
437  Context* context);
438 
439 template <class Context>
440 void CopyMatrix(
441  const size_t item_size,
442  const int M,
443  const int N,
444  const void* A,
445  const int lda,
446  void* B,
447  const int ldb,
448  Context* context,
449  TypeMeta::TypedCopy copy = nullptr);
450 
451 template <typename T, class Context>
452 void CopyVector(const int N, const T* A, T* B, Context* context);
453 
454 // Function uses casting from int to unsigned to compare if value of
455 // parameter a is greater or equal to zero and lower than value of
456 // parameter b. The b parameter is of type signed and is always
457 // positive,
458 // therefore its value is always lower than 0x800... where casting
459 // negative value of a parameter converts it to value higher than
460 // 0x800...
461 // The casting allows to use one condition instead of two.
462 inline bool is_a_ge_zero_and_a_lt_b(int a, int b) {
463  return static_cast<unsigned>(a) < static_cast<unsigned>(b);
464 }
465 
466 // Calculates ceil(a / b). User must be careful to ensure that there
467 // is no overflow or underflow in the calculation.
468 template <typename T>
469 constexpr T divUp(T a, T b) {
470  return (a + b - (T) 1) / b;
471 }
472 
473 // Rounds a up to the next highest multiple of b. User must be careful
474 // to ensure that there is no overflow or underflow in the calculation
475 // of divUp.
476 template <typename T>
477 constexpr T roundUp(T a, T b) {
478  return divUp<T>(a, b) * b;
479 }
480 
481 // Returns log2(n) for a positive integer type
482 template <typename T>
483 constexpr int integerLog2(T n, int p = 0) {
484  return (n <= 1) ? p : integerLog2(n / 2, p + 1);
485 }
486 
487 // Returns the next highest power-of-2 for an integer type
488 template <typename T>
489 constexpr T integerNextHighestPowerOf2(T v) {
490  return (integerIsPowerOf2(v) ? (T)2 * v : ((T)1 << (integerLog2(v) + 1)));
491 }
492 
493 } // namespace math
494 } // namespace caffe2
495 
496 #include "caffe2/utils/math-detail.h"
497 #endif // CAFFE2_UTILS_MATH_H_
Definition: types.h:88
Tensor is the basic class in Caffe2 that stores a contiguous memory with its shape information...
Definition: tensor.h:109
Copyright (c) 2016-present, Facebook, Inc.
Copyright (c) 2016-present, Facebook, Inc.