Caffe2 - C++ API
A deep learning, cross platform ML framework
math.h
1 #ifndef CAFFE2_UTILS_MATH_H_
2 #define CAFFE2_UTILS_MATH_H_
3 // This is a simple translation from the old Caffe math interfaces. We aim to
4 // still keep it simple, so all platforms would be able to support it fairly
5 // easily.
6 
7 // We include the cblas header here so that we can obtain the macros from cblas.
8 extern "C" {
9 #include "caffe2/utils/cblas.h"
10 }
11 
12 #ifdef CAFFE2_USE_ACCELERATE
13 #include <Accelerate/Accelerate.h>
14 #endif // CAFFE2_USE_ACCELERATE
15 
16 #include "caffe2/core/common.h"
17 #include "caffe2/core/types.h"
18 #include "caffe2/utils/math/broadcast.h"
19 #include "caffe2/utils/math/elementwise.h"
20 #include "caffe2/utils/math/reduce.h"
21 #include "caffe2/utils/math/transpose.h"
22 #include "caffe2/utils/math/utils.h"
23 
24 namespace caffe2 {
25 
26 // TODO: Change dims related arguments to int64_t?
27 class Tensor;
28 
29 // An empty class as a placeholder for a math function that has no specific
30 // engine specified.
31 class CAFFE2_API DefaultEngine {};
32 
33 namespace math {
34 
35 #define C10_DECLARE_COMPARE_OP(Comp) \
36  template <typename T, class Context, bool kBroadcast1st = false> \
37  void Rowwise##Comp( \
38  const int rows, \
39  const int cols, \
40  const T* A, \
41  const T* B, \
42  bool* C, \
43  Context* context); \
44  \
45  template <typename T, class Context, bool kBroadcast1st = false> \
46  void Colwise##Comp( \
47  const int rows, \
48  const int cols, \
49  const T* A, \
50  const T* B, \
51  bool* C, \
52  Context* context); \
53  \
54  template <typename T, class Context> \
55  void Comp( \
56  const int A_ndim, \
57  const int* A_dims, \
58  const int B_ndim, \
59  const int* B_dims, \
60  const T* A, \
61  const T* B, \
62  bool* C, \
63  Context* context);
64 
65 C10_DECLARE_COMPARE_OP(EQ)
66 C10_DECLARE_COMPARE_OP(NE)
67 C10_DECLARE_COMPARE_OP(LT)
68 C10_DECLARE_COMPARE_OP(LE)
69 C10_DECLARE_COMPARE_OP(GT)
70 C10_DECLARE_COMPARE_OP(GE)
71 
72 #undef C10_DECLARE_COMPARE_OP
73 
74 #define C10_DECLARE_BINARY_OP(Func) \
75  template <typename T, class Context, bool kBroadcast1st = false> \
76  void Rowwise##Func( \
77  const int rows, \
78  const int cols, \
79  const T* A, \
80  const T* B, \
81  T* C, \
82  Context* context); \
83  \
84  template <typename T, class Context, bool kBroadcast1st = false> \
85  void Colwise##Func( \
86  const int rows, \
87  const int cols, \
88  const T* A, \
89  const T* B, \
90  T* C, \
91  Context* context); \
92  \
93  template <typename T, class Context> \
94  void Func( \
95  const int A_ndim, \
96  const int* A_dims, \
97  const int B_ndim, \
98  const int* B_dims, \
99  const T* A, \
100  const T* B, \
101  T* C, \
102  Context* context);
103 
104 C10_DECLARE_BINARY_OP(Add)
105 C10_DECLARE_BINARY_OP(Sub)
106 C10_DECLARE_BINARY_OP(Mul)
107 C10_DECLARE_BINARY_OP(Div)
108 
109 C10_DECLARE_BINARY_OP(And)
110 C10_DECLARE_BINARY_OP(Or)
111 C10_DECLARE_BINARY_OP(Xor)
112 
113 C10_DECLARE_BINARY_OP(BitwiseAnd)
114 C10_DECLARE_BINARY_OP(BitwiseOr)
115 C10_DECLARE_BINARY_OP(BitwiseXor)
116 
117 #undef C10_DECLARE_BINARY_OP
118 
119 // Broadcasts X with X_dims to Y with Y_dims.
120 template <typename T, class Context>
121 CAFFE2_API void Broadcast(
122  const int X_ndim,
123  const int* X_dims,
124  const int Y_ndim,
125  const int* Y_dims,
126  const T alpha,
127  const T* X,
128  T* Y,
129  Context* context);
130 
131 // Computes inv_std from variance.
132 template <typename T, class Context>
133 CAFFE2_API void InvStd(
134  const int N,
135  const T epsilon,
136  const T* var,
137  T* inv_std,
138  Context* context);
139 
140 // Adds batch sub-tensors elementwise to output. Stripe is the stripe length
141 // and N is the number of elements to add (size of Y).
142 template <typename T, class Context>
143 CAFFE2_API void AddStripedBatch(
144  const int N,
145  const T* first,
146  T* y,
147  const int stripe,
148  const int batch,
149  Context* context);
150 
151 // Compute the row-wise max of a N*D matrix X, and write it to a N
152 // dimensional vector y.
153 template <typename T, class Context>
154 CAFFE2_API void
155 RowwiseMax(const int N, const int D, const T* x, T* y, Context* context);
156 
157 // Compute the column-wise max of a N*D matrix X, and write it to a D
158 // dimensional vector y.
159 template <typename T, class Context>
160 CAFFE2_API void
161 ColwiseMax(const int N, const int D, const T* x, T* y, Context* context);
162 
163 // Elemwise maximum of vector x and scalar alpha. y[i] = max(x[i], alpha)
164 template <typename T, class Context>
165 CAFFE2_API void
166 Maximum(const int N, const float alpha, const T* x, T* y, Context* context);
167 
168 // Decaf gemm provides a simpler interface to the gemm functions, with the
169 // limitation that the data has to be contiguous in memory.
170 template <typename T, class Context, class Engine = DefaultEngine>
171 CAFFE2_API void Gemm(
172  const CBLAS_TRANSPOSE trans_A,
173  const CBLAS_TRANSPOSE trans_B,
174  const int M,
175  const int N,
176  const int K,
177  const float alpha,
178  const T* A,
179  const T* B,
180  const float beta,
181  T* C,
182  Context* context,
183  TensorProto::DataType math_type = TensorProto_DataType_FLOAT);
184 
185 // We also provide a gemm that has explicit lda, ldb and ldc specified.
186 // In most cases you probably want to use the function above, though.
187 template <typename T, class Context, class Engine = DefaultEngine>
188 CAFFE2_API void GemmEx(
189  const CBLAS_TRANSPOSE trans_A,
190  const CBLAS_TRANSPOSE trans_B,
191  const int M,
192  const int N,
193  const int K,
194  const T alpha,
195  const T* A,
196  const int lda,
197  const T* B,
198  const int ldb,
199  const T beta,
200  T* C,
201  const int ldc,
202  Context* context);
203 
204 // GemmBatched provides a simple abstraction into library routines
205 template <typename T, class Context, class Engine = DefaultEngine>
206 CAFFE2_API void GemmBatched(
207  const CBLAS_TRANSPOSE trans_A,
208  const CBLAS_TRANSPOSE trans_B,
209  const int batch_size,
210  const int M,
211  const int N,
212  const int K,
213  const float alpha,
214  const T** A,
215  const T** B,
216  const float beta,
217  T** C,
218  Context* context,
219  TensorProto::DataType math_type = TensorProto_DataType_FLOAT);
220 
221 template <typename T, class Context, class Engine = DefaultEngine>
222 CAFFE2_API void GemmStridedBatched(
223  const CBLAS_TRANSPOSE trans_A,
224  const CBLAS_TRANSPOSE trans_B,
225  const int batch_size,
226  const int M,
227  const int N,
228  const int K,
229  const float alpha,
230  const T* A,
231  const int A_stride,
232  const T* B,
233  const int B_stride,
234  const float beta,
235  T* C,
236  const int C_stride,
237  Context* context,
238  TensorProto::DataType math_type = TensorProto_DataType_FLOAT);
239 
240 // Gemv always takes in a M*N matrix A, and depending on whether we set TransA
241 // to Trans, the output is:
242 // CblasNoTrans: x is an N dim vector and y is an M dim vector.
243 // CblasTrans: x is an M dim vector and y is an N dim vector.
244 template <typename T, class Context, class Engine = DefaultEngine>
245 CAFFE2_API void Gemv(
246  const CBLAS_TRANSPOSE trans_A,
247  const int M,
248  const int N,
249  const float alpha,
250  const T* A,
251  const T* x,
252  const float beta,
253  T* y,
254  Context* context,
255  TensorProto::DataType math_type = TensorProto_DataType_FLOAT);
256 
257 template <typename T, class Context>
258 CAFFE2_API void
259 RandUniform(const size_t n, const T a, const T b, T* r, Context* context);
260 
261 // Generate n values that sum up to a fixed sum
262 // and subject to a restriction a <= x <= b for each x generated
263 template <typename T, class Context>
264 CAFFE2_API void RandFixedSum(
265  const size_t n,
266  const T a,
267  const T b,
268  const T sum,
269  T* r,
270  Context* context);
271 
272 template <typename T, class Context>
273 CAFFE2_API void RandUniformUnique(
274  const size_t n,
275  const T a,
276  const T b,
277  T* r,
278  const size_t m,
279  const T* avoid,
280  Context* context);
281 
282 // Generate n values from synthetic data distribution,
283 // define by unique accesses and stack distances
284 template <typename T, class Context>
285 CAFFE2_API void
286 RandSyntheticData(const size_t n, const T a, const T b, T* r, Context* context);
287 
288 template <typename T, class Context>
289 CAFFE2_API void
290 RandGaussian(const size_t n, const T mean, const T std, T* r, Context* context);
291 
292 // Dot matrix of vector a and b, and writes the result to a single value y.
293 template <typename T, class Context>
294 CAFFE2_API void
295 Dot(const int N, const T* a, const T* b, T* y, Context* context);
296 
297 // Sum of vector x, and writes the result to a single value y.
298 template <typename T, class Context>
299 CAFFE2_API void Sum(
300  const int N,
301  const T* x,
302  T* y,
303  Context* context,
304  Tensor* scratch_ptr = nullptr);
305 
306 // Sum of squares of vector x, and writes the result to a single value y.
307 template <typename T, class Context>
308 CAFFE2_API void SumSqr(
309  const int N,
310  const T* x,
311  T* y,
312  Context* context,
313  Tensor* scratch_ptr = nullptr);
314 
315 // Select does index selection of the rows a N*D matrix x, and gives the N
316 // dimensional vector y that contains the selected data.
317 template <typename T, class Context>
318 CAFFE2_API void Select(
319  const int N,
320  const int D,
321  const T* x,
322  const int* idx,
323  T* y,
324  Context* context);
325 
326 template <typename T, class Context>
327 CAFFE2_API void
328 Axpy(const int N, const float alpha, const T* x, T* y, Context* context);
329 
330 // Different from the Axpy function above, if alpha is passed in
331 // as a pointer, we will assume that it lives on the Context device,
332 // for example on GPU.
333 template <typename T, class Context>
334 CAFFE2_API void
335 Axpy(const int N, const float* alpha, const T* x, T* y, Context* context);
336 
337 template <typename TCoeff, typename TData, class Context>
338 CAFFE2_API void Axpby(
339  const int N,
340  const TCoeff alpha,
341  const TData* x,
342  const TCoeff b,
343  TData* y,
344  Context* context);
345 
346 template <typename TCoeff, typename TData, class Context>
347 CAFFE2_API void Axpby(
348  const int N,
349  const TCoeff* alpha,
350  const TData* x,
351  const TCoeff* b,
352  TData* y,
353  Context* context);
354 
355 // groups must be 1 for GPU
356 // For NHWC order with groups > 1, the result will be layout in
357 // NHW G RS C/G order to make data within the same group to be contiguous.
358 // For NCHW order, groups doesn't make any difference because we're doing Im2Col
359 // for each N and C is the slowest moving dimension among CHW.
360 template <typename T, class Context, StorageOrder kOrder>
361 CAFFE2_API void Im2Col(
362  const int channels,
363  const int height,
364  const int width,
365  const int kernel_h,
366  const int kernel_w,
367  const int dilation_h,
368  const int dilation_w,
369  const int pad_t,
370  const int pad_l,
371  const int pad_b,
372  const int pad_r,
373  const int stride_h,
374  const int stride_w,
375  const T* img_data,
376  T* col_data,
377  Context* context,
378  const int groups = 1);
379 
380 // groups must be 1 for GPU
381 template <typename T, class Context, StorageOrder kOrder>
382 CAFFE2_API void Im2ColNd(
383  const int N,
384  const int img_size,
385  const int col_size,
386  const int* img_shape,
387  const int* col_shape,
388  const int* kernel_shape,
389  const int* stride,
390  const int* dilation,
391  const int* pad,
392  const T* img_data,
393  T* col_data,
394  Context* context,
395  const int groups = 1);
396 
397 // groups must be 1 for GPU
398 // For NHWC order with groups > 1, the result will be layout in
399 // NHW G RS C/G order to make data within the same group to be contiguous.
400 // For NCHW order, groups doesn't make any difference because we're doing Im2Col
401 // for each N and C is the slowest moving dimension among CHW.
402 template <typename T, class Context, StorageOrder kOrder>
403 CAFFE2_API void Col2Im(
404  const int channels,
405  const int height,
406  const int width,
407  const int patch_h,
408  const int patch_w,
409  const int dilation_h,
410  const int dilation_w,
411  const int pad_t,
412  const int pad_l,
413  const int pad_b,
414  const int pad_r,
415  const int stride_h,
416  const int stride_w,
417  const T* col_data,
418  T* img_data,
419  Context* context,
420  const int groups = 1);
421 
422 // groups must be 1 for GPU
423 // For NHWC order with groups > 1, the result will be layout in
424 // NHW G RS C/G order to make data within the same group to be contiguous.
425 // For NCHW order, groups doesn't make any difference because we're doing Im2Col
426 // for each N and C is the slowest moving dimension among CHW.
427 template <typename T, class Context, StorageOrder kOrder>
428 CAFFE2_API void Col2ImNd(
429  const int N,
430  const int img_size,
431  const int col_size,
432  const int* img_shape,
433  const int* col_shape,
434  const int* kernel_shape,
435  const int* stride,
436  const int* dilation,
437  const int* pad,
438  const T* col_data,
439  T* img_data,
440  Context* context,
441  const int groups = 1);
442 
443 // Applies a per-channel bias value to each channel of the input
444 // image. image_size is H * W
445 template <typename T, class Context>
446 CAFFE2_API void BiasCHW(
447  const T* bias,
448  const T* bias_multiplier,
449  const int bias_channels,
450  const int image_size,
451  T* image,
452  Context* context);
453 
454 template <class Context>
455 CAFFE2_API void CopyMatrix(
456  const size_t item_size,
457  const int M,
458  const int N,
459  const void* A,
460  const int lda,
461  void* B,
462  const int ldb,
463  Context* context,
464  TypeMeta::Copy copy = nullptr);
465 
466 template <typename T, class Context>
467 CAFFE2_API void CopyMatrix(
468  const int M,
469  const int N,
470  const T* A,
471  const int lda,
472  T* B,
473  const int ldb,
474  Context* context);
475 
476 template <typename T, class Context>
477 CAFFE2_API void CopyMatrix(
478  const int M,
479  const int N,
480  const T* A,
481  const int A_outer_stride,
482  const int A_inner_stride,
483  T* B,
484  const int B_outer_stride,
485  const int B_inner_stride,
486  Context* context);
487 
488 template <typename T, class Context>
489 CAFFE2_API void CopyVector(const int N, const T* A, T* B, Context* context);
490 
491 
492 } // namespace math
493 } // namespace caffe2
494 
495 #include "caffe2/utils/math-detail.h"
496 #endif // CAFFE2_UTILS_MATH_H_
Definition: OpClasses.h:414
Definition: any.cpp:108
Definition: static.cpp:52
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: static.cpp:64
Definition: static.cpp:58
Definition: static.cpp:70
Definition: OpClasses.h:659