1 #include "caffe2/utils/math/transpose.h"    10 #endif // CAFFE2_USE_MKL    12 #ifdef CAFFE2_USE_HPTT    14 #endif // CAFFE2_USE_HPTT    16 #include "caffe2/core/context.h"    17 #include "caffe2/utils/eigen_utils.h"    18 #include "caffe2/utils/math/utils.h"    25 template <
typename TIndex, 
typename TData>
    31   EigenMatrixMap<TData>(Y, rows, cols) =
    32       ConstEigenMatrixMap<TData>(X, cols, rows).transpose();
    37 #define DELEGATE_TRANSPOSE_2D(TIndex, TData, MKLFunc)                   \    39   void Transpose2D<TIndex, TData>(                                      \    40       const TIndex rows, const TIndex cols, const TData* X, TData* Y) { \    41     MKLFunc('R', 'T', rows, cols, TData(1), X, cols, Y, rows);          \    43 DELEGATE_TRANSPOSE_2D(std::int32_t, 
float, mkl_somatcopy);
    44 DELEGATE_TRANSPOSE_2D(std::int64_t, 
float, mkl_somatcopy);
    45 DELEGATE_TRANSPOSE_2D(std::int32_t, 
double, mkl_domatcopy);
    46 DELEGATE_TRANSPOSE_2D(std::int64_t, 
double, mkl_domatcopy);
    47 #undef DELEGATE_TRANSPOSE_2D    49 #endif // CAFFE2_USE_MKL    51 #ifdef CAFFE2_USE_HPTT    53 template <
typename TIndex, 
typename TData>
    60   for (
int i = 0; i < ndim; ++i) {
    61     if (dims[i] <= 0 || dims[i] > std::numeric_limits<int>::max()) {
    66   std::vector<int> axes_cm(ndim);
    67   std::vector<int> dims_cm(ndim);
    69   const auto cm_fn = [ndim](
const int i) { 
return ndim - i - 1; };
    70   for (
int i = 0; i < ndim; ++i) {
    71     axes_cm[i] = cm_fn(axes[cm_fn(i)]);
    72     dims_cm[i] = dims[cm_fn(i)];
    74   auto plan = hptt::create_plan(
    86   if (plan == 
nullptr) {
    93 #endif // CAFFE2_USE_HPTT    95 template <
typename TIndex, 
typename TData>
   102   std::vector<TIndex> Y_dims(ndim);
   103   for (
int i = 0; i < ndim; ++i) {
   104     Y_dims[i] = dims[axes[i]];
   107   int pivot = ndim - 1;
   108   TIndex block_size = 1;
   109   for (; pivot >= 0 && axes[pivot] == pivot; --pivot) {
   110     block_size *= Y_dims[pivot];
   113   const TIndex num_blocks = std::accumulate(
   115       Y_dims.cbegin() + pivot,
   117       std::multiplies<TIndex>());
   118   std::vector<TIndex> X_strides(pivot);
   119   utils::ComputeTransposedStrides<TIndex>(pivot, dims, axes, X_strides.data());
   120   std::vector<TIndex> index(pivot, 0);
   121   for (TIndex Y_index = 0; Y_index < num_blocks; ++Y_index) {
   122     const TIndex X_index = std::inner_product(
   123         X_strides.cbegin(), X_strides.cend(), index.cbegin(), TIndex(0));
   124     if (block_size == 1) {
   125       Y[Y_index] = X[X_index];
   128           Y + block_size * Y_index,
   129           X + block_size * X_index,
   130           block_size * 
sizeof(TData));
   132     utils::IncreaseIndexInDims<TIndex>(pivot, Y_dims.data(), index.data());
   136 template <
typename TIndex, 
typename TData>
   144       std::accumulate(dims, dims + ndim, TIndex(1), std::multiplies<TIndex>());
   148   if (utils::IsIdentityPermutation(ndim, axes)) {
   149     std::memcpy(Y, X, size * 
sizeof(TData));
   152   if (utils::IsBatchTranspose2D(ndim, axes)) {
   153     const TIndex H = dims[ndim - 2];
   154     const TIndex W = dims[ndim - 1];
   155     const TIndex N = size / (H * W);
   156     for (TIndex i = 0; i < N; ++i) {
   157       Transpose2D<TIndex, TData>(H, W, X + i * H * W, Y + i * H * W);
   161   TransposeND<TIndex, TData>(ndim, dims, axes, X, Y);
   164 #ifdef CAFFE2_USE_HPTT   166 #define CAFFE2_SPECIALIZED_TRANSPOSE_IMPL(TIndex, TData)                \   168   void TransposeImpl<TIndex, TData>(                                    \   170       const TIndex* dims,                                               \   174     const TIndex size = std::accumulate(                                \   175         dims, dims + ndim, TIndex(1), std::multiplies<TIndex>());       \   179     if (utils::IsIdentityPermutation(ndim, axes)) {                     \   180       std::memcpy(Y, X, size * sizeof(TData));                          \   183     if (TransposeByHPTT(ndim, dims, axes, X, Y)) {                      \   186     if (utils::IsBatchTranspose2D(ndim, axes)) {                        \   187       const TIndex H = dims[ndim - 2];                                  \   188       const TIndex W = dims[ndim - 1];                                  \   189       const TIndex N = size / (H * W);                                  \   190       for (TIndex i = 0; i < N; ++i) {                                  \   191         Transpose2D<TIndex, TData>(H, W, X + i * H * W, Y + i * H * W); \   195     TransposeND<TIndex, TData>(ndim, dims, axes, X, Y);                 \   197 CAFFE2_SPECIALIZED_TRANSPOSE_IMPL(std::int32_t, 
float)
   198 CAFFE2_SPECIALIZED_TRANSPOSE_IMPL(
std::int64_t, 
float)
   199 CAFFE2_SPECIALIZED_TRANSPOSE_IMPL(
std::int32_t, 
double)
   200 CAFFE2_SPECIALIZED_TRANSPOSE_IMPL(
std::int64_t, 
double)
   201 #undef CAFFE2_SPECIALIZED_TRANSPOSE_IMPL   203 #endif // CAFFE2_USE_HPTT   207 #define CAFFE2_SPECIALIZED_TRANSPOSE(TIndex, TData)       \   209   C10_EXPORT void Transpose<TIndex, TData, CPUContext>(   \   211       const TIndex* dims,                                 \   216     TransposeImpl<TIndex, TData>(ndim, dims, axes, X, Y); \   218 CAFFE2_SPECIALIZED_TRANSPOSE(std::int32_t, 
float)
   219 CAFFE2_SPECIALIZED_TRANSPOSE(
std::int64_t, 
float)
   220 CAFFE2_SPECIALIZED_TRANSPOSE(
std::int32_t, 
double)
   221 CAFFE2_SPECIALIZED_TRANSPOSE(
std::int64_t, 
double)
   222 CAFFE2_SPECIALIZED_TRANSPOSE(
std::int32_t, 
std::int32_t)
   223 CAFFE2_SPECIALIZED_TRANSPOSE(
std::int64_t, 
std::int32_t)
   224 CAFFE2_SPECIALIZED_TRANSPOSE(
std::int32_t, 
std::int64_t)
   225 CAFFE2_SPECIALIZED_TRANSPOSE(
std::int64_t, 
std::int64_t)
   226 CAFFE2_SPECIALIZED_TRANSPOSE(
std::int32_t, 
std::uint8_t)
   227 CAFFE2_SPECIALIZED_TRANSPOSE(
std::int64_t, 
std::uint8_t)
   228 CAFFE2_SPECIALIZED_TRANSPOSE(
std::int32_t, 
std::uint16_t)
   229 CAFFE2_SPECIALIZED_TRANSPOSE(
std::int64_t, 
std::uint16_t)
   230 #undef CAFFE2_SPECIALIZED_TRANSPOSE   232 #define CAFFE2_SPECIALIZED_NCHW2NHWC(T)                       \   234   C10_EXPORT void NCHW2NHWC<T, CPUContext>(                   \   241     const int stride = C * HxW;                               \   242     for (int i = 0; i < N; ++i) {                             \   243       Transpose2D<T>(C, HxW, X + i * stride, Y + i * stride); \   246 CAFFE2_SPECIALIZED_NCHW2NHWC(
float)
   247 #undef CAFFE2_SPECIALIZED_NCHW2NHWC   249 #define CAFFE2_SPECIALIZED_NHWC2NCHW(T)                       \   251   C10_EXPORT void NHWC2NCHW<T, CPUContext>(                   \   258     const int stride = HxW * C;                               \   259     for (int i = 0; i < N; ++i) {                             \   260       Transpose2D<T>(HxW, C, X + i * stride, Y + i * stride); \   263 CAFFE2_SPECIALIZED_NHWC2NCHW(
float)
   264 #undef CAFFE2_SPECIALIZED_NHWC2NCHW 
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...