1 #include "caffe2/operators/resize_op.h"     3 #include "caffe2/utils/cpu_neon.h"     4 #include "caffe2/utils/math.h"     6 #ifdef CAFFE2_USE_MKLDNN     7 #include "caffe2/ideep/operators/operator_fallback_ideep.h"     8 #include "caffe2/ideep/utils/ideep_operator.h"    13 void resizeNearestNCHW2x(
    20   const int output_height = input_height * 2;
    21   const int output_width = input_width * 2;
    22   for (
int n = 0; n < batch_size; ++n) {
    23     for (
int c = 0; c < num_channels; ++c) {
    24       for (
int y = 0; y < output_height; ++y) {
    25         const int in_y = y / 2;
    27 #if defined(__ARM_NEON__) || defined(__ARM_NEON)    28         int vecW = (input_width / 4) * 4; 
    30         for (; x < vecW; x += 4) {
    32           float32x4_t v = vld1q_f32(input + in_y * input_width + x);
    33           const int oidx = output_width * y + x * 2;
    34           float32x4x2_t v2 = {{v, v}};
    36           vst2q_f32(output + oidx + 0, v2);
    40         for (; x < input_width; ++x) {
    41           const float v = input[in_y * input_width + x];
    42           const int oidx = output_width * y + x * 2;
    47         for (
int x = 0; x < input_width; ++x) {
    48           const float v = input[in_y * input_width + x];
    49           const int oidx = output_width * y + x * 2;
    55       input += input_height * input_width;
    56       output += output_height * output_width;
    62 bool ResizeNearestOp<float, CPUContext>::RunOnDeviceWithOrderNCHW() {
    63   const auto& X = Input(0);
    65   const int batch_size = X.dim32(0),
    66             num_channels = X.dim32(1),
    67             input_height = X.dim32(2),
    68             input_width = X.dim32(3);
    69   if (InputSize() == 2) {
    70     const auto& scales = Input(1);
    71     CAFFE_ENFORCE_EQ(scales.dim(), 1);
    72     CAFFE_ENFORCE_EQ(scales.numel(), 2);
    73     const float* scales_data = scales.data<
float>();
    74     height_scale_ = scales_data[0];
    75     width_scale_ = scales_data[1];
    78   int output_width = input_width * width_scale_;
    79   int output_height = input_height * height_scale_;
    82       {batch_size, num_channels, output_height, output_width},
    85   const float* Xdata = X.data<
float>();
    86   float* Ydata = Y->template mutable_data<float>();
    89   if (width_scale_ == 2.0 && height_scale_ == 2.0) {
    91         batch_size, num_channels, input_height, input_width, Xdata, Ydata);
    95   for (
int n = 0; n < batch_size; ++n) {
    96     for (
int c = 0; c < num_channels; ++c) {
    97       for (
int y = 0; y < output_height; ++y) {
    98         const int in_y = std::min((
int)(y / height_scale_), (input_height - 1));
    99         for (
int x = 0; x < output_width; ++x) {
   100           const int in_x = std::min((
int)(x / width_scale_), (input_width - 1));
   101           Ydata[output_width * y + x] = Xdata[input_width * in_y + in_x];
   104       Xdata += input_height * input_width;
   105       Ydata += output_width * output_height;
   113 bool ResizeNearestOp<float, CPUContext>::RunOnDeviceWithOrderNHWC() {
   114   const auto& X = Input(0);
   116   const int batch_size = X.dim32(0), input_height = X.dim32(1),
   117             input_width = X.dim32(2), num_channels = X.dim32(3);
   118   if (InputSize() == 2) {
   119     const auto& scales = Input(1);
   120     CAFFE_ENFORCE_EQ(scales.dim(), 1);
   121     CAFFE_ENFORCE_EQ(scales.numel(), 2);
   122     const float* scales_data = scales.data<
float>();
   123     height_scale_ = scales_data[0];
   124     width_scale_ = scales_data[1];
   127   int output_width = input_width * width_scale_;
   128   int output_height = input_height * height_scale_;
   130   const int output_width_stride = output_width * num_channels;
   131   const int input_width_stride = input_width * num_channels;
   135       {batch_size, output_height, output_width, num_channels},
   138   const float* Xdata = X.data<
float>();
   139   float* Ydata = Y->template mutable_data<float>();
   141   for (
int n = 0; n < batch_size; ++n) {
   142     for (
int y = 0; y < output_height; ++y) {
   143       const int in_y = std::min((
int)(y / height_scale_), (input_height - 1));
   144       for (
int x = 0; x < output_width; ++x) {
   145         const int in_x = std::min((
int)(x / width_scale_), (input_width - 1));
   147             &Ydata[output_width_stride * y + num_channels * x],
   148             &Xdata[input_width_stride * in_y + num_channels * in_x],
   149             num_channels * 
sizeof(
float));
   152     Xdata += input_height * input_width_stride;
   153     Ydata += output_height * output_width_stride;
   160 bool ResizeNearestOp<float, CPUContext>::RunOnDevice() {
   162     case StorageOrder::NHWC:
   163       return RunOnDeviceWithOrderNHWC();
   164     case StorageOrder::NCHW:
   165       return RunOnDeviceWithOrderNCHW();
   167       CAFFE_THROW(
"Unknown Storage order: ", order_);
   172 bool ResizeNearestGradientOp<float, CPUContext>::RunOnDeviceWithOrderNCHW() {
   173   const auto& dY = Input(0);
   174   const auto& X = Input(1);
   176   const auto inputDims = dY.sizes();
   177   CAFFE_ENFORCE_EQ(4, inputDims.size());
   178   const int batch_size = dY.dim32(0),
   179             num_channels = dY.dim32(1),
   180             input_height = dY.dim32(2),
   181             input_width = dY.dim32(3);
   182   const int output_height = X.dim32(2);
   183   const int output_width = X.dim32(3);
   184   if (InputSize() == 3) {
   185     const auto& scales = Input(2);
   186     CAFFE_ENFORCE_EQ(scales.dim(), 1);
   187     CAFFE_ENFORCE_EQ(scales.numel(), 2);
   188     const float* scales_data = scales.data<
float>();
   189     height_scale_ = scales_data[0];
   190     width_scale_ = scales_data[1];
   194       {batch_size, num_channels, output_height, output_width},
   196   math::Set<float, CPUContext>(
   197       dX->numel(), 0.0f, dX->template mutable_data<float>(), &context_);
   199   const float* dYdata = dY.data<
float>();
   200   float* dXdata = dX->template mutable_data<float>();
   202   for (
int n = 0; n < batch_size; ++n) {
   203     for (
int c = 0; c < num_channels; ++c) {
   204       for (
int y = 0; y < input_height; ++y) {
   205         const int out_y = std::min((
int)(y / height_scale_),
   206                                    (output_height - 1));
   207         for (
int x = 0; x < input_width; ++x) {
   208           const int out_x = std::min((
int)(x / width_scale_),
   210           dXdata[output_width * out_y + out_x] += dYdata[input_width * y + x];
   213       dYdata += input_height * input_width;
   214       dXdata += output_height * output_width;
   222 bool ResizeNearestGradientOp<float, CPUContext>::RunOnDeviceWithOrderNHWC() {
   223   const auto& dY = Input(0);
   224   const auto& X = Input(1);
   226   const auto inputDims = dY.sizes();
   227   CAFFE_ENFORCE_EQ(4, inputDims.size());
   228   const int batch_size = dY.dim32(0), input_height = dY.dim32(1),
   229             input_width = dY.dim32(2), num_channels = dY.dim32(3);
   230   const int output_height = X.dim32(1);
   231   const int output_width = X.dim32(2);
   232   if (InputSize() == 3) {
   233     const auto& scales = Input(2);
   234     CAFFE_ENFORCE_EQ(scales.dim(), 1);
   235     CAFFE_ENFORCE_EQ(scales.numel(), 2);
   236     const float* scales_data = scales.data<
float>();
   237     height_scale_ = scales_data[0];
   238     width_scale_ = scales_data[1];
   242       {batch_size, output_height, output_width, num_channels},
   244   math::Set<float, CPUContext>(
   245       dX->numel(), 0.0f, dX->template mutable_data<float>(), &context_);
   247   const int output_width_stride = output_width * num_channels;
   248   const int input_width_stride = input_width * num_channels;
   250   const float* dYdata = dY.data<
float>();
   251   float* dXdata = dX->template mutable_data<float>();
   253   for (
int n = 0; n < batch_size; ++n) {
   254     for (
int y = 0; y < input_height; ++y) {
   255       const int out_y = std::min((
int)(y / height_scale_), (output_height - 1));
   256       for (
int x = 0; x < input_width; ++x) {
   257         const int out_x = std::min((
int)(x / width_scale_), (output_width - 1));
   260             dXdata + output_width_stride * out_y + num_channels * out_x;
   261         const float* dYdata_c0 =
   262             dYdata + input_width_stride * y + num_channels * x;
   264         for (
int c = 0; c < num_channels; ++c) {
   265           dXdata_c0[c] += dYdata_c0[c];
   269     dYdata += input_height * input_width_stride;
   270     dXdata += output_height * output_width_stride;
   277 bool ResizeNearestGradientOp<float, CPUContext>::RunOnDevice() {
   279     case StorageOrder::NHWC:
   280       return RunOnDeviceWithOrderNHWC();
   281     case StorageOrder::NCHW:
   282       return RunOnDeviceWithOrderNCHW();
   284       CAFFE_THROW(
"Unknown Storage order: ", order_);
   287 REGISTER_CPU_OPERATOR(ResizeNearest, ResizeNearestOp<float, CPUContext>);
   288 REGISTER_CPU_GRADIENT_OPERATOR(
   289     ResizeNearestGradient,
   290     ResizeNearestGradientOp<float, CPUContext>);
   292 #ifdef CAFFE2_USE_MKLDNN   293 REGISTER_IDEEP_OPERATOR(
   295     IDEEPFallbackOp<ResizeNearestOp<float, CPUContext>>);
   299 OPERATOR_SCHEMA(ResizeNearest)
   302     .Arg(
"width_scale", 
"Scale along width dimension")
   303     .Arg(
"height_scale", 
"Scale along height dimension")
   305 Resizes the spatial dimensions of the input using nearest neighbor   306 interpolation. The `width_scale` and `height_scale` arguments   307 control the size of the output, which is given by:   308 output_width = floor(input_width * width_scale)   309 output_height = floor(output_height * height_scale)   311     .Input(0, "X", 
"Input tensor")
   315         "1D, 2-element, Scales tensor, [height_scale, width_scale]")
   316     .Output(0, 
"Y", 
"Output tensor")
   317     .InheritOnnxSchema(
"Upsample");
   320 GRADIENT_OPERATOR_SCHEMA(ResizeNearestGradient)
   323     .Arg(
"width_scale", 
"Scale along width dimension")
   324     .Arg(
"height_scale", 
"Scale along height dimension");
   327   using GradientMakerBase::GradientMakerBase;
   328   vector<OperatorDef> GetGradientDefs()
 override {
   329     if (def_.input().size() == 2) {
   333           "ResizeNearestGradient",
   335           vector<string>{GO(0), I(0), I(1)},
   336           vector<string>{GI(0)});
   340                              vector<string>{GO(0), I(0)},
   341                              vector<string>{GI(0)});
 
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
 
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...