doxygen-c/html/reduction__ops_8cc_source.html

 #include "caffe2/operators/reduction_ops.h"

 namespace caffe2 {

 REGISTER_CPU_OPERATOR(SumElements, SumElementsOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(SumElementsInt, SumElementsIntOp<int, CPUContext>);
 REGISTER_CPU_OPERATOR(SumSqrElements, SumSqrElementsOp<CPUContext>);

 REGISTER_CPU_OPERATOR(
     SumElementsGradient,
     SumElementsGradientOp<float, CPUContext>);

 REGISTER_CPU_OPERATOR(RowwiseMax, MaxReductionOp<float, CPUContext, true>);
 REGISTER_CPU_OPERATOR(
     RowwiseMaxGradient,
     MaxReductionGradientOp<float, CPUContext, true>);
 REGISTER_CPU_OPERATOR(
     ColwiseMaxGradient,
     MaxReductionGradientOp<float, CPUContext, false>);
 REGISTER_CPU_OPERATOR(ColwiseMax, MaxReductionOp<float, CPUContext, false>);

 OPERATOR_SCHEMA(SumElements)
     .NumInputs(1)
     .NumOutputs(1)
     .ScalarType(TensorProto::FLOAT)
     .SetDoc(R"DOC(
 Sums the elements of the input tensor. Tensor type must be float32.

 Github Links:
 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.cc

 <details>

 <summary> <b>Example</b> </summary>

 **Code**

 ```

 workspace.ResetWorkspace()

 sum_op = core.CreateOperator(
     "SumElements",
     ["X"],
     ["Y"]
 )

 avg_op = core.CreateOperator(
     "SumElements",
     ["X"],
     ["Y"],
     average=True
 )

 workspace.FeedBlob("X", np.random.randint(10, size=(3,3)).astype(np.float32))
 print("X:\n", workspace.FetchBlob("X"))
 workspace.RunOperatorOnce(sum_op)
 print("Y (sum_op):", workspace.FetchBlob("Y"))
 workspace.RunOperatorOnce(avg_op)
 print("Y (avg_op):", workspace.FetchBlob("Y"))

 ```

 **Result**

 ```

 X:
  [[7. 2. 5.]
  [9. 4. 2.]
  [1. 2. 5.]]
 Y (sum_op): 37.0
 Y (avg_op): 4.111111

 ```

 </details>

     )DOC")
     .Arg("average", "(*bool*): set to True to compute the average of the elements rather than the sum")
     .Input(0, "X", "(*Tensor`<float>`*): blob pointing to an instance of a counter")
     .Output(0, "sum", "(*Tensor`<float>`*): Scalar tensor containing the sum (or average)");

 OPERATOR_SCHEMA(SumElementsInt)
     .NumInputs(1)
     .NumOutputs(1)
     .ScalarType(TensorProto::INT32)
     .SetDoc("Sums the integer elements of the input tensor.")
     .Input(0, "X", "Tensor to sum up")
     .Output(0, "sum", "Scalar sum");
 SHOULD_NOT_DO_GRADIENT(SumElementsInt);

 OPERATOR_SCHEMA(SumSqrElements)
     .NumInputs(1)
     .NumOutputs(1)
     .ScalarType(TensorProto::FLOAT)
     .SetDoc("Sums the squares elements of the input tensor.")
     .Arg("average", "whether to average or not")
     .Input(0, "X", "Tensor to sum up")
     .Output(0, "sum", "Scalar sum of squares");

 OPERATOR_SCHEMA(SumElementsGradient).NumInputs(2).NumOutputs(1);

 class GetSumElementsGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
     return SingleGradientDef(
         "SumElementsGradient",
         "",
         vector<string>{I(0), GO(0)},
         vector<string>{GI(0)});
   }
 };
 REGISTER_GRADIENT(SumElements, GetSumElementsGradient);

 OPERATOR_SCHEMA(RowwiseMax)
     .NumInputs(1)
     .NumOutputs(1)
     .SetDoc(R"DOC(
 Compute row-wise max reduction of the input tensor. This op takes one input, $X$, of shape $BxMxN$, where $B$ is the batch size, $M$ is number of rows, and $N$ is number of columns. The output of this op, $Y$, is a matrix of shape $BxM$, with one row for each element of the batch, and the same number of columns as the number of rows of the input tensor.

 Github Links:
 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.h
 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.cc

 <details>

 <summary> <b>Example</b> </summary>

 **Code**

 ```

 workspace.ResetWorkspace()

 op = core.CreateOperator(
     "RowwiseMax",
     ["X"],
     ["Y"]
 )

 // Create X, simulating a batch of 2, 4x4 matricies
 X = np.random.randint(0,high=20,size=(2,4,4))
 print("X:\n",X)

 // Feed X into workspace
 workspace.FeedBlob("X", X.astype(np.float32))

 // Run op
 workspace.RunOperatorOnce(op)

 // Collect Output
 print("Y:\n", workspace.FetchBlob("Y"))

 ```

 **Result**

 ```

 X:
  [[[ 5 12 10  1]
   [ 4 16  2 15]
   [ 5 11 12 15]
   [15  4 17 19]]

  [[16  5  5 13]
   [17  2  1 17]
   [18  3 19  5]
   [14 16 10 16]]]
 Y:
  [[12. 16. 15. 19.]
  [16. 17. 19. 16.]]

 ```

 </details>

     )DOC")
     .Input(
         0,
         "X",
         "A tensor of dimensions $B x M x N$ to compute rowwise-max. Here, $B$ is batch size, and $M$ and $N$ are the number of rows and columns of each element of the batch, respectively.")
     .Output(
         0,
         "Y",
         "The output tensor of shape $B x M$, where each row represents the row-wise maximums for that element of the input batch.");

 OPERATOR_SCHEMA(RowwiseMaxGradient).NumInputs(3).NumOutputs(1);
 class GetRowwiseMaxGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
     return SingleGradientDef(
         "RowwiseMaxGradient",
         "",
         vector<string>{I(0), O(0), GO(0)},
         vector<string>{GI(0)});
   }
 };
 REGISTER_GRADIENT(RowwiseMax, GetRowwiseMaxGradient);

 OPERATOR_SCHEMA(ColwiseMaxGradient);

 OPERATOR_SCHEMA(ColwiseMax)
     .NumInputs(1)
     .NumOutputs(1)
     .SetDoc(R"DOC(
 Compute column-wise max reduction of the input tensor. This op takes one input, $X$, of shape $BxMxN$, where $B$ is the batch size, $M$ is number of rows, and $N$ is number of columns. The output of this op, $Y$, is a matrix of shape $BxN$, with one row for each element of the batch, and the same number of columns as the input tensor.

 Github Links:
 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.h
 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.cc

 <details>

 <summary> <b>Example</b> </summary>

 **Code**

 ```
 workspace.ResetWorkspace()

 op = core.CreateOperator(
     "ColwiseMax",
     ["X"],
     ["Y"]
 )

 // Create X, simulating a batch of 2, 4x4 matricies
 X = np.random.randint(0,high=20,size=(2,4,4))
 print("X:\n",X)

 // Feed X into workspace
 workspace.FeedBlob("X", X.astype(np.float32))

 // Run op
 workspace.RunOperatorOnce(op)

 // Collect Output
 print("Y:\n", workspace.FetchBlob("Y"))

 ```

 **Result**

 ```

 X:
  [[[17 15  2  6]
   [ 8 12  6  0]
   [ 6  9  7  3]
   [ 4 13 16 13]]

  [[ 0  3  4 12]
   [18  1 17 12]
   [ 7 17 13 14]
   [12 17  2  1]]]
 Y:
  [[17. 15. 16. 13.]
  [18. 17. 17. 14.]]

 ```

 </details>

     )DOC")
     .Input(
         0,
         "X",
         "A tensor of dimensions $B x M x N$ to compute columnwise-max. Here, $B$ is batch size, and $M$ and $N$ are the number of rows and columns of each element of the batch, respectively.")
     .Output(
         0,
         "Y",
         "The output tensor of shape $B x N$, where each row represents the column-wise maximums for that element of the input batch.");

 OPERATOR_SCHEMA(ColumnMaxGradient).NumInputs(3).NumOutputs(1);
 class GetColwiseMaxGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
     return SingleGradientDef(
         "ColwiseMaxGradient",
         "",
         vector<string>{I(0), O(0), GO(0)},
         vector<string>{GI(0)});
   }
 };
 REGISTER_GRADIENT(ColwiseMax, GetColwiseMaxGradient);

 template <typename T, class Context>
 bool SumElementsGradientOp<T, Context>::RunOnDevice()
 // TODO: T21635077 fix float-divide-by-zero undefined behavior
 #if defined(__has_feature)
 #if __has_feature(__address_sanitizer__)
     __attribute__((__no_sanitize__("float-divide-by-zero")))
 #endif
 #endif
 {
   auto& X = Input(0);
   Tensor sum_grad(Input(1), CPU);

   auto* dX = Output(0, X.sizes(), at::dtype<T>());
   DCHECK_EQ(sum_grad.numel(), 1);
   math::Set<T, Context>(
       dX->numel(),
       static_cast<T>(
           sum_grad.template data<T>()[0] * (average_ ? 1.0 / X.numel() : 1)),
       dX->template mutable_data<T>(),
       &context_);
   return true;
 }

 template <typename T, class Context, bool ROWWISE>
 bool MaxReductionGradientOp<T, Context, ROWWISE>::RunOnDevice() {
   auto& X = Input(0);
   auto& Y = Input(1);
   auto& dY = Input(2);

   auto* dX = Output(0, X.sizes(), at::dtype<T>());

   CAFFE_ENFORCE_EQ(X.dim(), 3);

   const int batch_size = X.dim32(0);
   const int M = X.dim32(1);
   const int N = X.dim32(2);

   const T* Xdata = X.template data<T>();
   const T* Ydata = Y.template data<T>();
   const T* dYdata = dY.template data<T>();
   T* dXdata = dX->template mutable_data<T>();

   const int input_size = M * N;
   for (int i = 0; i < batch_size; ++i) {
     const T* Xdata_i = Xdata + i * input_size;
     T* dXdata_i = dXdata + i * input_size;
     if (ROWWISE) {
       const T* Ydata_i = Ydata + i * M;
       const T* dYdata_i = dYdata + i * M;
       for (int m = 0; m < M; ++m) {
         const T* Xdata_m = Xdata_i + m * N;
         T* dXdata_m = dXdata_i + m * N;
         for (int n = 0; n < N; ++n) {
           if (Xdata_m[n] == Ydata_i[m]) {
             dXdata_m[n] = dYdata_i[m];
           } else {
             dXdata_m[n] = static_cast<T>(0);
           }
         }
       }
     } else {
       const T* Ydata_i = Ydata + i * N;
       const T* dYdata_i = dYdata + i * N;
       for (int n = 0; n < N; ++n) {
         for (int m = 0; m < M; ++m) {
           const T* Xdata_m = Xdata_i + m * N;
           T* dXdata_m = dXdata_i + m * N;
           if (Xdata_m[n] == Ydata_i[n]) {
             dXdata_m[n] = dYdata_i[n];
           } else {
             dXdata_m[n] = static_cast<T>(0);
           }
         }
       }
     }
   }

   return true;
 }

 } // namespace caffe2
caffe2::GradientMakerBase
Definition: operator_gradient.h:47

M
Definition: any.cpp:108

caffe2::GetSumElementsGradient
Definition: reduction_ops.cc:104

T
Definition: dataloader.cpp:482

nom::repr::Tensor
Definition: NeuralNet.h:158

caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13

caffe2::GradientMakerBase::SingleGradientDef
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...
Definition: operator_gradient.h:199

caffe2::SumElementsGradientOp
Definition: reduction_ops.h:79

caffe2::MaxReductionGradientOp
Definition: reduction_ops.h:177