Caffe2 - C++ API
A deep learning, cross platform ML framework
spatial_softmax_with_loss_op.cc
1 #include "spatial_softmax_with_loss_op.h"
2 #include "softmax_shared.h"
3 
4 namespace caffe2 {
5 
6 REGISTER_CPU_OPERATOR(
7  SpatialSoftmaxWithLoss,
8  SpatialSoftmaxWithLossOp<float, CPUContext>);
9 REGISTER_CPU_OPERATOR(
10  SpatialSoftmaxWithLossGradient,
11  SpatialSoftmaxWithLossGradientOp<float, CPUContext>);
12 
13 // Input: X (logits), T (labels); Output: P (probs), Y
14 OPERATOR_SCHEMA(SpatialSoftmaxWithLoss)
15  .NumInputs(2, 3)
16  .NumOutputs(2)
17  .TensorInferenceFunction([](const OperatorDef& def,
18  const vector<TensorShape>& in) {
19  ArgumentHelper helper(def);
20  vector<TensorShape> out(2);
21 
22  auto logits = in[0]; // Tensor with Shape [batch_size, num_classes]
23  auto labels = in[1]; // Tensor with shape [batch_size, ]
24  auto batch_size = logits.dims().Get(0);
25  auto num_classes = logits.dims().Get(1);
26 
27  CAFFE_ENFORCE_EQ(logits.dims_size(), 4);
28  CAFFE_ENFORCE_EQ(labels.dims_size(), 3);
29  out[0].set_data_type(logits.data_type());
30  out[0].add_dims(batch_size);
31  out[0].add_dims(num_classes);
32  out[0].add_dims(in[0].dims(2));
33  out[0].add_dims(in[0].dims(3));
34  // Output 2 is scalar shape, so no dims added
35  return out;
36  })
37  .SetDoc(R"DOC(
38 Combined Spatial Softmax and Cross-Entropy loss operator.
39 Similar to SoftmaxWithLoss, this operator computes the spatial softmax
40 normalized values for each layer in the batch of the given input, after which
41 cross-entropy loss is computed. This operator is numerically more stable than
42 separate Softmax and CrossEntropy ops. The inputs are a 2-D tensor
43 (Tensor) of size (batch_size x input_feature_dimensions) and tensor of
44 labels (ground truth).
45 Output is tensor with the probability for each label in a pixel for each example
46 (N x D x W x H) and averaged loss (scalar).
47 For spatial softmax, weighting is by x,y position of the input.
48 )DOC")
49  .Input(0, "logits", "Unscaled log probabilities")
50  .Input(1, "labels", "Ground truth")
51  .Input(
52  2,
53  "weight_tensor",
54  "Optional blob to be used to weight the samples for the loss. With\
55  spatial set, weighting is by x,y of the input")
56  .Output(0, "softmax", "Tensor with softmax cross entropy loss")
57  .Output(1, "loss", "Average loss");
58 
59 // Input: X, T, P, dY; Output: dX
60 OPERATOR_SCHEMA(SpatialSoftmaxWithLossGradient).NumOutputs(1);
61 
62 #define DONT_CARE (-1)
63 
64 template <>
65 bool SpatialSoftmaxWithLossOp<float, CPUContext>::RunOnDevice() {
66  auto& X = Input(0); // Logits
67  auto& T = Input(1); // Labels / targets
68 
69  int N, D;
70  N = X.dim32(0);
71  D = X.dim32(1);
72  auto* P =
73  Output(0, X.sizes(), at::dtype<float>()); // Probabilities from softmax
74 
75  if (!sum_multiplier_.defined()) {
76  sum_multiplier_ = caffe2::empty({D}, at::dtype<float>().device(CPU));
77  math::Set<float, CPUContext>(
78  D, 1.f, sum_multiplier_.mutable_data<float>(), &context_);
79  } else if (sum_multiplier_.numel() != D) {
80  sum_multiplier_.Resize(D);
81  math::Set<float, CPUContext>(
82  D, 1.f, sum_multiplier_.mutable_data<float>(), &context_);
83  }
84 
85  float* Pdata = P->template mutable_data<float>();
86  const float* weights = (InputSize() > 2 ? Input(2).data<float>() : nullptr);
87  CAFFE_ENFORCE_EQ(X.dim(), 4);
88  CAFFE_ENFORCE_EQ(T.dim(), 3);
89  CAFFE_ENFORCE_EQ(T.dim32(0), N);
90 
91  int H = X.dim32(2);
92  int W = X.dim32(3);
93 
94  const float* Xdata = X.data<float>();
95 
96  for (int i = 0; i < N; ++i) {
97  for (int y = 0; y < H; ++y) {
98  for (int x = 0; x < W; ++x) {
99  // Subtract max on each cell for numerical reasons
100  float max_val = (-1e20f);
101  for (int c = 0; c < D; ++c) {
102  // TODO optimize
103  int idx = i * (H * W * D) + c * (H * W) + y * W + x;
104  max_val = std::max(max_val, Xdata[idx]);
105  }
106 
107  // Exponentiate
108  float expsum = 0.0f;
109  for (int c = 0; c < D; ++c) {
110  int idx = i * (H * W * D) + c * (H * W) + y * W + x;
111  float expx = exp(Xdata[idx] - max_val);
112  Pdata[idx] = expx;
113  expsum += expx;
114  }
115 
116  // Normalize
117  for (int c = 0; c < D; ++c) {
118  int idx = i * (H * W * D) + c * (H * W) + y * W + x;
119  Pdata[idx] /= expsum;
120  }
121  }
122  }
123  }
124 
125  // Compute the avg cross-entropy loss
126  auto* avg_loss =
127  Output(1, vector<int64_t>(), at::dtype<float>()); // Average loss
128  float* avg_loss_data = avg_loss->template mutable_data<float>();
129  const int* label_data = T.data<int>();
130 
131  float sum_label_xent = 0.0f;
132  float total_weight = 0.0;
133 
134  for (int y = 0; y < H; y++) {
135  for (int x = 0; x < W; x++) {
136  for (int i = 0; i < N; i++) {
137  int label_idx = i * H * W + y * W + x;
138  int label = label_data[label_idx];
139  if (label != DONT_CARE) {
140  CAFFE_ENFORCE(
141  label < D && label >= 0,
142  "Label seems incorrect:label value larger than number of classes",
143  label_data[i],
144  " vs ",
145  D);
146  int idx = i * (H * W * D) + label * (H * W) + y * W + x;
147  float w = weights ? weights[label_idx] : 1.0;
148  total_weight += w;
149  sum_label_xent += -log(std::max(Pdata[idx], 1e-20f)) * w;
150  }
151  }
152  }
153  }
154  if (total_weight != 0.0) {
155  *avg_loss_data = sum_label_xent / total_weight;
156  } else {
157  *avg_loss_data = 0.0;
158  }
159  return true;
160 }
161 
162 template <>
163 bool SpatialSoftmaxWithLossGradientOp<float, CPUContext>::RunOnDevice() {
164  auto& X = Input(0); // Logits
165  auto& T = Input(1); // Labels / targets
166  // Input(2) is weights if given
167  auto& P = Input(InputSize() - 2); // Probabilities from softmax
168  auto& d_avg_loss = Input(InputSize() - 1); // Gradient w.r.t. avg loss
169 
170  const float* weights = (InputSize() > 4 ? Input(2).data<float>() : nullptr);
171  int N, D;
172  N = X.dim32(0);
173  D = X.dim32(1);
174  auto* dX = Output(0, X.sizes(), at::dtype<float>());
175  CAFFE_ENFORCE_EQ(T.dim32(0), N);
176  CAFFE_ENFORCE_EQ(X.dim(), 4);
177  CAFFE_ENFORCE_EQ(T.dim(), 3);
178 
179  int H = X.dim32(2);
180  int W = X.dim32(3);
181 
182  const float* Pdata = P.data<float>();
183  float* dX_data = dX->template mutable_data<float>();
184  const int* label_data = T.data<int>();
185 
186  // Copy softmax probabilities into dX. All but the neuron
187  // corresponding to the correct label has gradient equaling e(x_j)
188  // which is the probability under softmax.
189  context_.CopyFromCPU<float>(P.numel(), Pdata, dX_data);
190 
191  float total_weight = 0.0f;
192  for (int y = 0; y < H; ++y) {
193  for (int x = 0; x < W; ++x) {
194  for (int i = 0; i < N; ++i) {
195  int label_idx = i * H * W + y * W + x;
196  int label = label_data[label_idx];
197 
198  if (label != DONT_CARE) {
199  int idx = i * (H * W * D) + label * (H * W) + y * W + x;
200 
201  dX_data[idx] = (dX_data[idx] - 1.0);
202 
203  if (weights != nullptr) {
204  float weight = weights[label_idx];
205  for (int c = 0; c < D; ++c) {
206  int k = i * (H * W * D) + c * (H * W) + y * W + x;
207  dX_data[k] *= weight;
208  }
209  total_weight += weight;
210  } else {
211  total_weight += 1.0;
212  }
213  } else {
214  // Set gradient to zero for coordinates where we have dont care
215  for (int c = 0; c < D; ++c) {
216  int idx = i * (H * W * D) + c * (H * W) + y * W + x;
217  dX_data[idx] = 0;
218  }
219  }
220  }
221  }
222  }
223 
224  if (total_weight > 0) {
225  math::Scale<float, float, CPUContext>(
226  dX->numel(),
227  scale_ / total_weight,
228  dX->data<float>(),
229  dX_data,
230  &context_);
231  }
232  math::Scale<float, float, CPUContext>(
233  dX->numel(),
234  d_avg_loss.data<float>(),
235  dX->data<float>(),
236  dX->template mutable_data<float>(),
237  &context_);
238  return true;
239 }
240 
241 namespace {
242 class GetSoftmaxWithLossGradient : public GradientMakerBase {
243  using GradientMakerBase::GradientMakerBase;
244  vector<OperatorDef> GetGradientDefs() override {
245  vector<string> blob_names{
246  {I(0), I(1), O(0), GO(1)},
247  };
248 
249  // Add weight blob, if given
250  if (def_.input_size() == 3) {
251  blob_names.emplace(blob_names.begin() + 2, I(2));
252  }
253  return SingleGradientDef(
254  "SpatialSoftmaxWithLossGradient",
255  "",
256  blob_names,
257  vector<string>{GI(0)});
258  }
259 };
260 
261 REGISTER_GRADIENT(SpatialSoftmaxWithLoss, GetSoftmaxWithLossGradient);
262 }
263 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: static.cpp:70