Caffe2 - C++ API
A deep learning, cross platform ML framework
rank_loss_op.cc
1 
17 #include "caffe2/operators/rank_loss_op.h"
18 
19 namespace caffe2 {
20 
21 namespace {
22 
23 // Computes log(1 + exp(y)) in a way that avoids early over-/under-flow
24 template <class T>
25 inline T logLogit(T x) {
26  static const auto kMinLogDiff = std::log(std::numeric_limits<T>::epsilon());
27 
28  if (x < kMinLogDiff) {
29  return 0;
30  }
31  if (x > -kMinLogDiff) {
32  return x;
33  }
34  return std::log(std::exp(x) + 1);
35 }
36 }
37 
38 template <typename T, class Context>
39 bool PairWiseLossOp<T, Context>::RunOnDevice() {
40  auto& X = Input(XVALUE);
41  auto& label = Input(LABEL);
42  auto* Y = Output(YVALUE);
43 
44  int N = X.ndim() > 0 ? X.dim32(0) : 0;
45  if (N == 0) {
46  Y->Resize(0);
47  Y->template mutable_data<T>();
48  return true;
49  }
50 
51  const int32_t* lengths_vec;
52  int len_size = 1;
53  if (InputSize() > LENGTHS) {
54  auto& lengths = Input(LENGTHS);
55  CAFFE_ENFORCE_EQ(lengths.ndim(), 1);
56  len_size = lengths.size();
57  lengths_vec = lengths.template data<int32_t>();
58  int len_sum = 0;
59  if (len_size > 0) {
60  math::Sum<int, Context>(len_size, lengths_vec, &len_sum, &context_);
61  }
62  CAFFE_ENFORCE_EQ(len_sum, N);
63  } else {
64  lengths_vec = &N;
65  }
66 
67  // a total of len_size sessions
68  Y->Resize(len_size);
69  auto* Ydata = Y->template mutable_data<T>();
70 
71  int D = X.size() / N;
72  CAFFE_ENFORCE(
73  (label.ndim() == 1) || (label.ndim() == 2 && label.dim32(1) == 1));
74  CAFFE_ENFORCE_EQ(label.dim32(0), N);
75  CAFFE_ENFORCE_EQ(1, D); // only support one class at the moment
76 
77  const auto* Xdata = X.template data<T>();
78  const auto* labelData = label.template data<T>();
79  int offset = 0;
80  for (int idx = 0; idx < len_size; ++idx) {
81  Ydata[idx] = 0;
82  int numPairs = 0;
83  for (int i = offset; i < offset + lengths_vec[idx]; ++i) {
84  for (int j = offset; j < i; ++j) {
85  if (std::abs(labelData[i] - labelData[j]) <
86  std::numeric_limits<T>::epsilon()) {
87  continue;
88  }
89  ++numPairs;
90  // only use sigmoid loss function at the moment
91  auto sign = labelData[i] > labelData[j] ? 1 : -1;
92  Ydata[idx] += logLogit(sign * (Xdata[j] - Xdata[i]));
93  }
94  }
95  if (numPairs > 0) {
96  Ydata[idx] /= numPairs;
97  }
98  offset += lengths_vec[idx];
99  }
100  return true;
101 }
102 
103 template <class T, class Context>
104 bool PairWiseLossGradientOp<T, Context>::RunOnDevice() {
105  auto& X = Input(XVALUE);
106  auto& label = Input(LABEL);
107  auto& dY = Input(DYVALUE);
108  auto* dX = Output(DXVALUE);
109  int N = X.ndim() > 0 ? X.dim32(0) : 0;
110  CAFFE_ENFORCE_EQ(N, X.size());
111  CAFFE_ENFORCE(
112  (label.ndim() == 1) || (label.ndim() == 2 && label.dim32(1) == 1));
113  CAFFE_ENFORCE_EQ(label.dim32(0), N);
114  dX->ResizeLike(X);
115  math::Set<T, CPUContext>(
116  dX->size(), 0.f, dX->template mutable_data<T>(), &context_);
117 
118  if (N == 0) {
119  return true;
120  }
121 
122  const int32_t* lengths_vec;
123  int len_size = 1;
124  if (InputSize() > LENGTHS) {
125  auto& lengths = Input(LENGTHS);
126  CAFFE_ENFORCE_EQ(lengths.ndim(), 1);
127  len_size = lengths.size();
128  lengths_vec = lengths.template data<int32_t>();
129  int len_sum = 0;
130  if (len_size > 0) {
131  math::Sum<int, Context>(len_size, lengths_vec, &len_sum, &context_);
132  }
133  CAFFE_ENFORCE_EQ(len_sum, N);
134  } else {
135  lengths_vec = &N;
136  }
137 
138  CAFFE_ENFORCE_EQ(dY.ndim(), 1);
139  CAFFE_ENFORCE_EQ(dY.dim32(0), len_size);
140 
141  const T* Xdata = X.template data<T>();
142  const T* dYdata = dY.template data<T>();
143  const T* labelData = label.template data<T>();
144  T* dXdata = dX->template mutable_data<T>();
145  int offset = 0;
146  for (int idx = 0; idx < len_size; ++idx) {
147  int numPairs = 0;
148  for (int i = offset; i < offset + lengths_vec[idx]; ++i) {
149  for (int j = offset; j < i; ++j) {
150  if (std::abs(labelData[i] - labelData[j]) <
151  std::numeric_limits<T>::epsilon()) {
152  continue;
153  }
154  ++numPairs;
155  // only use sigmoid loss function at the moment
156  auto sign = labelData[i] > labelData[j] ? 1 : -1;
157  auto grad =
158  sign * dYdata[idx] / (1 + exp(-sign * (Xdata[j] - Xdata[i])));
159  dXdata[i] -= grad;
160  dXdata[j] += grad;
161  }
162  }
163  if (numPairs > 0) {
164  for (int i = offset; i < offset + lengths_vec[idx]; ++i) {
165  dXdata[i] /= numPairs;
166  }
167  }
168  offset += lengths_vec[idx];
169  }
170  return true;
171 }
172 
173 namespace {
174 REGISTER_CPU_OPERATOR(PairWiseLoss, PairWiseLossOp<float, CPUContext>);
175 REGISTER_CPU_OPERATOR(
176  PairWiseLossGradient,
177  PairWiseLossGradientOp<float, CPUContext>);
178 
179 OPERATOR_SCHEMA(PairWiseLoss)
180  .NumInputs(2, 3)
181  .NumOutputs(1)
182  .SetDoc(R"DOC(
183 Operator computes the pair wise loss between all pairs within a batch
184  using the logit loss function on the difference in scores between pairs
185 )DOC")
186  .Input(
187  0,
188  "X",
189  "Input blob from the previous layer, which is almost always "
190  "the result of a softmax operation; X is a 2D array of size N x 1"
191  "where N is the batch size. For more info: "
192  "D. Sculley, Large Scale Learning to Rank. "
193  "https://www.eecs.tufts.edu/~dsculley/papers/large-scale-rank.pdf")
194  .Input(1, "label", "Blob containing the labels used to compare the input")
195  .Input(
196  2,
197  "lengths",
198  "Optional input blob that contains the lengths"
199  "of multiple sessions. The summation of this blob must be equal"
200  "to the size of blob X. If lengths blob is provided, the output"
201  "blob has the same size as lengths blob, and the cross entropy"
202  "is computed within each session.")
203  .Output(0, "Y", "Output blob after the cross entropy computation");
204 OPERATOR_SCHEMA(PairWiseLossGradient).NumInputs(3, 4).NumOutputs(1);
205 
206 class GetPairWiseLossGradient : public GradientMakerBase {
207  using GradientMakerBase::GradientMakerBase;
208  vector<OperatorDef> GetGradientDefs() override {
209  vector<string> blob_names{I(0), I(1), GO(0)};
210 
211  // Add lengths blob if given
212  if (def_.input_size() == 3) {
213  blob_names.push_back(I(2));
214  }
215  return SingleGradientDef(
216  "PairWiseLossGradient", "", blob_names, vector<string>{GI(0)});
217  }
218 };
219 REGISTER_GRADIENT(PairWiseLoss, GetPairWiseLossGradient);
220 
221 } // namespace
222 } // namespace caffe2
Copyright (c) 2016-present, Facebook, Inc.