Caffe2 - C++ API
A deep learning, cross platform ML framework
rank_loss_op.cc
1 #include "caffe2/operators/rank_loss_op.h"
2 
3 namespace caffe2 {
4 
5 namespace {
6 
7 // Computes log(1 + exp(y)) in a way that avoids early over-/under-flow
8 template <class T>
9 inline T logLogit(T x) {
10  static const auto kMinLogDiff = std::log(std::numeric_limits<T>::epsilon());
11 
12  if (x < kMinLogDiff) {
13  return 0;
14  }
15  if (x > -kMinLogDiff) {
16  return x;
17  }
18  return std::log(std::exp(x) + 1);
19 }
20 }
21 
22 template <typename T, class Context>
23 bool PairWiseLossOp<T, Context>::RunOnDevice() {
24  auto& X = Input(XVALUE);
25  auto& label = Input(LABEL);
26 
27  int N = X.dim() > 0 ? X.dim32(0) : 0;
28  if (N == 0) {
29  // Set correct data type for output
30  Output(YVALUE, {0}, at::dtype<T>());
31  return true;
32  }
33 
34  const int32_t* lengths_vec;
35  int len_size = 1;
36  if (InputSize() > LENGTHS) {
37  auto& lengths = Input(LENGTHS);
38  CAFFE_ENFORCE_EQ(lengths.dim(), 1);
39  len_size = lengths.numel();
40  lengths_vec = lengths.template data<int32_t>();
41  int len_sum = 0;
42  if (len_size > 0) {
43  math::Sum<int, Context>(len_size, lengths_vec, &len_sum, &context_);
44  }
45  CAFFE_ENFORCE_EQ(len_sum, N);
46  } else {
47  lengths_vec = &N;
48  }
49 
50  // a total of len_size sessions
51  auto* Y = Output(YVALUE, {len_size}, at::dtype<T>());
52  auto* Ydata = Y->template mutable_data<T>();
53 
54  int D = X.numel() / N;
55  CAFFE_ENFORCE(
56  (label.dim() == 1) || (label.dim() == 2 && label.dim32(1) == 1));
57  CAFFE_ENFORCE_EQ(label.dim32(0), N);
58  CAFFE_ENFORCE_EQ(1, D); // only support one class at the moment
59 
60  const auto* Xdata = X.template data<T>();
61  const auto* labelData = label.template data<T>();
62  int offset = 0;
63  for (int idx = 0; idx < len_size; ++idx) {
64  Ydata[idx] = 0;
65  int numPairs = 0;
66  for (int i = offset; i < offset + lengths_vec[idx]; ++i) {
67  for (int j = offset; j < i; ++j) {
68  if (std::abs(labelData[i] - labelData[j]) <
69  std::numeric_limits<T>::epsilon()) {
70  continue;
71  }
72  ++numPairs;
73  // only use sigmoid loss function at the moment
74  auto sign = labelData[i] > labelData[j] ? 1 : -1;
75  Ydata[idx] += logLogit(sign * (Xdata[j] - Xdata[i]));
76  }
77  }
78  if (numPairs > 0) {
79  Ydata[idx] /= numPairs;
80  }
81  offset += lengths_vec[idx];
82  }
83  return true;
84 }
85 
86 template <class T, class Context>
87 bool PairWiseLossGradientOp<T, Context>::RunOnDevice() {
88  auto& X = Input(XVALUE);
89  auto& label = Input(LABEL);
90  auto& dY = Input(DYVALUE);
91 
92  int N = X.dim() > 0 ? X.dim32(0) : 0;
93  CAFFE_ENFORCE_EQ(N, X.numel());
94  CAFFE_ENFORCE(
95  (label.dim() == 1) || (label.dim() == 2 && label.dim32(1) == 1));
96  CAFFE_ENFORCE_EQ(label.dim32(0), N);
97  auto* dX = Output(DXVALUE, X.sizes(), at::dtype<T>());
98  math::Set<T, CPUContext>(
99  dX->numel(), 0.f, dX->template mutable_data<T>(), &context_);
100 
101  if (N == 0) {
102  return true;
103  }
104 
105  const int32_t* lengths_vec;
106  int len_size = 1;
107  if (InputSize() > LENGTHS) {
108  auto& lengths = Input(LENGTHS);
109  CAFFE_ENFORCE_EQ(lengths.dim(), 1);
110  len_size = lengths.numel();
111  lengths_vec = lengths.template data<int32_t>();
112  int len_sum = 0;
113  if (len_size > 0) {
114  math::Sum<int, Context>(len_size, lengths_vec, &len_sum, &context_);
115  }
116  CAFFE_ENFORCE_EQ(len_sum, N);
117  } else {
118  lengths_vec = &N;
119  }
120 
121  CAFFE_ENFORCE_EQ(dY.dim(), 1);
122  CAFFE_ENFORCE_EQ(dY.dim32(0), len_size);
123 
124  const T* Xdata = X.template data<T>();
125  const T* dYdata = dY.template data<T>();
126  const T* labelData = label.template data<T>();
127  T* dXdata = dX->template mutable_data<T>();
128  int offset = 0;
129  for (int idx = 0; idx < len_size; ++idx) {
130  int numPairs = 0;
131  for (int i = offset; i < offset + lengths_vec[idx]; ++i) {
132  for (int j = offset; j < i; ++j) {
133  if (std::abs(labelData[i] - labelData[j]) <
134  std::numeric_limits<T>::epsilon()) {
135  continue;
136  }
137  ++numPairs;
138  // only use sigmoid loss function at the moment
139  auto sign = labelData[i] > labelData[j] ? 1 : -1;
140  auto grad =
141  sign * dYdata[idx] / (1 + exp(-sign * (Xdata[j] - Xdata[i])));
142  dXdata[i] -= grad;
143  dXdata[j] += grad;
144  }
145  }
146  if (numPairs > 0) {
147  for (int i = offset; i < offset + lengths_vec[idx]; ++i) {
148  dXdata[i] /= numPairs;
149  }
150  }
151  offset += lengths_vec[idx];
152  }
153  return true;
154 }
155 
156 namespace {
157 REGISTER_CPU_OPERATOR(PairWiseLoss, PairWiseLossOp<float, CPUContext>);
158 REGISTER_CPU_OPERATOR(
159  PairWiseLossGradient,
160  PairWiseLossGradientOp<float, CPUContext>);
161 
162 OPERATOR_SCHEMA(PairWiseLoss)
163  .NumInputs(2, 3)
164  .NumOutputs(1)
165  .SetDoc(R"DOC(
166 Operator computes the pair wise loss between all pairs within a batch
167  using the logit loss function on the difference in scores between pairs
168 )DOC")
169  .Input(
170  0,
171  "X",
172  "Input blob from the previous layer, which is almost always "
173  "the result of a softmax operation; X is a 2D array of size N x 1"
174  "where N is the batch size. For more info: "
175  "D. Sculley, Large Scale Learning to Rank. "
176  "https://www.eecs.tufts.edu/~dsculley/papers/large-scale-rank.pdf")
177  .Input(1, "label", "Blob containing the labels used to compare the input")
178  .Input(
179  2,
180  "lengths",
181  "Optional input blob that contains the lengths"
182  "of multiple sessions. The summation of this blob must be equal"
183  "to the size of blob X. If lengths blob is provided, the output"
184  "blob has the same size as lengths blob, and the cross entropy"
185  "is computed within each session.")
186  .Output(0, "Y", "Output blob after the cross entropy computation");
187 OPERATOR_SCHEMA(PairWiseLossGradient).NumInputs(3, 4).NumOutputs(1);
188 
189 class GetPairWiseLossGradient : public GradientMakerBase {
190  using GradientMakerBase::GradientMakerBase;
191  vector<OperatorDef> GetGradientDefs() override {
192  vector<string> blob_names{I(0), I(1), GO(0)};
193 
194  // Add lengths blob if given
195  if (def_.input_size() == 3) {
196  blob_names.push_back(I(2));
197  }
198  return SingleGradientDef(
199  "PairWiseLossGradient", "", blob_names, vector<string>{GI(0)});
200  }
201 };
202 REGISTER_GRADIENT(PairWiseLoss, GetPairWiseLossGradient);
203 
204 } // namespace
205 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: static.cpp:70