1 #include "caffe2/operators/rank_loss_op.h" 9 inline T logLogit(
T x) {
10 static const auto kMinLogDiff = std::log(std::numeric_limits<T>::epsilon());
12 if (x < kMinLogDiff) {
15 if (x > -kMinLogDiff) {
18 return std::log(std::exp(x) + 1);
22 template <
typename T,
class Context>
23 bool PairWiseLossOp<T, Context>::RunOnDevice() {
24 auto& X = Input(XVALUE);
25 auto& label = Input(LABEL);
27 int N = X.dim() > 0 ? X.dim32(0) : 0;
30 Output(YVALUE, {0}, at::dtype<T>());
34 const int32_t* lengths_vec;
36 if (InputSize() > LENGTHS) {
37 auto& lengths = Input(LENGTHS);
38 CAFFE_ENFORCE_EQ(lengths.dim(), 1);
39 len_size = lengths.numel();
40 lengths_vec = lengths.template data<int32_t>();
43 math::Sum<int, Context>(len_size, lengths_vec, &len_sum, &context_);
45 CAFFE_ENFORCE_EQ(len_sum, N);
51 auto* Y = Output(YVALUE, {len_size}, at::dtype<T>());
52 auto* Ydata = Y->template mutable_data<T>();
54 int D = X.numel() / N;
56 (label.dim() == 1) || (label.dim() == 2 && label.dim32(1) == 1));
57 CAFFE_ENFORCE_EQ(label.dim32(0), N);
58 CAFFE_ENFORCE_EQ(1, D);
60 const auto* Xdata = X.template data<T>();
61 const auto* labelData = label.template data<T>();
63 for (
int idx = 0; idx < len_size; ++idx) {
66 for (
int i = offset; i < offset + lengths_vec[idx]; ++i) {
67 for (
int j = offset; j < i; ++j) {
68 if (std::abs(labelData[i] - labelData[j]) <
69 std::numeric_limits<T>::epsilon()) {
74 auto sign = labelData[i] > labelData[j] ? 1 : -1;
75 Ydata[idx] += logLogit(sign * (Xdata[j] - Xdata[i]));
79 Ydata[idx] /= numPairs;
81 offset += lengths_vec[idx];
86 template <
class T,
class Context>
87 bool PairWiseLossGradientOp<T, Context>::RunOnDevice() {
88 auto& X = Input(XVALUE);
89 auto& label = Input(LABEL);
90 auto& dY = Input(DYVALUE);
92 int N = X.dim() > 0 ? X.dim32(0) : 0;
93 CAFFE_ENFORCE_EQ(N, X.numel());
95 (label.dim() == 1) || (label.dim() == 2 && label.dim32(1) == 1));
96 CAFFE_ENFORCE_EQ(label.dim32(0), N);
97 auto* dX = Output(DXVALUE, X.sizes(), at::dtype<T>());
98 math::Set<T, CPUContext>(
99 dX->numel(), 0.f, dX->template mutable_data<T>(), &context_);
105 const int32_t* lengths_vec;
107 if (InputSize() > LENGTHS) {
108 auto& lengths = Input(LENGTHS);
109 CAFFE_ENFORCE_EQ(lengths.dim(), 1);
110 len_size = lengths.numel();
111 lengths_vec = lengths.template data<int32_t>();
114 math::Sum<int, Context>(len_size, lengths_vec, &len_sum, &context_);
116 CAFFE_ENFORCE_EQ(len_sum, N);
121 CAFFE_ENFORCE_EQ(dY.dim(), 1);
122 CAFFE_ENFORCE_EQ(dY.dim32(0), len_size);
124 const T* Xdata = X.template data<T>();
125 const T* dYdata = dY.template data<T>();
126 const T* labelData = label.template data<T>();
127 T* dXdata = dX->template mutable_data<T>();
129 for (
int idx = 0; idx < len_size; ++idx) {
131 for (
int i = offset; i < offset + lengths_vec[idx]; ++i) {
132 for (
int j = offset; j < i; ++j) {
133 if (std::abs(labelData[i] - labelData[j]) <
134 std::numeric_limits<T>::epsilon()) {
139 auto sign = labelData[i] > labelData[j] ? 1 : -1;
141 sign * dYdata[idx] / (1 + exp(-sign * (Xdata[j] - Xdata[i])));
147 for (
int i = offset; i < offset + lengths_vec[idx]; ++i) {
148 dXdata[i] /= numPairs;
151 offset += lengths_vec[idx];
157 REGISTER_CPU_OPERATOR(PairWiseLoss, PairWiseLossOp<float, CPUContext>);
158 REGISTER_CPU_OPERATOR(
159 PairWiseLossGradient,
160 PairWiseLossGradientOp<float, CPUContext>);
162 OPERATOR_SCHEMA(PairWiseLoss)
166 Operator computes the pair wise loss between all pairs within a batch 167 using the logit loss function on the difference in scores between pairs 172 "Input blob from the previous layer, which is almost always " 173 "the result of a softmax operation; X is a 2D array of size N x 1" 174 "where N is the batch size. For more info: " 175 "D. Sculley, Large Scale Learning to Rank. " 176 "https://www.eecs.tufts.edu/~dsculley/papers/large-scale-rank.pdf")
177 .Input(1,
"label",
"Blob containing the labels used to compare the input")
181 "Optional input blob that contains the lengths" 182 "of multiple sessions. The summation of this blob must be equal" 183 "to the size of blob X. If lengths blob is provided, the output" 184 "blob has the same size as lengths blob, and the cross entropy" 185 "is computed within each session.")
186 .Output(0,
"Y",
"Output blob after the cross entropy computation");
187 OPERATOR_SCHEMA(PairWiseLossGradient).NumInputs(3, 4).NumOutputs(1);
189 class GetPairWiseLossGradient :
public GradientMakerBase {
190 using GradientMakerBase::GradientMakerBase;
191 vector<OperatorDef> GetGradientDefs()
override {
192 vector<string> blob_names{I(0), I(1), GO(0)};
195 if (def_.input_size() == 3) {
196 blob_names.push_back(I(2));
198 return SingleGradientDef(
199 "PairWiseLossGradient",
"", blob_names, vector<string>{GI(0)});
202 REGISTER_GRADIENT(PairWiseLoss, GetPairWiseLossGradient);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...