1 #include "caffe2/operators/listwise_l2r_op.h" 2 #include "caffe2/core/context.h" 3 #include "caffe2/core/operator.h" 4 #include "caffe2/utils/eigen_utils.h" 14 template <
typename TDATA,
typename TIDX>
15 void arg_sort(
const TDATA* data, TIDX* idx,
const size_t N,
bool reverse) {
16 std::function<bool(size_t, size_t)> cmp_lambda;
18 cmp_lambda = [data](
size_t i,
size_t j) {
return data[i] > data[j]; };
20 cmp_lambda = [data](
size_t i,
size_t j) {
return data[i] < data[j]; };
23 std::generate(idx, idx + N, [&n] {
return n++; });
24 std::sort(idx, idx + N, cmp_lambda);
27 #define PAIRWISE_DIFF(vec, N) \ 28 ((vec.matrix() * Eigen::MatrixXf::Ones(1, N) - \ 29 Eigen::MatrixXf::Ones(N, 1) * vec.matrix().transpose()) \ 32 #define CWISE_SIGM(vec) (1. / (1. + (-(vec)).exp())) 34 #define CWISE_GT(vec1, vec2) ((vec1) > (vec2)) 36 #define CWISE_LT(vec1, vec2) ((vec1) < (vec2)) 38 #define CWISE_SIGN(vec) (CWISE_GT((vec), 0).cast<float>() * 2. - 1.) 40 #define CWISE_LOG_SIGM(vec, huge) \ 41 (CWISE_GT((vec), (huge)) \ 43 0, CWISE_LT((vec), -(huge)).select(vec, CWISE_SIGM((vec)).log()))) 48 void LambdaRankNdcgOp<float, CPUContext>::ResizeInvLogITensor(
int size) {
49 int old_size = inv_log_i_.numel();
50 int new_size = std::max(old_size, 1);
51 while (new_size < size) {
54 if (new_size != old_size) {
58 at::dtype<float>().device(CPU));
59 auto* data = inv_log_i_.template mutable_data<float>();
60 EigenVectorArrayMap<float> vec(data, inv_log_i_.numel());
61 const float log2f_ = std::log(2.f);
63 (Eigen::ArrayXf::LinSpaced(new_size, 2, 1 + new_size).log().inverse());
69 void LambdaRankNdcgOp<float, CPUContext>::ComputeDiscounts(
int* idx,
int N) {
71 &discount_, {N}, at::dtype<float>().device(CPU));
72 auto* discount_data = discount_.template mutable_data<float>();
73 auto* inv_log_i_data = inv_log_i_.template mutable_data<float>();
74 for (
int i = 0; i < N; i++) {
75 discount_data[idx[i]] = inv_log_i_data[i];
81 float LambdaRankNdcgOp<float, CPUContext>::LambdaRankNdcgSession(
87 CAFFE_ENFORCE(start_index >= 0);
88 CAFFE_ENFORCE(start_index < y.numel());
89 const auto* y_data = y.template data<float>();
90 const auto* r_data = r.template data<float>();
92 int N = end_index - start_index + 1;
94 ConstEigenVectorArrayMap<float> y_vec(&y_data[start_index], N);
95 ConstEigenVectorArrayMap<float> r_vec(&r_data[start_index], N);
102 &ideal_idx_, {N}, at::dtype<int>().device(CPU));
104 &rank_idx_, {N}, at::dtype<int>().device(CPU));
105 auto* rank_idx_data = rank_idx_.template mutable_data<int>();
106 auto* ideal_idx_data = ideal_idx_.template mutable_data<int>();
109 arg_sort(&y_data[start_index], rank_idx_data, N,
true);
111 arg_sort(&r_data[start_index], ideal_idx_data, N,
true);
113 auto* dy_data = (*dy)->template mutable_data<float>();
114 EigenVectorArrayMap<float> dy_vec(&dy_data[start_index], N);
118 if (r_vec.abs().sum() < 1e-6) {
122 const double log2f_ = std::log(2.f);
124 &gain_, {N}, at::dtype<float>().device(CPU));
125 auto* gain_data = gain_.template mutable_data<float>();
126 EigenVectorArrayMap<float> gain_vec(gain_data, gain_.numel());
128 if (use_ndcg_as_loss_ && !use_exp_gain_) {
132 gain_vec = (r_vec * log2f_).exp();
134 ResizeInvLogITensor(N);
135 ComputeDiscounts(ideal_idx_data, N);
136 auto* ideal_discount_data = discount_.template mutable_data<float>();
137 EigenVectorArrayMap<float> ideal_discount_vec(
138 ideal_discount_data, discount_.numel());
140 double idcg = (gain_vec * ideal_discount_vec).sum();
145 ComputeDiscounts(rank_idx_data, N);
146 auto* discount_data = discount_.template mutable_data<float>();
147 EigenVectorArrayMap<float> discount_vec(discount_data, discount_.numel());
149 double dcg = (gain_vec * discount_vec).sum();
152 &lambda_, {N * N}, at::dtype<float>().device(CPU));
153 auto* lambda_data = lambda_.template mutable_data<float>();
154 EigenArrayMap<float> lambda_mat(lambda_data, N, N);
157 (PAIRWISE_DIFF(discount_vec, N) * PAIRWISE_DIFF(gain_vec, N)).abs();
163 -(lambda_mat * CWISE_SIGN(PAIRWISE_DIFF(r_vec, N)) *
165 -CWISE_SIGN(PAIRWISE_DIFF(r_vec, N)) * PAIRWISE_DIFF(y_vec, N)))
169 if (use_ndcg_as_loss_) {
170 loss = 1 - dcg / idcg;
172 loss = -(lambda_mat *
174 CWISE_SIGN(PAIRWISE_DIFF(r_vec, N)) * PAIRWISE_DIFF(y_vec, N),
183 bool LambdaRankNdcgOp<float, CPUContext>::RunOnDevice() {
184 auto& y = Input(PRED);
185 auto& r = Input(REL);
186 auto& sid = Input(SESSION_LENS);
188 auto* dy = Output(DPRED);
190 const auto* session_lengths = sid.template data<int>();
191 CAFFE_ENFORCE(y.dim() == 1);
192 CAFFE_ENFORCE(y.numel() == r.numel());
193 dy->Resize(y.numel());
194 auto* loss = Output(LOSS, {sid.numel()}, at::dtype<float>());
195 auto loss_vec = loss->template mutable_data<float>();
197 for (
int i = 0; i < sid.numel(); i++) {
198 loss_vec[i] = LambdaRankNdcgSession(
199 start_id, session_lengths[i] + start_id - 1, y, r, &dy);
200 start_id += session_lengths[i];
207 bool LambdaRankNdcgGradientOp<float, CPUContext>::RunOnDevice() {
209 auto& sids = Input(SESSION_LENS);
210 auto& dy_cache = Input(DY_CACHE);
211 auto& dLoss = Input(DLOSS);
213 CAFFE_ENFORCE(y.dim() == 1);
214 CAFFE_ENFORCE(dy_cache.dim() == 1);
215 CAFFE_ENFORCE(dy_cache.numel() > 0);
216 CAFFE_ENFORCE(y.numel() == dy_cache.numel());
218 const auto* session_lengths = sids.template data<int>();
219 CAFFE_ENFORCE(dLoss.numel() == sids.numel());
221 ConstEigenVectorArrayMap<float> dy_cache_vec(
222 dy_cache.template data<float>(), dy_cache.numel());
223 auto* dy = Output(DY, {dy_cache.numel()}, at::dtype<float>());
224 EigenVectorArrayMap<float> dy_vec(
225 dy->template mutable_data<float>(), dy->numel());
226 auto multiplier = dLoss.template data<float>();
228 for (
int j = 0; j < sids.numel(); j++) {
229 dy_vec.segment(count, session_lengths[j]) =
230 multiplier[j] * dy_cache_vec.segment(count, session_lengths[j]);
231 count += session_lengths[j];
238 REGISTER_CPU_OPERATOR(LambdaRankNdcg, LambdaRankNdcgOp<float, CPUContext>);
239 REGISTER_CPU_OPERATOR(
240 LambdaRankNdcgGradient,
241 LambdaRankNdcgGradientOp<float, CPUContext>);
243 OPERATOR_SCHEMA(LambdaRankNdcg).NumInputs(3).NumOutputs(2).SetDoc(R
"DOC( 244 It implements the LambdaRank as appeared in Wu, Qiang, et al. "Adapting boosting 245 for information retrieval measures." Information Retrieval 13.3 (2010): 254-270. 247 This method heuristically optimizes the NDCG. 249 OPERATOR_SCHEMA(LambdaRankNdcgGradient).NumInputs(4).NumOutputs(1); 251 class GetLambdaRankNdcgGradient :
public GradientMakerBase {
252 using GradientMakerBase::GradientMakerBase;
253 vector<OperatorDef> GetGradientDefs()
override {
254 return SingleGradientDef(
255 "LambdaRankNdcgGradient",
257 vector<string>{I(0), I(2), O(1), GO(0)},
258 vector<string>{GI(0)});
262 REGISTER_GRADIENT(LambdaRankNdcg, GetLambdaRankNdcgGradient);
void ReinitializeTensor(Tensor *tensor, at::IntArrayRef dims, at::TensorOptions options)
Reinitialize a Tensor to given dims and options if necessary, note that this will not do anything if ...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...