1 #include "caffe2/operators/batch_box_cox_op.h" 3 #include "caffe2/core/operator.h" 4 #include "caffe2/core/tensor.h" 8 #endif // CAFFE2_USE_MKL 17 void TileArrayIntoVector(
const T* a,
int D,
int K, vector<T>* b) {
19 for (
int k = 0; k < K; k++) {
20 std::copy(a, a + D, b->begin() + k * D);
24 void TileIndicesInPlace(vector<int>* v,
int D,
int K) {
27 for (
int k = 1; k < K; k++) {
28 for (
int j = 0; j < n; j++) {
29 (*v)[k * n + j] = (*v)[j] + k * D;
36 void PackV(
const int N,
const T* a,
const int* ia,
T* y);
38 void UnpackV(
const int N,
const T* a,
T* y,
const int* iy);
40 void Pow(
const int N,
const T* a,
const T* b,
T* y);
42 #define DELEGATE_PACKV_FUNCTION(T, OriginalFunc) \ 44 void PackV<T>(const int N, const T* a, const int* ia, T* y) { \ 45 OriginalFunc(N, a, ia, y); \ 47 DELEGATE_PACKV_FUNCTION(
float, vsPackV)
48 DELEGATE_PACKV_FUNCTION(
double, vdPackV)
49 #undef DELEGATE_PACKV_FUNCTION 51 #define DELEGATE_UNPACKV_FUNCTION(T, OriginalFunc) \ 53 void UnpackV<T>(const int N, const T* a, T* y, const int* iy) { \ 54 OriginalFunc(N, a, y, iy); \ 56 DELEGATE_UNPACKV_FUNCTION(
float, vsUnpackV)
57 DELEGATE_UNPACKV_FUNCTION(
double, vdUnpackV)
58 #undef DELEGATE_UNPACKV_FUNCTION 60 #define DELEGATE_SIMPLE_BINARY_FUNCTION(T, Funcname, OriginalFunc) \ 62 void Funcname<T>(const int N, const T* a, const T* b, T* y) { \ 63 OriginalFunc(N, a, b, y); \ 65 DELEGATE_SIMPLE_BINARY_FUNCTION(
float, Pow, vsPow)
66 DELEGATE_SIMPLE_BINARY_FUNCTION(
double, Pow, vdPow)
67 #undef DELEGATE_SIMPLE_BINARY_FUNCTION 70 #endif // CAFFE2_USE_MKL 74 bool BatchBoxCoxOp<CPUContext>::DoRunWithType() {
75 auto& data = Input(DATA);
76 auto& lambda1 = Input(LAMBDA1);
77 auto& lambda2 = Input(LAMBDA2);
78 CAFFE_ENFORCE_GE(data.dim(), 1);
79 auto N = data.size(0);
80 auto D = data.size_from_dim(1);
82 auto* output = Output(0, Input(DATA).sizes(), at::dtype<T>());
83 auto* output_ptr = output->template mutable_data<T>();
85 if (data.numel() <= 0) {
89 CAFFE_ENFORCE_EQ(lambda1.numel(), D);
90 CAFFE_ENFORCE_EQ(lambda2.numel(), D);
92 const auto* data_ptr = data.template data<T>();
93 const auto* lambda1_ptr = lambda1.template data<T>();
94 const auto* lambda2_ptr = lambda2.template data<T>();
96 const T k_eps =
static_cast<T>(1e-6);
99 if (min_block_size_ < 1) {
100 BoxCoxNaive(N, D, data_ptr, lambda1_ptr, lambda2_ptr, k_eps, output_ptr);
105 nonzeros_.reserve(D);
107 for (int64_t j = 0; j < D; j++) {
108 if (lambda1_ptr[j] == 0) {
111 nonzeros_.push_back(j);
116 const int K = std::min(N, (min_block_size_ + D - 1) / D);
121 TypedCachedBuffers<T>& b = GetBuffers<T>();
122 if (nonzeros_.size() == D) {
125 TileArrayIntoVector(lambda1_ptr, D, K, &b.lambda1_);
126 TileArrayIntoVector(lambda2_ptr, D, K, &b.lambda2_);
127 DCHECK_EQ(K * D, b.lambda1_.size());
128 DCHECK_EQ(K * D, b.lambda2_.size());
129 for (; i < N - K + 1; i += K, data_ptr += K * D, output_ptr += K * D) {
139 for (; i < N; i++, data_ptr += D, output_ptr += D) {
141 D, data_ptr, lambda1_ptr, lambda2_ptr, k_eps, output_ptr);
143 }
else if (zeros_.size() == D) {
146 TileArrayIntoVector(lambda2_ptr, D, K, &b.lambda2_z_);
147 DCHECK_EQ(K * D, b.lambda2_z_.size());
148 for (; i < N - K + 1; i += K, data_ptr += K * D, output_ptr += K * D) {
150 K * D, data_ptr, b.lambda2_z_.data(), k_eps, output_ptr);
153 for (; i < N; i++, data_ptr += D, output_ptr += D) {
154 BoxCoxZeroLambda(D, data_ptr, lambda2_ptr, k_eps, output_ptr);
157 int n = nonzeros_.size();
159 TileIndicesInPlace(&nonzeros_, 0, K);
160 TileIndicesInPlace(&zeros_, 0, K);
164 b.lambda1_.resize(nonzeros_.size());
165 b.lambda2_.resize(nonzeros_.size());
166 b.lambda2_z_.resize(zeros_.size());
167 PackV(nonzeros_.size(), lambda1_ptr, nonzeros_.data(), b.lambda1_.data());
168 PackV(nonzeros_.size(), lambda2_ptr, nonzeros_.data(), b.lambda2_.data());
169 PackV(zeros_.size(), lambda2_ptr, zeros_.data(), b.lambda2_z_.data());
172 b.accumulator_.resize(std::max(nonzeros_.size(), zeros_.size()));
176 zeros_.resize(D - n);
177 TileIndicesInPlace(&nonzeros_, D, K);
178 TileIndicesInPlace(&zeros_, D, K);
179 DCHECK_EQ(nonzeros_.size(), b.lambda1_.size());
180 DCHECK_EQ(nonzeros_.size(), b.lambda2_.size());
181 DCHECK_EQ(zeros_.size(), b.lambda2_z_.size());
182 for (; i < N - K + 1; i += K, data_ptr += K * D, output_ptr += K * D) {
191 b.accumulator_.data(),
196 zeros_.resize(D - n);
198 for (; i < N; i++, data_ptr += D, output_ptr += D) {
207 b.accumulator_.data(),
212 #else // CAFFE2_USE_MKL 213 BoxCoxNaive(N, D, data_ptr, lambda1_ptr, lambda2_ptr, k_eps, output_ptr);
214 #endif // CAFFE2_USE_MKL 219 template <
typename T>
220 void BatchBoxCoxOp<CPUContext>::BoxCoxNaive(
224 const T* lambda1_ptr,
225 const T* lambda2_ptr,
228 for (int64_t i = 0; i < N; i++) {
229 for (int64_t j = 0; j < D; j++, data_ptr++, output_ptr++) {
230 T lambda1_v = lambda1_ptr[j];
231 T lambda2_v = lambda2_ptr[j];
232 T tmp = std::max(*data_ptr + lambda2_v, k_eps);
233 if (lambda1_v == 0) {
234 *output_ptr = std::log(tmp);
236 *output_ptr = (std::pow(tmp, lambda1_v) - 1) / lambda1_v;
242 #ifdef CAFFE2_USE_MKL 245 template <
typename T>
246 void BatchBoxCoxOp<CPUContext>::BoxCoxNonzeroLambda(
253 caffe2::math::Add(D, data_ptr, lambda2, out, &context_);
254 for (int64_t j = 0; j < D; j++) {
255 out[j] = std::max(out[j], k_eps);
257 Pow(D, out, lambda1, out);
258 for (int64_t j = 0; j < D; j++) {
261 caffe2::math::Div(D, out, lambda1, out, &context_);
265 template <
typename T>
266 void BatchBoxCoxOp<CPUContext>::BoxCoxZeroLambda(
272 caffe2::math::Add(D, data_ptr, lambda2, output_ptr, &context_);
273 for (int64_t j = 0; j < D; j++) {
274 output_ptr[j] = std::max(output_ptr[j], k_eps);
276 caffe2::math::Log(D, output_ptr, output_ptr, &context_);
280 template <
typename T>
281 void BatchBoxCoxOp<CPUContext>::BoxCoxMixedLambda(
283 const vector<int>& nonzeros,
284 const vector<int>& zeros,
291 PackV(nonzeros.size(), data_ptr, nonzeros.data(), buffer);
292 BoxCoxNonzeroLambda(nonzeros.size(), buffer, lambda1, lambda2, k_eps, buffer);
293 UnpackV(nonzeros.size(), buffer, output_ptr, nonzeros.data());
295 PackV(zeros.size(), data_ptr, zeros.data(), buffer);
296 BoxCoxZeroLambda(zeros.size(), buffer, lambda2_z, k_eps, buffer);
297 UnpackV(zeros.size(), buffer, output_ptr, zeros.data());
301 #define DEFINE_CACHED_BUFFERS(T, tag) \ 304 BatchBoxCoxOp<CPUContext>::TypedCachedBuffers<T>& \ 305 BatchBoxCoxOp<CPUContext>::GetBuffers<T>() { \ 306 if (!buffers_ || buffers_->type_ != tag) { \ 307 buffers_.reset(new BatchBoxCoxOp<CPUContext>::TypedCachedBuffers<T>()); \ 308 buffers_->type_ = tag; \ 310 return *static_cast<TypedCachedBuffers<T>*>(buffers_.get()); \ 312 DEFINE_CACHED_BUFFERS(
float, 1);
313 DEFINE_CACHED_BUFFERS(
double, 2);
314 #undef DEFINE_CACHED_BUFFERS 316 #endif // CAFFE2_USE_MKL 320 REGISTER_CPU_OPERATOR(BatchBoxCox, BatchBoxCoxOp<CPUContext>);
321 OPERATOR_SCHEMA(BatchBoxCox)
324 .IdenticalTypeAndShapeOfInput(0)
325 .AllowInplace({{0, 0}})
327 Input `data` is a N * D matrix. Apply box-cox transform for each column. 328 `lambda1` and `lambda2` is of size D that defines the hyper-parameters for 329 the transform of each column `x` of the input `data`: 331 ln(x + lambda2), if lambda1 == 0 332 ((x + lambda2)^lambda1 - 1)/lambda1, if lambda1 != 0 335 .Input(0, "data",
"input float or double N * D matrix")
336 .Input(1,
"lambda1",
"tensor of size D with the same type as data")
337 .Input(2,
"lambda2",
"tensor of size D with the same type as data")
338 .Output(0,
"output",
"output matrix that applied box-cox transform");
340 GRADIENT_NOT_IMPLEMENTED_YET(BatchBoxCox);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...