1 #ifndef CAFFE2_OPERATORS_INT8_ROI_ALIGN_OP_H_ 2 #define CAFFE2_OPERATORS_INT8_ROI_ALIGN_OP_H_ 4 #include "caffe2/core/common.h" 5 #include "caffe2/core/context.h" 6 #include "caffe2/core/logging.h" 7 #include "caffe2/core/operator.h" 8 #include "caffe2/core/operator_schema.h" 9 #include "caffe2/core/tensor_int8.h" 10 #include "caffe2/operators/quantized/int8_utils.h" 11 #include "caffe2/utils/math.h" 30 void pre_calc_for_bilinear_interpolate(
33 const int pooled_height,
34 const int pooled_width,
43 std::vector<PreCalc>& pre_calc) {
44 int pre_calc_index = 0;
46 const float w_multiplier = 255.0;
47 for (
int ph = 0; ph < pooled_height; ph++) {
48 for (
int pw = 0; pw < pooled_width; pw++) {
49 for (
int iy = 0; iy < iy_upper; iy++) {
50 const float yy = roi_start_h + ph * bin_size_h +
51 static_cast<float>(iy + .5f) * bin_size_h /
52 static_cast<float>(roi_bin_grid_h);
53 for (
int ix = 0; ix < ix_upper; ix++) {
54 const float xx = roi_start_w + pw * bin_size_w +
55 static_cast<float>(ix + .5f) * bin_size_w /
56 static_cast<float>(roi_bin_grid_w);
61 if (y < -1.0 || y > height || x < -1.0 || x > width) {
72 pre_calc[pre_calc_index] = pc;
89 if (y_low >= height - 1) {
90 y_high = y_low = height - 1;
96 if (x_low >= width - 1) {
97 x_high = x_low = width - 1;
103 float ly = y - y_low;
104 float lx = x - x_low;
105 float hy = 1. - ly, hx = 1. - lx;
107 uint8_t w1 =
static_cast<uint8_t
>(Round(hy * hx * w_multiplier));
108 uint8_t w2 =
static_cast<uint8_t
>(Round(hy * lx * w_multiplier));
109 uint8_t w3 =
static_cast<uint8_t
>(Round(ly * hx * w_multiplier));
110 uint8_t w4 =
static_cast<uint8_t
>(Round(ly * lx * w_multiplier));
114 pc.pos1 = y_low * width + x_low;
115 pc.pos2 = y_low * width + x_high;
116 pc.pos3 = y_high * width + x_low;
117 pc.pos4 = y_high * width + x_high;
123 pre_calc[pre_calc_index] = pc;
132 void ROIAlignForward(
134 const uint8_t* bottom_data,
135 const float& spatial_scale,
139 const int pooled_height,
140 const int pooled_width,
141 const int sampling_ratio,
142 const float* bottom_rois,
147 const int32_t x_offset,
148 const int32_t y_offset,
149 StorageOrder order) {
150 DCHECK(roi_cols == 4 || roi_cols == 5);
152 int n_rois = nthreads / channels / pooled_width / pooled_height;
154 for (
int n = 0; n < n_rois; n++) {
155 int index_n = n * channels * pooled_width * pooled_height;
158 const float* offset_bottom_rois = bottom_rois + n * roi_cols;
159 int roi_batch_ind = 0;
161 roi_batch_ind = offset_bottom_rois[0];
162 offset_bottom_rois++;
166 float roi_start_w = offset_bottom_rois[0] * spatial_scale;
167 float roi_start_h = offset_bottom_rois[1] * spatial_scale;
168 float roi_end_w = offset_bottom_rois[2] * spatial_scale;
169 float roi_end_h = offset_bottom_rois[3] * spatial_scale;
172 float roi_width = std::max(roi_end_w - roi_start_w, (
float)1.);
173 float roi_height = std::max(roi_end_h - roi_start_h, (
float)1.);
175 static_cast<float>(roi_height) / static_cast<float>(pooled_height);
177 static_cast<float>(roi_width) / static_cast<float>(pooled_width);
180 int roi_bin_grid_h = (sampling_ratio > 0)
182 : ceil(roi_height / pooled_height);
184 (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
187 const float count = roi_bin_grid_h * roi_bin_grid_w;
190 double real_multiplier = x_scale / (y_scale * 255.0 * count);
191 int32_t Y_multiplier;
193 QuantizeMultiplierSmallerThanOne(real_multiplier, &Y_multiplier, &Y_shift);
197 std::vector<PreCalc> pre_calc(
198 roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height);
199 pre_calc_for_bilinear_interpolate(
214 const uint8_t* offset_bottom_data =
215 bottom_data + roi_batch_ind * channels * height * width;
216 int pre_calc_index = 0;
217 for (
int ph = 0; ph < pooled_height; ph++) {
218 for (
int pw = 0; pw < pooled_width; pw++) {
219 vector<int32_t> acc_buffer(channels, 0);
221 for (
int iy = 0; iy < roi_bin_grid_h; iy++) {
222 for (
int ix = 0; ix < roi_bin_grid_w; ix++) {
223 PreCalc pc = pre_calc[pre_calc_index];
225 const uint8_t* data_1 = offset_bottom_data + channels * pc.pos1;
226 const uint8_t* data_2 = offset_bottom_data + channels * pc.pos2;
227 const uint8_t* data_3 = offset_bottom_data + channels * pc.pos3;
228 const uint8_t* data_4 = offset_bottom_data + channels * pc.pos4;
229 for (
int c = 0; c < channels; ++c) {
230 acc_buffer[c] += (uint32_t)(pc.w1) * (uint32_t)(data_1[c]);
231 acc_buffer[c] += (uint32_t)(pc.w2) * (uint32_t)(data_2[c]);
232 acc_buffer[c] += (uint32_t)(pc.w3) * (uint32_t)(data_3[c]);
233 acc_buffer[c] += (uint32_t)(pc.w4) * (uint32_t)(data_4[c]);
236 acc_buffer[c] -= x_offset * 255.0;
242 int index_nhw = index_n + (ph * pooled_width + pw) * channels;
243 uint8_t* out_ptr = top_data + index_nhw;
244 for (
int c = 0; c < channels; ++c) {
245 int32_t a_mul = MultiplyByQuantizedMultiplierSmallerThanOne(
246 acc_buffer[c], Y_multiplier, Y_shift) +
249 std::min<int32_t>(255, std::max<int32_t>(0, a_mul));
250 out_ptr[c] =
static_cast<uint8_t
>(clamped_a);
261 template <
class... Args>
264 order_(StringToStorageOrder(
265 this->
template GetSingleArgument<string>(
"order",
"NHWC"))),
267 this->
template GetSingleArgument<float>(
"spatial_scale", 1.)),
268 pooled_height_(this->
template GetSingleArgument<int>(
"pooled_h", 1)),
269 pooled_width_(this->
template GetSingleArgument<int>(
"pooled_w", 1)),
271 this->
template GetSingleArgument<int>(
"sampling_ratio", -1)) {
272 DCHECK_GT(spatial_scale_, 0);
273 DCHECK_GT(pooled_height_, 0);
274 DCHECK_GT(pooled_width_, 0);
275 DCHECK_GE(sampling_ratio_, 0);
277 CAFFE_ENFORCE(order_ == StorageOrder::NHWC);
280 bool RunOnDevice()
override {
281 const auto& X = Inputs()[0]->template Get<Int8TensorCPU>();
283 auto* Y = Outputs()[0]->template GetMutable<Int8TensorCPU>();
285 int32_t Y_offset = this->
template GetSingleArgument<int>(
"Y_zero_point", 0);
286 auto Y_scale = this->
template GetSingleArgument<float>(
"Y_scale", 1);
288 Y->zero_point = Y_offset;
290 if (R.numel() == 0) {
292 Y->t.Resize(0, pooled_height_, pooled_width_, X.t.dim32(3));
294 Y->t.mutable_data<uint8_t>();
298 CAFFE_ENFORCE_EQ(R.dim(), 2);
300 CAFFE_ENFORCE(R.dim32(1) == 4 || R.dim32(1) == 5);
302 assert(sampling_ratio_ >= 0);
307 {R.dim32(0), pooled_height_, pooled_width_, X.t.dim32(3)},
308 at::dtype<uint8_t>().device(CPU));
309 int output_size = Y->t.numel();
323 Y->t.mutable_data<uint8_t>(),
335 float spatial_scale_;
345 #endif // CAFFE2_OPERATORS_INT8_ROI_ALIGN_OP_H_ void ReinitializeTensor(Tensor *tensor, at::IntArrayRef dims, at::TensorOptions options)
Reinitialize a Tensor to given dims and options if necessary, note that this will not do anything if ...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...