Caffe2 - C++ API
A deep learning, cross platform ML framework
convert_image_to_tensor.cc
1 
17 #include <opencv2/opencv.hpp>
18 #include <cmath>
19 #include <fstream>
20 
21 #include "caffe2/core/common.h"
22 #include "caffe2/core/db.h"
23 #include "caffe2/core/init.h"
24 #include "caffe2/core/logging.h"
25 #include "caffe2/core/timer.h"
26 #include "caffe2/proto/caffe2_pb.h"
27 #include "caffe2/utils/proto_utils.h"
28 #include "caffe2/utils/string_utils.h"
29 
30 
31 C10_DEFINE_int(
32  batch_size,
33  -1,
34  "Specify the batch size of the input. The number of items in the "
35  "input needs to be multiples of the batch size. If the batch size "
36  "is less than 0, all inputs are in one batch.")
37 C10_DEFINE_bool(color, true, "If set, load images in color.");
38 C10_DEFINE_string(
39  crop,
40  "-1,-1",
41  "The center cropped hight and width. If the value is less than zero, "
42  "it is not cropped.");
43 C10_DEFINE_string(input_images, "", "Comma separated images");
44 C10_DEFINE_string(input_image_file, "", "The file containing imput images");
45 C10_DEFINE_string(input_text_file, "", "the text file to be written to blobs");
46 C10_DEFINE_string(
47  output_tensor,
48  "",
49  "The output tensor file in NCHW for input images");
50 C10_DEFINE_string(
51  output_text_tensor,
52  "",
53  "The output tensor file for the text input specified in input_text_file");
54 C10_DEFINE_string(
55  preprocess,
56  "",
57  "Options to specify the preprocess routines. The available options are "
58  "subtract128, normalize, mean, std, bgrtorgb. If multiple steps are provided, they "
59  "are separated by comma (,) in sequence.");
60 C10_DEFINE_string(
61  report_time,
62  "",
63  "Report the conversion stage time to screen. "
64  "The format of the string is <type>|<identifier>. "
65  "The valid type is 'json'. "
66  "The valid identifier is nothing or an identifer that prefix every line");
67 C10_DEFINE_string(
68  scale,
69  "-1,-1",
70  "Scale the images to be within the min,max box. The shorter edge is "
71  "min pixels. But if the other edge is more than the max pixels, the "
72  "other edge and scaled to max pixels (and the shorter edge can be less "
73  "than the min pixels");
74 C10_DEFINE_bool(text_output, false, "Write the output in text format.");
75 C10_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
76 
77 namespace caffe2 {
78 
79 void reportTime(
80  std::string type,
81  double ts,
82  std::string metric,
83  std::string unit) {
84  if (FLAGS_report_time == "") {
85  return;
86  }
87  vector<string> s = caffe2::split('|', FLAGS_report_time);
88  assert(s[0] == "json");
89  std::string identifier = "";
90  if (s.size() > 1) {
91  identifier = s[1];
92  }
93  std::cout << identifier << "{\"type\": \"" << type << "\", \"value\": " << ts
94  << ", \"metric\": \"" << metric << "\", \"unit\": \"" << unit
95  << "\"}" << std::endl;
96 }
97 
98 void splitSizes(const std::string& arg, int* ptr0, int* ptr1) {
99  vector<string> sizes = caffe2::split(',', arg);
100  if (sizes.size() == 2) {
101  *ptr0 = std::stoi(sizes[0]);
102  *ptr1 = std::stoi(sizes[1]);
103  } else if (sizes.size() == 1) {
104  *ptr0 = std::stoi(sizes[0]);
105  *ptr1 = std::stoi(sizes[0]);
106  } else {
107  assert(false);
108  }
109 }
110 
111 
112 cv::Mat resizeImage(cv::Mat& img) {
113  int min_size, max_size;
114  splitSizes(FLAGS_scale, &min_size, &max_size);
115  if ((min_size <= 0) && (max_size <= 0)) {
116  return img;
117  }
118  if (max_size < 0) {
119  max_size = INT_MAX;
120  }
121  assert(min_size <= max_size);
122 
123  int im_min_size = img.rows > img.cols ? img.cols : img.rows;
124  int im_max_size = img.rows > img.cols ? img.rows : img.cols;
125 
126  double im_scale = 1.0 * min_size / im_min_size;
127  if (im_scale * im_max_size > max_size) {
128  im_scale = 1.0 * max_size / im_max_size;
129  }
130  int scaled_width = int(round(img.cols * im_scale));
131  int scaled_height = int(round(img.rows * im_scale));
132  assert((scaled_width <= max_size) && (scaled_height <= max_size));
133  if ((scaled_width < min_size) || (scaled_height < min_size)) {
134  assert((scaled_width == max_size) || (scaled_height == max_size));
135  } else {
136  assert((scaled_width == min_size) || (scaled_height == min_size));
137  }
138  cv::Mat resized_img;
139  cv::resize(
140  img,
141  resized_img,
142  cv::Size(),
143  im_scale,
144  im_scale,
145  cv::INTER_LINEAR);
146  return resized_img;
147 }
148 
149 cv::Mat cropToRec(cv::Mat& img, int* height_ptr, int* width_ptr) {
150  int height = *height_ptr;
151  int width = *width_ptr;
152  if ((height > 0) && (width > 0) &&
153  ((img.rows != height) || (img.cols != width))) {
154  cv::Mat cropped_img, cimg;
155  cv::Rect roi;
156  roi.x = int((img.cols - width) / 2);
157  roi.y = int((img.rows - height) / 2);
158  roi.x = roi.x < 0 ? 0 : roi.x;
159  roi.y = roi.y < 0 ? 0 : roi.y;
160  width = width > img.cols ? img.cols : width;
161  height = height > img.rows ? img.rows : height;
162  roi.width = width;
163  roi.height = height;
164  assert(
165  0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= img.cols &&
166  0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= img.rows);
167  cropped_img = img(roi);
168  // Make the image in continuous space in memory
169  cimg = cropped_img.clone();
170  *height_ptr = height;
171  *width_ptr = width;
172  return cimg;
173  } else {
174  return img;
175  }
176 }
177 
178 std::vector<float> convertToVector(cv::Mat& img) {
179  std::vector<float> normalize(3, 1);
180  std::vector<float> mean(3, 0);
181  std::vector<float> std(3, 1);
182  bool bgrtorgb = false;
183  int size = img.cols * img.rows;
184  vector<string> steps = caffe2::split(',', FLAGS_preprocess);
185  for (int i = 0; i < steps.size(); i++) {
186  auto step = steps[i];
187  if (step == "subtract128") {
188  mean = {128, 128, 128};
189  std = {1, 1, 1};
190  normalize = {1, 1, 1};
191  } else if (step == "normalize") {
192  normalize = {255, 255, 255};
193  } else if (step == "mean") {
194  mean = {0.406f, 0.456f, 0.485f};
195  } else if (step == "std") {
196  std = {0.225f, 0.224f, 0.229f};
197  } else if (step == "bgrtorgb") {
198  bgrtorgb = true;
199  } else {
200  CAFFE_ENFORCE(
201  false,
202  "Unsupported preprocess step. The supported steps are: subtract128, "
203  "normalize,mean, std, swaprb.");
204  }
205  }
206 
207  int C = FLAGS_color ? 3 : 1;
208  int total_size = C * size;
209  std::vector<float> values(total_size);
210  if (C == 1) {
211  cv::MatIterator_<float> it, end;
212  int idx = 0;
213  for (it = img.begin<float>(), end = img.end<float>(); it != end; ++it) {
214  values[idx++] = (*it / normalize[0] - mean[0]) / std[0];
215  }
216  } else {
217  int i = 0;
218  cv::MatIterator_<cv::Vec3f> it, end;
219  int b = bgrtorgb ? 2 : 0;
220  int g = 1;
221  int r = bgrtorgb ? 0 : 2;
222  for (it = img.begin<cv::Vec3f>(), end = img.end<cv::Vec3f>(); it != end;
223  ++it, i++) {
224  values[i] = (((*it)[b] / normalize[0] - mean[0]) / std[0]);
225  int offset = size + i;
226  values[offset] = (((*it)[g] / normalize[1] - mean[1]) / std[1]);
227  offset = size + offset;
228  values[offset] = (((*it)[r] / normalize[2] - mean[2]) / std[2]);
229  }
230  }
231  return values;
232 }
233 
234 std::vector<float> convertOneImage(
235  std::string& filename,
236  int* height_ptr,
237  int* width_ptr) {
238  assert(filename[0] != '~');
239 
240  std::cout << "Converting " << filename << std::endl;
241 
242  // Load image
243  cv::Mat img_uint8 = cv::imread(
244 #if CV_MAJOR_VERSION <= 3
245  filename, FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
246 #else
247  filename, FLAGS_color ? cv::IMREAD_COLOR : cv::IMREAD_GRAYSCALE);
248 #endif
249  caffe2::Timer timer;
250  timer.Start();
251  cv::Mat img;
252  // Convert image to floating point values
253  img_uint8.convertTo(img, CV_32F);
254  // Resize image
255  cv::Mat resized_img = resizeImage(img);
256 
257  int height, width;
258  splitSizes(FLAGS_crop, &height, &width);
259  if ((height <= 0) || (width <= 0)) {
260  height = resized_img.rows;
261  width = resized_img.cols;
262  }
263  cv::Mat crop = cropToRec(resized_img, &height, &width);
264 
265  // Assert we don't have to deal with alignment
266  DCHECK(crop.isContinuous());
267  assert(crop.rows == height);
268  assert(crop.cols == width);
269  std::vector<float> one_image_values = convertToVector(crop);
270  *height_ptr = height;
271  *width_ptr = width;
272  double ts = timer.MicroSeconds();
273  reportTime("image_preprocess", ts, "convert", "us");
274  return one_image_values;
275 }
276 
277 int getBatchSize(int num_items) {
278  int batch_size = FLAGS_batch_size;
279  if (batch_size < 0) {
280  batch_size = num_items;
281  } else {
282  assert(num_items % batch_size == 0);
283  }
284  return batch_size;
285 }
286 
287 void writeValues(
288  std::vector<std::vector<std::vector<float>>>& values,
289  std::vector<std::vector<int>>& dims,
290  std::string output_file) {
291 
292  caffe2::Timer timer;
293  timer.Start();
294 
295  assert(dims.size() == values.size());
296  int num_batches = dims.size();
297 
298  TensorProtos protos;
299  for (int k = 0; k < num_batches; k++) {
300  TensorProto* data;
301  data = protos.add_protos();
302  data->set_data_type(TensorProto::FLOAT);
303  auto one_dim = dims[k];
304  for (int dim : one_dim) {
305  data->add_dims(dim);
306  }
307  int batch_size = one_dim[0];
308  long long int entry_size = 1;
309  for (int i = 1; i < one_dim.size(); i++) {
310  entry_size *= one_dim[i];
311  }
312 
313  // Not optimized
314  for (int i = 0; i < batch_size; i++) {
315  assert(values[k][i].size() == entry_size);
316  for (int j = 0; j < values[k][i].size(); j++) {
317  data->add_float_data(values[k][i][j]);
318  }
319  }
320  }
321  double ts = timer.MicroSeconds();
322  reportTime("preprocess", ts, "data_pack", "us");
323 
324  if (FLAGS_text_output) {
325  caffe2::WriteProtoToTextFile(protos, output_file);
326  } else {
327  caffe2::WriteProtoToBinaryFile(protos, output_file);
328  }
329 }
330 
331 void convertImages() {
332  vector<string> file_names;
333  if (FLAGS_input_images != "") {
334  file_names = caffe2::split(',', FLAGS_input_images);
335  } else if (FLAGS_input_image_file != "") {
336  std::ifstream infile(FLAGS_input_image_file);
337  std::string line;
338  while (std::getline(infile, line)) {
339  vector<string> file_name = caffe2::split(',', line);
340  string name;
341  if (file_name.size() == 3) {
342  name = file_name[2];
343  } else {
344  name = line;
345  }
346  file_names.push_back(name);
347  }
348  } else {
349  return;
350  }
351  int batch_size = getBatchSize(file_names.size());
352  int num_batches = file_names.size() / batch_size;
353  assert(file_names.size() == batch_size * num_batches);
354  std::vector<std::vector<std::vector<float>>> values;
355  std::vector<std::vector<int>> dims;
356  int C = FLAGS_color ? 3 : 1;
357  for (int k = 0; k < num_batches; k++) {
358  std::vector<std::vector<float>> one_value;
359  int height = -1;
360  int width = -1;
361  for (int i = 0; i < batch_size; i++) {
362  int idx = k * batch_size + i;
363  int one_height, one_width;
364  std::vector<float> one_image_values =
365  convertOneImage(file_names[idx], &one_height, &one_width);
366  if (height < 0 && width < 0) {
367  height = one_height;
368  width = one_width;
369  } else {
370  assert(height == one_height);
371  assert(width == one_width);
372  }
373  one_value.push_back(one_image_values);
374  }
375  vector<int> one_dim = {batch_size, C, height, width};
376  dims.push_back(one_dim);
377  values.push_back(one_value);
378  }
379  writeValues(values, dims, FLAGS_output_tensor);
380 }
381 
382 template <class TYPE>
383 vector<TYPE> splitString(std::string& line) {
384  vector<string> vector_str = caffe2::split(',', line);
385  vector<TYPE> vector_int;
386  for (string str : vector_str) {
387  vector_int.push_back((TYPE)std::stod(str));
388  }
389  return vector_int;
390 }
391 
392 /* Convert the values in a json file to blobs
393  The format of the json file should be:
394  <number of items>, <dim2>.... (dimensions of items)
395  <entry>, <entry>, <entry>... (all entries in one item)
396  <entry>, <entry>, <entry>...
397  ....
398 */
399 void convertValues() {
400  if (FLAGS_input_text_file == "") {
401  return;
402  }
403  std::ifstream infile(FLAGS_input_text_file);
404  std::string line;
405  std::getline(infile, line);
406  vector<int> file_dims = splitString <int>(line);
407  assert(file_dims.size() >= 2);
408 
409  int num_items = file_dims[0];
410  int batch_size = getBatchSize(num_items);
411  int num_batches = num_items / batch_size;
412  assert(num_items == batch_size * num_batches);
413  vector<string> lines;
414  while (std::getline(infile, line)) {
415  lines.push_back(line);
416  }
417  assert(lines.size() == num_items);
418  std::vector<std::vector<std::vector<float>>> values;
419  std::vector<std::vector<int>> dims;
420  for (int i = 0; i < num_batches; i++) {
421  std::vector<std::vector<float>> one_value;
422  int num = -1;
423  for (int j = 0; j < batch_size; j++) {
424  int idx = i * batch_size + j;
425  std::string line = lines[idx];
426  vector<float> item = splitString<float>(line);
427  if (num < 0) {
428  num = item.size();
429  } else {
430  assert(num == item.size());
431  }
432  one_value.push_back(item);
433  }
434  vector<int> batch_dims = file_dims;
435  batch_dims[0] = batch_size;
436  dims.push_back(batch_dims);
437  values.push_back(one_value);
438  }
439 
440  writeValues(values, dims, FLAGS_output_text_tensor);
441 }
442 
443 } // namespace caffe2
444 
445 int main(int argc, char** argv) {
446  caffe2::GlobalInit(&argc, &argv);
447  caffe2::convertImages();
448  caffe2::convertValues();
449  return 0;
450 }
void Start()
Starts a timer.
Definition: timer.h:24
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: static.cpp:64
float MicroSeconds()
Returns the elapsed time in microseconds.
Definition: timer.h:36
bool GlobalInit(int *pargc, char ***pargv)
Initialize the global environment of caffe2.
Definition: init.cc:44
A simple timer object for measuring time.
Definition: timer.h:16