Caffe2 - C++ API
A deep learning, cross platform ML framework
convert_encoded_to_raw_leveldb.cc
1 
17 // This script converts an image dataset to leveldb.
18 //
19 // FLAGS_input_folder is the root folder that holds all the images, and
20 // FLAGS_list_file should be a list of files as well as their labels, in
21 // the format as
22 // subfolder1/file1.JPEG 7
23 // ....
24 
25 #include <opencv2/opencv.hpp>
26 
27 #include <fstream> // NOLINT(readability/streams)
28 #include <memory>
29 #include <random>
30 #include <string>
31 
32 #include "caffe2/core/init.h"
33 #include "caffe2/proto/caffe2_pb.h"
34 #include "caffe2/core/logging.h"
35 #include "leveldb/db.h"
36 #include "leveldb/write_batch.h"
37 
38 C10_DEFINE_string(input_db_name, "", "The input image file name.");
39 C10_DEFINE_string(output_db_name, "", "The output training leveldb name.");
40 C10_DEFINE_bool(color, true, "If set, load images in color.");
41 C10_DEFINE_int(
42  scale,
43  256,
44  "If FLAGS_raw is set, scale all the images' shorter edge to the given "
45  "value.");
46 C10_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
47 
48 namespace caffe2 {
49 
50 using std::string;
51 using std::unique_ptr;
52 
53 void ConvertToRawDataset(
54  const string& input_db_name, const string& output_db_name) {
55  // input leveldb
56  std::unique_ptr<leveldb::DB> input_db;
57  LOG(INFO) << "Opening input leveldb " << input_db_name;
58  {
59  leveldb::Options options;
60  options.create_if_missing = false;
61  leveldb::DB* db_temp;
62  leveldb::Status status = leveldb::DB::Open(
63  options, input_db_name, &db_temp);
64  CAFFE_ENFORCE(status.ok(), "Failed to open leveldb ", input_db_name, ".");
65  input_db.reset(db_temp);
66  }
67 
68  // output leveldb
69  std::unique_ptr<leveldb::DB> output_db;
70  std::unique_ptr<leveldb::WriteBatch> batch;
71  LOG(INFO) << "Opening leveldb " << output_db_name;
72  {
73  leveldb::Options options;
74  options.error_if_exists = true;
75  options.create_if_missing = true;
76  options.write_buffer_size = 268435456;
77  leveldb::DB* db_temp;
78  leveldb::Status status = leveldb::DB::Open(
79  options, output_db_name, &db_temp);
80  CAFFE_ENFORCE(
81  status.ok(),
82  "Failed to open leveldb ",
83  output_db_name,
84  ". Is it already existing?");
85  output_db.reset(db_temp);
86  }
87  batch.reset(new leveldb::WriteBatch());
88 
89  TensorProtos input_protos;
90  TensorProtos output_protos;
91  TensorProto* data = output_protos.add_protos();
92  TensorProto* label = output_protos.add_protos();
93  data->set_data_type(TensorProto::BYTE);
94  data->add_dims(0);
95  data->add_dims(0);
96  if (FLAGS_color) {
97  data->add_dims(3);
98  }
99  string value;
100 
101  unique_ptr<leveldb::Iterator> iter;
102  iter.reset(input_db->NewIterator(leveldb::ReadOptions()));
103  iter->SeekToFirst();
104  int count = 0;
105  for (; iter->Valid(); iter->Next()) {
106  CAFFE_ENFORCE(input_protos.ParseFromString(iter->value().ToString()));
107  label->CopyFrom(input_protos.protos(1));
108  const string& encoded_image = input_protos.protos(0).string_data(0);
109  int encoded_size = encoded_image.size();
110  cv::Mat img = cv::imdecode(
111  cv::Mat(
112  1, &encoded_size, CV_8UC1, const_cast<char*>(encoded_image.data())),
113  FLAGS_color ? cv::IMREAD_COLOR : cv::IMREAD_GRAYSCALE);
114  cv::Mat resized_img;
115  int scaled_width, scaled_height;
116  if (FLAGS_warp) {
117  scaled_width = FLAGS_scale;
118  scaled_height = FLAGS_scale;
119  } else if (img.rows > img.cols) {
120  scaled_width = FLAGS_scale;
121  scaled_height = static_cast<float>(img.rows) * FLAGS_scale / img.cols;
122  } else {
123  scaled_height = FLAGS_scale;
124  scaled_width = static_cast<float>(img.cols) * FLAGS_scale / img.rows;
125  }
126  cv::resize(img, resized_img, cv::Size(scaled_width, scaled_height), 0, 0,
127  cv::INTER_LINEAR);
128  data->set_dims(0, scaled_height);
129  data->set_dims(1, scaled_width);
130  DCHECK(resized_img.isContinuous());
131  data->set_byte_data(
132  resized_img.ptr(),
133  scaled_height * scaled_width * (FLAGS_color ? 3 : 1));
134  output_protos.SerializeToString(&value);
135  // Put in db
136  batch->Put(iter->key(), value);
137  if (++count % 1000 == 0) {
138  output_db->Write(leveldb::WriteOptions(), batch.get());
139  batch.reset(new leveldb::WriteBatch());
140  LOG(INFO) << "Processed " << count << " files.";
141  }
142  }
143  // write the last batch
144  if (count % 1000 != 0) {
145  output_db->Write(leveldb::WriteOptions(), batch.get());
146  }
147  LOG(INFO) << "Processed a total of " << count << " files.";
148 }
149 
150 } // namespace caffe2
151 
152 
153 int main(int argc, char** argv) {
154  caffe2::GlobalInit(&argc, &argv);
155  caffe2::ConvertToRawDataset(FLAGS_input_db_name, FLAGS_output_db_name);
156  return 0;
157 }
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
bool GlobalInit(int *pargc, char ***pargv)
Initialize the global environment of caffe2.
Definition: init.cc:44