17 #include <opencv2/opencv.hpp> 21 #include "caffe2/core/common.h" 22 #include "caffe2/core/db.h" 23 #include "caffe2/core/init.h" 24 #include "caffe2/core/logging.h" 25 #include "caffe2/core/timer.h" 26 #include "caffe2/proto/caffe2_pb.h" 27 #include "caffe2/utils/proto_utils.h" 28 #include "caffe2/utils/string_utils.h" 29 #include "caffe2/utils/bench_utils.h" 31 #include "binaries/benchmark_args.h" 32 #include "binaries/benchmark_helper.h" 34 #include <observers/net_observer_reporter_print.h> 35 #include <observers/observer_config.h> 36 #include <observers/perf_observer.h> 42 "Specify the batch size of the input. The number of items in the " 43 "input needs to be multiples of the batch size. If the batch size " 44 "is less than 0, all inputs are in one batch.")
45 C10_DEFINE_bool(color, true, "If set, load images in color.");
49 "The center cropped hight and width. If the value is less than zero, "
50 "it is not cropped.");
51 C10_DEFINE_string(input_image_files, "", "Files containing imput images");
52 C10_DEFINE_string(input_text_files, "", "Text files to be written to blobs");
56 "Options to specify the preprocess routines. The available options are "
57 "subtract128, normalize, mean,
std, bgrtorgb. If multiple steps are provided, they "
58 "are separated by comma (,) in sequence.");
62 "Report the conversion stage time to screen. "
63 "The format of the
string is <type>|<identifier>. "
64 "The valid type is 'json'. "
65 "The valid identifier is nothing or an identifer that prefix every line");
69 "Scale the images to be within the min,max box. The shorter edge is "
70 "min pixels. But if the other edge is more than the max pixels, the "
71 "other edge and scaled to max pixels (and the shorter edge can be less "
72 "than the min pixels");
73 C10_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
82 if (FLAGS_report_time ==
"") {
85 vector<string> s = caffe2::split(
'|', FLAGS_report_time);
86 assert(s[0] ==
"json");
87 std::string identifier =
"";
91 std::cout << identifier <<
"{\"type\": \"" << type <<
"\", \"value\": " << ts
92 <<
", \"metric\": \"" << metric <<
"\", \"unit\": \"" << unit
93 <<
"\"}" << std::endl;
96 void splitSizes(
const std::string& arg,
int* ptr0,
int* ptr1) {
97 vector<string> sizes = caffe2::split(
',', arg);
98 if (sizes.size() == 2) {
99 *ptr0 = std::stoi(sizes[0]);
100 *ptr1 = std::stoi(sizes[1]);
101 }
else if (sizes.size() == 1) {
102 *ptr0 = std::stoi(sizes[0]);
103 *ptr1 = std::stoi(sizes[0]);
110 cv::Mat resizeImage(cv::Mat& img) {
111 int min_size, max_size;
112 splitSizes(FLAGS_scale, &min_size, &max_size);
113 if ((min_size <= 0) && (max_size <= 0)) {
119 assert(min_size <= max_size);
121 int im_min_size = img.rows > img.cols ? img.cols : img.rows;
122 int im_max_size = img.rows > img.cols ? img.rows : img.cols;
124 double im_scale = 1.0 * min_size / im_min_size;
125 if (im_scale * im_max_size > max_size) {
126 im_scale = 1.0 * max_size / im_max_size;
128 int scaled_width = int(round(img.cols * im_scale));
129 int scaled_height = int(round(img.rows * im_scale));
130 assert((scaled_width <= max_size) && (scaled_height <= max_size));
131 if ((scaled_width < min_size) || (scaled_height < min_size)) {
132 assert((scaled_width == max_size) || (scaled_height == max_size));
134 assert((scaled_width == min_size) || (scaled_height == min_size));
147 cv::Mat cropToRec(cv::Mat& img,
int* height_ptr,
int* width_ptr) {
148 int height = *height_ptr;
149 int width = *width_ptr;
150 if ((height > 0) && (width > 0) &&
151 ((img.rows != height) || (img.cols != width))) {
152 cv::Mat cropped_img, cimg;
154 roi.x = int((img.cols - width) / 2);
155 roi.y = int((img.rows - height) / 2);
156 roi.x = roi.x < 0 ? 0 : roi.x;
157 roi.y = roi.y < 0 ? 0 : roi.y;
158 width = width > img.cols ? img.cols : width;
159 height = height > img.rows ? img.rows : height;
163 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= img.cols &&
164 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= img.rows);
165 cropped_img = img(roi);
167 cimg = cropped_img.clone();
168 *height_ptr = height;
176 std::vector<float> convertToVector(cv::Mat& img) {
177 std::vector<float> normalize(3, 1);
178 std::vector<float> mean(3, 0);
179 std::vector<float> std(3, 1);
180 bool bgrtorgb =
false;
181 int size = img.cols * img.rows;
182 vector<string> steps = caffe2::split(
',', FLAGS_preprocess);
183 for (
int i = 0; i < steps.size(); i++) {
184 auto step = steps[i];
185 if (step ==
"subtract128") {
186 mean = {128, 128, 128};
188 normalize = {1, 1, 1};
189 }
else if (step ==
"normalize") {
190 normalize = {255, 255, 255};
191 }
else if (step ==
"mean") {
192 mean = {0.406f, 0.456f, 0.485f};
193 }
else if (step ==
"std") {
194 std = {0.225f, 0.224f, 0.229f};
195 }
else if (step ==
"bgrtorgb") {
200 "Unsupported preprocess step. The supported steps are: subtract128, " 201 "normalize,mean, std, swaprb.");
205 int C = FLAGS_color ? 3 : 1;
206 int total_size = C * size;
207 std::vector<float> values(total_size);
209 cv::MatIterator_<float> it, end;
211 for (it = img.begin<
float>(), end = img.end<
float>(); it != end; ++it) {
212 values[idx++] = (*it / normalize[0] - mean[0]) / std[0];
216 cv::MatIterator_<cv::Vec3f> it, end;
217 int b = bgrtorgb ? 2 : 0;
219 int r = bgrtorgb ? 0 : 2;
220 for (it = img.begin<cv::Vec3f>(), end = img.end<cv::Vec3f>(); it != end;
222 values[i] = (((*it)[b] / normalize[0] - mean[0]) / std[0]);
223 int offset = size + i;
224 values[offset] = (((*it)[g] / normalize[1] - mean[1]) / std[1]);
225 offset = size + offset;
226 values[offset] = (((*it)[r] / normalize[2] - mean[2]) / std[2]);
232 std::vector<float> convertOneImage(
233 std::string& filename,
236 assert(filename[0] !=
'~');
238 std::cout <<
"Converting " << filename << std::endl;
241 cv::Mat img_uint8 = cv::imread(
242 #
if CV_MAJOR_VERSION <= 3
243 filename, FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
245 filename, FLAGS_color ? cv::IMREAD_COLOR : cv::IMREAD_GRAYSCALE);
251 img_uint8.convertTo(img, CV_32F);
253 cv::Mat resized_img = resizeImage(img);
256 splitSizes(FLAGS_crop, &height, &width);
257 if ((height <= 0) || (width <= 0)) {
258 height = resized_img.rows;
259 width = resized_img.cols;
261 cv::Mat crop = cropToRec(resized_img, &height, &width);
264 DCHECK(crop.isContinuous());
265 assert(crop.rows == height);
266 assert(crop.cols == width);
267 std::vector<float> one_image_values = convertToVector(crop);
268 *height_ptr = height;
270 double ts = timer.MicroSeconds();
271 reportTime(
"image_preprocess", ts,
"convert",
"us");
272 return one_image_values;
275 int getBatchSize(
int num_items) {
276 int batch_size = FLAGS_batch_size;
277 if (batch_size < 0) {
278 batch_size = num_items;
280 assert(num_items % batch_size == 0);
285 TensorProtos writeValues(
286 std::vector<std::vector<std::vector<float>>>& values,
287 std::vector<std::vector<int>>& dims) {
292 assert(dims.size() == values.size());
293 int num_batches = dims.size();
296 for (
int k = 0; k < num_batches; k++) {
298 data = protos.add_protos();
299 data->set_data_type(TensorProto::FLOAT);
300 auto one_dim = dims[k];
301 for (
int dim : one_dim) {
304 int batch_size = one_dim[0];
305 long long int entry_size = 1;
306 for (
int i = 1; i < one_dim.size(); i++) {
307 entry_size *= one_dim[i];
311 for (
int i = 0; i < batch_size; i++) {
312 assert(values[k][i].size() == entry_size);
313 for (
int j = 0; j < values[k][i].size(); j++) {
314 data->add_float_data(values[k][i][j]);
319 reportTime(
"preprocess", ts,
"data_pack",
"us");
324 TensorProtos convertImages(std::string& image_file) {
325 vector<string> file_names;
326 if (image_file !=
"") {
327 std::ifstream infile(image_file);
329 while (std::getline(infile, line)) {
330 vector<string> file_name = caffe2::split(
',', line);
332 if (file_name.size() == 3) {
337 file_names.push_back(name);
343 int batch_size = getBatchSize(file_names.size());
344 int num_batches = file_names.size() / batch_size;
345 assert(file_names.size() == batch_size * num_batches);
346 std::vector<std::vector<std::vector<float>>> values;
347 std::vector<std::vector<int>> dims;
348 int C = FLAGS_color ? 3 : 1;
349 for (
int k = 0; k < num_batches; k++) {
350 std::vector<std::vector<float>> one_value;
353 for (
int i = 0; i < batch_size; i++) {
354 int idx = k * batch_size + i;
355 int one_height, one_width;
356 std::vector<float> one_image_values =
357 convertOneImage(file_names[idx], &one_height, &one_width);
358 if (height < 0 && width < 0) {
362 assert(height == one_height);
363 assert(width == one_width);
365 one_value.push_back(one_image_values);
367 vector<int> one_dim = {batch_size, C, height, width};
368 dims.push_back(one_dim);
369 values.push_back(one_value);
371 return writeValues(values, dims);
374 template <
class TYPE>
375 vector<TYPE> splitString(std::string& line) {
376 vector<string> vector_str = caffe2::split(
',', line);
377 vector<TYPE> vector_int;
378 for (
string str : vector_str) {
379 vector_int.push_back((TYPE)std::stod(str));
391 TensorProtos convertValues(std::string& file_name) {
392 if (file_name ==
"") {
396 std::ifstream infile(file_name);
398 std::getline(infile, line);
399 vector<int> file_dims = splitString <int>(line);
400 assert(file_dims.size() >= 2);
402 int num_items = file_dims[0];
403 int batch_size = getBatchSize(num_items);
404 int num_batches = num_items / batch_size;
405 assert(num_items == batch_size * num_batches);
406 vector<string> lines;
407 while (std::getline(infile, line)) {
408 lines.push_back(line);
410 assert(lines.size() == num_items);
411 std::vector<std::vector<std::vector<float>>> values;
412 std::vector<std::vector<int>> dims;
413 for (
int i = 0; i < num_batches; i++) {
414 std::vector<std::vector<float>> one_value;
416 for (
int j = 0; j < batch_size; j++) {
417 int idx = i * batch_size + j;
418 std::string line = lines[idx];
419 vector<float> item = splitString<float>(line);
423 assert(num == item.size());
425 one_value.push_back(item);
427 vector<int> batch_dims = file_dims;
428 batch_dims[0] = batch_size;
429 dims.push_back(batch_dims);
430 values.push_back(one_value);
433 return writeValues(values, dims);
438 void observerConfig() {
439 caffe2::ClearGlobalNetObservers();
441 return caffe2::make_unique<caffe2::PerfNetObserver>(subject);
443 caffe2::ObserverConfig::setReporter(
444 caffe2::make_unique<caffe2::NetObserverReporterPrint>());
447 bool backendCudaSet(
const string& backend) {
448 bool run_on_gpu =
false;
449 if (backend ==
"cuda") {
454 CAFFE_THROW(
"NO GPU support on this host machine");
457 CAFFE_THROW(
"NO GPU support");
463 void setOperatorEngine(caffe2::NetDef* net_def,
const string& backend) {
464 if (backend !=
"builtin") {
466 if( backend ==
"nnpack" ) {
468 }
else if ( backend ==
"eigen" ) {
470 }
else if ( backend ==
"mkl" ) {
472 }
else if ( backend ==
"cuda" ) {
474 }
else if ( backend ==
"dnnlowp" ) {
476 }
else if ( backend ==
"dnnlowp_acc16" ) {
477 engine =
"DNNLOWP_ACC16";
478 }
else if ( backend ==
"default" ) {
483 CAFFE_ENFORCE(engine !=
"NONE",
"Backend is not supported");
484 for (
int i = 0; i < net_def->op_size(); i++) {
485 caffe2::OperatorDef* op_def = net_def->mutable_op(i);
486 op_def->set_engine(engine);
492 shared_ptr<caffe2::Workspace> workspace,
493 map<string, caffe2::TensorProtos>& tensor_protos_map,
495 if (tensor_protos_map.empty()) {
499 for (
auto& tensor_kv : tensor_protos_map) {
500 caffe2::Blob* blob = workspace->GetBlob(tensor_kv.first);
501 if (blob ==
nullptr) {
502 blob = workspace->CreateBlob(tensor_kv.first);
505 int protos_size = tensor_kv.second.protos_size();
506 if (protos_size == 1 && iteration > 0) {
512 caffe2::TensorProto* tensor_proto =
513 tensor_kv.second.mutable_protos(iteration % protos_size);
514 BlobSetTensor(blob, deserializer.Deserialize(*tensor_proto));
520 shared_ptr<caffe2::Workspace> workspace,
521 const bool run_on_gpu,
522 const string& output,
523 const string& output_folder,
524 const bool text_output,
526 const int num_blobs) {
527 if (output.size() == 0) {
530 string output_prefix = output_folder.size() ? output_folder +
"/" :
"";
531 vector<string> output_names = caffe2::split(
',', output);
533 output_names = workspace->Blobs();
535 for (
const string& name : output_names) {
537 workspace->HasBlob(name),
538 "You requested a non-existing blob: ",
543 writeTextOutput<caffe2::CUDAContext, caffe2::TensorCUDA>(
550 CAFFE_THROW(
"Not support GPU.");
553 writeTextOutput<caffe2::CPUContext, caffe2::TensorCPU>(
554 BlobGetMutableTensor(workspace->GetBlob(name), caffe2::CPU),
564 "Binary file only support one output.");
565 string serialized =
SerializeBlob(*workspace->GetBlob(name), name);
566 string output_filename = output_prefix + name;
567 caffe2::WriteStringToFile(serialized, output_filename.c_str());
573 shared_ptr<caffe2::Workspace> workspace,
574 caffe2::NetDef& net_def,
575 map<string, caffe2::TensorProtos>& tensor_protos_map,
576 const bool wipe_cache,
577 const bool run_individual,
578 const bool run_on_gpu,
579 const bool text_output,
583 const int sleep_before_run,
584 const int sleep_between_iteration,
585 const int sleep_between_net_and_operator,
586 const std::string& output,
587 const std::string& output_folder) {
589 if (!net_def.has_name()) {
590 net_def.set_name(
"benchmark");
596 LOG(INFO) <<
"Starting benchmark.";
597 caffe2::ObserverConfig::initSampleRate(1, 1, 1, run_individual, warmup);
598 LOG(INFO) <<
"Running warmup runs.";
599 for (
int i = 0; i < warmup; ++i) {
600 fillInputBlob(workspace, tensor_protos_map, i);
601 CAFFE_ENFORCE(net->Run(),
"Warmup run ", i,
" has failed.");
605 caffe2::wipe_cache();
607 if (sleep_before_run > 0) {
608 std::this_thread::sleep_for(std::chrono::seconds(sleep_before_run));
610 LOG(INFO) <<
"Main runs.";
613 "Number of main runs should be non negative, provided ",
616 LOG(INFO) <<
"net runs.";
617 for (
int i = 0; i < iter; ++i) {
618 caffe2::ObserverConfig::initSampleRate(1, 1, 1, 0, warmup);
619 fillInputBlob(workspace, tensor_protos_map, i);
621 caffe2::wipe_cache();
623 CAFFE_ENFORCE(net->Run(),
"Main run ", i,
" has failed.");
634 caffe2::wipe_cache();
636 if (sleep_between_iteration > 0) {
637 std::this_thread::sleep_for(
638 std::chrono::seconds(sleep_between_iteration));
641 if (run_individual) {
642 LOG(INFO) <<
"operator runs.";
643 if (sleep_between_net_and_operator > 0) {
644 std::this_thread::sleep_for(
645 std::chrono::seconds(sleep_between_net_and_operator));
647 for (
int i = 0; i < iter; ++i) {
648 caffe2::ObserverConfig::initSampleRate(1, 1, 1, 1, warmup);
649 fillInputBlob(workspace, tensor_protos_map, i);
650 CAFFE_ENFORCE(net->Run(),
"Main run ", i,
" with operator has failed.");
652 caffe2::wipe_cache();
654 if (sleep_between_iteration > 0) {
655 std::this_thread::sleep_for(
656 std::chrono::seconds(sleep_between_iteration));
665 const string& FLAGS_backend,
666 const string& FLAGS_init_net,
667 const string& FLAGS_input_dims,
669 const string& FLAGS_net,
670 const string& FLAGS_output,
671 const string& FLAGS_output_folder,
672 bool FLAGS_run_individual,
673 int FLAGS_sleep_before_run,
674 int FLAGS_sleep_between_iteration,
675 int FLAGS_sleep_between_net_and_operator,
676 bool FLAGS_text_output,
678 bool FLAGS_wipe_cache) {
683 std::ifstream net_file(FLAGS_net);
684 CAFFE_ENFORCE(net_file.good());
687 std::ifstream init_net_file(FLAGS_init_net);
688 CAFFE_ENFORCE(init_net_file.good());
689 init_net_file.close();
693 caffe2::ShowLogInfoToStderr();
696 bool run_on_gpu = backendCudaSet(FLAGS_backend);
698 caffe2::NetDef init_net_def;
699 CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_init_net, &init_net_def));
700 setOperatorEngine(&init_net_def, FLAGS_backend);
701 CAFFE_ENFORCE(workspace->RunNetOnce(init_net_def));
704 caffe2::NetDef net_def;
705 CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_net, &net_def));
706 setOperatorEngine(&net_def, FLAGS_backend);
708 map<string, caffe2::TensorProtos> tensor_protos_map;
711 vector<string> images = caffe2::split(
';', FLAGS_input_image_files);
712 for (
int i = 0; i < images.size(); ++i) {
713 vector<string> mapping = caffe2::split(
',', images[i]);
714 caffe2::TensorProtos proto_images = caffe2::convertImages(mapping[1]);
715 workspace->CreateBlob(mapping[0]);
716 tensor_protos_map.insert(std::make_pair(mapping[0], proto_images));
717 num_blobs = proto_images.protos_size();
720 vector<string> values = caffe2::split(
';', FLAGS_input_text_files);
721 for (
int i = 0; i < values.size(); ++i) {
722 vector<string> mapping = caffe2::split(
',', values[i]);
723 caffe2::TensorProtos proto_values = caffe2::convertValues(mapping[1]);
724 workspace->CreateBlob(mapping[0]);
725 tensor_protos_map.insert(std::make_pair(mapping[0], proto_values));
726 num_blobs = proto_values.protos_size();
734 FLAGS_run_individual,
740 FLAGS_sleep_before_run,
741 FLAGS_sleep_between_iteration,
742 FLAGS_sleep_between_net_and_operator,
744 FLAGS_output_folder);
749 int main(
int argc,
char** argv) {
761 FLAGS_run_individual,
762 FLAGS_sleep_before_run,
763 FLAGS_sleep_between_iteration,
764 FLAGS_sleep_between_net_and_operator,
Blob is a general container that hosts a typed pointer.
void Start()
Starts a timer.
bool HasCudaGPU()
Check if the current running session has a cuda gpu present.
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
TensorDeserializer is the deserializer for Tensors.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
float MicroSeconds()
Returns the elapsed time in microseconds.
bool GlobalInit(int *pargc, char ***pargv)
Initialize the global environment of caffe2.
A simple timer object for measuring time.
void SerializeBlob(const Blob &blob, const string &name, BlobSerializerBase::SerializationAcceptor acceptor, int chunk_size)
Serializes the given blob, if possible.