22 #include <binaries/benchmark_helper.h> 23 #include "caffe2/core/blob_serialization.h" 25 #include "caffe2/core/context_gpu.h" 27 #include "caffe2/core/init.h" 28 #include "caffe2/core/logging.h" 29 #include "caffe2/core/net.h" 30 #include "caffe2/core/operator.h" 31 #include "caffe2/core/tensor_int8.h" 32 #include "caffe2/utils/bench_utils.h" 33 #include "caffe2/utils/string_utils.h" 34 #include <observers/net_observer_reporter_print.h> 35 #include <observers/observer_config.h> 36 #include <observers/perf_observer.h> 39 using std::shared_ptr;
41 using std::unique_ptr;
44 void observerConfig() {
45 caffe2::ClearGlobalNetObservers();
47 return caffe2::make_unique<caffe2::PerfNetObserver>(subject);
49 caffe2::ObserverConfig::setReporter(
50 caffe2::make_unique<caffe2::NetObserverReporterPrint>());
53 bool backendCudaSet(
const string& backend) {
54 bool run_on_gpu =
false;
55 if (backend ==
"cuda") {
60 CAFFE_THROW(
"NO GPU support on this host machine");
63 CAFFE_THROW(
"NO GPU support");
69 void setDeviceType(caffe2::NetDef* net_def, caffe2::DeviceType& run_dev) {
70 for (
int j = 0; j < net_def->op_size(); j++) {
71 caffe2::OperatorDef* op = net_def->mutable_op(j);
72 op->mutable_device_option()->set_device_type(caffe2::TypeToProto(run_dev));
76 void setOperatorEngine(caffe2::NetDef* net_def,
const string& backend) {
77 if (backend !=
"builtin") {
78 string engine = backend ==
"nnpack" 80 : backend ==
"eigen" ?
"EIGEN" 81 : backend ==
"mkl" ?
"MKLDNN" 84 : backend ==
"dnnlowp" ?
"DNNLOWP" 85 : backend ==
"dnnlowp_acc16" 87 : backend ==
"default" ?
"" :
"NONE";
88 CAFFE_ENFORCE(engine !=
"NONE",
"Backend is not supported");
89 for (
int i = 0; i < net_def->op_size(); i++) {
90 caffe2::OperatorDef* op_def = net_def->mutable_op(i);
91 op_def->set_engine(engine);
97 shared_ptr<caffe2::Workspace> workspace,
98 const bool run_on_gpu,
99 map<string, caffe2::TensorProtos>& tensor_protos_map,
101 const string& input_file,
102 const string& input_dims,
103 const string& input_type) {
108 vector<string> input_names = caffe2::split(
',', input);
109 if (input_file.size()) {
110 vector<string> input_files = caffe2::split(
',', input_file);
114 "Input name and file should have the same number.");
115 for (
int i = 0; i < input_names.size(); ++i) {
116 caffe2::TensorProtos tensor_protos;
118 caffe2::ReadProtoFromFile(input_files[i], &tensor_protos));
119 workspace->CreateBlob(input_names[i]);
120 tensor_protos_map.insert(std::make_pair(input_names[i], tensor_protos));
123 blob_num = tensor_protos_map[input_names[0]].protos_size();
124 for (
int i = 1; i < input_names.size(); ++i) {
125 int bnum = tensor_protos_map[input_names[i]].protos_size();
129 "Number of blobs are not the same for all inputs");
131 }
else if (input_dims.size() || input_type.size()) {
135 "Input dims must be specified when input tensors are used.");
139 "Input type must be specified when input tensors are used.");
141 vector<string> input_dims_list = caffe2::split(
';', input_dims);
144 input_dims_list.size(),
145 "Input name and dims should have the same number of items.");
146 vector<string> input_type_list = caffe2::split(
';', input_type);
149 input_type_list.size(),
150 "Input name and type should have the same number of items.");
151 for (
size_t i = 0; i < input_names.size(); ++i) {
152 vector<string> input_dims_str = caffe2::split(
',', input_dims_list[i]);
153 vector<int> input_dims;
154 for (
const string& s : input_dims_str) {
155 input_dims.push_back(c10::stoi(s));
157 caffe2::Blob* blob = workspace->GetBlob(input_names[i]);
158 if (blob ==
nullptr) {
159 blob = workspace->CreateBlob(input_names[i]);
162 LOG(INFO) <<
"Running on GPU.";
165 CHECK_NOTNULL(tensor);
166 tensor->Resize(input_dims);
167 if (input_type_list[i] ==
"uint8_t") {
168 tensor->mutable_data<uint8_t>();
169 }
else if (input_type_list[i] ==
"float") {
170 tensor->mutable_data<
float>();
172 CAFFE_THROW(
"Unsupported input type: ", input_type_list[i]);
175 CAFFE_THROW(
"Not support GPU on mobile.");
178 if (input_type_list[i] ==
"uint8_t") {
181 CHECK_NOTNULL(tensor);
182 tensor->t.Resize(input_dims);
183 tensor->t.mutable_data<uint8_t>();
184 }
else if (input_type_list[i] ==
"float") {
186 CHECK_NOTNULL(tensor);
187 tensor->Resize(input_dims);
188 tensor->mutable_data<
float>();
189 }
else if (input_type_list[i] ==
"int") {
191 CHECK_NOTNULL(tensor);
192 tensor->Resize(input_dims);
193 tensor->mutable_data<
int>();
195 CAFFE_THROW(
"Unsupported input type: ", input_type_list[i]);
201 "You requested input tensors, but neither input_file nor " 202 "input_dims is set.");
209 shared_ptr<caffe2::Workspace> workspace,
210 map<string, caffe2::TensorProtos>& tensor_protos_map,
212 if (tensor_protos_map.empty()) {
216 for (
auto& tensor_kv : tensor_protos_map) {
217 caffe2::Blob* blob = workspace->GetBlob(tensor_kv.first);
218 if (blob ==
nullptr) {
219 blob = workspace->CreateBlob(tensor_kv.first);
222 int protos_size = tensor_kv.second.protos_size();
223 if (protos_size == 1 && iteration > 0) {
229 caffe2::TensorProto* tensor_proto =
230 tensor_kv.second.mutable_protos(iteration % protos_size);
231 BlobSetTensor(blob, deserializer.Deserialize(*tensor_proto));
237 shared_ptr<caffe2::Workspace> workspace,
238 caffe2::NetDef& net_def,
239 map<string, caffe2::TensorProtos>& tensor_protos_map,
240 const bool wipe_cache,
241 const bool run_individual,
242 const bool run_on_gpu,
243 const bool text_output,
247 const int sleep_before_run,
248 const int sleep_between_iteration,
249 const int sleep_between_net_and_operator,
250 const std::string& output,
251 const std::string& output_folder) {
253 if (!net_def.has_name()) {
254 net_def.set_name(
"benchmark");
260 LOG(INFO) <<
"Starting benchmark.";
261 caffe2::ObserverConfig::initSampleRate(1, 1, 1, run_individual, warmup);
262 LOG(INFO) <<
"Running warmup runs.";
263 for (
int i = 0; i < warmup; ++i) {
264 fillInputBlob(workspace, tensor_protos_map, i);
265 CAFFE_ENFORCE(net->Run(),
"Warmup run ", i,
" has failed.");
269 caffe2::wipe_cache();
271 if (sleep_before_run > 0) {
272 std::this_thread::sleep_for(std::chrono::seconds(sleep_before_run));
274 LOG(INFO) <<
"Main runs.";
277 "Number of main runs should be non negative, provided ",
280 LOG(INFO) <<
"net runs.";
281 for (
int i = 0; i < iter; ++i) {
282 caffe2::ObserverConfig::initSampleRate(1, 1, 1, 0, warmup);
283 fillInputBlob(workspace, tensor_protos_map, i);
285 caffe2::wipe_cache();
287 CAFFE_ENFORCE(net->Run(),
"Main run ", i,
" has failed.");
298 caffe2::wipe_cache();
300 if (sleep_between_iteration > 0) {
301 std::this_thread::sleep_for(
302 std::chrono::seconds(sleep_between_iteration));
305 if (run_individual) {
306 LOG(INFO) <<
"operator runs.";
307 if (sleep_between_net_and_operator > 0) {
308 std::this_thread::sleep_for(
309 std::chrono::seconds(sleep_between_net_and_operator));
311 for (
int i = 0; i < iter; ++i) {
312 caffe2::ObserverConfig::initSampleRate(1, 1, 1, 1, warmup);
313 fillInputBlob(workspace, tensor_protos_map, i);
314 CAFFE_ENFORCE(net->Run(),
"Main run ", i,
" with operator has failed.");
316 caffe2::wipe_cache();
318 if (sleep_between_iteration > 0) {
319 std::this_thread::sleep_for(
320 std::chrono::seconds(sleep_between_iteration));
327 shared_ptr<caffe2::Workspace> workspace,
328 const bool run_on_gpu,
329 const string& output,
330 const string& output_folder,
331 const bool text_output,
333 const int num_blobs) {
334 if (output.size() == 0) {
337 string output_prefix = output_folder.size() ? output_folder +
"/" :
"";
338 vector<string> output_names = caffe2::split(
',', output);
340 output_names = workspace->Blobs();
342 for (
const string& name : output_names) {
344 workspace->HasBlob(name),
345 "You requested a non-existing blob: ",
350 writeTextOutput<caffe2::CUDAContext, caffe2::TensorCUDA>(
357 CAFFE_THROW(
"Not support GPU.");
360 writeTextOutput<caffe2::CPUContext, caffe2::TensorCPU>(
361 BlobGetMutableTensor(workspace->GetBlob(name), caffe2::CPU),
371 "Binary file only support one output.");
372 string serialized =
SerializeBlob(*workspace->GetBlob(name), name);
373 string output_filename = output_prefix + name;
374 caffe2::WriteStringToFile(serialized, output_filename.c_str());
382 const string& FLAGS_backend,
383 const string& FLAGS_init_net,
384 const string& FLAGS_input,
385 const string& FLAGS_input_dims,
386 const string& FLAGS_input_file,
387 const string& FLAGS_input_type,
389 const string& FLAGS_net,
390 const string& FLAGS_output,
391 const string& FLAGS_output_folder,
392 bool FLAGS_run_individual,
393 int FLAGS_sleep_before_run,
394 int FLAGS_sleep_between_iteration,
395 int FLAGS_sleep_between_net_and_operator,
396 bool FLAGS_text_output,
398 bool FLAGS_wipe_cache) {
403 std::ifstream net_file(FLAGS_net);
404 CAFFE_ENFORCE(net_file.good());
407 std::ifstream init_net_file(FLAGS_init_net);
408 CAFFE_ENFORCE(init_net_file.good());
409 init_net_file.close();
411 if (FLAGS_input_file.size() > 0) {
412 vector<string> input_files = caffe2::split(
',', FLAGS_input_file);
413 for (
auto input_file : input_files) {
414 std::ifstream ifile(input_file);
415 CAFFE_ENFORCE(ifile.good());
422 caffe2::ShowLogInfoToStderr();
425 bool run_on_gpu = backendCudaSet(FLAGS_backend);
427 caffe2::NetDef init_net_def;
428 CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_init_net, &init_net_def));
429 setOperatorEngine(&init_net_def, FLAGS_backend);
430 CAFFE_ENFORCE(workspace->RunNetOnce(init_net_def));
433 caffe2::NetDef net_def;
434 CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_net, &net_def));
435 setOperatorEngine(&net_def, FLAGS_backend);
437 map<string, caffe2::TensorProtos> tensor_protos_map;
439 int num_blobs = loadInput(
453 FLAGS_run_individual,
459 FLAGS_sleep_before_run,
460 FLAGS_sleep_between_iteration,
461 FLAGS_sleep_between_net_and_operator,
463 FLAGS_output_folder);
Blob is a general container that hosts a typed pointer.
bool HasCudaGPU()
Check if the current running session has a cuda gpu present.
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
TensorDeserializer is the deserializer for Tensors.
T * GetMutable()
Gets a mutable pointer to the stored object.
void SerializeBlob(const Blob &blob, const string &name, BlobSerializerBase::SerializationAcceptor acceptor, int chunk_size)
Serializes the given blob, if possible.