1 from __future__
import absolute_import
2 from __future__
import division
3 from __future__
import print_function
4 from __future__
import unicode_literals
6 from caffe2.proto
import caffe2_pb2
15 def GetArgumentParser():
16 parser = argparse.ArgumentParser(description=
"Caffe2 benchmark.")
21 help=
"The batch size." 23 parser.add_argument(
"--model", type=str, help=
"The model to benchmark.")
28 help=
"The order to evaluate." 34 help=
"device to evaluate on." 39 help=
"The cudnn workspace size." 45 help=
"Number of iterations to run the network." 48 "--warmup_iterations",
51 help=
"Number of warm-up iterations before benchmarking." 56 help=
"If set, only run the forward pass." 59 "--layer_wise_benchmark",
61 help=
"If True, run the layer-wise benchmark as well." 67 help=
"If set, blindly prefer the given engine(s) for every op.")
71 help=
"If True, dump the model prototxts to disk." 73 parser.add_argument(
"--net_type", type=str, default=
"simple")
74 parser.add_argument(
"--num_workers", type=int, default=2)
75 parser.add_argument(
"--use-nvtx", default=
False, action=
'store_true')
76 parser.add_argument(
"--htrace_span_log_path", type=str)
81 print(
'Batch size: {}'.format(args.batch_size))
82 mf = ModelDownloader()
83 init_net, pred_net, value_info = mf.get_c2_model(args.model)
84 input_shapes = {k : [args.batch_size] + v[-1][1:]
for (k, v)
in value_info.items()}
85 print(
"input info: {}".format(input_shapes))
87 for k, v
in input_shapes.items():
88 external_inputs[k] = np.random.randn(*v).astype(np.float32)
90 if args.device ==
'CPU':
91 device_option = core.DeviceOption(caffe2_pb2.CPU)
92 elif args.device ==
'MKL':
93 device_option = core.DeviceOption(caffe2_pb2.MKLDNN)
94 elif args.device ==
'IDEEP':
95 device_option = core.DeviceOption(caffe2_pb2.IDEEP)
97 raise Exception(
"Unknown device: {}".format(args.device))
98 print(
"Device option: {}, {}".format(args.device, device_option))
99 pred_net.device_option.CopyFrom(device_option)
100 for op
in pred_net.op:
101 op.device_option.CopyFrom(device_option)
104 workspace.RunNetOnce(init_net)
105 bb = workspace.Blobs()
108 weights[b] = workspace.FetchBlob(b)
109 for k, v
in external_inputs.items():
111 workspace.ResetWorkspace()
113 with core.DeviceScope(device_option):
114 for name, blob
in weights.items():
116 workspace.FeedBlob(name, blob, device_option)
117 workspace.CreateNet(pred_net)
119 res = workspace.BenchmarkNet(pred_net.name,
120 args.warmup_iterations,
122 args.layer_wise_benchmark)
123 print(
"FPS: {:.2f}".format(1/res[0]*1000*args.batch_size))
125 if __name__ ==
'__main__':
126 args, extra_args = GetArgumentParser().parse_known_args()
128 not args.batch_size
or not args.model
or not args.order
130 GetArgumentParser().print_help()