4 Benchmark for common convnets. 6 Speed on Titan X, with 10 warmup steps and 10 main steps and with different 7 versions of cudnn, are as follows (time reported below is per-batch time, 8 forward / forward+backward): 11 AlexNet 32.5 / 108.0 27.4 / 90.1 12 OverFeat 113.0 / 342.3 91.7 / 276.5 13 Inception 134.5 / 485.8 125.7 / 450.6 14 VGG (batch 64) 200.8 / 650.0 164.1 / 551.7 16 Speed on Inception with varied batch sizes and CuDNN v4 is as follows: 18 Batch Size Speed per batch Speed per image 19 16 22.8 / 72.7 1.43 / 4.54 20 32 38.0 / 127.5 1.19 / 3.98 21 64 67.2 / 233.6 1.05 / 3.65 22 128 125.7 / 450.6 0.98 / 3.52 24 Speed on Tesla M40, which 10 warmup steps and 10 main steps and with cudnn 28 OverFeat 210.5 / 630.3 29 Inception 300.2 / 1122.2 30 VGG (batch 64) 405.8 / 1327.7 32 (Note that these numbers involve a "full" backprop, i.e. the gradient 33 with respect to the input image is also computed.) 35 To get the numbers, simply run: 37 for MODEL in AlexNet OverFeat Inception; do 38 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 39 --batch_size 128 --model $MODEL --forward_only True 41 for MODEL in AlexNet OverFeat Inception; do 42 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 43 --batch_size 128 --model $MODEL 45 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 46 --batch_size 64 --model VGGA --forward_only True 47 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 48 --batch_size 64 --model VGGA 50 for BS in 16 32 64 128; do 51 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 52 --batch_size $BS --model Inception --forward_only True 53 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 54 --batch_size $BS --model Inception 57 Note that VGG needs to be run at batch 64 due to memory limit on the backward 66 def MLP(order, cudnn_ws):
67 model = model_helper.ModelHelper(name=
"MLP")
71 for i
in range(depth):
72 for j
in range(width):
73 current =
"fc_{}_{}".format(i, j)
if i > 0
else "data" 74 next_ =
"fc_{}_{}".format(i + 1, j)
81 weight_init=(
'XavierFill', {}),
82 bias_init=(
'XavierFill', {}),
85 model, [
"fc_{}_{}".format(depth, j)
for j
in range(width)], [
"sum"]
93 weight_init=(
'XavierFill', {}),
94 bias_init=(
'XavierFill', {}),
96 xent = model.net.LabelCrossEntropy([
"last",
"label"],
"xent")
97 model.net.AveragedLoss(xent,
"loss")
101 def AlexNet(order, cudnn_ws):
105 'cudnn_exhaustive_search':
True,
108 my_arg_scope[
'ws_nbytes_limit'] = cudnn_ws
109 model = model_helper.ModelHelper(
111 arg_scope=my_arg_scope,
119 11, (
'XavierFill', {}), (
'ConstantFill', {}),
123 relu1 = brew.relu(model, conv1,
"conv1")
124 pool1 = brew.max_pool(model, relu1,
"pool1", kernel=3, stride=2)
133 (
'ConstantFill', {}),
136 relu2 = brew.relu(model, conv2,
"conv2")
137 pool2 = brew.max_pool(model, relu2,
"pool2", kernel=3, stride=2)
146 (
'ConstantFill', {}),
149 relu3 = brew.relu(model, conv3,
"conv3")
158 (
'ConstantFill', {}),
161 relu4 = brew.relu(model, conv4,
"conv4")
170 (
'ConstantFill', {}),
173 relu5 = brew.relu(model, conv5,
"conv5")
174 pool5 = brew.max_pool(model, relu5,
"pool5", kernel=3, stride=2)
177 pool5,
"fc6", 256 * 6 * 6, 4096, (
'XavierFill', {}),
180 relu6 = brew.relu(model, fc6,
"fc6")
182 model, relu6,
"fc7", 4096, 4096, (
'XavierFill', {}), (
'ConstantFill', {})
184 relu7 = brew.relu(model, fc7,
"fc7")
186 model, relu7,
"fc8", 4096, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
188 pred = brew.softmax(model, fc8,
"pred")
189 xent = model.net.LabelCrossEntropy([pred,
"label"],
"xent")
190 model.net.AveragedLoss(xent,
"loss")
194 def OverFeat(order, cudnn_ws):
198 'cudnn_exhaustive_search':
True,
201 my_arg_scope[
'ws_nbytes_limit'] = cudnn_ws
202 model = model_helper.ModelHelper(
204 arg_scope=my_arg_scope,
214 (
'ConstantFill', {}),
217 relu1 = brew.relu(model, conv1,
"conv1")
218 pool1 = brew.max_pool(model, relu1,
"pool1", kernel=2, stride=2)
220 model, pool1,
"conv2", 96, 256, 5, (
'XavierFill', {}),
223 relu2 = brew.relu(model, conv2,
"conv2")
224 pool2 = brew.max_pool(model, relu2,
"pool2", kernel=2, stride=2)
233 (
'ConstantFill', {}),
236 relu3 = brew.relu(model, conv3,
"conv3")
245 (
'ConstantFill', {}),
248 relu4 = brew.relu(model, conv4,
"conv4")
257 (
'ConstantFill', {}),
260 relu5 = brew.relu(model, conv5,
"conv5")
261 pool5 = brew.max_pool(model, relu5,
"pool5", kernel=2, stride=2)
263 model, pool5,
"fc6", 1024 * 6 * 6, 3072, (
'XavierFill', {}),
266 relu6 = brew.relu(model, fc6,
"fc6")
268 model, relu6,
"fc7", 3072, 4096, (
'XavierFill', {}), (
'ConstantFill', {})
270 relu7 = brew.relu(model, fc7,
"fc7")
272 model, relu7,
"fc8", 4096, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
274 pred = brew.softmax(model, fc8,
"pred")
275 xent = model.net.LabelCrossEntropy([pred,
"label"],
"xent")
276 model.net.AveragedLoss(xent,
"loss")
280 def VGGA(order, cudnn_ws):
284 'cudnn_exhaustive_search':
True,
287 my_arg_scope[
'ws_nbytes_limit'] = cudnn_ws
288 model = model_helper.ModelHelper(
290 arg_scope=my_arg_scope,
300 (
'ConstantFill', {}),
303 relu1 = brew.relu(model, conv1,
"conv1")
304 pool1 = brew.max_pool(model, relu1,
"pool1", kernel=2, stride=2)
313 (
'ConstantFill', {}),
316 relu2 = brew.relu(model, conv2,
"conv2")
317 pool2 = brew.max_pool(model, relu2,
"pool2", kernel=2, stride=2)
326 (
'ConstantFill', {}),
329 relu3 = brew.relu(model, conv3,
"conv3")
338 (
'ConstantFill', {}),
341 relu4 = brew.relu(model, conv4,
"conv4")
342 pool4 = brew.max_pool(model, relu4,
"pool4", kernel=2, stride=2)
351 (
'ConstantFill', {}),
354 relu5 = brew.relu(model, conv5,
"conv5")
363 (
'ConstantFill', {}),
366 relu6 = brew.relu(model, conv6,
"conv6")
367 pool6 = brew.max_pool(model, relu6,
"pool6", kernel=2, stride=2)
376 (
'ConstantFill', {}),
379 relu7 = brew.relu(model, conv7,
"conv7")
388 (
'ConstantFill', {}),
391 relu8 = brew.relu(model, conv8,
"conv8")
392 pool8 = brew.max_pool(model, relu8,
"pool8", kernel=2, stride=2)
395 model, pool8,
"fcix", 512 * 7 * 7, 4096, (
'XavierFill', {}),
398 reluix = brew.relu(model, fcix,
"fcix")
400 model, reluix,
"fcx", 4096, 4096, (
'XavierFill', {}),
403 relux = brew.relu(model, fcx,
"fcx")
405 model, relux,
"fcxi", 4096, 1000, (
'XavierFill', {}),
408 pred = brew.softmax(model, fcxi,
"pred")
409 xent = model.net.LabelCrossEntropy([pred,
"label"],
"xent")
410 model.net.AveragedLoss(xent,
"loss")
414 def _InceptionModule(
415 model, input_blob, input_depth, output_name, conv1_depth, conv3_depths,
416 conv5_depths, pool_depth
420 model, input_blob, output_name +
":conv1", input_depth, conv1_depth, 1,
421 (
'XavierFill', {}), (
'ConstantFill', {})
423 conv1 = brew.relu(model, conv1, conv1)
425 conv3_reduce = brew.conv(
426 model, input_blob, output_name +
":conv3_reduce", input_depth,
427 conv3_depths[0], 1, (
'XavierFill', {}), (
'ConstantFill', {})
429 conv3_reduce = brew.relu(model, conv3_reduce, conv3_reduce)
433 output_name +
":conv3",
438 (
'ConstantFill', {}),
441 conv3 = brew.relu(model, conv3, conv3)
443 conv5_reduce = brew.conv(
444 model, input_blob, output_name +
":conv5_reduce", input_depth,
445 conv5_depths[0], 1, (
'XavierFill', {}), (
'ConstantFill', {})
447 conv5_reduce = brew.relu(model, conv5_reduce, conv5_reduce)
451 output_name +
":conv5",
456 (
'ConstantFill', {}),
459 conv5 = brew.relu(model, conv5, conv5)
461 pool = brew.max_pool(
464 output_name +
":pool",
469 pool_proj = brew.conv(
470 model, pool, output_name +
":pool_proj", input_depth, pool_depth, 1,
471 (
'XavierFill', {}), (
'ConstantFill', {})
473 pool_proj = brew.relu(model, pool_proj, pool_proj)
474 output = brew.concat(model, [conv1, conv3, conv5, pool_proj], output_name)
478 def Inception(order, cudnn_ws):
482 'cudnn_exhaustive_search':
True,
485 my_arg_scope[
'ws_nbytes_limit'] = cudnn_ws
486 model = model_helper.ModelHelper(
488 arg_scope=my_arg_scope,
498 (
'ConstantFill', {}),
502 relu1 = brew.relu(model, conv1,
"conv1")
503 pool1 = brew.max_pool(model, relu1,
"pool1", kernel=3, stride=2, pad=1)
505 model, pool1,
"conv2a", 64, 64, 1, (
'XavierFill', {}),
508 conv2a = brew.relu(model, conv2a, conv2a)
517 (
'ConstantFill', {}),
520 relu2 = brew.relu(model, conv2,
"conv2")
521 pool2 = brew.max_pool(model, relu2,
"pool2", kernel=3, stride=2, pad=1)
523 inc3 = _InceptionModule(
524 model, pool2, 192,
"inc3", 64, [96, 128], [16, 32], 32
526 inc4 = _InceptionModule(
527 model, inc3, 256,
"inc4", 128, [128, 192], [32, 96], 64
529 pool5 = brew.max_pool(model, inc4,
"pool5", kernel=3, stride=2, pad=1)
530 inc5 = _InceptionModule(
531 model, pool5, 480,
"inc5", 192, [96, 208], [16, 48], 64
533 inc6 = _InceptionModule(
534 model, inc5, 512,
"inc6", 160, [112, 224], [24, 64], 64
536 inc7 = _InceptionModule(
537 model, inc6, 512,
"inc7", 128, [128, 256], [24, 64], 64
539 inc8 = _InceptionModule(
540 model, inc7, 512,
"inc8", 112, [144, 288], [32, 64], 64
542 inc9 = _InceptionModule(
543 model, inc8, 528,
"inc9", 256, [160, 320], [32, 128], 128
545 pool9 = brew.max_pool(model, inc9,
"pool9", kernel=3, stride=2, pad=1)
546 inc10 = _InceptionModule(
547 model, pool9, 832,
"inc10", 256, [160, 320], [32, 128], 128
549 inc11 = _InceptionModule(
550 model, inc10, 832,
"inc11", 384, [192, 384], [48, 128], 128
552 pool11 = brew.average_pool(model, inc11,
"pool11", kernel=7, stride=1)
554 model, pool11,
"fc", 1024, 1000, (
'XavierFill', {}),
560 pred = brew.softmax(model, fc,
"pred")
561 xent = model.net.LabelCrossEntropy([pred,
"label"],
"xent")
562 model.net.AveragedLoss(xent,
"loss")
566 def AddParameterUpdate(model):
567 """ Simple plain SGD update -- not tuned to actually train the models """ 568 ITER = brew.iter(model,
"iter")
569 LR = model.net.LearningRate(
570 ITER,
"LR", base_lr=-1e-8, policy=
"step", stepsize=10000, gamma=0.999)
571 ONE = model.param_init_net.ConstantFill([],
"ONE", shape=[1], value=1.0)
572 for param
in model.params:
573 param_grad = model.param_to_grad[param]
574 model.net.WeightedSum([param, ONE, param_grad, LR], param)
577 def Benchmark(model_gen, arg):
578 model, input_size = model_gen(arg.order, arg.cudnn_ws)
579 model.Proto().type = arg.net_type
580 model.Proto().num_workers = arg.num_workers
584 if arg.order ==
"NCHW":
585 input_shape = [arg.batch_size, 3, input_size, input_size]
587 input_shape = [arg.batch_size, input_size, input_size, 3]
588 if arg.model ==
"MLP":
589 input_shape = [arg.batch_size, input_size]
591 model.param_init_net.GaussianFill(
598 model.param_init_net.UniformIntFill(
601 shape=[arg.batch_size, ],
607 print(
'{}: running forward only.'.format(arg.model))
609 print(
'{}: running forward-backward.'.format(arg.model))
610 model.AddGradientOperators([
"loss"])
611 AddParameterUpdate(model)
612 if arg.order ==
'NHWC':
615 'NHWC order with CuDNN may not be supported yet, so I might\n' 620 model.param_init_net.RunAllOnGPU()
621 model.net.RunAllOnGPU()
624 for op
in model.net.Proto().op:
625 op.engine = arg.engine
630 "{0}_init_batch_{1}.pbtxt".format(arg.model, arg.batch_size),
"w" 632 fid.write(str(model.param_init_net.Proto()))
633 with open(
"{0}.pbtxt".format(arg.model, arg.batch_size),
"w")
as fid:
634 fid.write(str(model.net.Proto()))
636 workspace.RunNetOnce(model.param_init_net)
637 workspace.CreateNet(model.net)
638 workspace.BenchmarkNet(
639 model.net.Proto().name, arg.warmup_iterations, arg.iterations,
640 arg.layer_wise_benchmark)
643 def GetArgumentParser():
644 parser = argparse.ArgumentParser(description=
"Caffe2 benchmark.")
649 help=
"The batch size." 651 parser.add_argument(
"--model", type=str, help=
"The model to benchmark.")
656 help=
"The order to evaluate." 661 help=
"The cudnn workspace size." 667 help=
"Number of iterations to run the network." 670 "--warmup_iterations",
673 help=
"Number of warm-up iterations before benchmarking." 678 help=
"If set, only run the forward pass." 681 "--layer_wise_benchmark",
683 help=
"If True, run the layer-wise benchmark as well." 688 help=
"If True, run testing on CPU instead of GPU." 694 help=
"If set, blindly prefer the given engine(s) for every op.")
698 help=
"If True, dump the model prototxts to disk." 700 parser.add_argument(
"--net_type", type=str, default=
"dag")
701 parser.add_argument(
"--num_workers", type=int, default=2)
702 parser.add_argument(
"--use-nvtx", default=
False, action=
'store_true')
703 parser.add_argument(
"--htrace_span_log_path", type=str)
707 if __name__ ==
'__main__':
708 args, extra_args = GetArgumentParser().parse_known_args()
710 not args.batch_size
or not args.model
or not args.order
712 GetArgumentParser().print_help()
714 workspace.GlobalInit(
715 [
'caffe2',
'--caffe2_log_level=0'] + extra_args +
716 ([
'--caffe2_use_nvtx']
if args.use_nvtx
else []) +
717 ([
'--caffe2_htrace_span_log_path=' + args.htrace_span_log_path]
718 if args.htrace_span_log_path
else []))
722 'OverFeat': OverFeat,
724 'Inception': Inception,
727 Benchmark(model_map[args.model], args)