18 from __future__
import absolute_import
19 from __future__
import division
20 from __future__
import print_function
21 from __future__
import unicode_literals
23 Benchmark for common convnets. 25 (NOTE: Numbers below prior with missing parameter=update step, TODO to update) 27 Speed on Titan X, with 10 warmup steps and 10 main steps and with different 28 versions of cudnn, are as follows (time reported below is per-batch time, 29 forward / forward+backward): 32 AlexNet 32.5 / 108.0 27.4 / 90.1 33 OverFeat 113.0 / 342.3 91.7 / 276.5 34 Inception 134.5 / 485.8 125.7 / 450.6 35 VGG (batch 64) 200.8 / 650.0 164.1 / 551.7 37 Speed on Inception with varied batch sizes and CuDNN v4 is as follows: 39 Batch Size Speed per batch Speed per image 40 16 22.8 / 72.7 1.43 / 4.54 41 32 38.0 / 127.5 1.19 / 3.98 42 64 67.2 / 233.6 1.05 / 3.65 43 128 125.7 / 450.6 0.98 / 3.52 45 Speed on Tesla M40, which 10 warmup steps and 10 main steps and with cudnn 49 OverFeat 210.5 / 630.3 50 Inception 300.2 / 1122.2 51 VGG (batch 64) 405.8 / 1327.7 53 (Note that these numbers involve a "full" backprop, i.e. the gradient 54 with respect to the input image is also computed.) 56 To get the numbers, simply run: 58 for MODEL in AlexNet OverFeat Inception; do 59 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 60 --batch_size 128 --model $MODEL --forward_only True 62 for MODEL in AlexNet OverFeat Inception; do 63 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 64 --batch_size 128 --model $MODEL 66 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 67 --batch_size 64 --model VGGA --forward_only True 68 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 69 --batch_size 64 --model VGGA 71 for BS in 16 32 64 128; do 72 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 73 --batch_size $BS --model Inception --forward_only True 74 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 75 --batch_size $BS --model Inception 78 Note that VGG needs to be run at batch 64 due to memory limit on the backward 87 import caffe2.python.SparseTransformer
as SparseTransformer
91 model = cnn.CNNModelHelper()
95 for i
in range(depth):
96 for j
in range(width):
97 current =
"fc_{}_{}".format(i, j)
if i > 0
else "data" 98 next_ =
"fc_{}_{}".format(i + 1, j)
102 weight_init=model.XavierInit,
103 bias_init=model.XavierInit)
104 model.Sum([
"fc_{}_{}".format(depth, j)
105 for j
in range(width)], [
"sum"])
106 model.FC(
"sum",
"last",
107 dim_in=d, dim_out=1000,
108 weight_init=model.XavierInit,
109 bias_init=model.XavierInit)
110 xent = model.LabelCrossEntropy([
"last",
"label"],
"xent")
111 model.AveragedLoss(xent,
"loss")
116 model = cnn.CNNModelHelper(order, name=
"alexnet",
117 use_cudnn=
True, cudnn_exhaustive_search=
True)
125 (
'ConstantFill', {}),
130 relu1 = model.Relu(conv1,
"conv1")
131 pool1 = model.MaxPool(relu1,
"pool1", kernel=3, stride=2)
139 (
'ConstantFill', {}),
142 relu2 = model.Relu(conv2,
"conv2")
143 pool2 = model.MaxPool(relu2,
"pool2", kernel=3, stride=2)
151 (
'ConstantFill', {}),
154 relu3 = model.Relu(conv3,
"conv3")
162 (
'ConstantFill', {}),
165 relu4 = model.Relu(conv4,
"conv4")
173 (
'ConstantFill', {}),
176 relu5 = model.Relu(conv5,
"conv5")
177 pool5 = model.MaxPool(relu5,
"pool5", kernel=3, stride=2)
179 pool5,
"fc6", 256 * 6 * 6, 4096, (
'XavierFill', {}),
182 relu6 = model.Relu(fc6,
"fc6")
184 relu6,
"fc7", 4096, 4096, (
'XavierFill', {}), (
'ConstantFill', {})
186 relu7 = model.Relu(fc7,
"fc7")
188 relu7,
"fc8", 4096, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
190 pred = model.Softmax(fc8,
"pred")
191 xent = model.LabelCrossEntropy([pred,
"label"],
"xent")
192 model.AveragedLoss(xent,
"loss")
197 model = cnn.CNNModelHelper(order, name=
"overfeat",
198 use_cudnn=
True, cudnn_exhaustive_search=
True)
206 (
'ConstantFill', {}),
209 relu1 = model.Relu(conv1,
"conv1")
210 pool1 = model.MaxPool(relu1,
"pool1", kernel=2, stride=2)
212 pool1,
"conv2", 96, 256, 5, (
'XavierFill', {}), (
'ConstantFill', {})
214 relu2 = model.Relu(conv2,
"conv2")
215 pool2 = model.MaxPool(relu2,
"pool2", kernel=2, stride=2)
223 (
'ConstantFill', {}),
226 relu3 = model.Relu(conv3,
"conv3")
234 (
'ConstantFill', {}),
237 relu4 = model.Relu(conv4,
"conv4")
245 (
'ConstantFill', {}),
248 relu5 = model.Relu(conv5,
"conv5")
249 pool5 = model.MaxPool(relu5,
"pool5", kernel=2, stride=2)
251 pool5,
"fc6", 1024 * 6 * 6, 3072, (
'XavierFill', {}),
254 relu6 = model.Relu(fc6,
"fc6")
256 relu6,
"fc7", 3072, 4096, (
'XavierFill', {}), (
'ConstantFill', {})
258 relu7 = model.Relu(fc7,
"fc7")
260 relu7,
"fc8", 4096, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
262 pred = model.Softmax(fc8,
"pred")
263 xent = model.LabelCrossEntropy([pred,
"label"],
"xent")
264 model.AveragedLoss(xent,
"loss")
269 model = cnn.CNNModelHelper(order, name=
'vgg-a',
270 use_cudnn=
True, cudnn_exhaustive_search=
True)
278 (
'ConstantFill', {}),
281 relu1 = model.Relu(conv1,
"conv1")
282 pool1 = model.MaxPool(relu1,
"pool1", kernel=2, stride=2)
290 (
'ConstantFill', {}),
293 relu2 = model.Relu(conv2,
"conv2")
294 pool2 = model.MaxPool(relu2,
"pool2", kernel=2, stride=2)
302 (
'ConstantFill', {}),
305 relu3 = model.Relu(conv3,
"conv3")
313 (
'ConstantFill', {}),
316 relu4 = model.Relu(conv4,
"conv4")
317 pool4 = model.MaxPool(relu4,
"pool4", kernel=2, stride=2)
325 (
'ConstantFill', {}),
328 relu5 = model.Relu(conv5,
"conv5")
336 (
'ConstantFill', {}),
339 relu6 = model.Relu(conv6,
"conv6")
340 pool6 = model.MaxPool(relu6,
"pool6", kernel=2, stride=2)
348 (
'ConstantFill', {}),
351 relu7 = model.Relu(conv7,
"conv7")
359 (
'ConstantFill', {}),
362 relu8 = model.Relu(conv8,
"conv8")
363 pool8 = model.MaxPool(relu8,
"pool8", kernel=2, stride=2)
366 pool8,
"fcix", 512 * 7 * 7, 4096, (
'XavierFill', {}),
369 reluix = model.Relu(fcix,
"fcix")
371 reluix,
"fcx", 4096, 4096, (
'XavierFill', {}), (
'ConstantFill', {})
373 relux = model.Relu(fcx,
"fcx")
375 relux,
"fcxi", 4096, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
377 pred = model.Softmax(fcxi,
"pred")
378 xent = model.LabelCrossEntropy([pred,
"label"],
"xent")
379 model.AveragedLoss(xent,
"loss")
383 def net_DAG_Builder(model):
384 print(
"====================================================")
385 print(
" Start Building DAG ")
386 print(
"====================================================")
387 net_root = SparseTransformer.netbuilder(model)
391 def _InceptionModule(
392 model, input_blob, input_depth, output_name, conv1_depth, conv3_depths,
393 conv5_depths, pool_depth
397 input_blob, output_name +
":conv1", input_depth, conv1_depth, 1,
398 (
'XavierFill', {}), (
'ConstantFill', {})
400 conv1 = model.Relu(conv1, conv1)
402 conv3_reduce = model.Conv(
403 input_blob, output_name +
404 ":conv3_reduce", input_depth, conv3_depths[0],
405 1, (
'XavierFill', {}), (
'ConstantFill', {})
407 conv3_reduce = model.Relu(conv3_reduce, conv3_reduce)
410 output_name +
":conv3",
415 (
'ConstantFill', {}),
418 conv3 = model.Relu(conv3, conv3)
420 conv5_reduce = model.Conv(
421 input_blob, output_name +
422 ":conv5_reduce", input_depth, conv5_depths[0],
423 1, (
'XavierFill', {}), (
'ConstantFill', {})
425 conv5_reduce = model.Relu(conv5_reduce, conv5_reduce)
428 output_name +
":conv5",
433 (
'ConstantFill', {}),
436 conv5 = model.Relu(conv5, conv5)
438 pool = model.MaxPool(
440 output_name +
":pool",
445 pool_proj = model.Conv(
446 pool, output_name +
":pool_proj", input_depth, pool_depth, 1,
447 (
'XavierFill', {}), (
'ConstantFill', {})
449 pool_proj = model.Relu(pool_proj, pool_proj)
450 output = model.Concat([conv1, conv3, conv5, pool_proj], output_name)
454 def Inception(order):
455 model = cnn.CNNModelHelper(order, name=
"inception",
456 use_cudnn=
True, cudnn_exhaustive_search=
True)
464 (
'ConstantFill', {}),
468 relu1 = model.Relu(conv1,
"conv1")
469 pool1 = model.MaxPool(relu1,
"pool1", kernel=3, stride=2, pad=1)
471 pool1,
"conv2a", 64, 64, 1, (
'XavierFill', {}), (
'ConstantFill', {})
473 conv2a = model.Relu(conv2a, conv2a)
481 (
'ConstantFill', {}),
484 relu2 = model.Relu(conv2,
"conv2")
485 pool2 = model.MaxPool(relu2,
"pool2", kernel=3, stride=2, pad=1)
487 inc3 = _InceptionModule(
488 model, pool2, 192,
"inc3", 64, [96, 128], [16, 32], 32
490 inc4 = _InceptionModule(
491 model, inc3, 256,
"inc4", 128, [128, 192], [32, 96], 64
493 pool5 = model.MaxPool(inc4,
"pool5", kernel=3, stride=2, pad=1)
494 inc5 = _InceptionModule(
495 model, pool5, 480,
"inc5", 192, [96, 208], [16, 48], 64
497 inc6 = _InceptionModule(
498 model, inc5, 512,
"inc6", 160, [112, 224], [24, 64], 64
500 inc7 = _InceptionModule(
501 model, inc6, 512,
"inc7", 128, [128, 256], [24, 64], 64
503 inc8 = _InceptionModule(
504 model, inc7, 512,
"inc8", 112, [144, 288], [32, 64], 64
506 inc9 = _InceptionModule(
507 model, inc8, 528,
"inc9", 256, [160, 320], [32, 128], 128
509 pool9 = model.MaxPool(inc9,
"pool9", kernel=3, stride=2, pad=1)
510 inc10 = _InceptionModule(
511 model, pool9, 832,
"inc10", 256, [160, 320], [32, 128], 128
513 inc11 = _InceptionModule(
514 model, inc10, 832,
"inc11", 384, [192, 384], [48, 128], 128
516 pool11 = model.AveragePool(inc11,
"pool11", kernel=7, stride=1)
518 pool11,
"fc", 1024, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
523 pred = model.Softmax(fc,
"pred")
524 xent = model.LabelCrossEntropy([pred,
"label"],
"xent")
525 model.AveragedLoss(xent,
"loss")
529 def AddInput(model, batch_size, db, db_type):
530 """Adds the data input part.""" 531 data_uint8, label = model.TensorProtosDBInput(
532 [], [
"data_uint8",
"label"], batch_size=batch_size,
533 db=db, db_type=db_type
535 data = model.Cast(data_uint8,
"data_nhwc", to=core.DataType.FLOAT)
536 data = model.NHWC2NCHW(data,
"data")
537 data = model.Scale(data, data, scale=float(1. / 256))
538 data = model.StopGradient(data, data)
542 def AddParameterUpdate(model):
543 """ Simple plain SGD update -- not tuned to actually train the models """ 544 ITER = model.Iter(
"iter")
545 LR = model.LearningRate(
546 ITER,
"LR", base_lr=-1e-8, policy=
"step", stepsize=10000, gamma=0.999)
547 ONE = model.param_init_net.ConstantFill([],
"ONE", shape=[1], value=1.0)
548 for param
in model.params:
549 param_grad = model.param_to_grad[param]
550 model.WeightedSum([param, ONE, param_grad, LR], param)
553 def Benchmark(model_gen, arg):
554 model, input_size = model_gen(arg.order)
555 model.Proto().type = arg.net_type
556 model.Proto().num_workers = arg.num_workers
561 if arg.order ==
"NCHW":
562 input_shape = [arg.batch_size, 3, input_size, input_size]
564 input_shape = [arg.batch_size, input_size, input_size, 3]
565 if arg.model ==
"MLP":
566 input_shape = [arg.batch_size, input_size]
568 model.param_init_net.GaussianFill(
575 model.param_init_net.UniformIntFill(
578 shape=[arg.batch_size, ],
584 print(
'{}: running forward only.'.format(arg.model))
586 print(
'{}: running forward-backward.'.format(arg.model))
587 model.AddGradientOperators([
"loss"])
588 AddParameterUpdate(model)
590 if arg.order ==
'NHWC':
593 'NHWC order with CuDNN may not be supported yet, so I might\n' 598 model.param_init_net.RunAllOnGPU()
599 model.net.RunAllOnGPU()
604 "{0}_init_batch_{1}.pbtxt".format(arg.model, arg.batch_size),
"w" 606 fid.write(str(model.param_init_net.Proto()))
607 with open(
"{0}.pbtxt".format(arg.model,
608 arg.batch_size),
"w")
as fid:
609 fid.write(str(model.net.Proto()))
611 workspace.RunNetOnce(model.param_init_net)
612 workspace.CreateNet(model.net)
613 for i
in range(arg.warmup_iterations):
614 workspace.RunNet(model.net.Proto().name)
616 plan = core.Plan(
"plan")
617 plan.AddStep(core.ExecutionStep(
"run", model.net, arg.iterations))
619 workspace.RunPlan(plan)
620 print(
'Spent: {}'.format((time.time() - start) / arg.iterations))
621 if arg.layer_wise_benchmark:
622 print(
'Layer-wise benchmark.')
623 workspace.BenchmarkNet(model.net.Proto().name, 1, arg.iterations,
True)
626 def GetArgumentParser():
627 parser = argparse.ArgumentParser(description=
"Caffe2 benchmark.")
632 help=
"The batch size." 634 parser.add_argument(
"--model", type=str, help=
"The model to benchmark.")
639 help=
"The order to evaluate." 645 help=
"The cudnn workspace size." 651 help=
"Number of iterations to run the network." 654 "--warmup_iterations",
657 help=
"Number of warm-up iterations before benchmarking." 662 help=
"If set, only run the forward pass." 665 "--layer_wise_benchmark",
667 help=
"If True, run the layer-wise benchmark as well." 672 help=
"If True, run testing on CPU instead of GPU." 677 help=
"If True, dump the model prototxts to disk." 679 parser.add_argument(
"--net_type", type=str, default=
"dag")
680 parser.add_argument(
"--num_workers", type=int, default=2)
684 if __name__ ==
'__main__':
685 args = GetArgumentParser().parse_args()
687 not args.batch_size
or not args.model
or not args.order
or 690 GetArgumentParser().print_help()
692 workspace.GlobalInit([
'caffe2',
'--caffe2_log_level=0'])
695 'OverFeat': OverFeat,
697 'Inception': Inception,
700 Benchmark(model_map[args.model], args)