Caffe2 - Python API
A deep learning, cross platform ML framework
convnet_benchmarks.py
1 # Copyright (c) 2016-present, Facebook, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 ##############################################################################
15 
16 ## @package convnet_benchmarks
17 # Module caffe2.experiments.python.convnet_benchmarks
18 from __future__ import absolute_import
19 from __future__ import division
20 from __future__ import print_function
21 from __future__ import unicode_literals
22 """
23 Benchmark for common convnets.
24 
25 (NOTE: Numbers below prior with missing parameter=update step, TODO to update)
26 
27 Speed on Titan X, with 10 warmup steps and 10 main steps and with different
28 versions of cudnn, are as follows (time reported below is per-batch time,
29 forward / forward+backward):
30 
31  CuDNN V3 CuDNN v4
32  AlexNet 32.5 / 108.0 27.4 / 90.1
33  OverFeat 113.0 / 342.3 91.7 / 276.5
34  Inception 134.5 / 485.8 125.7 / 450.6
35  VGG (batch 64) 200.8 / 650.0 164.1 / 551.7
36 
37 Speed on Inception with varied batch sizes and CuDNN v4 is as follows:
38 
39 Batch Size Speed per batch Speed per image
40 16 22.8 / 72.7 1.43 / 4.54
41 32 38.0 / 127.5 1.19 / 3.98
42 64 67.2 / 233.6 1.05 / 3.65
43 128 125.7 / 450.6 0.98 / 3.52
44 
45 Speed on Tesla M40, which 10 warmup steps and 10 main steps and with cudnn
46 v4, is as follows:
47 
48 AlexNet 68.4 / 218.1
49 OverFeat 210.5 / 630.3
50 Inception 300.2 / 1122.2
51 VGG (batch 64) 405.8 / 1327.7
52 
53 (Note that these numbers involve a "full" backprop, i.e. the gradient
54 with respect to the input image is also computed.)
55 
56 To get the numbers, simply run:
57 
58 for MODEL in AlexNet OverFeat Inception; do
59 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
60  --batch_size 128 --model $MODEL --forward_only True
61 done
62 for MODEL in AlexNet OverFeat Inception; do
63 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
64  --batch_size 128 --model $MODEL
65 done
66 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
67  --batch_size 64 --model VGGA --forward_only True
68 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
69  --batch_size 64 --model VGGA
70 
71 for BS in 16 32 64 128; do
72 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
73  --batch_size $BS --model Inception --forward_only True
74 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
75  --batch_size $BS --model Inception
76 done
77 
78 Note that VGG needs to be run at batch 64 due to memory limit on the backward
79 pass.
80 """
81 
82 import argparse
83 import time
84 
85 from caffe2.python import cnn, workspace, core
86 
87 import caffe2.python.SparseTransformer as SparseTransformer
88 
89 
90 def MLP(order):
91  model = cnn.CNNModelHelper()
92  d = 256
93  depth = 20
94  width = 3
95  for i in range(depth):
96  for j in range(width):
97  current = "fc_{}_{}".format(i, j) if i > 0 else "data"
98  next_ = "fc_{}_{}".format(i + 1, j)
99  model.FC(
100  current, next_,
101  dim_in=d, dim_out=d,
102  weight_init=model.XavierInit,
103  bias_init=model.XavierInit)
104  model.Sum(["fc_{}_{}".format(depth, j)
105  for j in range(width)], ["sum"])
106  model.FC("sum", "last",
107  dim_in=d, dim_out=1000,
108  weight_init=model.XavierInit,
109  bias_init=model.XavierInit)
110  xent = model.LabelCrossEntropy(["last", "label"], "xent")
111  model.AveragedLoss(xent, "loss")
112  return model, d
113 
114 
115 def AlexNet(order):
116  model = cnn.CNNModelHelper(order, name="alexnet",
117  use_cudnn=True, cudnn_exhaustive_search=True)
118  conv1 = model.Conv(
119  "data",
120  "conv1",
121  3,
122  64,
123  11,
124  ('XavierFill', {}),
125  ('ConstantFill', {}),
126  stride=4,
127  pad=2
128  )
129 
130  relu1 = model.Relu(conv1, "conv1")
131  pool1 = model.MaxPool(relu1, "pool1", kernel=3, stride=2)
132  conv2 = model.Conv(
133  pool1,
134  "conv2",
135  64,
136  192,
137  5,
138  ('XavierFill', {}),
139  ('ConstantFill', {}),
140  pad=2
141  )
142  relu2 = model.Relu(conv2, "conv2")
143  pool2 = model.MaxPool(relu2, "pool2", kernel=3, stride=2)
144  conv3 = model.Conv(
145  pool2,
146  "conv3",
147  192,
148  384,
149  3,
150  ('XavierFill', {}),
151  ('ConstantFill', {}),
152  pad=1
153  )
154  relu3 = model.Relu(conv3, "conv3")
155  conv4 = model.Conv(
156  relu3,
157  "conv4",
158  384,
159  256,
160  3,
161  ('XavierFill', {}),
162  ('ConstantFill', {}),
163  pad=1
164  )
165  relu4 = model.Relu(conv4, "conv4")
166  conv5 = model.Conv(
167  relu4,
168  "conv5",
169  256,
170  256,
171  3,
172  ('XavierFill', {}),
173  ('ConstantFill', {}),
174  pad=1
175  )
176  relu5 = model.Relu(conv5, "conv5")
177  pool5 = model.MaxPool(relu5, "pool5", kernel=3, stride=2)
178  fc6 = model.FC(
179  pool5, "fc6", 256 * 6 * 6, 4096, ('XavierFill', {}),
180  ('ConstantFill', {})
181  )
182  relu6 = model.Relu(fc6, "fc6")
183  fc7 = model.FC(
184  relu6, "fc7", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})
185  )
186  relu7 = model.Relu(fc7, "fc7")
187  fc8 = model.FC(
188  relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
189  )
190  pred = model.Softmax(fc8, "pred")
191  xent = model.LabelCrossEntropy([pred, "label"], "xent")
192  model.AveragedLoss(xent, "loss")
193  return model, 224
194 
195 
196 def OverFeat(order):
197  model = cnn.CNNModelHelper(order, name="overfeat",
198  use_cudnn=True, cudnn_exhaustive_search=True)
199  conv1 = model.Conv(
200  "data",
201  "conv1",
202  3,
203  96,
204  11,
205  ('XavierFill', {}),
206  ('ConstantFill', {}),
207  stride=4
208  )
209  relu1 = model.Relu(conv1, "conv1")
210  pool1 = model.MaxPool(relu1, "pool1", kernel=2, stride=2)
211  conv2 = model.Conv(
212  pool1, "conv2", 96, 256, 5, ('XavierFill', {}), ('ConstantFill', {})
213  )
214  relu2 = model.Relu(conv2, "conv2")
215  pool2 = model.MaxPool(relu2, "pool2", kernel=2, stride=2)
216  conv3 = model.Conv(
217  pool2,
218  "conv3",
219  256,
220  512,
221  3,
222  ('XavierFill', {}),
223  ('ConstantFill', {}),
224  pad=1
225  )
226  relu3 = model.Relu(conv3, "conv3")
227  conv4 = model.Conv(
228  relu3,
229  "conv4",
230  512,
231  1024,
232  3,
233  ('XavierFill', {}),
234  ('ConstantFill', {}),
235  pad=1
236  )
237  relu4 = model.Relu(conv4, "conv4")
238  conv5 = model.Conv(
239  relu4,
240  "conv5",
241  1024,
242  1024,
243  3,
244  ('XavierFill', {}),
245  ('ConstantFill', {}),
246  pad=1
247  )
248  relu5 = model.Relu(conv5, "conv5")
249  pool5 = model.MaxPool(relu5, "pool5", kernel=2, stride=2)
250  fc6 = model.FC(
251  pool5, "fc6", 1024 * 6 * 6, 3072, ('XavierFill', {}),
252  ('ConstantFill', {})
253  )
254  relu6 = model.Relu(fc6, "fc6")
255  fc7 = model.FC(
256  relu6, "fc7", 3072, 4096, ('XavierFill', {}), ('ConstantFill', {})
257  )
258  relu7 = model.Relu(fc7, "fc7")
259  fc8 = model.FC(
260  relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
261  )
262  pred = model.Softmax(fc8, "pred")
263  xent = model.LabelCrossEntropy([pred, "label"], "xent")
264  model.AveragedLoss(xent, "loss")
265  return model, 231
266 
267 
268 def VGGA(order):
269  model = cnn.CNNModelHelper(order, name='vgg-a',
270  use_cudnn=True, cudnn_exhaustive_search=True)
271  conv1 = model.Conv(
272  "data",
273  "conv1",
274  3,
275  64,
276  3,
277  ('XavierFill', {}),
278  ('ConstantFill', {}),
279  pad=1
280  )
281  relu1 = model.Relu(conv1, "conv1")
282  pool1 = model.MaxPool(relu1, "pool1", kernel=2, stride=2)
283  conv2 = model.Conv(
284  pool1,
285  "conv2",
286  64,
287  128,
288  3,
289  ('XavierFill', {}),
290  ('ConstantFill', {}),
291  pad=1
292  )
293  relu2 = model.Relu(conv2, "conv2")
294  pool2 = model.MaxPool(relu2, "pool2", kernel=2, stride=2)
295  conv3 = model.Conv(
296  pool2,
297  "conv3",
298  128,
299  256,
300  3,
301  ('XavierFill', {}),
302  ('ConstantFill', {}),
303  pad=1
304  )
305  relu3 = model.Relu(conv3, "conv3")
306  conv4 = model.Conv(
307  relu3,
308  "conv4",
309  256,
310  256,
311  3,
312  ('XavierFill', {}),
313  ('ConstantFill', {}),
314  pad=1
315  )
316  relu4 = model.Relu(conv4, "conv4")
317  pool4 = model.MaxPool(relu4, "pool4", kernel=2, stride=2)
318  conv5 = model.Conv(
319  pool4,
320  "conv5",
321  256,
322  512,
323  3,
324  ('XavierFill', {}),
325  ('ConstantFill', {}),
326  pad=1
327  )
328  relu5 = model.Relu(conv5, "conv5")
329  conv6 = model.Conv(
330  relu5,
331  "conv6",
332  512,
333  512,
334  3,
335  ('XavierFill', {}),
336  ('ConstantFill', {}),
337  pad=1
338  )
339  relu6 = model.Relu(conv6, "conv6")
340  pool6 = model.MaxPool(relu6, "pool6", kernel=2, stride=2)
341  conv7 = model.Conv(
342  pool6,
343  "conv7",
344  512,
345  512,
346  3,
347  ('XavierFill', {}),
348  ('ConstantFill', {}),
349  pad=1
350  )
351  relu7 = model.Relu(conv7, "conv7")
352  conv8 = model.Conv(
353  relu7,
354  "conv8",
355  512,
356  512,
357  3,
358  ('XavierFill', {}),
359  ('ConstantFill', {}),
360  pad=1
361  )
362  relu8 = model.Relu(conv8, "conv8")
363  pool8 = model.MaxPool(relu8, "pool8", kernel=2, stride=2)
364 
365  fcix = model.FC(
366  pool8, "fcix", 512 * 7 * 7, 4096, ('XavierFill', {}),
367  ('ConstantFill', {})
368  )
369  reluix = model.Relu(fcix, "fcix")
370  fcx = model.FC(
371  reluix, "fcx", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})
372  )
373  relux = model.Relu(fcx, "fcx")
374  fcxi = model.FC(
375  relux, "fcxi", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
376  )
377  pred = model.Softmax(fcxi, "pred")
378  xent = model.LabelCrossEntropy([pred, "label"], "xent")
379  model.AveragedLoss(xent, "loss")
380  return model, 231
381 
382 
383 def net_DAG_Builder(model):
384  print("====================================================")
385  print(" Start Building DAG ")
386  print("====================================================")
387  net_root = SparseTransformer.netbuilder(model)
388  return net_root
389 
390 
391 def _InceptionModule(
392  model, input_blob, input_depth, output_name, conv1_depth, conv3_depths,
393  conv5_depths, pool_depth
394 ):
395  # path 1: 1x1 conv
396  conv1 = model.Conv(
397  input_blob, output_name + ":conv1", input_depth, conv1_depth, 1,
398  ('XavierFill', {}), ('ConstantFill', {})
399  )
400  conv1 = model.Relu(conv1, conv1)
401  # path 2: 1x1 conv + 3x3 conv
402  conv3_reduce = model.Conv(
403  input_blob, output_name +
404  ":conv3_reduce", input_depth, conv3_depths[0],
405  1, ('XavierFill', {}), ('ConstantFill', {})
406  )
407  conv3_reduce = model.Relu(conv3_reduce, conv3_reduce)
408  conv3 = model.Conv(
409  conv3_reduce,
410  output_name + ":conv3",
411  conv3_depths[0],
412  conv3_depths[1],
413  3,
414  ('XavierFill', {}),
415  ('ConstantFill', {}),
416  pad=1
417  )
418  conv3 = model.Relu(conv3, conv3)
419  # path 3: 1x1 conv + 5x5 conv
420  conv5_reduce = model.Conv(
421  input_blob, output_name +
422  ":conv5_reduce", input_depth, conv5_depths[0],
423  1, ('XavierFill', {}), ('ConstantFill', {})
424  )
425  conv5_reduce = model.Relu(conv5_reduce, conv5_reduce)
426  conv5 = model.Conv(
427  conv5_reduce,
428  output_name + ":conv5",
429  conv5_depths[0],
430  conv5_depths[1],
431  5,
432  ('XavierFill', {}),
433  ('ConstantFill', {}),
434  pad=2
435  )
436  conv5 = model.Relu(conv5, conv5)
437  # path 4: pool + 1x1 conv
438  pool = model.MaxPool(
439  input_blob,
440  output_name + ":pool",
441  kernel=3,
442  stride=1,
443  pad=1
444  )
445  pool_proj = model.Conv(
446  pool, output_name + ":pool_proj", input_depth, pool_depth, 1,
447  ('XavierFill', {}), ('ConstantFill', {})
448  )
449  pool_proj = model.Relu(pool_proj, pool_proj)
450  output = model.Concat([conv1, conv3, conv5, pool_proj], output_name)
451  return output
452 
453 
454 def Inception(order):
455  model = cnn.CNNModelHelper(order, name="inception",
456  use_cudnn=True, cudnn_exhaustive_search=True)
457  conv1 = model.Conv(
458  "data",
459  "conv1",
460  3,
461  64,
462  7,
463  ('XavierFill', {}),
464  ('ConstantFill', {}),
465  stride=2,
466  pad=3
467  )
468  relu1 = model.Relu(conv1, "conv1")
469  pool1 = model.MaxPool(relu1, "pool1", kernel=3, stride=2, pad=1)
470  conv2a = model.Conv(
471  pool1, "conv2a", 64, 64, 1, ('XavierFill', {}), ('ConstantFill', {})
472  )
473  conv2a = model.Relu(conv2a, conv2a)
474  conv2 = model.Conv(
475  conv2a,
476  "conv2",
477  64,
478  192,
479  3,
480  ('XavierFill', {}),
481  ('ConstantFill', {}),
482  pad=1
483  )
484  relu2 = model.Relu(conv2, "conv2")
485  pool2 = model.MaxPool(relu2, "pool2", kernel=3, stride=2, pad=1)
486  # Inception modules
487  inc3 = _InceptionModule(
488  model, pool2, 192, "inc3", 64, [96, 128], [16, 32], 32
489  )
490  inc4 = _InceptionModule(
491  model, inc3, 256, "inc4", 128, [128, 192], [32, 96], 64
492  )
493  pool5 = model.MaxPool(inc4, "pool5", kernel=3, stride=2, pad=1)
494  inc5 = _InceptionModule(
495  model, pool5, 480, "inc5", 192, [96, 208], [16, 48], 64
496  )
497  inc6 = _InceptionModule(
498  model, inc5, 512, "inc6", 160, [112, 224], [24, 64], 64
499  )
500  inc7 = _InceptionModule(
501  model, inc6, 512, "inc7", 128, [128, 256], [24, 64], 64
502  )
503  inc8 = _InceptionModule(
504  model, inc7, 512, "inc8", 112, [144, 288], [32, 64], 64
505  )
506  inc9 = _InceptionModule(
507  model, inc8, 528, "inc9", 256, [160, 320], [32, 128], 128
508  )
509  pool9 = model.MaxPool(inc9, "pool9", kernel=3, stride=2, pad=1)
510  inc10 = _InceptionModule(
511  model, pool9, 832, "inc10", 256, [160, 320], [32, 128], 128
512  )
513  inc11 = _InceptionModule(
514  model, inc10, 832, "inc11", 384, [192, 384], [48, 128], 128
515  )
516  pool11 = model.AveragePool(inc11, "pool11", kernel=7, stride=1)
517  fc = model.FC(
518  pool11, "fc", 1024, 1000, ('XavierFill', {}), ('ConstantFill', {})
519  )
520  # It seems that Soumith's benchmark does not have softmax on top
521  # for Inception. We will add it anyway so we can have a proper
522  # backward pass.
523  pred = model.Softmax(fc, "pred")
524  xent = model.LabelCrossEntropy([pred, "label"], "xent")
525  model.AveragedLoss(xent, "loss")
526  return model, 224
527 
528 
529 def AddInput(model, batch_size, db, db_type):
530  """Adds the data input part."""
531  data_uint8, label = model.TensorProtosDBInput(
532  [], ["data_uint8", "label"], batch_size=batch_size,
533  db=db, db_type=db_type
534  )
535  data = model.Cast(data_uint8, "data_nhwc", to=core.DataType.FLOAT)
536  data = model.NHWC2NCHW(data, "data")
537  data = model.Scale(data, data, scale=float(1. / 256))
538  data = model.StopGradient(data, data)
539  return data, label
540 
541 
542 def AddParameterUpdate(model):
543  """ Simple plain SGD update -- not tuned to actually train the models """
544  ITER = model.Iter("iter")
545  LR = model.LearningRate(
546  ITER, "LR", base_lr=-1e-8, policy="step", stepsize=10000, gamma=0.999)
547  ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
548  for param in model.params:
549  param_grad = model.param_to_grad[param]
550  model.WeightedSum([param, ONE, param_grad, LR], param)
551 
552 
553 def Benchmark(model_gen, arg):
554  model, input_size = model_gen(arg.order)
555  model.Proto().type = arg.net_type
556  model.Proto().num_workers = arg.num_workers
557 
558  # In order to be able to run everything without feeding more stuff, let's
559  # add the data and label blobs to the parameter initialization net as well.
560 
561  if arg.order == "NCHW":
562  input_shape = [arg.batch_size, 3, input_size, input_size]
563  else:
564  input_shape = [arg.batch_size, input_size, input_size, 3]
565  if arg.model == "MLP":
566  input_shape = [arg.batch_size, input_size]
567 
568  model.param_init_net.GaussianFill(
569  [],
570  "data",
571  shape=input_shape,
572  mean=0.0,
573  std=1.0
574  )
575  model.param_init_net.UniformIntFill(
576  [],
577  "label",
578  shape=[arg.batch_size, ],
579  min=0,
580  max=999
581  )
582 
583  if arg.forward_only:
584  print('{}: running forward only.'.format(arg.model))
585  else:
586  print('{}: running forward-backward.'.format(arg.model))
587  model.AddGradientOperators(["loss"])
588  AddParameterUpdate(model)
589 
590  if arg.order == 'NHWC':
591  print(
592  '==WARNING==\n'
593  'NHWC order with CuDNN may not be supported yet, so I might\n'
594  'exit suddenly.'
595  )
596 
597  if not arg.cpu:
598  model.param_init_net.RunAllOnGPU()
599  model.net.RunAllOnGPU()
600 
601  if arg.dump_model:
602  # Writes out the pbtxt for benchmarks on e.g. Android
603  with open(
604  "{0}_init_batch_{1}.pbtxt".format(arg.model, arg.batch_size), "w"
605  ) as fid:
606  fid.write(str(model.param_init_net.Proto()))
607  with open("{0}.pbtxt".format(arg.model,
608  arg.batch_size), "w") as fid:
609  fid.write(str(model.net.Proto()))
610 
611  workspace.RunNetOnce(model.param_init_net)
612  workspace.CreateNet(model.net)
613  for i in range(arg.warmup_iterations):
614  workspace.RunNet(model.net.Proto().name)
615 
616  plan = core.Plan("plan")
617  plan.AddStep(core.ExecutionStep("run", model.net, arg.iterations))
618  start = time.time()
619  workspace.RunPlan(plan)
620  print('Spent: {}'.format((time.time() - start) / arg.iterations))
621  if arg.layer_wise_benchmark:
622  print('Layer-wise benchmark.')
623  workspace.BenchmarkNet(model.net.Proto().name, 1, arg.iterations, True)
624 
625 
626 def GetArgumentParser():
627  parser = argparse.ArgumentParser(description="Caffe2 benchmark.")
628  parser.add_argument(
629  "--batch_size",
630  type=int,
631  default=128,
632  help="The batch size."
633  )
634  parser.add_argument("--model", type=str, help="The model to benchmark.")
635  parser.add_argument(
636  "--order",
637  type=str,
638  default="NCHW",
639  help="The order to evaluate."
640  )
641  parser.add_argument(
642  "--cudnn_ws",
643  type=int,
644  default=-1,
645  help="The cudnn workspace size."
646  )
647  parser.add_argument(
648  "--iterations",
649  type=int,
650  default=10,
651  help="Number of iterations to run the network."
652  )
653  parser.add_argument(
654  "--warmup_iterations",
655  type=int,
656  default=10,
657  help="Number of warm-up iterations before benchmarking."
658  )
659  parser.add_argument(
660  "--forward_only",
661  action='store_true',
662  help="If set, only run the forward pass."
663  )
664  parser.add_argument(
665  "--layer_wise_benchmark",
666  action='store_true',
667  help="If True, run the layer-wise benchmark as well."
668  )
669  parser.add_argument(
670  "--cpu",
671  action='store_true',
672  help="If True, run testing on CPU instead of GPU."
673  )
674  parser.add_argument(
675  "--dump_model",
676  action='store_true',
677  help="If True, dump the model prototxts to disk."
678  )
679  parser.add_argument("--net_type", type=str, default="dag")
680  parser.add_argument("--num_workers", type=int, default=2)
681  return parser
682 
683 
684 if __name__ == '__main__':
685  args = GetArgumentParser().parse_args()
686  if (
687  not args.batch_size or not args.model or not args.order or
688  not args.cudnn_ws
689  ):
690  GetArgumentParser().print_help()
691 
692  workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
693  model_map = {
694  'AlexNet': AlexNet,
695  'OverFeat': OverFeat,
696  'VGGA': VGGA,
697  'Inception': Inception,
698  'MLP': MLP,
699  }
700  Benchmark(model_map[args.model], args)