Caffe2 - Python API
A deep learning, cross platform ML framework
resnet.py
1 ## @package resnet
2 # Module caffe2.python.models.resnet
3 
4 from __future__ import absolute_import
5 from __future__ import division
6 from __future__ import print_function
7 
8 from caffe2.python import brew
9 import logging
10 
11 '''
12 Utility for creating ResNe(X)t
13 "Deep Residual Learning for Image Recognition" by He, Zhang et. al. 2015
14 "Aggregated Residual Transformations for Deep Neural Networks" by Xie et. al. 2016
15 '''
16 
17 
18 class ResNetBuilder():
19  '''
20  Helper class for constructing residual blocks.
21  '''
22 
23  def __init__(
24  self,
25  model,
26  prev_blob,
27  no_bias,
28  is_test,
29  bn_epsilon=1e-5,
30  bn_momentum=0.9,
31  ):
32  self.model = model
33  self.comp_count = 0
34  self.comp_idx = 0
35  self.prev_blob = prev_blob
36  self.is_test = is_test
37  self.bn_epsilon = bn_epsilon
38  self.bn_momentum = bn_momentum
39  self.no_bias = 1 if no_bias else 0
40 
41  def add_conv(
42  self,
43  in_filters,
44  out_filters,
45  kernel,
46  stride=1,
47  group=1,
48  pad=0,
49  ):
50  self.comp_idx += 1
51  self.prev_blob = brew.conv(
52  self.model,
53  self.prev_blob,
54  'comp_%d_conv_%d' % (self.comp_count, self.comp_idx),
55  in_filters,
56  out_filters,
57  weight_init=("MSRAFill", {}),
58  kernel=kernel,
59  stride=stride,
60  group=group,
61  pad=pad,
62  no_bias=self.no_bias,
63  )
64  return self.prev_blob
65 
66  def add_relu(self):
67  self.prev_blob = brew.relu(
68  self.model,
69  self.prev_blob,
70  self.prev_blob, # in-place
71  )
72  return self.prev_blob
73 
74  def add_spatial_bn(self, num_filters):
75  self.prev_blob = brew.spatial_bn(
76  self.model,
77  self.prev_blob,
78  'comp_%d_spatbn_%d' % (self.comp_count, self.comp_idx),
79  num_filters,
80  epsilon=self.bn_epsilon,
81  momentum=self.bn_momentum,
82  is_test=self.is_test,
83  )
84  return self.prev_blob
85 
86  '''
87  Add a "bottleneck" component as decribed in He et. al. Figure 3 (right)
88  '''
89 
90  def add_bottleneck(
91  self,
92  input_filters, # num of feature maps from preceding layer
93  base_filters, # num of filters internally in the component
94  output_filters, # num of feature maps to output
95  stride=1,
96  group=1,
97  spatial_batch_norm=True,
98  ):
99  self.comp_idx = 0
100  shortcut_blob = self.prev_blob
101 
102  # 1x1
103  self.add_conv(
104  input_filters,
105  base_filters,
106  kernel=1,
107  stride=1,
108  )
109 
110  if spatial_batch_norm:
111  self.add_spatial_bn(base_filters)
112 
113  self.add_relu()
114 
115  # 3x3 (note the pad, required for keeping dimensions)
116  self.add_conv(
117  base_filters,
118  base_filters,
119  kernel=3,
120  stride=stride,
121  group=group,
122  pad=1,
123  )
124 
125  if spatial_batch_norm:
126  self.add_spatial_bn(base_filters)
127  self.add_relu()
128 
129  # 1x1
130  last_conv = self.add_conv(base_filters, output_filters, kernel=1)
131  if spatial_batch_norm:
132  last_conv = self.add_spatial_bn(output_filters)
133 
134  # Summation with input signal (shortcut)
135  # When the number of feature maps mismatch between the input
136  # and output (this usually happens when the residual stage
137  # changes), we need to do a projection for the short cut
138  if output_filters != input_filters:
139  shortcut_blob = brew.conv(
140  self.model,
141  shortcut_blob,
142  'shortcut_projection_%d' % self.comp_count,
143  input_filters,
144  output_filters,
145  weight_init=("MSRAFill", {}),
146  kernel=1,
147  stride=stride,
148  no_bias=self.no_bias,
149  )
150  if spatial_batch_norm:
151  shortcut_blob = brew.spatial_bn(
152  self.model,
153  shortcut_blob,
154  'shortcut_projection_%d_spatbn' % self.comp_count,
155  output_filters,
156  epsilon=self.bn_epsilon,
157  momentum=self.bn_momentum,
158  is_test=self.is_test,
159  )
160 
161  self.prev_blob = brew.sum(
162  self.model, [shortcut_blob, last_conv],
163  'comp_%d_sum_%d' % (self.comp_count, self.comp_idx)
164  )
165  self.comp_idx += 1
166  self.add_relu()
167 
168  # Keep track of number of high level components if this ResNetBuilder
169  self.comp_count += 1
170 
171  return output_filters
172 
173  def add_simple_block(
174  self,
175  input_filters,
176  num_filters,
177  down_sampling=False,
178  spatial_batch_norm=True
179  ):
180  self.comp_idx = 0
181  shortcut_blob = self.prev_blob
182 
183  # 3x3
184  self.add_conv(
185  input_filters,
186  num_filters,
187  kernel=3,
188  stride=(1 if down_sampling is False else 2),
189  pad=1
190  )
191 
192  if spatial_batch_norm:
193  self.add_spatial_bn(num_filters)
194  self.add_relu()
195 
196  last_conv = self.add_conv(num_filters, num_filters, kernel=3, pad=1)
197  if spatial_batch_norm:
198  last_conv = self.add_spatial_bn(num_filters)
199 
200  # Increase of dimensions, need a projection for the shortcut
201  if (num_filters != input_filters):
202  shortcut_blob = brew.conv(
203  self.model,
204  shortcut_blob,
205  'shortcut_projection_%d' % self.comp_count,
206  input_filters,
207  num_filters,
208  weight_init=("MSRAFill", {}),
209  kernel=1,
210  stride=(1 if down_sampling is False else 2),
211  no_bias=self.no_bias,
212  )
213  if spatial_batch_norm:
214  shortcut_blob = brew.spatial_bn(
215  self.model,
216  shortcut_blob,
217  'shortcut_projection_%d_spatbn' % self.comp_count,
218  num_filters,
219  epsilon=1e-3,
220  is_test=self.is_test,
221  )
222 
223  self.prev_blob = brew.sum(
224  self.model, [shortcut_blob, last_conv],
225  'comp_%d_sum_%d' % (self.comp_count, self.comp_idx)
226  )
227  self.comp_idx += 1
228  self.add_relu()
229 
230  # Keep track of number of high level components if this ResNetBuilder
231  self.comp_count += 1
232 
233 
234 def create_resnet_32x32(
235  model, data, num_input_channels, num_groups, num_labels, is_test=False
236 ):
237  '''
238  Create residual net for smaller images (sec 4.2 of He et. al (2015))
239  num_groups = 'n' in the paper
240  '''
241  # conv1 + maxpool
242  brew.conv(
243  model, data, 'conv1', num_input_channels, 16, kernel=3, stride=1
244  )
245  brew.spatial_bn(
246  model, 'conv1', 'conv1_spatbn', 16, epsilon=1e-3, is_test=is_test
247  )
248  brew.relu(model, 'conv1_spatbn', 'relu1')
249 
250  # Number of blocks as described in sec 4.2
251  filters = [16, 32, 64]
252 
253  builder = ResNetBuilder(model, 'relu1', no_bias=0, is_test=is_test)
254  prev_filters = 16
255  for groupidx in range(0, 3):
256  for blockidx in range(0, 2 * num_groups):
257  builder.add_simple_block(
258  prev_filters if blockidx == 0 else filters[groupidx],
259  filters[groupidx],
260  down_sampling=(True if blockidx == 0 and
261  groupidx > 0 else False))
262  prev_filters = filters[groupidx]
263 
264  # Final layers
265  brew.average_pool(
266  model, builder.prev_blob, 'final_avg', kernel=8, stride=1
267  )
268  brew.fc(model, 'final_avg', 'last_out', 64, num_labels)
269  softmax = brew.softmax(model, 'last_out', 'softmax')
270  return softmax
271 
272 
273 RESNEXT_BLOCK_CONFIG = {
274  18: (2, 2, 2, 2),
275  34: (3, 4, 6, 3),
276  50: (3, 4, 6, 3),
277  101: (3, 4, 23, 3),
278  152: (3, 8, 36, 3),
279  200: (3, 24, 36, 3),
280 }
281 
282 RESNEXT_STRIDES = [1, 2, 2, 2]
283 
284 logging.basicConfig()
285 log = logging.getLogger("resnext_builder")
286 log.setLevel(logging.DEBUG)
287 
288 
289 # The conv1 and final_avg kernel/stride args provide a basic mechanism for
290 # adapting resnet50 for different sizes of input images.
291 def create_resnext(
292  model,
293  data,
294  num_input_channels,
295  num_labels,
296  num_layers,
297  num_groups,
298  num_width_per_group,
299  label=None,
300  is_test=False,
301  no_loss=False,
302  no_bias=1,
303  conv1_kernel=7,
304  conv1_stride=2,
305  final_avg_kernel=7,
306  log=None,
307  bn_epsilon=1e-5,
308  bn_momentum=0.9,
309 ):
310  if num_layers not in RESNEXT_BLOCK_CONFIG:
311  log.error("{}-layer is invalid for resnext config".format(num_layers))
312 
313  num_blocks = RESNEXT_BLOCK_CONFIG[num_layers]
314  strides = RESNEXT_STRIDES
315  num_filters = [64, 256, 512, 1024, 2048]
316 
317  if num_layers in [18, 34]:
318  num_filters = [64, 64, 128, 256, 512]
319 
320  # the number of features before the last FC layer
321  num_features = num_filters[-1]
322 
323  # conv1 + maxpool
324  conv_blob = brew.conv(
325  model,
326  data,
327  'conv1',
328  num_input_channels,
329  num_filters[0],
330  weight_init=("MSRAFill", {}),
331  kernel=conv1_kernel,
332  stride=conv1_stride,
333  pad=3,
334  no_bias=no_bias
335  )
336 
337  bn_blob = brew.spatial_bn(
338  model,
339  conv_blob,
340  'conv1_spatbn_relu',
341  num_filters[0],
342  epsilon=bn_epsilon,
343  momentum=bn_momentum,
344  is_test=is_test
345  )
346  relu_blob = brew.relu(model, bn_blob, bn_blob)
347  max_pool = brew.max_pool(model, relu_blob, 'pool1', kernel=3, stride=2, pad=1)
348 
349  # Residual blocks...
350  builder = ResNetBuilder(model, max_pool, no_bias=no_bias,
351  is_test=is_test, bn_epsilon=1e-5, bn_momentum=0.9)
352 
353  inner_dim = num_groups * num_width_per_group
354 
355  # 4 different kinds of residual blocks
356  for residual_idx in range(4):
357  residual_num = num_blocks[residual_idx]
358  residual_stride = strides[residual_idx]
359  dim_in = num_filters[residual_idx]
360 
361  for blk_idx in range(residual_num):
362  dim_in = builder.add_bottleneck(
363  dim_in,
364  inner_dim,
365  num_filters[residual_idx + 1], # dim out
366  stride=residual_stride if blk_idx == 0 else 1,
367  group=num_groups,
368  )
369 
370  inner_dim *= 2
371 
372  # Final layers
373  final_avg = brew.average_pool(
374  model,
375  builder.prev_blob,
376  'final_avg',
377  kernel=final_avg_kernel,
378  stride=1,
379  global_pooling=True,
380  )
381 
382  # Final dimension of the "image" is reduced to 7x7
383  last_out = brew.fc(
384  model, final_avg, 'last_out_L{}'.format(num_labels), num_features, num_labels
385  )
386 
387  if no_loss:
388  return last_out
389 
390  # If we create model for training, use softmax-with-loss
391  if (label is not None):
392  (softmax, loss) = model.SoftmaxWithLoss(
393  [last_out, label],
394  ["softmax", "loss"],
395  )
396 
397  return (softmax, loss)
398  else:
399  # For inference, we just return softmax
400  return brew.softmax(model, last_out, "softmax")
401 
402 
403 # The conv1 and final_avg kernel/stride args provide a basic mechanism for
404 # adapting resnet50 for different sizes of input images.
405 def create_resnet50(
406  model,
407  data,
408  num_input_channels,
409  num_labels,
410  label=None,
411  is_test=False,
412  no_loss=False,
413  no_bias=0,
414  conv1_kernel=7,
415  conv1_stride=2,
416  final_avg_kernel=7,
417 ):
418  # resnet50 is a special case for ResNeXt50-1x64d
419  return create_resnext(
420  model,
421  data,
422  num_input_channels,
423  num_labels,
424  num_layers=50,
425  num_groups=1,
426  num_width_per_group=64,
427  label=label,
428  is_test=is_test,
429  no_loss=no_loss,
430  no_bias=no_bias,
431  conv1_kernel=conv1_kernel,
432  conv1_stride=conv1_stride,
433  final_avg_kernel=final_avg_kernel,
434  )
def add_conv(self, in_filters, out_filters, kernel, stride=1, group=1, pad=0)
Definition: resnet.py:49
def add_spatial_bn(self, num_filters)
Definition: resnet.py:74