Caffe2 - Python API
A deep learning, cross platform ML framework
resnet.py
1 # Copyright (c) 2016-present, Facebook, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 ##############################################################################
15 
16 ## @package resnet
17 # Module caffe2.python.models.resnet
18 
19 from __future__ import absolute_import
20 from __future__ import division
21 from __future__ import print_function
22 
23 from caffe2.python import brew
24 '''
25 Utility for creating ResNets
26 See "Deep Residual Learning for Image Recognition" by He, Zhang et. al. 2015
27 '''
28 
29 
30 class ResNetBuilder():
31  '''
32  Helper class for constructing residual blocks.
33  '''
34 
35  def __init__(self, model, prev_blob, no_bias, is_test, spatial_bn_mom=0.9):
36  self.model = model
37  self.comp_count = 0
38  self.comp_idx = 0
39  self.prev_blob = prev_blob
40  self.is_test = is_test
41  self.spatial_bn_mom = spatial_bn_mom
42  self.no_bias = 1 if no_bias else 0
43 
44  def add_conv(self, in_filters, out_filters, kernel, stride=1, pad=0):
45  self.comp_idx += 1
46  self.prev_blob = brew.conv(
47  self.model,
48  self.prev_blob,
49  'comp_%d_conv_%d' % (self.comp_count, self.comp_idx),
50  in_filters,
51  out_filters,
52  weight_init=("MSRAFill", {}),
53  kernel=kernel,
54  stride=stride,
55  pad=pad,
56  no_bias=self.no_bias,
57  )
58  return self.prev_blob
59 
60  def add_relu(self):
61  self.prev_blob = brew.relu(
62  self.model,
63  self.prev_blob,
64  self.prev_blob, # in-place
65  )
66  return self.prev_blob
67 
68  def add_spatial_bn(self, num_filters):
69  self.prev_blob = brew.spatial_bn(
70  self.model,
71  self.prev_blob,
72  'comp_%d_spatbn_%d' % (self.comp_count, self.comp_idx),
73  num_filters,
74  epsilon=1e-3,
75  momentum=self.spatial_bn_mom,
76  is_test=self.is_test,
77  )
78  return self.prev_blob
79 
80  '''
81  Add a "bottleneck" component as decribed in He et. al. Figure 3 (right)
82  '''
83 
84  def add_bottleneck(
85  self,
86  input_filters, # num of feature maps from preceding layer
87  base_filters, # num of filters internally in the component
88  output_filters, # num of feature maps to output
89  down_sampling=False,
90  spatial_batch_norm=True,
91  ):
92  self.comp_idx = 0
93  shortcut_blob = self.prev_blob
94 
95  # 1x1
96  self.add_conv(
97  input_filters,
98  base_filters,
99  kernel=1,
100  stride=1
101  )
102 
103  if spatial_batch_norm:
104  self.add_spatial_bn(base_filters)
105 
106  self.add_relu()
107 
108  # 3x3 (note the pad, required for keeping dimensions)
109  self.add_conv(
110  base_filters,
111  base_filters,
112  kernel=3,
113  stride=(1 if down_sampling is False else 2),
114  pad=1
115  )
116 
117  if spatial_batch_norm:
118  self.add_spatial_bn(base_filters)
119  self.add_relu()
120 
121  # 1x1
122  last_conv = self.add_conv(base_filters, output_filters, kernel=1)
123  if spatial_batch_norm:
124  last_conv = self.add_spatial_bn(output_filters)
125 
126  # Summation with input signal (shortcut)
127  # If we need to increase dimensions (feature maps), need to
128  # do a projection for the short cut
129  if (output_filters > input_filters):
130  shortcut_blob = brew.conv(
131  self.model,
132  shortcut_blob,
133  'shortcut_projection_%d' % self.comp_count,
134  input_filters,
135  output_filters,
136  weight_init=("MSRAFill", {}),
137  kernel=1,
138  stride=(1 if down_sampling is False else 2),
139  no_bias=self.no_bias,
140  )
141  if spatial_batch_norm:
142  shortcut_blob = brew.spatial_bn(
143  self.model,
144  shortcut_blob,
145  'shortcut_projection_%d_spatbn' % self.comp_count,
146  output_filters,
147  epsilon=1e-3,
148  momentum=self.spatial_bn_mom,
149  is_test=self.is_test,
150  )
151 
152  self.prev_blob = brew.sum(
153  self.model, [shortcut_blob, last_conv],
154  'comp_%d_sum_%d' % (self.comp_count, self.comp_idx)
155  )
156  self.comp_idx += 1
157  self.add_relu()
158 
159  # Keep track of number of high level components if this ResNetBuilder
160  self.comp_count += 1
161 
162  def add_simple_block(
163  self,
164  input_filters,
165  num_filters,
166  down_sampling=False,
167  spatial_batch_norm=True
168  ):
169  self.comp_idx = 0
170  shortcut_blob = self.prev_blob
171 
172  # 3x3
173  self.add_conv(
174  input_filters,
175  num_filters,
176  kernel=3,
177  stride=(1 if down_sampling is False else 2),
178  pad=1
179  )
180 
181  if spatial_batch_norm:
182  self.add_spatial_bn(num_filters)
183  self.add_relu()
184 
185  last_conv = self.add_conv(num_filters, num_filters, kernel=3, pad=1)
186  if spatial_batch_norm:
187  last_conv = self.add_spatial_bn(num_filters)
188 
189  # Increase of dimensions, need a projection for the shortcut
190  if (num_filters != input_filters):
191  shortcut_blob = brew.conv(
192  self.model,
193  shortcut_blob,
194  'shortcut_projection_%d' % self.comp_count,
195  input_filters,
196  num_filters,
197  weight_init=("MSRAFill", {}),
198  kernel=1,
199  stride=(1 if down_sampling is False else 2),
200  no_bias=self.no_bias,
201  )
202  if spatial_batch_norm:
203  shortcut_blob = brew.spatial_bn(
204  self.model,
205  shortcut_blob,
206  'shortcut_projection_%d_spatbn' % self.comp_count,
207  num_filters,
208  epsilon=1e-3,
209  is_test=self.is_test,
210  )
211 
212  self.prev_blob = brew.sum(
213  self.model, [shortcut_blob, last_conv],
214  'comp_%d_sum_%d' % (self.comp_count, self.comp_idx)
215  )
216  self.comp_idx += 1
217  self.add_relu()
218 
219  # Keep track of number of high level components if this ResNetBuilder
220  self.comp_count += 1
221 
222 
223 # The conv1 and final_avg kernel/stride args provide a basic mechanism for
224 # adapting resnet50 for different sizes of input images.
225 def create_resnet50(
226  model,
227  data,
228  num_input_channels,
229  num_labels,
230  label=None,
231  is_test=False,
232  no_loss=False,
233  no_bias=0,
234  conv1_kernel=7,
235  conv1_stride=2,
236  final_avg_kernel=7,
237 ):
238  # conv1 + maxpool
239  brew.conv(
240  model,
241  data,
242  'conv1',
243  num_input_channels,
244  64,
245  weight_init=("MSRAFill", {}),
246  kernel=conv1_kernel,
247  stride=conv1_stride,
248  pad=3,
249  no_bias=no_bias
250  )
251 
252  brew.spatial_bn(
253  model,
254  'conv1',
255  'conv1_spatbn_relu',
256  64,
257  epsilon=1e-3,
258  momentum=0.1,
259  is_test=is_test
260  )
261  brew.relu(model, 'conv1_spatbn_relu', 'conv1_spatbn_relu')
262  brew.max_pool(model, 'conv1_spatbn_relu', 'pool1', kernel=3, stride=2)
263 
264  # Residual blocks...
265  builder = ResNetBuilder(model, 'pool1', no_bias=no_bias,
266  is_test=is_test, spatial_bn_mom=0.1)
267 
268  # conv2_x (ref Table 1 in He et al. (2015))
269  builder.add_bottleneck(64, 64, 256)
270  builder.add_bottleneck(256, 64, 256)
271  builder.add_bottleneck(256, 64, 256)
272 
273  # conv3_x
274  builder.add_bottleneck(256, 128, 512, down_sampling=True)
275  for _ in range(1, 4):
276  builder.add_bottleneck(512, 128, 512)
277 
278  # conv4_x
279  builder.add_bottleneck(512, 256, 1024, down_sampling=True)
280  for _ in range(1, 6):
281  builder.add_bottleneck(1024, 256, 1024)
282 
283  # conv5_x
284  builder.add_bottleneck(1024, 512, 2048, down_sampling=True)
285  builder.add_bottleneck(2048, 512, 2048)
286  builder.add_bottleneck(2048, 512, 2048)
287 
288  # Final layers
289  final_avg = brew.average_pool(
290  model,
291  builder.prev_blob,
292  'final_avg',
293  kernel=final_avg_kernel,
294  stride=1,
295  global_pooling=True,
296  )
297 
298  # Final dimension of the "image" is reduced to 7x7
299  last_out = brew.fc(
300  model, final_avg, 'last_out_L{}'.format(num_labels), 2048, num_labels
301  )
302 
303  if no_loss:
304  return last_out
305 
306  # If we create model for training, use softmax-with-loss
307  if (label is not None):
308  (softmax, loss) = model.SoftmaxWithLoss(
309  [last_out, label],
310  ["softmax", "loss"],
311  )
312 
313  return (softmax, loss)
314  else:
315  # For inference, we just return softmax
316  return brew.softmax(model, last_out, "softmax")
317 
318 
319 def create_resnet_32x32(
320  model, data, num_input_channels, num_groups, num_labels, is_test=False
321 ):
322  '''
323  Create residual net for smaller images (sec 4.2 of He et. al (2015))
324  num_groups = 'n' in the paper
325  '''
326  # conv1 + maxpool
327  brew.conv(
328  model, data, 'conv1', num_input_channels, 16, kernel=3, stride=1
329  )
330  brew.spatial_bn(
331  model, 'conv1', 'conv1_spatbn', 16, epsilon=1e-3, is_test=is_test
332  )
333  brew.relu(model, 'conv1_spatbn', 'relu1')
334 
335  # Number of blocks as described in sec 4.2
336  filters = [16, 32, 64]
337 
338  builder = ResNetBuilder(model, 'relu1', is_test=is_test)
339  prev_filters = 16
340  for groupidx in range(0, 3):
341  for blockidx in range(0, 2 * num_groups):
342  builder.add_simple_block(
343  prev_filters if blockidx == 0 else filters[groupidx],
344  filters[groupidx],
345  down_sampling=(True if blockidx == 0 and
346  groupidx > 0 else False))
347  prev_filters = filters[groupidx]
348 
349  # Final layers
350  brew.average_pool(
351  model, builder.prev_blob, 'final_avg', kernel=8, stride=1
352  )
353  brew.fc(model, 'final_avg', 'last_out', 64, num_labels)
354  softmax = brew.softmax(model, 'last_out', 'softmax')
355  return softmax
def add_spatial_bn(self, num_filters)
Definition: resnet.py:68
def add_conv(self, in_filters, out_filters, kernel, stride=1, pad=0)
Definition: resnet.py:44