Caffe2 - Python API
A deep learning, cross platform ML framework
conv.py
1 ## @package conv
2 # Module caffe2.python.layers.conv
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 from __future__ import unicode_literals
7 
8 from caffe2.python import schema
9 from caffe2.python.layers.layers import (
10  ModelLayer,
11 )
12 import numpy as np
13 
14 
15 class Conv(ModelLayer):
16  """
17  Convolutional layer
18  Input:
19  - input_record: at least has the shape info of C (num_channels)
20  - output_dim: number of convolutional filters
21  - kernel_h, kernel_w: kernel size for h and w
22  - stride_h, stride_w: stride for h and w
23  - pad_b, pad_l, pad_r, pad_t: padding sizes, if stride == 1,
24  'None' value will do auto padding
25  - order: either 'NHWC' or 'NCHW'
26  """
27 
28  def __init__(self, model, input_record, output_dim, kernel_h, kernel_w,
29  stride_h, stride_w, pad_b=None, pad_l=None, pad_r=None,
30  pad_t=None, order='NHWC', kernel_init=None, bias_init=None,
31  kernel_optim=None, bias_optim=None,
32  name='conv', **kwargs):
33 
34  super(Conv, self).__init__(model, name, input_record, **kwargs)
35  assert isinstance(input_record, schema.Scalar), "Incorrect input type"
36  # input num_channels (C) is needed
37  input_dims = input_record.field_type().shape
38 
39  assert (kernel_h > 0 and isinstance(kernel_h, int)), (
40  "kernel_h should be positive integer")
41  assert (kernel_w > 0 and isinstance(kernel_w, int)), (
42  "kernel_w should be positive integer")
43  self.kernel_h = kernel_h
44  self.kernel_w = kernel_w
45 
46  assert (stride_h > 0 and isinstance(stride_h, int)), (
47  "stride_h should be positive integer")
48  assert (stride_w > 0 and isinstance(stride_w, int)), (
49  "stride_w should be positive integer")
50  self.stride_h = stride_h
51  self.stride_w = stride_w
52 
53  # output_dim calculation (http://cs231n.github.io/convolutional-networks/)
54  # output_dim_w = (input_dim_w - kernel_w + pad_r + pad_l) / stride_w + 1
55  # so, do auto_padding requires
56  # pad_r, pad_l = [(input_dim_w - 1) * stride_w - input_dim_w + kernel_w] / 2
57  # similair for pad_t and pad_b to auto pad kernel_h
58  # here we only do auto padding for stride = 1 case
59  if stride_h == 1:
60  pad_t = int((kernel_h - 1) / 2) if pad_t is None else pad_t
61  pad_b = int((kernel_h - 1) / 2) if pad_b is None else pad_b
62  else:
63  pad_t = 0 if pad_t is None else pad_t
64  pad_b = 0 if pad_b is None else pad_b
65 
66  if stride_w == 1:
67  pad_r = int((kernel_w - 1) / 2) if pad_r is None else pad_r
68  pad_l = int((kernel_w - 1) / 2) if pad_l is None else pad_l
69  else:
70  pad_r = 0 if pad_r is None else pad_r
71  pad_l = 0 if pad_l is None else pad_l
72 
73  assert (pad_t >= 0 and isinstance(pad_t, int)), "pad_t should be int >= 0"
74  assert (pad_b >= 0 and isinstance(pad_b, int)), "pad_b should be int >= 0"
75  assert (pad_r >= 0 and isinstance(pad_r, int)), "pad_r should be int >= 0"
76  assert (pad_l >= 0 and isinstance(pad_l, int)), "pad_l should be int >= 0"
77  self.pad_t = pad_t
78  self.pad_b = pad_b
79  self.pad_r = pad_r
80  self.pad_l = pad_l
81 
82  assert order in ['NHWC', 'NCHW'], "order should either 'NHWC' or 'NCHW'"
83  self.order = order
84 
85  if order == 'NHWC':
86  input_c = input_dims[-1]
87  kernel_shape = [output_dim, kernel_h, kernel_w, input_c]
88  elif order == 'NCHW':
89  input_c = input_dims[0]
90  kernel_shape = [output_dim, input_c, kernel_h, kernel_w]
91  assert input_c > 0, (
92  "Number of input channels in conv parameters should be positive")
93 
94  kernel_init = kernel_init if kernel_init else (
95  'XavierFill', {}
96  )
97  bias_init = bias_init if bias_init else (
98  'ConstantFill', {'value': 0.0}
99  )
100 
101  self.kernel = self.create_param(
102  param_name='conv_kernel',
103  shape=kernel_shape,
104  initializer=kernel_init,
105  optimizer=kernel_optim,
106  )
107 
108  self.bias = self.create_param(
109  param_name='conv_bias',
110  shape=[output_dim],
111  initializer=bias_init,
112  optimizer=bias_optim,
113  )
114 
115  # the output_schema only has the num of output channels
116  # output_h and output_w would be inferred internally
118  (np.float32, (output_dim,)),
119  self.get_next_blob_reference('output')
120  )
121 
122  def add_ops(self, net):
123  net.Conv(
124  self.input_record.field_blobs() + [self.kernel, self.bias],
125  self.output_schema.field_blobs(),
126  kernel_h=self.kernel_h,
127  kernel_w=self.kernel_w,
128  stride_h=self.stride_h,
129  stride_w=self.stride_w,
130  pad_t=self.pad_t,
131  pad_l=self.pad_l,
132  pad_b=self.pad_b,
133  pad_r=self.pad_r,
134  order=self.order
135  )