Caffe2 - C++ API
A deep learning, cross platform ML framework
conv_op.cc
1 #include "caffe2/operators/conv_op.h"
2 #include "caffe2/operators/conv_op_impl.h"
3 #include "caffe2/operators/conv_pool_op_base.h"
4 
5 namespace caffe2 {
6 
7 const char kConvDoc[] = R"DOC(
8 The Conv2D operator computes a 2D convolution operation over an input blob $(X)$, with a filter blob $(filter)$ and a bias blob $(bias)$, and outputs a single output blob $(Y)$. Although there are several options for order, the convention is that the input $(X)$ is a blob of shape $(N,C_{in},H_{in},W_{in})$ and the output $(Y)$ is a blob of shape $(N,C_{out},H_{out},W_{out})$. Here, $N$ is the batch size, $C$ is the number of channels, $H$ is the spatial height, and $W$ is the spatial width. For example, if your input data was a batch of five, 100x120pixel RGB images, $X$ would have shape $(5,3,120,100)$.
9 
10 The $filter$ input blob may contain multiple filters and has shape $(M, C_{in}, K_H, K_W)$. Here, $M$ is the number of individual filters contained in the blob, $C_{in}$ is the number of channels of each filter (by convention in 2D convolution it is the same as the number of channels in the input), $K_H$ is the spatial height of the kernel, and $K_W$ is the spatial width of the kernel. The $bias$ blob is a vector of length $M$, where there is one bias for each filter in the $filter$ blob.
11 
12 Given the shape of the input blob and the filter blob, we can calculate the shape of the output blob as follows. The number of items in the batch $N$ will stay the same. The number of channels in the output will equal the number of kernels in the filter blob, so $C_{out} = M.$ With stride and pad defined below, the spatial height and width of the output ($H_{out}$ and $W_{out}$) are calculated as
13 
14 $$H_{out} = \left \lfloor{\frac{H_{in} - K_H + 2*pad}{stride}+1}\right \rfloor$$
15 
16 
17 $$W_{out} = \left \lfloor{\frac{W_{in} - K_W + 2*pad}{stride}+1}\right \rfloor$$
18 
19 
20 Github Links:
21 
22 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/conv_op.h
23 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/conv_op.cc
24 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/conv_pool_op_base.h
25 
26 <details>
27 
28 <summary> <b>Example</b> </summary>
29 
30 **Code**
31 
32 ```
33 
34 workspace.ResetWorkspace()
35 
36 op = core.CreateOperator(
37  "Conv",
38  ["X", "filter", "bias"],
39  ["Y"],
40  kernel=5,
41  pad=1,
42  stride=2
43 )
44 
45 // Create X: (N,C,H,W)
46 data = np.random.randn(1,1,8,8).astype(np.float32)
47 print("Data shape: ",data.shape)
48 
49 // Create W: (M,C,Kh,Kw)
50 filters = np.random.randn(3,1,5,5).astype(np.float32)
51 print("Filter shape: ",filters.shape)
52 
53 // Create b: M
54 bias = np.array([1.,1.,1.]).astype(np.float32)
55 print("Bias shape: ",bias.shape)
56 
57 // Put the inputs into the workspace
58 workspace.FeedBlob("X", data)
59 workspace.FeedBlob("filter", filters)
60 workspace.FeedBlob("bias", bias)
61 
62 // Run the operator
63 workspace.RunOperatorOnce(op)
64 print("Y:\n", workspace.FetchBlob("Y"))
65 
66 ```
67 
68 **Result**
69 
70 ```
71 
72 Data shape: (1, 1, 8, 8)
73 Filter shape: (3, 1, 5, 5)
74 Bias shape: (3,)
75 Y:
76  [[[[ 0.6406407 0.8620521 0.56461596]
77  [ -1.5042953 -0.79549205 -10.683343 ]
78  [ -0.5240259 3.4538248 -3.9564204 ]]
79 
80  [[ 0.6876496 4.8328524 -1.9525816 ]
81  [ 1.2995434 -2.3895378 7.2670045 ]
82  [ 3.9929862 1.8126237 5.4699917 ]]
83 
84  [[ 3.55949 4.7934155 0.76086235]
85  [ 3.9588015 -1.3251319 4.413117 ]
86  [ -1.5296054 -1.4924102 -3.2552304 ]]]]
87 
88 ```
89 
90 </details>
91 
92 
93 )DOC";
94 
95 std::function<void(OpSchema&)> ConvDocGenerator(const char* dim) {
96  return [=](OpSchema& schema) {
97  string doc = R"DOC(
98 The convolution operator consumes an input vector, a {dim}filter blob
99 and a bias blob and computes the output. {conv_doc})DOC";
100  c10::ReplaceAll(doc, "{dim}", dim);
101  c10::ReplaceAll(doc, "{conv_doc}", kConvDoc);
102  schema.SetDoc(doc);
103  schema.Input(
104  0,
105  "X",
106  "Input data blob, of shape $(N, C_{in}, H_{in}, W_{in})$, to be convolved with the kernels in the filter blob."
107  );
108  schema.Input(
109  1,
110  "filter",
111  "The filter blob, of shape $(M, C_{in}, K_H, K_W)$, containing the filters to be convolved with the data."
112  );
113  schema.Input(
114  2,
115  "bias",
116  "The bias blob, of length $M$, containing the biases for the convolution, one bias per filter."
117  );
118  schema.Output(
119  0,
120  "Y",
121  "Output data blob, of shape $(N, C_{out}, H_{out}, W_{out})$, that contains the result of the convolution."
122  );
123  /*
124  schema.Arg(
125  "kernel",
126  "*(type: int; default: 0)* Desired kernel size. If left at default the kernel size will be inferred from the input $filter$ blob.",
127  0
128  );
129  schema.Arg(
130  "stride",
131  "*(type: int; default: 1)* Controls the stride of the kernel as it traverses the input blob.",
132  0
133  );
134  schema.Arg(
135  "dilation",
136  "*(type: int; default: 1)* Controls spacing between kernel points. If dilation is greater than one, the kernel does not operate on a contiguous spatial region. For a visualization click [here](https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md).",
137  0
138  );
139  schema.Arg(
140  "pad",
141  "*(type: int; default: 0)* Controls the amount of padding to apply to the input feature map before computing the convolution.",
142  0
143  );
144  schema.Arg(
145  "float16_compute",
146  "*(type: bool; default: False)* Whether to use float-16 compute kernel.",
147  0
148  );
149  schema.Arg(
150  "group",
151  "*(type: int; default: 1)* Controls level of group convolution. For more info click [here](https://blog.yani.io/filter-group-tutorial/).",
152  0
153  );
154  schema.Arg(
155  "order",
156  "*(type: string; default: \"NCHW\")* Specifies the order of the input data blob, where $N$ is batch size, $C$ is number of channels, $H$ is spatial height, and $W$ is spatial width. The only other valid option is \"NHWC\".",
157  0
158  );
159  schema.Arg(
160  "shared_buffer",
161  "*(type: int; default: 0)*",
162  0
163  );
164  */
165  };
166 }
167 REGISTER_CPU_OPERATOR(Conv, ConvOp<float, CPUContext>);
168 
169 OPERATOR_SCHEMA(Conv)
170  .NumInputs(2, 3)
171  .NumOutputs(1)
172  .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForConv)
173  .CostInferenceFunction(OpSchema::CostInferenceFunctionType(
174  ConvPoolOpBase<CPUContext>::CostInferenceForConv))
175  .FillUsing(ConvDocGenerator(""))
176  .InheritOnnxSchema();
177 
178 REGISTER_CPU_OPERATOR(Conv1D, ConvOp<float, CPUContext>);
179 
180 OPERATOR_SCHEMA(Conv1D)
181  .NumInputs(2, 3)
182  .NumOutputs(1)
183  .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForConv)
184  .FillUsing(ConvDocGenerator("1D "))
185  .InheritOnnxSchema("Conv");
186 
187 REGISTER_CPU_OPERATOR(Conv2D, ConvOp<float, CPUContext>);
188 
189 OPERATOR_SCHEMA(Conv2D)
190  .NumInputs(2, 3)
191  .NumOutputs(1)
192  .CostInferenceFunction(OpSchema::CostInferenceFunctionType(
193  ConvPoolOpBase<CPUContext>::CostInferenceForConv))
194  .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForConv)
195  .FillUsing(ConvDocGenerator("2D "))
196  .InheritOnnxSchema("Conv");
197 
198 REGISTER_CPU_OPERATOR(Conv3D, ConvOp<float, CPUContext>);
199 
200 OPERATOR_SCHEMA(Conv3D)
201  .NumInputs(2, 3)
202  .NumOutputs(1)
203  .CostInferenceFunction(OpSchema::CostInferenceFunctionType(
204  ConvPoolOpBase<CPUContext>::CostInferenceForConv))
205  .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForConv)
206  .FillUsing(ConvDocGenerator("3D "))
207  .InheritOnnxSchema("Conv");
208 
209 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: OpClasses.h:13
std::function< struct Cost(const OperatorDef &, const vector< TensorShape > &)> CostInferenceFunctionType
Registers a function that takes in an OperatorDef and a series of input shapes and returns the total ...