Caffe2 - C++ API
A deep learning, cross platform ML framework
fully_connected_op.cc
1 #include "caffe2/operators/fully_connected_op.h"
2 
3 #include <functional>
4 
5 #include "caffe2/operators/fc_inference.h"
6 
7 namespace caffe2 {
8 
9 REGISTER_CPU_OPERATOR(FC, FullyConnectedOp<CPUContext>);
10 REGISTER_CPU_GRADIENT_OPERATOR(
11  FCGradient,
12  FullyConnectedGradientOp<CPUContext>);
13 
14 REGISTER_CPU_OPERATOR(
15  FCTransposed,
16  FullyConnectedOp<
17  CPUContext,
18  DefaultEngine,
19  false /* don't transpose weight */>);
20 REGISTER_CPU_GRADIENT_OPERATOR(
21  FCTransposedGradient,
22  FullyConnectedGradientOp<
23  CPUContext,
24  DefaultEngine,
25  false /* don't transpose weight */>);
26 
27 namespace {
28 std::vector<TensorShape> FCGradientShapeInference(
29  const OperatorDef& def,
30  const vector<TensorShape>& in,
31  bool pretransposed_weight) {
32  vector<TensorShape> out(2);
33  ArgumentHelper helper(def);
34 
35  auto axis_w = helper.GetSingleArgument<int32_t>("axis_w", 1);
36  const int canonical_axis_w =
37  canonical_axis_index_(axis_w, in[1].dims().size());
38  const int N = pretransposed_weight
39  ? size_from_dim_(canonical_axis_w, GetDimsVector(in[1]))
40  : size_to_dim_(canonical_axis_w, GetDimsVector(in[1]));
41 
42  vector<int> dW_shape(in[1].dims().begin(), in[1].dims().end());
43  out[0] = CreateTensorShape(dW_shape, in[1].data_type());
44  out[1] = CreateTensorShape(vector<int>{N}, in[1].data_type()); // db
45  if (def.output_size() == 3) {
46  vector<int> dX_shape(in[0].dims().begin(), in[0].dims().end());
47  out.push_back(CreateTensorShape(dX_shape, in[0].data_type()));
48  }
49  return out;
50 }
51 
52 OpSchema::Cost CostInferenceForFCGradient(
53  const OperatorDef& def,
54  const vector<TensorShape>& in,
55  bool pretransposed_weight) {
56  struct OpSchema::Cost c;
57  ArgumentHelper helper(def);
58  std::vector<TensorShape> out =
59  FCGradientShapeInference(def, in, pretransposed_weight);
60 
61  CAFFE_ENFORCE_LT(0, out.size());
62  const TensorShape dW = out[0];
63  const TensorShape db = out[1];
64 
65  auto axis = helper.GetSingleArgument<int32_t>("axis", 1);
66  const auto canonical_axis = canonical_axis_index_(axis, in[0].dims().size());
67  const uint64_t M = size_to_dim_(canonical_axis, GetDimsVector(in[0]));
68  const uint64_t K = size_from_dim_(canonical_axis, GetDimsVector(in[0]));
69  auto axis_w = helper.GetSingleArgument<int32_t>("axis_w", 1);
70  const int canonical_axis_w =
71  canonical_axis_index_(axis_w, in[1].dims().size());
72  const uint64_t N = pretransposed_weight
73  ? size_from_dim_(canonical_axis_w, GetDimsVector(in[1]))
74  : size_to_dim_(canonical_axis_w, GetDimsVector(in[1]));
75 
76  uint64_t size_dW = nElemFromDim(dW);
77  uint64_t size_db = nElemFromDim(db);
78 
79  c.flops = M * N * (2 * K + 1);
80  c.bytes_written = (size_dW + size_db) * sizeof(float);
81  c.params_bytes = (K * N + N) * sizeof(float);
82 
83  if (out.size() == 3) {
84  const TensorShape dX = out[2];
85  uint64_t size_dX = nElemFromDim(dX);
86 
87  c.flops += 2 * M * N * K;
88  c.bytes_written += size_dX * sizeof(float);
89  }
90  return c;
91 }
92 
93 } // namespace
94 
95 using namespace std::placeholders;
96 OPERATOR_SCHEMA(FCTransposed)
97  .NumInputs(3)
98  .NumOutputs(1)
99  .TensorInferenceFunction(std::bind(FCShapeInference, _1, _2, true))
100  .CostInferenceFunction(std::bind(CostInferenceForFC, _1, _2, true))
101  .SetDoc(R"DOC(
102 Same as FC, but weight matrix is supposed to be already pretransposed.
103 FCTransposed stands for calling blass with no noTrans, noTrans
104 )DOC")
105  .InheritOnnxSchema();
106 
107 OPERATOR_SCHEMA(FC)
108  .NumInputs(3)
109  .NumOutputs(1)
110  .TensorInferenceFunction(std::bind(FCShapeInference, _1, _2, false))
111  .CostInferenceFunction(std::bind(CostInferenceForFC, _1, _2, false))
112  .SetDoc(R"DOC(
113 The FC operator computes an output $(Y)$ as a linear combination of the input data blob $(X)$ with a weight blob $(W)$ and bias blob $(b)$. More formally,
114 
115 $$Y = XW^T+b$$
116 
117 Here, $X$ is a matrix of shape $(M,K)$, $W$ is a matrix of shape $(N,K)$, $b$ is a vector of length $N$, and $Y$ is a matrix of shape $(M,N)$. $N$ can be thought of as the number of nodes in the layer, $M$ is the batch size, and $K$ is the number of features in an input observation.
118 
119 *NOTE: $X$ does not need to explicitly be a 2-dimensional matrix, however, if it is not it will be coerced into one. For an arbitrary $n$-dimensional tensor $X$, e.g. $[a_0, a_1, \ldots ,a_{k-1}, a_k, \ldots , a_{n-1}]$, where $a_i$ in $N$, and $k$ is the $axis$ arg provided, then $X$ will be coerced into a 2-dimensional tensor with dimensions $[a_0 * \ldots * a_{k-1}, a_k * \ldots * a_{n-1}]$. For the default case where axis=1, this means the $X$ tensor will be coerced into a 2D tensor of dimensions $[a_0, a_1 * \ldots * a_{n-1}]$, where $a_0$ is often the batch size. In this situation, we must have $a_0 = M$ and $a_1 * \ldots * a_{n-1} = K$. Lastly, even though $b$ is a vector of length $N$, it is copied and resized to shape $(M x N)$ implicitly, then added to each vector in the batch.*
120 
121 Github Links:
122 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/fully_connected_op.h
123 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/fully_connected_op.cc
124 
125 <details>
126 
127 <summary> <b>Example</b> </summary>
128 
129 **Code**
130 
131 ```
132 
133 // In this example, our batch size is 1 (M=1), the input observation will have
134 // 6 features (K=6), and the layer will have one hidden node (N=1). The
135 // expected output is Y=7.
136 workspace.ResetWorkspace()
137 
138 op = core.CreateOperator(
139  "FC",
140  ["X", "W", "b"],
141  ["Y"]
142 )
143 
144 // Create X: MxK
145 data = np.array([1,2,3,4,5,6]).astype(np.float32)
146 data = data[np.newaxis,:]
147 
148 // Create W: NxK
149 weights = np.array(np.array([1,1/2.,1/3.,1/4.,1/5.,1/6.])).astype(np.float32)
150 weights = weights[np.newaxis,:]
151 
152 // Create b: N
153 bias = np.array([1.]).astype(np.float32)
154 
155 // Put the inputs into the workspace
156 workspace.FeedBlob("X", data)
157 workspace.FeedBlob("W", weights)
158 workspace.FeedBlob("b", bias)
159 
160 // Run the operator
161 workspace.RunOperatorOnce(op)
162 print("Y:\n", workspace.FetchBlob("Y"))
163 
164 ```
165 
166 **Result**
167 
168 ```
169 
170 Y:
171  [[7.]]
172 
173 ```
174 
175 </details>
176 
177 )DOC")
178  .Arg(
179  "axis",
180  "*(type: int; default: 1)* Describes the axis of the input data $X$. Defaults to one because in the common case when the input $X$ has shape $(M,K)$, the first axis encodes the batch size.")
181  .Arg(
182  "axis_w",
183  "*(type: int; default: 1)* Describes the axis of the input weight matrix $W$. Defaults to one because the first axis most likely describes the batch_size.")
184  .Arg(
185  "float16_compute",
186  "*(type: bool; default: False)* Whether to use float-16 compute kernel.")
187  .Input(
188  0,
189  "X",
190  "Input blob to be coerced into a 2D matrix of shape $(M,K)$, where $M$ is the batch size and $K$ is the number of features in a single observation.")
191  .Input(
192  1,
193  "W",
194  "Input blob to be coerced into a 2D matrix of shape $(N,K)$ describing a fully connected weight matrix. Here, $K$ is the number of features in a single observation and $N$ is the number of nodes in the FC layer.")
195  .Input(
196  2,
197  "b",
198  "Input blob containing vector of length $N$ which describes one bias for each node in the layer.")
199  .Output(
200  0,
201  "Y",
202  "Output blob containing a 2D output matrix of shape $(M,N)$, where $M$ is the batch size and $N$ is the number of nodes in the layer. The output is calculated as $Y=XW^T+b$.")
203  .InheritOnnxSchema("Gemm");
204 
205 GRADIENT_OPERATOR_SCHEMA(FCGradient)
206  .NumInputs(3)
207  .NumOutputs(2, 3)
208  .TensorInferenceFunction(std::bind(FCGradientShapeInference, _1, _2, false))
209  .CostInferenceFunction(
210  std::bind(CostInferenceForFCGradient, _1, _2, false));
211 GRADIENT_OPERATOR_SCHEMA(FCTransposedGradient)
212  .NumInputs(3)
213  .NumOutputs(2, 3)
214  .TensorInferenceFunction(std::bind(FCGradientShapeInference, _1, _2, false))
215  .CostInferenceFunction(
216  std::bind(CostInferenceForFCGradient, _1, _2, false));
217 
218 namespace {
219 
221  using GradientMakerBase::GradientMakerBase;
222 
223  std::vector<OperatorDef> GetGradientDefs() override {
224  CAFFE_ENFORCE_EQ(def_.input_size(), 3);
225  CAFFE_ENFORCE(def_.type() == "FC" || def_.type() == "FCTransposed");
226  return SingleGradientDef(
227  def_.type() + "Gradient",
228  "",
229  vector<string>{I(0), I(1), GO(0)},
230  vector<string>{GI(1), GI(2), GI(0)});
231  }
232 };
233 
234 REGISTER_GRADIENT(FC, GetFCGradient);
235 REGISTER_GRADIENT(FCTransposed, GetFCGradient);
236 
237 } // namespace
238 
239 } // namespace caffe2
Definition: any.cpp:108
int64_t size_from_dim_(int k, IntArrayRef dims)
Return product of all dimensions starting from k.
Definition: TensorImpl.h:53
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: OpClasses.h:566