Caffe2 - C++ API
A deep learning, cross platform ML framework
conv_fusion_op.cc
1 #include <caffe2/ideep/operators/conv_pool_base_op.h>
2 
3 namespace caffe2 {
4 
5 class IDEEPConvFusionOp final : public IDEEPConvPoolOpBase {
6  public:
7  USE_IDEEP_DEF_ALIASES();
8  USE_IDEEP_CONV_POOL_BASE_FUNCTIONS();
9 
10  enum FusionType {
11  FUSION_UNKNOWN = 0,
12  FUSION_CONV_RELU = 1,
13  FUSION_CONV_SUM = 2,
14  FUSION_CONV_SUM_RELU = 3,
15  FUSION_MAX = FUSION_CONV_SUM_RELU + 1,
16  };
17 
18  IDEEPConvFusionOp(const OperatorDef& operator_def, Workspace* ws)
19  : IDEEPConvPoolOpBase(operator_def, ws),
20  fusion_type_(static_cast<FusionType>(
21  OperatorBase::GetSingleArgument<int>("fusion_type", 0))),
22  training_mode_(
23  OperatorBase::GetSingleArgument<int>("training_mode", 0)),
24  conv_algorithm_(
25  OperatorBase::GetSingleArgument<int>("conv_algorithm", CONV_ALGORITHM_AUTO)) {
26  OPERATOR_NEEDS_FEATURE(
27  pad_l() == pad_r() && pad_t() == pad_b(),
28  "Uneven padding not supported.");
29  OPERATOR_NEEDS_FEATURE(group_ == 1, "Group not supported.");
30  OPERATOR_NEEDS_FEATURE(
31  fusion_type_ > FUSION_UNKNOWN && fusion_type_ < FUSION_MAX,
32  "Undefined Conv fusion type.",
33  fusion_type_);
34 
35  // Check kernel only if we are doing conv. The reason is that a
36  // few other ops, like PadImage, are also using this base class. We really
37  // need to clean this up.
38  for (int dim = 0; dim < kernel_.size(); ++dim) {
39  CAFFE_ENFORCE_GE(pads_[dim], 0);
40  CAFFE_ENFORCE_GE(pads_[kernel_.size() + dim], 0);
41  CAFFE_ENFORCE(
42  kernel_[dim],
43  "If you are doing convolution, you will need to set "
44  "explicitly the kernel size.");
45  }
46  }
47  ~IDEEPConvFusionOp() override {}
48 
49  bool RunOnDeviceWithOrderNCHW() override {
50  const auto& X = Input(INPUT_X);
51  const auto& filter = Input(FILTER);
52  auto* Y = Output(OUTPUT);
53  auto Y_dims_conv = CalcOutputDims(X, filter.get_dim(0));
54  auto attr = [this]() {
55  return (fusion_type_ == FUSION_CONV_RELU)
56  ? iattr::fuse_relu()
57  : ((fusion_type_ == FUSION_CONV_SUM)
58  ? iattr::fuse_sum()
59  : ((fusion_type_ == FUSION_CONV_SUM_RELU) ? iattr::residual()
60  : iattr()));
61  };
62  auto last_input = [this]() {
63  return (fusion_type_ == FUSION_CONV_RELU) ? BIAS_OR_INPUT_S : INPUT_S;
64  };
65 
66  CAFFE_ENFORCE(4 == X.ndims());
67  CAFFE_ENFORCE(4 == filter.ndims());
68  CAFFE_ENFORCE(filter.get_dim(2) == kernel_h());
69  CAFFE_ENFORCE(filter.get_dim(3) == kernel_w());
70  CAFFE_ENFORCE(
71  X.get_dim(1) == filter.get_dim(1) * group_,
72  "Convolution fusion op: input channels does not match: "
73  "# of input channels ",
74  X.get_dim(1),
75  " is not equal to kernel channels * group:",
76  filter.get_dim(1),
77  "*",
78  group_);
79 
80  ideep::algorithm aalgorithm = ideep::algorithm::convolution_direct;
81  if (conv_algorithm_ == CONV_ALGORITHM_WINOGRAD) {
82  aalgorithm = ideep::algorithm::convolution_winograd;
83  }
84 
85  bool weights_changed =
86  (cached_weights_descriptor_ != filter.get_descriptor());
87  if (weights_changed && !training_mode_) {
88  cached_weights_descriptor_ = filter.get_descriptor();
89  filter_ = filter;
90  auto expected_descriptor =
91  ideep::convolution_forward::expected_weights_descriptor(
92  filter.get_dims());
93  if (filter_.get_descriptor() != expected_descriptor) {
94  filter_.init<ideep::utils::allocator, ideep::convolution_forward>(
95  expected_descriptor);
96  ideep::reorder::compute(filter, filter_);
97  }
98  }
99 
100  if (InputSize() > last_input()) {
101  ideep::convolution_forward::compute(
102  X,
103  training_mode_ ? filter : filter_,
104  Input(BIAS_OR_INPUT_S),
105  Y_dims_conv,
106  *Y,
107  stride_,
108  dilation_,
109  pad_tl(),
110  pad_br(),
111  group_,
112  attr(),
113  aalgorithm);
114  } else {
115  ideep::convolution_forward::compute(
116  X,
117  training_mode_ ? filter : filter_,
118  Y_dims_conv,
119  *Y,
120  stride_,
121  dilation_,
122  pad_tl(),
123  pad_br(),
124  group_,
125  attr(),
126  aalgorithm);
127  }
128 
129  if (fusion_type_ != FUSION_CONV_RELU) {
130  CAFFE_ENFORCE(
131  Y == &(Input(InputSize() - 1)),
132  "Convolution fusion op: InPlace is enforced for sum fusion.");
133  }
134 
135  return true;
136  }
137 
138  private:
139  FusionType fusion_type_;
140  bool training_mode_;
141  int conv_algorithm_;
142  ideep::tensor filter_;
143  ideep::tensor::descriptor cached_weights_descriptor_;
144 
145  INPUT_TAGS(INPUT_X, FILTER, BIAS_OR_INPUT_S, INPUT_S);
146  OUTPUT_TAGS(OUTPUT);
147 };
148 
149 REGISTER_IDEEP_OPERATOR(ConvFusion, IDEEPConvFusionOp);
150 
151 const char* kConvFusionDoc = R"DOC(
152 Note that other parameters, such as the stride and
153 kernel size, or the pads' sizes in each direction are not necessary for input
154 because they are provided by the ConvPoolOpBase operator. Various dimension
155 checks are done implicitly, and the sizes are specified in the Input docs for
156 this operator. As is expected, the filter is convolved with a subset of the
157 image and the bias is added; this is done throughout the image data and the
158 output is computed. As a side note on the implementation layout:
159 conv_op_impl.h is the templated implementation of the conv_op.h file, which is
160 why they are separate files.
161 )DOC";
162 
163 std::function<void(OpSchema&)> ConvFusionDocGenerator(const char* dim) {
164  return [=](OpSchema& schema) {
165  string doc = R"DOC(
166 The convolution fusion operator consumes an input vector, a {dim}filter blob,
167 a bias blob and another input vector and computes the output. This operator
168 gives the chance to fuse the ReLU or element-wise Sum with a convolution
169 operator. {conv_fusion_doc})DOC";
170  c10::ReplaceAll(doc, "{dim}", dim);
171  c10::ReplaceAll(doc, "{conv_fusion_doc}", kConvFusionDoc);
172  schema.SetDoc(doc);
173  schema.Input(
174  0,
175  "X",
176  "Input data blob from previous layer; has size (N x C x H x W), "
177  "where N is the batch size, C is the number of channels, "
178  "and H and W are the height and width. Note that this is for the NCHW "
179  "usage. On the other hand, the NHWC Op has a different set of "
180  "dimension constraints. ");
181  schema.Input(
182  1,
183  "filter",
184  "The filter blob that will be used in the "
185  "convolutions; has size (M x C x kH x kW), where C is the number of "
186  "channels, and kH and kW are the height and width of the kernel.");
187  schema.Input(
188  2,
189  "bias",
190  "The 1D bias blob that is added through the "
191  "convolution; has size (M).");
192  schema.Input(
193  3,
194  "S",
195  "Input data blob for element-wise Sum fusion from previous layer; "
196  "has the same size of convolution output. Its input index should "
197  "be 2 if no bias for this convolution, and it MUST be inplace with "
198  "output Y.");
199  schema.Output(
200  0,
201  "Y",
202  "Output data blob that contains the result of the "
203  "convolution fusion. The output dimensions are functions of the kernel "
204  "size, stride size, and pad lengths."
205  "");
206  };
207 }
208 
209 OPERATOR_SCHEMA(ConvFusion)
210  .NumInputs(2, 4)
211  .NumOutputs(1)
213  .CostInferenceFunction(OpSchema::CostInferenceFunctionType(
215  .Arg("fusion_type", "Which fusion type is used")
216  .AllowInplace({{2, 0}, {3, 0}})
217  .FillUsing(ConvFusionDocGenerator(""));
218 
219 } // namespace caffe2
A class to record the schema of an op.
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
std::function< struct Cost(const OperatorDef &, const vector< TensorShape > &)> CostInferenceFunctionType
Registers a function that takes in an OperatorDef and a series of input shapes and returns the total ...