Caffe2 - C++ API
A deep learning, cross platform ML framework
conv_op.cc
1 #include <caffe2/ideep/operators/conv_pool_base_op.h>
2 
3 namespace caffe2 {
4 
5 class IDEEPConvOp final : public IDEEPConvPoolOpBase {
6  public:
7  USE_IDEEP_DEF_ALIASES();
8  USE_IDEEP_CONV_POOL_BASE_FUNCTIONS();
9 
10  IDEEPConvOp(const OperatorDef& operator_def, Workspace* ws)
11  : IDEEPConvPoolOpBase(operator_def, ws),
12  training_mode_(
13  OperatorBase::GetSingleArgument<int>("training_mode", 0)),
14  conv_algorithm_(
15  OperatorBase::GetSingleArgument<int>("conv_algorithm", CONV_ALGORITHM_AUTO)) {
16  OPERATOR_NEEDS_FEATURE(
17  pad_l() == pad_r() && pad_t() == pad_b(),
18  "Uneven padding not supported.");
19  }
20  ~IDEEPConvOp() override {}
21 
22  bool RunOnDeviceWithOrderNCHW() override {
23  const auto& X = Input(INPUT);
24  const auto& filter = Input(FILTER);
25  auto* Y = Output(OUTPUT);
26  auto Y_dims = CalcOutputDims(X, filter.get_dim(0));
27 
28  CAFFE_ENFORCE(4 == X.ndims());
29  CAFFE_ENFORCE(4 == filter.ndims());
30  CAFFE_ENFORCE(filter.get_dim(2) == kernel_h());
31  CAFFE_ENFORCE(filter.get_dim(3) == kernel_w());
32  CAFFE_ENFORCE(
33  X.get_dim(1) == filter.get_dim(1) * group_,
34  "Convolution op: input channels does not match: # of input channels ",
35  X.get_dim(1),
36  " is not equal to kernel channels * group:",
37  filter.get_dim(1),
38  "*",
39  group_);
40 
41  ideep::algorithm aalgorithm = ideep::algorithm::convolution_direct;
42  if (conv_algorithm_ == CONV_ALGORITHM_WINOGRAD) {
43  aalgorithm = ideep::algorithm::convolution_winograd;
44  }
45 
46  bool weights_changed =
47  (cached_weights_descriptor_ != filter.get_descriptor());
48  if (weights_changed && !training_mode_) {
49  cached_weights_descriptor_ = filter.get_descriptor();
50  auto filter_in = filter;
51  filter_in.make_group(group_);
52  auto expected_descriptor =
53  ideep::convolution_forward::expected_weights_descriptor(
54  filter_in.get_dims(),
55  filter_in.get_data_type(),
56  stride_,
57  pad_tl(),
58  pad_br(),
59  dilation_,
60  group_,
61  aalgorithm);
62  filter_.init<ideep::utils::allocator, ideep::convolution_forward>(
63  expected_descriptor);
64  ideep::reorder::compute(filter_in, filter_);
65  }
66 
67  // NB: actually, in the case when `group_ > 1`, IDEEP will create
68  // an itermediate tensor for each run below. However, this tensor is merely
69  // a view of of the weights and there is no actual data copy, so I'll let it
70  // go now. If we encounter performance surprise when convoluting with group
71  // > 1, this is the first place to check and we need to do the same cache
72  // trick as above
73  if (InputSize() > BIAS) {
74  ideep::convolution_forward::compute(
75  X,
76  training_mode_ ? filter : filter_,
77  Input(BIAS),
78  Y_dims,
79  *Y,
80  stride_,
81  dilation_,
82  pad_tl(),
83  pad_br(),
84  group_,
85  ideep::descriptor_group::attr_t(),
86  aalgorithm);
87  } else {
88  ideep::convolution_forward::compute(
89  X,
90  training_mode_ ? filter : filter_,
91  Y_dims,
92  *Y,
93  stride_,
94  dilation_,
95  pad_tl(),
96  pad_br(),
97  group_,
98  ideep::descriptor_group::attr_t(),
99  aalgorithm);
100  }
101 
102  return true;
103  }
104 
105  private:
106  INPUT_TAGS(INPUT, FILTER, BIAS);
107  OUTPUT_TAGS(OUTPUT);
108 
109  bool training_mode_;
110  int conv_algorithm_;
111  ideep::tensor filter_;
112  ideep::tensor::descriptor cached_weights_descriptor_;
113 };
114 
116  public:
117  USE_IDEEP_DEF_ALIASES();
118  USE_IDEEP_CONV_POOL_BASE_FUNCTIONS();
119 
120  IDEEPConvGradientOp(const OperatorDef& operator_def, Workspace* ws)
121  : IDEEPConvPoolOpBase(operator_def, ws),
122  no_bias_(OperatorBase::GetSingleArgument<int>("no_bias", 0)) {
123  OPERATOR_NEEDS_FEATURE(
124  pad_l() == pad_r() && pad_t() == pad_b(),
125  "Uneven padding not supported.");
126  CAFFE_ENFORCE(
127  !(no_bias_ && OutputSize() == 3),
128  "If bias is not present, you should not have 3 grad output.");
129  CAFFE_ENFORCE(
130  OperatorBase::GetSingleArgument<int>("training_mode", 0),
131  "In order to backward propagate weights correctly, "
132  "please set training_mode=1");
133  }
134  ~IDEEPConvGradientOp() override {}
135 
136  bool RunOnDeviceWithOrderNCHW() override {
137  const auto& X = Input(INPUT);
138  const auto& filter = Input(FILTER);
139  const auto& dY = Input(OUTPUT_GRAD);
140  auto* dfilter = Output(FILTER_GRAD);
141 
142  if (no_bias_) {
143  ideep::convolution_backward_weights::compute(
144  X,
145  dY,
146  filter.get_dims(),
147  *dfilter,
148  stride_,
149  dilation_,
150  pad_tl(),
151  pad_br(),
152  group_);
153  } else {
154  auto* dbias = Output(BIAS_OR_INPUT_GRAD);
155  ideep::convolution_backward_weights::compute(
156  X,
157  dY,
158  filter.get_dims(),
159  *dfilter,
160  *dbias,
161  stride_,
162  dilation_,
163  pad_tl(),
164  pad_br(),
165  group_);
166  }
167 
168  if (OutputSize() == 3 || (no_bias_ && (OutputSize() == 2))) {
169  auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD);
170  ideep::convolution_backward_data::compute(
171  dY,
172  filter,
173  X.get_dims(),
174  *dX,
175  stride_,
176  dilation_,
177  pad_tl(),
178  pad_br(),
179  group_);
180  }
181 
182  return true;
183  }
184 
185  private:
186  bool no_bias_;
187 
188  INPUT_TAGS(INPUT, FILTER, OUTPUT_GRAD);
189  OUTPUT_TAGS(FILTER_GRAD, BIAS_OR_INPUT_GRAD, INPUT_GRAD);
190 };
191 
192 REGISTER_IDEEP_OPERATOR(Conv, IDEEPConvOp);
193 REGISTER_IDEEP_OPERATOR(ConvGradient, IDEEPConvGradientOp);
194 
195 } // namespace caffe2
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: OpClasses.h:13