1 #include <caffe2/ideep/operators/conv_pool_base_op.h> 7 USE_IDEEP_DEF_ALIASES();
8 USE_IDEEP_CONV_POOL_BASE_FUNCTIONS();
14 FUSION_CONV_SUM_RELU = 3,
15 FUSION_MAX = FUSION_CONV_SUM_RELU + 1,
20 fusion_type_(static_cast<FusionType>(
21 OperatorBase::GetSingleArgument<int>(
"fusion_type", 0))),
23 OperatorBase::GetSingleArgument<int>(
"training_mode", 0)),
25 OperatorBase::GetSingleArgument<int>(
"conv_algorithm", CONV_ALGORITHM_AUTO)) {
26 OPERATOR_NEEDS_FEATURE(
27 pad_l() == pad_r() && pad_t() == pad_b(),
28 "Uneven padding not supported.");
29 OPERATOR_NEEDS_FEATURE(group_ == 1,
"Group not supported.");
30 OPERATOR_NEEDS_FEATURE(
31 fusion_type_ > FUSION_UNKNOWN && fusion_type_ < FUSION_MAX,
32 "Undefined Conv fusion type.",
38 for (
int dim = 0; dim < kernel_.size(); ++dim) {
39 CAFFE_ENFORCE_GE(pads_[dim], 0);
40 CAFFE_ENFORCE_GE(pads_[kernel_.size() + dim], 0);
43 "If you are doing convolution, you will need to set " 44 "explicitly the kernel size.");
49 bool RunOnDeviceWithOrderNCHW()
override {
50 const auto& X = Input(INPUT_X);
51 const auto& filter = Input(FILTER);
52 auto* Y = Output(OUTPUT);
53 auto Y_dims_conv = CalcOutputDims(X, filter.get_dim(0));
54 auto attr = [
this]() {
55 return (fusion_type_ == FUSION_CONV_RELU)
57 : ((fusion_type_ == FUSION_CONV_SUM)
59 : ((fusion_type_ == FUSION_CONV_SUM_RELU) ? iattr::residual()
62 auto last_input = [
this]() {
63 return (fusion_type_ == FUSION_CONV_RELU) ? BIAS_OR_INPUT_S : INPUT_S;
66 CAFFE_ENFORCE(4 == X.ndims());
67 CAFFE_ENFORCE(4 == filter.ndims());
68 CAFFE_ENFORCE(filter.get_dim(2) == kernel_h());
69 CAFFE_ENFORCE(filter.get_dim(3) == kernel_w());
71 X.get_dim(1) == filter.get_dim(1) * group_,
72 "Convolution fusion op: input channels does not match: " 73 "# of input channels ",
75 " is not equal to kernel channels * group:",
80 ideep::algorithm aalgorithm = ideep::algorithm::convolution_direct;
81 if (conv_algorithm_ == CONV_ALGORITHM_WINOGRAD) {
82 aalgorithm = ideep::algorithm::convolution_winograd;
85 bool weights_changed =
86 (cached_weights_descriptor_ != filter.get_descriptor());
87 if (weights_changed && !training_mode_) {
88 cached_weights_descriptor_ = filter.get_descriptor();
90 auto expected_descriptor =
91 ideep::convolution_forward::expected_weights_descriptor(
93 if (filter_.get_descriptor() != expected_descriptor) {
94 filter_.init<ideep::utils::allocator, ideep::convolution_forward>(
96 ideep::reorder::compute(filter, filter_);
100 if (InputSize() > last_input()) {
101 ideep::convolution_forward::compute(
103 training_mode_ ? filter : filter_,
104 Input(BIAS_OR_INPUT_S),
115 ideep::convolution_forward::compute(
117 training_mode_ ? filter : filter_,
129 if (fusion_type_ != FUSION_CONV_RELU) {
131 Y == &(Input(InputSize() - 1)),
132 "Convolution fusion op: InPlace is enforced for sum fusion.");
139 FusionType fusion_type_;
142 ideep::tensor filter_;
143 ideep::tensor::descriptor cached_weights_descriptor_;
145 INPUT_TAGS(INPUT_X, FILTER, BIAS_OR_INPUT_S, INPUT_S);
151 const char* kConvFusionDoc = R
"DOC( 152 Note that other parameters, such as the stride and 153 kernel size, or the pads' sizes in each direction are not necessary for input 154 because they are provided by the ConvPoolOpBase operator. Various dimension 155 checks are done implicitly, and the sizes are specified in the Input docs for 156 this operator. As is expected, the filter is convolved with a subset of the 157 image and the bias is added; this is done throughout the image data and the 158 output is computed. As a side note on the implementation layout: 159 conv_op_impl.h is the templated implementation of the conv_op.h file, which is 160 why they are separate files. 163 std::function<void(OpSchema&)> ConvFusionDocGenerator(const char* dim) {
166 The convolution fusion operator consumes an input vector, a {dim}filter blob, 167 a bias blob and another input vector and computes the output. This operator 168 gives the chance to fuse the ReLU or element-wise Sum with a convolution 169 operator. {conv_fusion_doc})DOC"; 170 c10::ReplaceAll(doc, "{dim}", dim);
171 c10::ReplaceAll(doc,
"{conv_fusion_doc}", kConvFusionDoc);
176 "Input data blob from previous layer; has size (N x C x H x W), " 177 "where N is the batch size, C is the number of channels, " 178 "and H and W are the height and width. Note that this is for the NCHW " 179 "usage. On the other hand, the NHWC Op has a different set of " 180 "dimension constraints. ");
184 "The filter blob that will be used in the " 185 "convolutions; has size (M x C x kH x kW), where C is the number of " 186 "channels, and kH and kW are the height and width of the kernel.");
190 "The 1D bias blob that is added through the " 191 "convolution; has size (M).");
195 "Input data blob for element-wise Sum fusion from previous layer; " 196 "has the same size of convolution output. Its input index should " 197 "be 2 if no bias for this convolution, and it MUST be inplace with " 202 "Output data blob that contains the result of the " 203 "convolution fusion. The output dimensions are functions of the kernel " 204 "size, stride size, and pad lengths." 209 OPERATOR_SCHEMA(ConvFusion)
215 .Arg(
"fusion_type",
"Which fusion type is used")
216 .AllowInplace({{2, 0}, {3, 0}})
217 .FillUsing(ConvFusionDocGenerator(
""));
A class to record the schema of an op.
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
std::function< struct Cost(const OperatorDef &, const vector< TensorShape > &)> CostInferenceFunctionType
Registers a function that takes in an OperatorDef and a series of input shapes and returns the total ...