1 #include "caffe2/opt/optimize_ideep.h" 2 #include "caffe2/opt/converter.h" 3 #include "caffe2/opt/fusion.h" 5 #ifdef CAFFE2_USE_MKLDNN 6 #include "caffe2/ideep/ideep_utils.h" 14 #ifndef CAFFE2_USE_MKLDNN 15 void OptimizeForIdeep(
19 LOG(WARNING) <<
"Only support optimizations for IDEEP";
23 USE_IDEEP_DEF_ALIASES();
26 auto tensor = repr::nn::get<repr::Tensor>(node);
27 CAFFE_ENFORCE(ws->
HasBlob(tensor->getName()),
"Blob not in workspace");
28 return ws->
GetBlob(tensor->getName());
32 T* getTensor(Blob* blob) {
33 CAFFE_ENFORCE(blob,
"Blob is invalid");
34 if (blob && blob->template IsType<T>()) {
35 return blob->template GetMutable<T>();
41 auto annotation = nnOp.getAnnotation();
42 if (annotation ==
nullptr) {
43 CAFFE_THROW(
"Cannot get Operator annotation");
45 return dyn_cast<Caffe2Annotation>(annotation)->getOperatorDef();
49 auto annotation = nnOp.getMutableAnnotation();
50 if (annotation ==
nullptr) {
51 CAFFE_THROW(
"Cannot get Operator annotation");
53 return dyn_cast<Caffe2Annotation>(annotation)->getMutableOperatorDef();
57 if (!repr::nn::is<repr::NeuralNetOperator>(nodeRef)) {
60 auto op = repr::nn::get<repr::NeuralNetOperator>(nodeRef);
61 auto opDef = getOpDef(*op);
62 return opDef.type() == typeName;
67 const auto& op = getOpDef(nnOp);
68 return op.device_option().device_type() == DeviceTypeProto::PROTO_IDEEP;
71 bool shouldFuseConv(
const repr::Conv& conv) {
72 return isOnIdeepDevice(conv) ? (conv.getGroup() <= 1) :
false;
77 if (!repr::nn::is<repr::NeuralNetOperator>(node)) {
80 auto maybeStopGrad = repr::nn::get<repr::NeuralNetOperator>(node);
81 auto maybeStopGradDef = getOpDef(*maybeStopGrad);
82 return maybeStopGradDef.type() ==
"StopGradient";
85 auto allNodes = nn->dataFlow.getMutableNodes();
86 for (
int i = 0; i < allNodes.size(); ++i) {
87 auto node = allNodes[i];
88 if (!isStopGradientNode(node)) {
92 auto stopGradInput = repr::nn::getInputs(node).front();
93 auto stopGradOutput = repr::nn::getOutputs(node).front();
94 auto inputName = repr::nn::get<repr::Tensor>(stopGradInput)->getName();
95 auto outputName = repr::nn::get<repr::Tensor>(stopGradOutput)->getName();
96 if (inputName == outputName) {
97 nn->dataFlow.
replaceNode(stopGradOutput, stopGradInput);
108 auto conv = repr::nn::get<repr::Conv>(convNode);
109 auto annotation = conv->getMutableAnnotation();
110 if (!annotation || !isa<Caffe2Annotation>(annotation)) {
114 auto* op = getMutableOpDef(*conv);
119 if (op->type() ==
"ConvFusion") {
120 CAFFE_ENFORCE(fusion_type == 1,
"Invalid nest fusion");
121 for (
auto& arg : *op->mutable_arg()) {
122 if (arg.name() ==
"fusion_type") {
124 CAFFE_ENFORCE(arg.i() == 2,
"Invalid nest fusion");
132 CAFFE_ENFORCE(fusion_type < 3,
"Invalid fusion type");
133 op->set_type(
"ConvFusion");
134 auto* arg = op->add_arg();
135 arg->set_name(
"fusion_type");
136 arg->set_i(fusion_type);
141 if (!repr::nn::is<repr::NeuralNetOperator>(node)) {
144 auto maybeAffCh = repr::nn::get<repr::NeuralNetOperator>(node);
145 auto maybeAffChDef = getOpDef(*maybeAffCh);
146 return maybeAffChDef.type() ==
"AffineChannel";
149 for (
auto node_pair : repr::nn::dataIterator<repr::Conv>(nn->dataFlow)) {
150 bool no_bias =
false;
153 std::tie(conv, convNode) = node_pair;
155 if (!isOnIdeepDevice(*conv)) {
156 LOG(WARNING) <<
"Not a IDEEP operator";
160 const auto& op = getOpDef(*conv);
161 if (op.type() ==
"ConvFusion") {
165 auto convOutput = repr::nn::getOutputs(convNode).front();
166 auto consumers = repr::nn::getConsumers(convOutput);
168 if (consumers.size() != 1) {
173 auto consumer = consumers.front();
174 if (repr::nn::is<repr::BatchNormalization>(consumer)) {
176 }
else if (isAffineChannelNode(consumer)) {
181 auto bnOrAffChNode = consumer;
182 auto bn = isBN ? repr::nn::get<repr::BatchNormalization>(bnOrAffChNode) :
nullptr;
183 auto bnOrAffChOutput = repr::nn::getOutputs(bnOrAffChNode).front();
185 auto convInputs = repr::nn::getInputs(convNode);
186 if (convInputs.size() < 2) {
187 LOG(WARNING) <<
"Invalid convolution input size";
191 auto bnOrAffChInputs = repr::nn::getInputs(bnOrAffChNode);
192 int numInputs = isBN ? 5 : 3;
193 if (bnOrAffChInputs.size() < numInputs) {
194 LOG(WARNING) <<
"Invalid input size: " 195 << bnOrAffChInputs.size()
196 <<
", expect " << numInputs;
201 if (convInputs.size() < 3) {
203 nn->dataFlow.
createEdge(bnOrAffChInputs[2], convNode);
204 convInputs = repr::nn::getInputs(convNode);
207 #define EXPOSE_TENSOR_DATA(name, index, nodes, need_init) \ 208 itensor* name = nullptr; \ 209 itensor name##Tensor; \ 210 float* name##Data = nullptr; \ 212 name = getTensor<itensor>(getBlob(nodes[index], ws)); \ 213 if (name == nullptr) { \ 214 LOG(WARNING) << #name " not a IDEEP tensor"; \ 217 name##Tensor.resize(name->get_dims(), name->get_data_type()); \ 218 name##Tensor.reorder_from(*name); \ 220 name##Tensor.is_public_format(), #name " not with public format"); \ 221 name##Data = static_cast<float*>(name##Tensor.get_data_handle()); \ 224 EXPOSE_TENSOR_DATA(filter, 1, convInputs,
true);
225 EXPOSE_TENSOR_DATA(biasConv, 2, convInputs,
true);
227 EXPOSE_TENSOR_DATA(scale, 1, bnOrAffChInputs,
true);
228 EXPOSE_TENSOR_DATA(biasBNOrAffCh, 2, bnOrAffChInputs,
true);
229 EXPOSE_TENSOR_DATA(mean, 3, bnOrAffChInputs, isBN);
230 EXPOSE_TENSOR_DATA(variance, 4, bnOrAffChInputs, isBN);
232 #undef EXPOSE_TENSOR_DATA 235 auto chwDim = filterTensor.get_dim(1) * filterTensor.get_dim(2) *
236 filterTensor.get_dim(3);
237 for (
auto c = 0; c < filterTensor.get_dim(0); ++c) {
239 float variance_val = 1;
241 mean_val = meanData[c];
242 variance_val = std::sqrt(varianceData[c] + bn->getEpsilon());
244 float coeff = scaleData[c] / variance_val;
245 for (
auto i = 0; i < chwDim; ++i) {
246 filterData[c * chwDim + i] *= coeff;
250 biasConvData[c] = biasBNOrAffChData[c] - mean_val * coeff;
253 biasBNOrAffChData[c] + (biasConvData[c] - mean_val) * coeff;
257 filter->reorder_from(filterTensor);
258 biasConv->reorder_from(biasConvTensor);
259 nn->dataFlow.
replaceNode(convOutput, bnOrAffChOutput);
271 while (fuseConvBNAndAffChHelperForIdeep(nn, ws)) {
278 auto allNodes = nn->dataFlow.getMutableNodes();
279 for (
int i = 0; i < allNodes.size(); i++) {
280 auto sumNode = allNodes[i];
281 if (!repr::nn::hasInputs(sumNode)) {
287 if (!repr::nn::is<repr::Sum>(sumNode) && !isOpType(sumNode,
"Add")) {
291 auto sum = repr::nn::get<repr::NeuralNetOperator>(sumNode);
292 if (!isOnIdeepDevice(*sum)) {
293 LOG(WARNING) <<
"Not a IDEEP operator";
297 auto sumInputs = repr::nn::getInputs(sumNode);
298 if (sumInputs.size() != 2) {
302 bool should_fuse =
true;
303 for (
auto input : sumInputs) {
304 auto consumer = repr::nn::getConsumers(input).back();
305 if (consumer != sumNode) {
318 if (!repr::nn::hasInputs(sumNode)) {
323 if (repr::nn::is<repr::NeuralNetOperator>(allNodes[j])) {
325 if (repr::nn::is<repr::Conv>(allNodes[j])) {
326 convNode = allNodes[j];
331 if (convNode ==
nullptr) {
335 auto conv = repr::nn::get<repr::Conv>(convNode);
336 if (!shouldFuseConv(*conv)) {
337 LOG(WARNING) <<
"Not a IDEEP operator";
341 auto convOutput = repr::nn::getOutputs(convNode).front();
343 (sumInputs[0] == convOutput ? sumInputs[1] : sumInputs[0]);
344 CAFFE_ENFORCE(sumInputX !=
nullptr,
"Invalid sum inputs");
346 auto preNode = repr::nn::getProducer(sumInputX);
347 if (preNode ==
nullptr || !repr::nn::is<repr::NeuralNetOperator>(preNode)) {
348 LOG(WARNING) <<
"Can not fuse Conv Sum";
352 auto newOutputName = repr::nn::get<repr::Tensor>(sumInputX)->getName();
353 auto newOutputTensor = util::make_unique<repr::Tensor>(newOutputName);
355 unique_dyn_cast<repr::NeuralNetData>(newOutputTensor));
357 auto sumOutput = repr::nn::getOutputs(sumNode).front();
361 resetConvForFusion(convNode, 2);
373 auto should_fuse = shouldFuseConv;
374 auto postprocess = std::bind(resetConvForFusion, std::placeholders::_1, 1);
375 fuseActivation<repr::Conv, repr::Relu>(nn, should_fuse, postprocess);
382 for (
auto node_pair : repr::nn::dataIterator<repr::Conv>(nn->dataFlow)) {
385 std::tie(conv, convNode) = node_pair;
387 if (!isOnIdeepDevice(*conv)) {
388 LOG(WARNING) <<
"Not a IDEEP operator";
392 const auto& op = getOpDef(*conv);
393 if (op.type() !=
"ConvFusion") {
397 bool enforce_inplace =
false;
398 for (
const auto& arg : op.arg()) {
400 if (arg.name() ==
"fusion_type" && (arg.i() == 2 || arg.i() == 3)) {
401 enforce_inplace =
true;
406 if (!enforce_inplace) {
410 auto convInput = repr::nn::getInputs(convNode).back();
411 auto inputName = repr::nn::get<repr::Tensor>(convInput)->getName();
412 auto convOutput = repr::nn::getOutputs(convNode).front();
413 auto outputName = repr::nn::get<repr::Tensor>(convOutput)->getName();
414 if (inputName == outputName) {
418 auto consumer = repr::nn::getConsumers(convInput).back();
419 if (consumer != convNode) {
420 LOG(ERROR) <<
"Can not enforce to inplace for fusion";
424 auto newOutputTensor = util::make_unique<repr::Tensor>(inputName);
426 unique_dyn_cast<repr::NeuralNetData>(newOutputTensor));
434 for (
auto node_pair : repr::nn::dataIterator<repr::MaxPool>(nn->dataFlow)) {
436 repr::MaxPool *maxPool;
437 std::tie(maxPool, maxPoolNode) = node_pair;
439 if (!isOnIdeepDevice(*maxPool)) {
440 LOG(WARNING) <<
"Not a IDEEP operator";
444 auto *op = getMutableOpDef(*maxPool);
445 bool found_training_mode =
false;
446 for (
auto &arg : *op->mutable_arg()) {
447 if (arg.name() ==
"training_mode") {
449 found_training_mode =
true;
454 if (!found_training_mode) {
455 auto *arg = op->add_arg();
456 arg->set_name(
"training_mode");
462 void OptimizeForIdeep(
465 bool training_mode) {
471 removeStopGradientForInference(nn);
473 fuseConvBNAndAffChForIdeep(nn, ws);
475 fuseConvSumForIdeep(nn, ws);
477 fuseActivationForIdeep(nn);
479 enforceFusionInplaceForIdeep(nn);
481 setPoolingInferenceMode(nn);
484 #endif // CAFFE2_USE_MKLDNN
NodeRef createNode(T &&data)
Creates a node and retains ownership of it.
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
const Blob * GetBlob(const string &name) const
Gets the blob with the given name as a const pointer.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
void deleteNode(NodeRef n)
Deletes a node from the graph.
bool HasBlob(const string &name) const
Checks if a blob with the given name is present in the current workspace.
void replaceNode(const NodeRef &oldNode, const NodeRef &newNode)
Replace a node in the graph with another node.
EdgeRef createEdge(NodeRef tail, NodeRef head, U...data)
Creates a directed edge and retains ownership of it.