doxygen-c/html/operator__c10wrapper_8h_source.html

 #pragma once

 #include <ATen/core/dispatch/Dispatcher.h>
 #include "caffe2/core/operator.h"
 #include <c10/util/ArrayRef.h>
 #include <c10/util/Metaprogramming.h>
 #include <ATen/core/ivalue.h>

 namespace caffe2 {

 namespace detail {
 template <class Context>
 class C10OperatorWrapper final : public Operator<Context> {
  public:
   USE_OPERATOR_CONTEXT_FUNCTIONS;

   C10OperatorWrapper(
       const c10::OperatorHandle& op,
       const OperatorDef& operator_def,
       Workspace* ws)
       : Operator<Context>(operator_def, ws),
         op_(op),
         kernel_(at::nullopt),
         has_preallocated_outputs_(
             op_.schema().arguments().size() != 0 &&
             op_.schema().arguments().back().name() ==
                 detail::PREALLOCATED_OUTPUT_ARGNAME) {
     AT_ASSERT(
         !has_preallocated_outputs_ ||
         op_.schema().arguments().back().type()->isSubtypeOf(
             OptionalType::create(ListType::ofTensors())));

     AT_ASSERT(operator_def.output_size() == op_.schema().returns().size());
     AT_ASSERT(
         operator_def.input_size() + (has_preallocated_outputs_ ? 1 : 0) <=
         op_.schema()
             .arguments()
             .size()); // '<=' because there might be caffe2 nontensor arguments
   }

   bool RunOnDevice() override {
     // due to caching the stack_, concurrent calling is not allowed.
     // TODO thread_local might fix this
     std::lock_guard<std::mutex> lock(mutex_);

     pushInputs_();
     callKernel_();
     popOutputs_();

     return true;
   }

  private:
   void pushInputs_() {
     AT_ASSERT(stack_.size() == 0);
     stack_.reserve(
         op_.schema().arguments().size() + (has_preallocated_outputs_ ? 1 : 0));

     size_t input_tensor_index = 0;

     for (const auto& argument : op_.schema().arguments()) {
       if (argument.name() == detail::PREALLOCATED_OUTPUT_ARGNAME) {
         // note: if detail::PREALLOCATED_OUTPUT_ARGNAME was at the end of the
         // argument list, then has_preallocated_outputs_ would be true.
         AT_ASSERTM(
             has_preallocated_outputs_,
             "Error in caffe2->c10 wrapper: Operator schema has a parameter named ",
             detail::PREALLOCATED_OUTPUT_ARGNAME,
             ", but it's not at the end of the argument list");

         AT_ASSERTM(
             argument.type()->isSubtypeOf(
                 OptionalType::create(ListType::ofTensors())),
             "Error in caffe2->c10 wrapper: Operator schema has a parameter named ",
             detail::PREALLOCATED_OUTPUT_ARGNAME,
             ", but it's not of type TensorList?");
         stack_.emplace_back(preallocated_outputs_());

       } else if (argument.type()->isSubtypeOf(TensorType::get())) {
         AT_ASSERTM(
             input_tensor_index < InputSize(),
             "Error in caffe2->c10 wrapper: Too few tensor arguments given (",
             InputSize(),
             "), operator schema expected more.");
         stack_.emplace_back(at::Tensor(Input(input_tensor_index++)));

       } else if (argument.type()->isSubtypeOf(ListType::ofTensors())) {
         AT_ASSERTM(
             input_tensor_index == 0,
             "Error in caffe2->c10 wrapper: Schema can only have either one or more Tensor inputs or one TensorList input.");
         stack_.emplace_back(ivalue::TensorList::create(array_inputs_()));
         input_tensor_index = InputSize();

       } else {
         stack_.emplace_back(get_nontensor_argument_(argument));
       }

       AT_ASSERTM(
           input_tensor_index == InputSize(),
           "Error in caffe2->c10 wrapper: Number of caffe2 operator inputs (",
           InputSize(),
           ") doesn't match number of tensor arguments (",
           input_tensor_index,
           ") in the c10 operator schema.");
     }
   }

   void callKernel_() {
     AT_ASSERT(stack_.size() == op_.schema().arguments().size());
     if (!kernel_.has_value()) {
       // TODO if kernel is already set, try re-dispatch to assert it goes to the same kernel
       kernel_ = c10::Dispatcher::singleton().lookup(op_, &stack_);
     }
     kernel_->call(&stack_);
   }

   void popOutputs_() {
     AT_ASSERT(stack_.size() == op_.schema().returns().size());
     for (size_t i = 0; i < op_.schema().returns().size(); ++i) {
       OperatorBase::SetOutputTensor(i, Tensor(C10Tensor(std::move(stack_[i]).toTensor())));
     }
     stack_.clear();
   }

   std::vector<at::Tensor> array_inputs_() {
     std::vector<at::Tensor> result;
     result.reserve(InputSize());
     for (size_t i = 0; i < InputSize(); ++i) {
       result.emplace_back(Input(i));
     }
     return result;
   }

   std::vector<at::Tensor> preallocated_outputs_() {
     std::vector<at::Tensor> result;
     result.reserve(OutputSize());
     for (size_t i = 0; i < OutputSize(); ++i) {
       result.emplace_back(OperatorBase::OutputTensorOrUndefined(i));
     }
     return result;
   }

   IValue get_nontensor_argument_(const c10::Argument& argument) {
     if (argument.type()->isSubtypeOf(IntType::get())) {
       return get_nontensor_argument_<int>(
           argument.name(), argument.default_value());
     } else if (argument.type()->isSubtypeOf(FloatType::get())) {
       return get_nontensor_argument_<double>(
           argument.name(), argument.default_value());
     } else if (argument.type()->isSubtypeOf(BoolType::get())) {
       return get_nontensor_argument_<bool>(
           argument.name(), argument.default_value());
     } else {
       // TODO Support more types
       AT_ERROR(
           "Error in caffe2->c10 wrapper: Unsupported argument type ",
           argument.type()->str(),
           " in c10 operator schema");
     }
   }

   template <class T>
   IValue get_nontensor_argument_(
       const std::string& name,
       const c10::optional<IValue>& default_value) {
     if (default_value.has_value()) {
       return this->template GetSingleArgument<T>(name, default_value->to<T>());
     } else {
       AT_CHECK(
           this->template HasSingleArgumentOfType<T>(name),
           "Error in caffe2->c10 wrapper: Expected argument '",
           name,
           "' missing or wrong type.");
       return this->template GetSingleArgument<T>(name, 0);
     }
   }

   c10::OperatorHandle op_;
   c10::optional<OpKernel> kernel_;

   // has_preallocated_outputs_ is true iff the operator schema has a last
   // argument that is a TensorList and has a name equal to with the name equal
   // to detail::PREALLOCATED_OUTPUT_ARGNAME. This argument is then used to pass
   // in preallocated output tensors to the caffe2 operator.
   bool has_preallocated_outputs_;

   // this is stored as a member here to avoid having to re-allocate a stack
   // for each call. Between kernel calls, stack_.size() == 0, but capacity
   // should not need to be grown anymore after the first call.
   std::vector<IValue> stack_;
   std::mutex mutex_;
 };

 template <class Context>
 inline std::function<
     std::unique_ptr<OperatorBase>(const OperatorDef&, Workspace*)>
 createC10OperatorWrapper(const c10::OperatorHandle& op_handle) {
   return [op_handle](const OperatorDef& op_def, Workspace* ws) {
     return c10::guts::make_unique<C10OperatorWrapper<Context>>(
         op_handle, op_def, ws);
   };
 }

 } // namespace detail

 // TODO Also register c10 operators on mobile
 #ifndef C10_MOBILE
 // TODO Currently we only register the CPU variant. This is going to be fixed
 //      once the tensor detemplatization lands.
 #define REGISTER_C10_OPERATOR_FOR_CAFFE2_DISPATCH_CPU(OperatorHandle, Name) \
   REGISTER_CPU_OPERATOR_CREATOR(                                            \
       Name, detail::createC10OperatorWrapper<CPUContext>(OperatorHandle))
 #define REGISTER_C10_OPERATOR_FOR_CAFFE2_DISPATCH_CUDA(OperatorHandle, Name) \
   REGISTER_CUDA_OPERATOR_CREATOR(                                            \
       Name, detail::createC10OperatorWrapper<CUDAContext>(OperatorHandle))
 #define REGISTER_C10_OPERATOR_FOR_CAFFE2_DISPATCH_HIP(OperatorHandle, Name) \
   REGISTER_HIP_OPERATOR_CREATOR(                                            \
       Name, detail::createC10OperatorWrapper<HIPContext>(OperatorHandle))
 #else
 #define REGISTER_C10_OPERATOR_FOR_CAFFE2_DISPATCH_CPU(OperatorHandle, Name)
 #define REGISTER_C10_OPERATOR_FOR_CAFFE2_DISPATCH_CUDA(OperatorHandle, Name)
 #define REGISTER_C10_OPERATOR_FOR_CAFFE2_DISPATCH_HIP(OperatorHandle, Name)
 #endif
 } // namespace caffe2
c10::C10Tensor
This is a minimal Tensor class for use in c10 code.
Definition: Tensor.h:18

at::Tensor
Definition: Tensor.h:48

T
Definition: dataloader.cpp:482

nom::repr::Tensor
Definition: NeuralNet.h:158

caffe2::detail::C10OperatorWrapper
Definition: operator_c10wrapper.h:21

caffe2::Workspace
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47

c10::Dispatcher::lookup
OpKernel lookup(const OperatorHandle &op, const Stack *stack) const
Perform a dynamic dispatch and get the kernel for an operator.
Definition: Dispatcher.h:159

caffe2::Operator::Input
const Tensor & Input(int idx, DeviceType type=Context::GetDeviceType())
Retrieve a non-owning reference to the input at position &#39;idx&#39; for this operator. ...
Definition: operator.h:702

detail
Definition: Dispatch.h:13

caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13

c10::Argument
Definition: function_schema.h:14

c10::OperatorHandle
This is a handle to an operator schema registered with the dispatcher.
Definition: Dispatcher.h:139

c10::optional
Definition: Optional.h:118

c10::IValue
Definition: ivalue.h:127

caffe2::Operator
Definition: operator.h:677