4 #include <ATen/core/dispatch/OpSchemaRegistration.h> 5 #include <ATen/core/dispatch/KernelRegistration.h> 6 #include <ATen/core/function_schema.h> 11 constexpr
const char* PREALLOCATED_OUTPUT_ARGNAME =
12 "_caffe2_preallocated_outputs";
14 using _CallCaffe2OpFunc = std::vector<at::Tensor>(
16 std::vector<c10::IValue>&& inputs,
17 std::vector<at::Tensor>&& outputs);
19 template <
class Caffe2Operator>
20 inline std::vector<at::Tensor> _call_caffe2_op(
22 std::vector<c10::IValue>&& inputs,
23 std::vector<at::Tensor>&& outputs) {
24 Caffe2Operator op(schema, std::move(inputs), std::move(outputs));
26 return std::move(op).move_newstyle_outputs();
37 inline void _call_caffe2_op_from_c10(
40 _CallCaffe2OpFunc* call_op) {
47 schema.arguments().size() != 0 &&
48 schema.arguments().back().type()->isSubtypeOf(
49 OptionalType::create(ListType::ofTensors())));
50 IValue preallocated_outputs = torch::jit::pop(*stack);
52 const size_t num_outputs = schema.returns().size();
53 const size_t num_inputs = schema.arguments().size() -
56 std::vector<at::Tensor> outputs;
57 if (preallocated_outputs.isNone()) {
61 outputs.resize(num_outputs);
63 AT_ASSERT(preallocated_outputs.isTensorList());
65 std::move(*std::move(preallocated_outputs).toTensorList()).elements();
70 std::vector<IValue> inputs = torch::jit::pop(*stack, num_inputs);
72 outputs = (*call_op)(schema, std::move(inputs), std::move(outputs));
74 for (
auto&& output : std::move(outputs)) {
75 torch::jit::push(*stack, std::move(output));
83 template <const c10::OperatorHandle& (*OpHandle)(),
class Caffe2Operator>
84 void call_caffe2_op_from_c10(
87 _call_caffe2_op_from_c10(
88 stack, OpHandle().schema(), &_call_caffe2_op<Caffe2Operator>);
91 inline c10::FunctionSchema make_function_schema_for_c10(
const char* OperatorName, std::vector<c10::Argument> inputs, std::vector<c10::Argument> outputs) {
94 std::vector<c10::Argument> actual_inputs = std::move(inputs);
95 actual_inputs.emplace_back(
96 PREALLOCATED_OUTPUT_ARGNAME,
97 c10::OptionalType::create(c10::ListType::ofTensors()),
102 std::string(
"_caffe2::") + OperatorName,
104 std::move(actual_inputs),
157 #define C10_DECLARE_CAFFE2_OPERATOR(OperatorName) \ 159 namespace _c10_ops { \ 160 C10_DECLARE_OP_SCHEMA(OperatorName); \ 165 #define C10_REGISTER_CAFFE2_OPERATOR_CPU( \ 166 OperatorName, Inputs, Outputs, OperatorClass) \ 169 namespace _c10_ops { \ 170 C10_DEFINE_OP_SCHEMA( \ 172 caffe2::detail::make_function_schema_for_c10( \ 180 C10_REGISTER_KERNEL(caffe2::_c10_ops::OperatorName) \ 181 .kernel<&caffe2::detail::call_caffe2_op_from_c10< \ 182 ::caffe2::_c10_ops::OperatorName, \ 184 .dispatchKey(CPUTensorId()); \ 187 #define C10_REGISTER_CAFFE2_OPERATOR_CUDA(OperatorName, OperatorClass) \ 189 C10_REGISTER_KERNEL(caffe2::_c10_ops::OperatorName) \ 190 .kernel<&caffe2::detail::call_caffe2_op_from_c10< \ 191 ::caffe2::_c10_ops::OperatorName, \ 193 .dispatchKey(CUDATensorId()); \ 199 #define C10_REGISTER_CAFFE2_OPERATOR_HIP(OperatorName, OperatorClass) \ 201 C10_REGISTER_KERNEL(caffe2::_c10_ops::OperatorName) \ 202 .kernel<&caffe2::detail::call_caffe2_op_from_c10< \ 203 ::caffe2::_c10_ops::OperatorName, \ 205 .dispatchKey(HIPTensorId()); \ 210 #define C10_DECLARE_CAFFE2_OPERATOR(OperatorName) 211 #define C10_REGISTER_CAFFE2_OPERATOR_CPU(OperatorName, Inputs, Outputs, OperatorClass) 212 #define C10_REGISTER_CAFFE2_OPERATOR_CUDA(OperatorName, OperatorClass) 213 #define C10_REGISTER_CAFFE2_OPERATOR_HIP(OperatorName, OperatorClass)
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
A kernel can keep around a cache to have better performance when it's called multiple times...