doxygen-c/html/msnpu__extension_8cpp_source.html

 #include <torch/extension.h>

 #include <ATen/ExtensionBackendRegistration.h>

 using namespace at;

 static int test_int;

 Tensor get_dtype_tensor(caffe2::TypeMeta dtype) {
   auto tensor_impl = c10::make_intrusive<TensorImpl, UndefinedTensorImpl>(
       Storage(
           dtype, 0, at::DataPtr(nullptr, Device(DeviceType::MSNPU, 0)), nullptr, false),
       MSNPUTensorId(),
       false);
   return Tensor(std::move(tensor_impl));
 }

 Tensor zeros_override(IntArrayRef size, const TensorOptions & options) {
   test_int = 0;
   return get_dtype_tensor(options.dtype());
 }

 Tensor add_override(const Tensor & a, const Tensor & b , Scalar c) {
   test_int = 1;
   return get_dtype_tensor(a.dtype());
 }

 Tensor sum_override(const Tensor & self) {
   test_int = 2;
   return get_dtype_tensor(self.dtype());
 }

 // needed for sum backwards
 Tensor expand_override(const Tensor & self, IntArrayRef size, bool implicit) {
   return get_dtype_tensor(self.dtype());
 }


 Tensor kl_div_override(
     const Tensor & self, const Tensor & target, int64_t reduction) {
   test_int = 3;
   return get_dtype_tensor(self.dtype());
 }

 Tensor kl_div_backward_override(
     const Tensor & grad_output,
     const Tensor & self,
     const Tensor & target,
     int64_t reduction) {
   test_int = 4;
   return get_dtype_tensor(self.dtype());
 }

 // numel and ones_like are needed for autograd backwards
 int64_t numel_override(const Tensor & self) {
   return 1;
 }

 Tensor ones_like_override(const Tensor & self, const TensorOptions & options) {
   return get_dtype_tensor(options.dtype());
 }

 void init_msnpu_extension() {
   register_extension_backend_op(
     Backend::MSNPU,
     "zeros(IntArrayRef size, TensorOptions options) -> Tensor", &zeros_override);
   register_extension_backend_op(
     Backend::MSNPU,
     "add(Tensor self, Tensor other, Scalar alpha) -> Tensor", &add_override);
   register_extension_backend_op(
     Backend::MSNPU,
     "sum(Tensor self) -> Tensor", &sum_override);
   register_extension_backend_op(
     Backend::MSNPU,
     "expand(Tensor self, IntArrayRef size, bool implicit) -> Tensor",
     &expand_override);
   register_extension_backend_op(
     Backend::MSNPU,
     "kl_div(Tensor self, Tensor target, int64_t reduction) -> Tensor",
     &kl_div_override);
   register_extension_backend_op(
     Backend::MSNPU,
     "kl_div_backward(Tensor grad_output, Tensor self, Tensor target, int64_t reduction) -> Tensor",
     &kl_div_backward_override);
   register_extension_backend_op(
     Backend::MSNPU,
     "numel(Tensor self) -> int64_t", &numel_override);
   register_extension_backend_op(
     Backend::MSNPU,
     "ones_like(Tensor self, TensorOptions options) -> Tensor",
     &ones_like_override);
 }

 // TODO: Extend this to exercise multi-device setting.  In that case,
 // we need to add a thread local variable to track the current device.
 struct MSNPUGuardImpl final : public c10::impl::DeviceGuardImplInterface {
   static constexpr DeviceType static_type = DeviceType::MSNPU;
   MSNPUGuardImpl() {}
   MSNPUGuardImpl(DeviceType t) {
     AT_ASSERT(t == DeviceType::MSNPU);
   }
   DeviceType type() const override {
     return DeviceType::MSNPU;
   }
   Device exchangeDevice(Device d) const override {
     AT_ASSERT(d.type() == DeviceType::MSNPU);
     AT_ASSERT(d.index() == 0);
     return d;
   }
   Device getDevice() const override {
     return Device(DeviceType::MSNPU, 0);
   }
   void setDevice(Device d) const override {
     AT_ASSERT(d.type() == DeviceType::MSNPU);
     AT_ASSERT(d.index() == 0);
   }
   void uncheckedSetDevice(Device d) const noexcept override {
   }
   Stream getStream(Device d) const noexcept override {
     return Stream(Stream::DEFAULT, Device(DeviceType::MSNPU, 0));
   }
   Stream exchangeStream(Stream s) const noexcept override {
     return Stream(Stream::DEFAULT, Device(DeviceType::MSNPU, 0));
   }
   DeviceIndex deviceCount() const override {
     return 1;
   }
 };

 constexpr DeviceType MSNPUGuardImpl::static_type;
 C10_REGISTER_GUARD_IMPL(MSNPU, MSNPUGuardImpl);

 int get_test_int() {
   return test_int;
 }

 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
   m.def("init_msnpu_extension", &init_msnpu_extension);
   m.def("get_test_int", &get_test_int);
 }
MSNPUGuardImpl::deviceCount
DeviceIndex deviceCount() const override
Get the number of devices.
Definition: msnpu_extension.cpp:125

at::Tensor
Definition: Tensor.h:48

c10::Stream
A stream is a software mechanism used to synchronize launched kernels without requiring explicit sync...
Definition: Stream.h:57

c10::Scalar
Scalar represents a 0-dimensional tensor which contains a single element.
Definition: Scalar.h:22

MSNPUGuardImpl::getDevice
Device getDevice() const override
Get the current device.
Definition: msnpu_extension.cpp:110

at::Tensor::dtype
caffe2::TypeMeta dtype() const noexcept
Returns a Tensor&#39;s dtype (TypeMeta). Defined in TensorMethods.h.
Definition: TensorMethods.h:1287

c10::Device
Represents a a compute device on which a tensor is located.
Definition: Device.h:30

c10::DeviceIndex
int16_t DeviceIndex
An index representing a specific device; e.g., the 1 in GPU 1.
Definition: Device.h:18

MSNPUGuardImpl
Definition: msnpu_extension.cpp:96

c10::DataPtr
Definition: Allocator.h:19

c10::TensorOptions::dtype
C10_NODISCARD TensorOptions dtype(c10::optional< caffe2::TypeMeta > dtype) const noexcept
Return a copy of TensorOptions with dtype set to the given one.
Definition: TensorOptions.h:193

MSNPUGuardImpl::type
DeviceType type() const override
Return the type of device managed by this guard implementation.
Definition: msnpu_extension.cpp:102

c10::ArrayRef< int64_t >

c10::Storage
Definition: Storage.h:7

at
Flush-To-Zero and Denormals-Are-Zero mode.
Definition: AccumulateType.h:17

caffe2::TypeMeta
TypeMeta is a thin class that allows us to store the type of a container such as a blob...
Definition: typeid.h:324

c10::Device::index
DeviceIndex index() const noexcept
Returns the optional index.
Definition: Device.h:70

c10::TensorOptions
Definition: TensorOptions.h:99

c10::impl::DeviceGuardImplInterface
DeviceGuardImplInterface represents the virtual interface which provides functionality to provide an ...
Definition: DeviceGuardImplInterface.h:32

c10::Device::type
DeviceType type() const noexcept
Returns the type of device this is.
Definition: Device.h:65