doxygen-c/html/cuda_2fused__kernel_8h_source.html

 #pragma once

 #include <ATen/ATen.h>
 #include <torch/csrc/WindowsTorchApiMacro.h>
 #include <torch/csrc/jit/fuser/fused_kernel.h>

 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <nvrtc.h>

 #include <cstdint>
 #include <string>
 #include <vector>

 namespace torch {
 namespace jit {
 namespace fuser {
 namespace cuda {

 // A class holding metadata for an actual CUDA function.
 // Note: CUDA functions are per device.
 struct TORCH_API FusedKernelCUDA : public ::torch::jit::fuser::FusedKernel {
   FusedKernelCUDA(
       int16_t device,
       std::string name,
       std::string code,
       std::vector<TensorDesc> input_desc,
       std::vector<TensorDesc> output_desc,
       std::vector<PartitionDesc> chunk_desc,
       std::vector<PartitionDesc> concat_desc,
       bool has_random);

   ~FusedKernelCUDA() override;

   void launch_raw(const uint32_t numel, std::vector<void*>& arguments)
       const override;

   at::Backend backend() const override {
     return at::Backend::CUDA;
   }

  private:
   static constexpr auto kBlockSize = 128;

   // Note: per device to store device properties and compute launch heuristics
   //  Acquiring these values at launch time would be too slow
   int16_t device_;
   int maxBlocks_;
   cudaDeviceProp* prop_;
   std::vector<char> ptx_;
   CUmodule module_;
   CUfunction function_;
 };

 } // namespace cuda
 } // namespace fuser
 } // namespace jit
 } // namespace torch
c10::Backend
Backend
This legacy enum class defines the set of backends supported by old school, code generated Type-based...
Definition: Backend.h:23

torch::jit::fuser::FusedKernel
Definition: fused_kernel.h:16

torch
Definition: jit_type.h:17

torch::jit::fuser::cuda::FusedKernelCUDA
Definition: fused_kernel.h:22