4 #include <torch/csrc/WindowsTorchApiMacro.h> 5 #include <torch/csrc/jit/fuser/fused_kernel.h> 8 #include <cuda_runtime.h> 27 std::vector<TensorDesc> input_desc,
28 std::vector<TensorDesc> output_desc,
29 std::vector<PartitionDesc> chunk_desc,
30 std::vector<PartitionDesc> concat_desc,
33 ~FusedKernelCUDA()
override;
35 void launch_raw(
const uint32_t numel, std::vector<void*>& arguments)
39 return at::Backend::CUDA;
43 static constexpr
auto kBlockSize = 128;
49 cudaDeviceProp* prop_;
50 std::vector<char> ptx_;
Backend
This legacy enum class defines the set of backends supported by old school, code generated Type-based...