Caffe2 - C++ API
A deep learning, cross platform ML framework
python_comm.cpp
1 #include <torch/csrc/utils/pybind.h>
2 #include <torch/csrc/cuda/comm.h>
3 #include <torch/csrc/cuda/Stream.h>
4 #include <torch/csrc/cuda/THCP.h>
5 #include <torch/csrc/utils/auto_gil.h>
6 #include <ATen/core/functional.h>
7 
8 #include <ATen/ATen.h>
9 
10 #include <THC/THC.h>
11 
12 #include <cstddef>
13 #include <vector>
14 
15 namespace torch { namespace cuda { namespace python {
16 void initCommMethods(PyObject *module) {
17  auto m = py::cast<py::module>(module);
18  m.def(
19  "_broadcast_coalesced",
20  [](std::vector<at::Tensor>& tensors,
21  std::vector<int64_t> devices,
22  size_t buffer_size) {
23  return broadcast_coalesced(tensors, devices, buffer_size);
24  },
25  py::arg("tensors"),
26  py::arg("devices"),
27  py::arg("buffer_size"),
28  py::call_guard<py::gil_scoped_release>())
29  .def(
30  "_broadcast",
31  [](at::Tensor& tensor, std::vector<int64_t> devices) {
32  return broadcast(tensor, devices);
33  },
34  py::call_guard<py::gil_scoped_release>())
35  .def(
36  "_scatter",
37  [](at::Tensor& tensor,
38  std::vector<int64_t>& devices,
39  c10::optional<std::vector<int64_t>> chunk_sizes,
40  int64_t dim,
41  c10::optional<py::object> py_streams) {
43  if (py_streams) {
44  py::handle handle = *py_streams;
45  streams = THPUtils_PySequence_to_CUDAStreamList(handle.ptr());
46  }
47  // Note: We're holding the GIL up to here.
48  AutoNoGIL no_gil;
49  return scatter(tensor, devices, chunk_sizes, dim, streams);
50  },
51  py::arg("tensor"),
52  py::arg("devices"),
53  py::arg("chunk_sizes"),
54  py::arg("dim"),
55  py::arg("streams"))
56  .def(
57  "_gather",
58  [](std::vector<at::Tensor>& tensors,
59  int64_t dim,
60  c10::optional<int32_t> destination_index) {
61  return gather(tensors, dim, destination_index);
62  },
63  py::arg("tensors"),
64  py::arg("dim"),
65  py::arg("destination_index"),
66  py::call_guard<py::gil_scoped_release>());
67 }
68 }}}
Definition: jit_type.h:17