Caffe2 - C++ API
A deep learning, cross platform ML framework
Stream.cpp
1 #include <torch/csrc/cuda/Stream.h>
2 #include <torch/csrc/cuda/Module.h>
3 #include <torch/csrc/Device.h>
4 #include <torch/csrc/THP.h>
5 
6 #include <c10/cuda/CUDAGuard.h>
7 
8 #include <structmember.h>
9 #include <cuda_runtime_api.h>
10 
11 PyObject *THCPStreamClass = nullptr;
12 
13 static PyObject * THCPStream_pynew(
14  PyTypeObject *type, PyObject *args, PyObject *kwargs) {
15  HANDLE_TH_ERRORS
16 
17  int current_device;
18  THCudaCheck(cudaGetDevice(&current_device));
19 
20  int priority = 0;
21  uint64_t cdata = 0;
22 
23  static char *kwlist[] = {"priority", "_cdata", nullptr};
24  if (!PyArg_ParseTupleAndKeywords(
25  args, kwargs, "|iK", kwlist, &priority, &cdata)) {
26  return nullptr;
27  }
28 
29  THPObjectPtr ptr(type->tp_alloc(type, 0));
30  if (!ptr) {
31  return nullptr;
32  }
33 
34  at::cuda::CUDAStream stream =
35  cdata ?
36  at::cuda::CUDAStream::unpack(cdata) :
37  at::cuda::getStreamFromPool(
38  /* isHighPriority */ priority < 0 ? true : false);
39 
40  THCPStream* self = (THCPStream *)ptr.get();
41  self->cdata = stream.pack();
42  new (&self->cuda_stream) at::cuda::CUDAStream(stream);
43 
44  return (PyObject *)ptr.release();
45  END_HANDLE_TH_ERRORS
46 }
47 
48 static void THCPStream_dealloc(THCPStream *self) {
49  self->cuda_stream.~CUDAStream();
50  Py_TYPE(self)->tp_free((PyObject*)self);
51 }
52 
53 static PyObject * THCPStream_get_device(THCPStream *self) {
54  HANDLE_TH_ERRORS
55  return THPDevice_New(self->cuda_stream.device());
56  END_HANDLE_TH_ERRORS
57 }
58 
59 static PyObject * THCPStream_get_cuda_stream(THCPStream *self) {
60  HANDLE_TH_ERRORS
61  return PyLong_FromVoidPtr(self->cuda_stream.stream());
62  END_HANDLE_TH_ERRORS
63 }
64 
65 static PyObject * THCPStream_get_priority(THCPStream *self) {
66  HANDLE_TH_ERRORS
67  return PyLong_FromLong(self->cuda_stream.priority());
68  END_HANDLE_TH_ERRORS
69 }
70 
71 static PyObject * THCPStream_priority_range() {
72  HANDLE_TH_ERRORS
73  int least_priority, greatest_priority;
74  std::tie(least_priority, greatest_priority) =
75  at::cuda::CUDAStream::priority_range();
76  return Py_BuildValue("(ii)", least_priority, greatest_priority);
77  END_HANDLE_TH_ERRORS
78 }
79 
80 static PyObject * THCPStream_query(THCPStream *self) {
81  HANDLE_TH_ERRORS
82  return PyBool_FromLong(self->cuda_stream.query());
83  END_HANDLE_TH_ERRORS
84 }
85 
86 static PyObject * THCPStream_synchronize(THCPStream *self) {
87  HANDLE_TH_ERRORS
88  with_no_gil([&] { self->cuda_stream.synchronize(); });
89  Py_RETURN_NONE;
90  END_HANDLE_TH_ERRORS
91 }
92 
93 static PyObject * THCPStream_eq(THCPStream *self, THCPStream *other) {
94  HANDLE_TH_ERRORS
95  return PyBool_FromLong(self->cuda_stream == other->cuda_stream);
96  END_HANDLE_TH_ERRORS
97 }
98 
99 static struct PyMemberDef THCPStream_members[] = {
100  {(char*)"_cdata",
101  T_ULONGLONG, offsetof(THCPStream, cdata), READONLY, nullptr},
102  {nullptr}
103 };
104 
105 static struct PyGetSetDef THCPStream_properties[] = {
106  {"device", (getter)THCPStream_get_device, nullptr, nullptr, nullptr},
107  {"cuda_stream",
108  (getter)THCPStream_get_cuda_stream, nullptr, nullptr, nullptr},
109  {"priority", (getter)THCPStream_get_priority, nullptr, nullptr, nullptr},
110  {nullptr}
111 };
112 
113 static PyMethodDef THCPStream_methods[] = {
114  {(char*)"query", (PyCFunction)THCPStream_query, METH_NOARGS, nullptr},
115  {(char*)"synchronize",
116  (PyCFunction)THCPStream_synchronize, METH_NOARGS, nullptr},
117  {(char*)"priority_range",
118  (PyCFunction)THCPStream_priority_range, METH_STATIC | METH_NOARGS, nullptr},
119  {(char*)"__eq__", (PyCFunction)THCPStream_eq, METH_O, nullptr},
120  {nullptr}
121 };
122 
123 PyTypeObject THCPStreamType = {
124  PyVarObject_HEAD_INIT(nullptr, 0)
125  "torch._C._CudaStreamBase", /* tp_name */
126  sizeof(THCPStream), /* tp_basicsize */
127  0, /* tp_itemsize */
128  (destructor)THCPStream_dealloc, /* tp_dealloc */
129  0, /* tp_print */
130  0, /* tp_getattr */
131  0, /* tp_setattr */
132  0, /* tp_reserved */
133  0, /* tp_repr */
134  0, /* tp_as_number */
135  0, /* tp_as_sequence */
136  0, /* tp_as_mapping */
137  0, /* tp_hash */
138  0, /* tp_call */
139  0, /* tp_str */
140  0, /* tp_getattro */
141  0, /* tp_setattro */
142  0, /* tp_as_buffer */
143  Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
144  nullptr, /* tp_doc */
145  0, /* tp_traverse */
146  0, /* tp_clear */
147  0, /* tp_richcompare */
148  0, /* tp_weaklistoffset */
149  0, /* tp_iter */
150  0, /* tp_iternext */
151  THCPStream_methods, /* tp_methods */
152  THCPStream_members, /* tp_members */
153  THCPStream_properties, /* tp_getset */
154  0, /* tp_base */
155  0, /* tp_dict */
156  0, /* tp_descr_get */
157  0, /* tp_descr_set */
158  0, /* tp_dictoffset */
159  0, /* tp_init */
160  0, /* tp_alloc */
161  THCPStream_pynew, /* tp_new */
162 };
163 
164 
165 void THCPStream_init(PyObject *module)
166 {
167  THCPStreamClass = (PyObject*)&THCPStreamType;
168  if (PyType_Ready(&THCPStreamType) < 0) {
169  throw python_error();
170  }
171  Py_INCREF(&THCPStreamType);
172  if (PyModule_AddObject(
173  module, "_CudaStreamBase", (PyObject *)&THCPStreamType) < 0) {
174  throw python_error();
175  }
176 }
uint64_t pack() const noexcept
Reversibly pack a CUDAStream into a uint64_t representation.
Definition: CUDAStream.h:147
Flush-To-Zero and Denormals-Are-Zero mode.