1 #include <torch/csrc/cuda/Stream.h> 2 #include <torch/csrc/cuda/Module.h> 3 #include <torch/csrc/Device.h> 4 #include <torch/csrc/THP.h> 6 #include <c10/cuda/CUDAGuard.h> 8 #include <structmember.h> 9 #include <cuda_runtime_api.h> 11 PyObject *THCPStreamClass =
nullptr;
13 static PyObject * THCPStream_pynew(
14 PyTypeObject *type, PyObject *args, PyObject *kwargs) {
18 THCudaCheck(cudaGetDevice(¤t_device));
23 static char *kwlist[] = {
"priority",
"_cdata",
nullptr};
24 if (!PyArg_ParseTupleAndKeywords(
25 args, kwargs,
"|iK", kwlist, &priority, &cdata)) {
36 at::cuda::CUDAStream::unpack(cdata) :
37 at::cuda::getStreamFromPool(
38 priority < 0 ? true : false);
41 self->cdata = stream.
pack();
44 return (PyObject *)ptr.release();
48 static void THCPStream_dealloc(
THCPStream *
self) {
49 self->cuda_stream.~CUDAStream();
50 Py_TYPE(
self)->tp_free((PyObject*)
self);
53 static PyObject * THCPStream_get_device(
THCPStream *
self) {
55 return THPDevice_New(self->cuda_stream.device());
59 static PyObject * THCPStream_get_cuda_stream(
THCPStream *
self) {
61 return PyLong_FromVoidPtr(self->cuda_stream.stream());
65 static PyObject * THCPStream_get_priority(
THCPStream *
self) {
67 return PyLong_FromLong(self->cuda_stream.priority());
71 static PyObject * THCPStream_priority_range() {
73 int least_priority, greatest_priority;
74 std::tie(least_priority, greatest_priority) =
75 at::cuda::CUDAStream::priority_range();
76 return Py_BuildValue(
"(ii)", least_priority, greatest_priority);
80 static PyObject * THCPStream_query(
THCPStream *
self) {
82 return PyBool_FromLong(self->cuda_stream.query());
86 static PyObject * THCPStream_synchronize(
THCPStream *
self) {
88 with_no_gil([&] {
self->cuda_stream.synchronize(); });
95 return PyBool_FromLong(self->cuda_stream == other->cuda_stream);
99 static struct PyMemberDef THCPStream_members[] = {
101 T_ULONGLONG, offsetof(
THCPStream, cdata), READONLY,
nullptr},
105 static struct PyGetSetDef THCPStream_properties[] = {
106 {
"device", (getter)THCPStream_get_device,
nullptr,
nullptr,
nullptr},
108 (getter)THCPStream_get_cuda_stream,
nullptr,
nullptr,
nullptr},
109 {
"priority", (getter)THCPStream_get_priority,
nullptr,
nullptr,
nullptr},
113 static PyMethodDef THCPStream_methods[] = {
114 {(
char*)
"query", (PyCFunction)THCPStream_query, METH_NOARGS,
nullptr},
115 {(
char*)
"synchronize",
116 (PyCFunction)THCPStream_synchronize, METH_NOARGS,
nullptr},
117 {(
char*)
"priority_range",
118 (PyCFunction)THCPStream_priority_range, METH_STATIC | METH_NOARGS,
nullptr},
119 {(
char*)
"__eq__", (PyCFunction)THCPStream_eq, METH_O,
nullptr},
123 PyTypeObject THCPStreamType = {
124 PyVarObject_HEAD_INIT(
nullptr, 0)
125 "torch._C._CudaStreamBase",
128 (destructor)THCPStream_dealloc,
143 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
153 THCPStream_properties,
165 void THCPStream_init(PyObject *module)
167 THCPStreamClass = (PyObject*)&THCPStreamType;
168 if (PyType_Ready(&THCPStreamType) < 0) {
171 Py_INCREF(&THCPStreamType);
172 if (PyModule_AddObject(
173 module,
"_CudaStreamBase", (PyObject *)&THCPStreamType) < 0) {
uint64_t pack() const noexcept
Reversibly pack a CUDAStream into a uint64_t representation.
Flush-To-Zero and Denormals-Are-Zero mode.