5 class Stream(torch._C._CudaStreamBase):
6 r"""Wrapper around a CUDA stream. 8 A CUDA stream is a linear sequence of execution that belongs to a specific 9 device, independent from other streams. See :ref:`cuda-semantics` for 13 device(torch.device or int, optional): a device on which to allocate 14 the stream. If :attr:`device` is ``None`` (default) or a negative 15 integer, this will use the current device. 16 priority(int, optional): priority of the stream. Lower numbers 17 represent higher priorities. 20 def __new__(cls, device=None, priority=0, **kwargs):
22 return super(Stream, cls).__new__(cls, priority=priority, **kwargs)
24 def wait_event(self, event):
25 r"""Makes all future work submitted to the stream wait for an event. 28 event (Event): an event to wait for. 30 .. note:: This is a wrapper around ``cudaStreamWaitEvent()``: see `CUDA 31 documentation`_ for more info. 33 This function returns without waiting for :attr:`event`: only future 34 operations are affected. 36 .. _CUDA documentation: 37 http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html 41 def wait_stream(self, stream):
42 r"""Synchronizes with another stream. 44 All future work submitted to this stream will wait until all kernels 45 submitted to a given stream at the time of call complete. 48 stream (Stream): a stream to synchronize. 50 .. note:: This function returns without waiting for currently enqueued 51 kernels in :attr:`stream`: only future operations are affected. 55 def record_event(self, event=None):
59 event (Event, optional): event to record. If not given, a new one 71 r"""Checks if all the work submitted has been completed. 74 A boolean indicating if all kernels in this stream are completed.""" 75 return super(Stream, self).query()
77 def synchronize(self):
78 r"""Wait for all the kernels in this stream to complete. 80 .. note:: This is a wrapper around ``cudaStreamSynchronize()``: see 81 `CUDA documentation`_ for more info. 83 .. _CUDA documentation: 84 http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html 86 super(Stream, self).synchronize()
89 def _as_parameter_(self):
90 return ctypes.c_void_p(self.cuda_stream)
93 if isinstance(o, Stream):
94 return super(Stream, self).__eq__(o)
98 return hash((self.cuda_stream, self.device))
101 return (
'<torch.cuda.Stream device={0} cuda_stream={1:#x}>' 102 .format(self.device, self.cuda_stream))
105 class Event(torch._C._CudaEventBase):
106 r"""Wrapper around a CUDA event. 108 CUDA events are synchronization markers that can be used to monitor the 109 device's progress, to accurately measure timing, and to synchronize CUDA 112 The underlying CUDA events are lazily initialized when the event is first 113 recorded or exported to another process. After creation, only streams on the 114 same device may record the event. However, streams on any device can wait on 118 enable_timing (bool, optional): indicates if the event should measure time 120 blocking (bool, optional): if ``True``, :meth:`wait` will be blocking (default: ``False``) 121 interprocess (bool): if ``True``, the event can be shared between processes 124 .. _CUDA documentation: 125 https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html 128 def __new__(cls, enable_timing=False, blocking=False, interprocess=False):
129 return super(Event, cls).__new__(
131 enable_timing=enable_timing, blocking=blocking, interprocess=interprocess)
134 def from_ipc_handle(cls, device, handle):
135 r"""Reconstruct an event from an IPC handle on the given device.""" 136 return super(Event, cls).from_ipc_handle(device, handle)
138 def record(self, stream=None):
139 r"""Records the event in a given stream. 141 Uses ``torch.cuda.current_stream()`` if no stream is specified. The 142 stream's device must match the event's device.""" 145 super(Event, self).record(stream)
147 def wait(self, stream=None):
148 r"""Makes all future work submitted to the given stream wait for this 151 Use ``torch.cuda.current_stream()`` if no stream is specified.""" 154 super(Event, self).wait(stream)
157 r"""Checks if all work currently captured by event has completed. 160 A boolean indicating if all work currently captured by event has 163 return super(Event, self).query()
165 def elapsed_time(self, end_event):
166 r"""Returns the time elapsed in milliseconds after the event was 167 recorded and before the end_event was recorded. 169 return super(Event, self).elapsed_time(end_event)
171 def synchronize(self):
172 r"""Waits for the event to complete. 174 Waits until the completion of all work currently captured in this event. 175 This prevents the CPU thread from proceeding until the event completes. 177 .. note:: This is a wrapper around ``cudaEventSynchronize()``: see `CUDA 178 documentation`_ for more info. 180 .. _CUDA documentation: 181 https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html 183 super(Event, self).synchronize()
185 def ipc_handle(self):
186 r"""Returns an IPC handle of this event. If not recorded yet, the event 187 will use the current device. """ 188 return super(Event, self).ipc_handle()
191 def _as_parameter_(self):
192 return ctypes.c_void_p(self.cuda_event)
196 return '<torch.cuda.Event {0:#x}>'.format(self._as_parameter_.value)
198 return '<torch.cuda.Event uninitialized>'
def wait_event(self, event)
def current_stream(device=None)