Caffe2 - Python API
A deep learning, cross platform ML framework
streams.py
1 import ctypes
2 import torch
3 
4 
5 class Stream(torch._C._CudaStreamBase):
6  r"""Wrapper around a CUDA stream.
7 
8  A CUDA stream is a linear sequence of execution that belongs to a specific
9  device, independent from other streams. See :ref:`cuda-semantics` for
10  details.
11 
12  Arguments:
13  device(torch.device or int, optional): a device on which to allocate
14  the stream. If :attr:`device` is ``None`` (default) or a negative
15  integer, this will use the current device.
16  priority(int, optional): priority of the stream. Lower numbers
17  represent higher priorities.
18  """
19 
20  def __new__(cls, device=None, priority=0, **kwargs):
21  with torch.cuda.device(device):
22  return super(Stream, cls).__new__(cls, priority=priority, **kwargs)
23 
24  def wait_event(self, event):
25  r"""Makes all future work submitted to the stream wait for an event.
26 
27  Arguments:
28  event (Event): an event to wait for.
29 
30  .. note:: This is a wrapper around ``cudaStreamWaitEvent()``: see `CUDA
31  documentation`_ for more info.
32 
33  This function returns without waiting for :attr:`event`: only future
34  operations are affected.
35 
36  .. _CUDA documentation:
37  http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html
38  """
39  event.wait(self)
40 
41  def wait_stream(self, stream):
42  r"""Synchronizes with another stream.
43 
44  All future work submitted to this stream will wait until all kernels
45  submitted to a given stream at the time of call complete.
46 
47  Arguments:
48  stream (Stream): a stream to synchronize.
49 
50  .. note:: This function returns without waiting for currently enqueued
51  kernels in :attr:`stream`: only future operations are affected.
52  """
53  self.wait_event(stream.record_event())
54 
55  def record_event(self, event=None):
56  r"""Records an event.
57 
58  Arguments:
59  event (Event, optional): event to record. If not given, a new one
60  will be allocated.
61 
62  Returns:
63  Recorded event.
64  """
65  if event is None:
66  event = Event()
67  event.record(self)
68  return event
69 
70  def query(self):
71  r"""Checks if all the work submitted has been completed.
72 
73  Returns:
74  A boolean indicating if all kernels in this stream are completed."""
75  return super(Stream, self).query()
76 
77  def synchronize(self):
78  r"""Wait for all the kernels in this stream to complete.
79 
80  .. note:: This is a wrapper around ``cudaStreamSynchronize()``: see
81  `CUDA documentation`_ for more info.
82 
83  .. _CUDA documentation:
84  http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html
85  """
86  super(Stream, self).synchronize()
87 
88  @property
89  def _as_parameter_(self):
90  return ctypes.c_void_p(self.cuda_stream)
91 
92  def __eq__(self, o):
93  if isinstance(o, Stream):
94  return super(Stream, self).__eq__(o)
95  return False
96 
97  def __hash__(self):
98  return hash((self.cuda_stream, self.device))
99 
100  def __repr__(self):
101  return ('<torch.cuda.Stream device={0} cuda_stream={1:#x}>'
102  .format(self.device, self.cuda_stream))
103 
104 
105 class Event(torch._C._CudaEventBase):
106  r"""Wrapper around a CUDA event.
107 
108  CUDA events are synchronization markers that can be used to monitor the
109  device's progress, to accurately measure timing, and to synchronize CUDA
110  streams.
111 
112  The underlying CUDA events are lazily initialized when the event is first
113  recorded or exported to another process. After creation, only streams on the
114  same device may record the event. However, streams on any device can wait on
115  the event.
116 
117  Arguments:
118  enable_timing (bool, optional): indicates if the event should measure time
119  (default: ``False``)
120  blocking (bool, optional): if ``True``, :meth:`wait` will be blocking (default: ``False``)
121  interprocess (bool): if ``True``, the event can be shared between processes
122  (default: ``False``)
123 
124  .. _CUDA documentation:
125  https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html
126  """
127 
128  def __new__(cls, enable_timing=False, blocking=False, interprocess=False):
129  return super(Event, cls).__new__(
130  cls,
131  enable_timing=enable_timing, blocking=blocking, interprocess=interprocess)
132 
133  @classmethod
134  def from_ipc_handle(cls, device, handle):
135  r"""Reconstruct an event from an IPC handle on the given device."""
136  return super(Event, cls).from_ipc_handle(device, handle)
137 
138  def record(self, stream=None):
139  r"""Records the event in a given stream.
140 
141  Uses ``torch.cuda.current_stream()`` if no stream is specified. The
142  stream's device must match the event's device."""
143  if stream is None:
144  stream = torch.cuda.current_stream()
145  super(Event, self).record(stream)
146 
147  def wait(self, stream=None):
148  r"""Makes all future work submitted to the given stream wait for this
149  event.
150 
151  Use ``torch.cuda.current_stream()`` if no stream is specified."""
152  if stream is None:
153  stream = torch.cuda.current_stream()
154  super(Event, self).wait(stream)
155 
156  def query(self):
157  r"""Checks if all work currently captured by event has completed.
158 
159  Returns:
160  A boolean indicating if all work currently captured by event has
161  completed.
162  """
163  return super(Event, self).query()
164 
165  def elapsed_time(self, end_event):
166  r"""Returns the time elapsed in milliseconds after the event was
167  recorded and before the end_event was recorded.
168  """
169  return super(Event, self).elapsed_time(end_event)
170 
171  def synchronize(self):
172  r"""Waits for the event to complete.
173 
174  Waits until the completion of all work currently captured in this event.
175  This prevents the CPU thread from proceeding until the event completes.
176 
177  .. note:: This is a wrapper around ``cudaEventSynchronize()``: see `CUDA
178  documentation`_ for more info.
179 
180  .. _CUDA documentation:
181  https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html
182  """
183  super(Event, self).synchronize()
184 
185  def ipc_handle(self):
186  r"""Returns an IPC handle of this event. If not recorded yet, the event
187  will use the current device. """
188  return super(Event, self).ipc_handle()
189 
190  @property
191  def _as_parameter_(self):
192  return ctypes.c_void_p(self.cuda_event)
193 
194  def __repr__(self):
195  if self.cuda_event:
196  return '<torch.cuda.Event {0:#x}>'.format(self._as_parameter_.value)
197  else:
198  return '<torch.cuda.Event uninitialized>'
def wait_event(self, event)
Definition: streams.py:24
def current_stream(device=None)
Definition: __init__.py:361