Caffe2 - Python API
A deep learning, cross platform ML framework
__init__.py
1 r"""
2 This package adds support for CUDA tensor types, that implement the same
3 function as CPU tensors, but they utilize GPUs for computation.
4 
5 It is lazily initialized, so you can always import it, and use
6 :func:`is_available()` to determine if your system supports CUDA.
7 
8 :ref:`cuda-semantics` has more details about working with CUDA.
9 """
10 
11 import contextlib
12 import platform
13 import ctypes
14 import os
15 import torch
16 import traceback
17 import warnings
18 from torch._six import raise_from
19 from subprocess import Popen, PIPE
20 from multiprocessing.util import register_after_fork as _register_after_fork
21 from ._utils import _get_device_index
22 
23 _initialized = False
24 _queued_calls = [] # don't invoke these until initialization occurs
25 _in_bad_fork = False # this global is also used in torch.manual_seed
26 _original_pid = False
27 _cudart = None
28 
29 
30 def find_cuda_windows_lib():
31  proc = Popen(['where', 'cudart64*.dll'], stdout=PIPE, stderr=PIPE, stdin=PIPE)
32  out, err = proc.communicate()
33  out = out.decode().strip()
34  if len(out) > 0:
35  if out.find('\r\n') != -1:
36  out = out.split('\r\n')[0]
37  cuda_lib_name = os.path.basename(out)
38  cuda_lib = os.path.splitext(cuda_lib_name)[0]
39  cuda_lib = str(cuda_lib)
40  return ctypes.cdll.LoadLibrary(cuda_lib)
41  else:
42  return None
43 
44 
45 def is_available():
46  r"""Returns a bool indicating if CUDA is currently available."""
47  if (not hasattr(torch._C, '_cuda_isDriverSufficient') or
48  not torch._C._cuda_isDriverSufficient()):
49  return False
50  return torch._C._cuda_getDeviceCount() > 0
51 
52 
53 def _sleep(cycles):
54  torch._C._cuda_sleep(cycles)
55 
56 
57 def _load_cudart():
58  # First check the main program for CUDA symbols
59  if platform.system() == 'Windows':
60  lib = find_cuda_windows_lib()
61  else:
62  lib = ctypes.cdll.LoadLibrary(None)
63  if hasattr(lib, 'cudaGetErrorName'):
64  return lib
65 
66  raise RuntimeError(
67  "couldn't find libcudart. Make sure CUDA libraries are installed in a "
68  "default location, or that they're in {}."
69  .format('DYLD_LIBRARY_PATH' if platform.system() == 'Darwin' else
70  'LD_LIBRARY_PATH'))
71 
72 
73 def _check_driver():
74  if not hasattr(torch._C, '_cuda_isDriverSufficient'):
75  raise AssertionError("Torch not compiled with CUDA enabled")
76  if not torch._C._cuda_isDriverSufficient():
77  if torch._C._cuda_getDriverVersion() == 0:
78  # found no NVIDIA driver on the system
79  raise AssertionError("""
80 Found no NVIDIA driver on your system. Please check that you
81 have an NVIDIA GPU and installed a driver from
82 http://www.nvidia.com/Download/index.aspx""")
83  else:
84  # TODO: directly link to the alternative bin that needs install
85  raise AssertionError("""
86 The NVIDIA driver on your system is too old (found version {}).
87 Please update your GPU driver by downloading and installing a new
88 version from the URL: http://www.nvidia.com/Download/index.aspx
89 Alternatively, go to: https://pytorch.org to install
90 a PyTorch version that has been compiled with your version
91 of the CUDA driver.""".format(str(torch._C._cuda_getDriverVersion())))
92 
93 
94 def _check_capability():
95  incorrect_binary_warn = """
96  Found GPU%d %s which requires CUDA_VERSION >= %d for
97  optimal performance and fast startup time, but your PyTorch was compiled
98  with CUDA_VERSION %d. Please install the correct PyTorch binary
99  using instructions from https://pytorch.org
100  """
101 
102  old_gpu_warn = """
103  Found GPU%d %s which is of cuda capability %d.%d.
104  PyTorch no longer supports this GPU because it is too old.
105  The minimum cuda capability that we support is 3.5.
106  """
107 
108  CUDA_VERSION = torch._C._cuda_getCompiledVersion()
109  for d in range(device_count()):
110  capability = get_device_capability(d)
111  major = capability[0]
112  name = get_device_name(d)
113  if CUDA_VERSION < 8000 and major >= 6:
114  warnings.warn(incorrect_binary_warn % (d, name, 8000, CUDA_VERSION))
115  elif CUDA_VERSION < 9000 and major >= 7:
116  warnings.warn(incorrect_binary_warn % (d, name, 9000, CUDA_VERSION))
117  elif capability == (3, 0) or major < 3:
118  warnings.warn(old_gpu_warn % (d, name, major, capability[1]))
119 
120 
121 def _lazy_call(callable):
122  if _initialized:
123  callable()
124  else:
125  # Don't store the actual traceback to avoid memory cycle
126  _queued_calls.append((callable, traceback.format_stack()))
127 
128 _lazy_call(_check_capability)
129 
130 
131 class DeferredCudaCallError(Exception):
132  pass
133 
134 
135 def init():
136  r"""Initialize PyTorch's CUDA state. You may need to call
137  this explicitly if you are interacting with PyTorch via
138  its C API, as Python bindings for CUDA functionality will not
139  be until this initialization takes place. Ordinary users
140  should not need this, as all of PyTorch's CUDA methods
141  automatically initialize CUDA state on-demand.
142 
143  Does nothing if the CUDA state is already initialized.
144  """
145  _lazy_init()
146 
147 
148 def _lazy_init():
149  global _initialized, _cudart, _original_pid, _queued_calls
150  if _initialized:
151  return
152  if _in_bad_fork:
153  from sys import version_info
154  if version_info < (3, 4):
155  msg = ("To use CUDA with multiprocessing, you must use Python "
156  "3.4+ and the 'spawn' start method")
157  else:
158  msg = ("To use CUDA with multiprocessing, you must use the "
159  "'spawn' start method")
160  raise RuntimeError(
161  "Cannot re-initialize CUDA in forked subprocess. " + msg)
162  _check_driver()
163  torch._C._cuda_init()
164  _cudart = _load_cudart()
165  _cudart.cudaGetErrorName.restype = ctypes.c_char_p
166  _cudart.cudaGetErrorString.restype = ctypes.c_char_p
167  _original_pid = os.getpid()
168  _initialized = True
169  # Important to do this after _initialized, since some queued calls
170  # may themselves call _lazy_init()
171  for queued_call, orig_traceback in _queued_calls:
172  try:
173  queued_call()
174  except Exception as e:
175  msg = ("CUDA call failed lazily at initialization with error: {}\n\n"
176  "CUDA call was originally invoked at:\n\n{}").format(str(e), orig_traceback)
177  raise_from(DeferredCudaCallError(msg), e)
178 
179 
180 def _after_fork(arg):
181  global _initialized, _in_bad_fork
182  if _initialized and _original_pid != os.getpid():
183  _initialized = False
184  _in_bad_fork = True
185  _CudaBase.__new__ = _lazy_new
186 
187 
188 _register_after_fork(_after_fork, _after_fork)
189 
190 
191 def cudart():
192  _lazy_init()
193  return _cudart
194 
195 
196 class cudaStatus(object):
197  SUCCESS = 0
198  ERROR_NOT_READY = 34
199 
200 
201 class CudaError(RuntimeError):
202  def __init__(self, code):
203  msg = cudart().cudaGetErrorString(code).decode('utf-8')
204  super(CudaError, self).__init__('{0} ({1})'.format(msg, code))
205 
206 
207 def check_error(res):
208  if res != cudaStatus.SUCCESS:
209  raise CudaError(res)
210 
211 
212 class device(object):
213  r"""Context-manager that changes the selected device.
214 
215  Arguments:
216  device (torch.device or int): device index to select. It's a no-op if
217  this argument is a negative integer or ``None``.
218  """
219 
220  def __init__(self, device):
221  self.idx = _get_device_index(device, optional=True)
222  self.prev_idx = -1
223 
224  def __enter__(self):
225  if self.idx == -1:
226  return
227  self.prev_idx = torch._C._cuda_getDevice()
228  if self.prev_idx != self.idx:
229  torch._C._cuda_setDevice(self.idx)
230  _lazy_init()
231 
232  def __exit__(self, *args):
233  if self.prev_idx != self.idx:
234  torch._C._cuda_setDevice(self.prev_idx)
235  return False
236 
237 
239  r"""Context-manager that changes the current device to that of given object.
240 
241  You can use both tensors and storages as arguments. If a given object is
242  not allocated on a GPU, this is a no-op.
243 
244  Arguments:
245  obj (Tensor or Storage): object allocated on the selected device.
246  """
247 
248  def __init__(self, obj):
249  idx = obj.get_device() if obj.is_cuda else -1
250  super(device_of, self).__init__(idx)
251 
252 
253 def set_device(device):
254  r"""Sets the current device.
255 
256  Usage of this function is discouraged in favor of :any:`device`. In most
257  cases it's better to use ``CUDA_VISIBLE_DEVICES`` environmental variable.
258 
259  Arguments:
260  device (torch.device or int): selected device. This function is a no-op
261  if this argument is negative.
262  """
263  device = _get_device_index(device)
264  if device >= 0:
265  torch._C._cuda_setDevice(device)
266 
267 
268 def get_device_name(device=None):
269  r"""Gets the name of a device.
270 
271  Arguments:
272  device (torch.device or int, optional): device for which to return the
273  name. This function is a no-op if this argument is a negative
274  integer. Uses the current device, given by :meth:`~torch.cuda.current_device`,
275  if :attr:`device` is ``None`` (default).
276  """
277  return get_device_properties(device).name
278 
279 
280 def get_device_capability(device=None):
281  r"""Gets the cuda capability of a device.
282 
283  Arguments:
284  device (torch.device or int, optional): device for which to return the
285  device capability. This function is a no-op if this argument is
286  a negative integer. Uses the current device, given by
287  :meth:`~torch.cuda.current_device`, if :attr:`device` is ``None``
288  (default).
289 
290  Returns:
291  tuple(int, int): the major and minor cuda capability of the device
292  """
293  prop = get_device_properties(device)
294  return prop.major, prop.minor
295 
296 
297 def get_device_properties(device):
298  if not _initialized:
299  init() # will define _get_device_properties and _CudaDeviceProperties
300  device = _get_device_index(device, optional=True)
301  if device < 0 or device >= device_count():
302  raise AssertionError("Invalid device id")
303  return _get_device_properties(device)
304 
305 
306 @contextlib.contextmanager
307 def stream(stream):
308  r"""Context-manager that selects a given stream.
309 
310  All CUDA kernels queued within its context will be enqueued on a selected
311  stream.
312 
313  Arguments:
314  stream (Stream): selected stream. This manager is a no-op if it's
315  ``None``.
316 
317  .. note:: Streams are per-device. If the selected stream is not on the
318  current device, this function will also change the current device to
319  match the stream.
320  """
321  if stream is None:
322  yield
323  return
324  src_prev_stream = current_stream()
325 
326  if src_prev_stream.device != stream.device:
327  # The given stream is on a different device; have to restore the
328  # current_stream on that device on exit as well
329  with device(stream.device):
330  dst_prev_stream = current_stream()
331 
332  torch._C._cuda_setStream(stream._cdata)
333  try:
334  yield
335  finally:
336  if src_prev_stream.device != stream.device:
337  torch._C._cuda_setStream(dst_prev_stream._cdata)
338  torch._C._cuda_setStream(src_prev_stream._cdata)
339 
340 
341 def device_count():
342  """Returns the number of GPUs available."""
343  if is_available():
344  return torch._C._cuda_getDeviceCount()
345  else:
346  return 0
347 
348 
349 def current_device():
350  r"""Returns the index of a currently selected device."""
351  _lazy_init()
352  return torch._C._cuda_getDevice()
353 
354 
355 def synchronize():
356  r"""Waits for all kernels in all streams on current device to complete."""
357  _lazy_init()
358  return torch._C._cuda_synchronize()
359 
360 
361 def current_stream(device=None):
362  r"""Returns the currently selected :class:`Stream` for a given device.
363 
364  Arguments:
365  device (torch.device or int, optional): selected device. Returns
366  the currently selected :class:`Stream` for the current device, given
367  by :meth:`~torch.cuda.current_device`, if :attr:`device` is ``None``
368  (default).
369  """
370  _lazy_init()
371  return torch.cuda.Stream(_cdata=torch._C._cuda_getCurrentStream(
372  _get_device_index(device, optional=True)))
373 
374 
375 def default_stream(device=None):
376  r"""Returns the default :class:`Stream` for a given device.
377 
378  Arguments:
379  device (torch.device or int, optional): selected device. Returns
380  the default :class:`Stream` for the current device, given by
381  :meth:`~torch.cuda.current_device`, if :attr:`device` is ``None``
382  (default).
383  """
384  _lazy_init()
385  return torch.cuda.Stream(_cdata=torch._C._cuda_getDefaultStream(
386  _get_device_index(device, optional=True)))
387 
388 
389 def current_blas_handle():
390  r"""Returns cublasHandle_t pointer to current cuBLAS handle"""
391  _lazy_init()
392  return torch._C._cuda_getCurrentBlasHandle()
393 
394 
395 def empty_cache():
396  r"""Releases all unoccupied cached memory currently held by the caching
397  allocator so that those can be used in other GPU application and visible in
398  `nvidia-smi`.
399 
400  .. note::
401  :meth:`~torch.cuda.empty_cache` doesn't increase the amount of GPU
402  memory available for PyTorch. See :ref:`cuda-memory-management` for
403  more details about GPU memory management.
404  """
405  if _initialized:
406  torch._C._cuda_emptyCache()
407 
408 
409 def memory_allocated(device=None):
410  r"""Returns the current GPU memory occupied by tensors in bytes for a given
411  device.
412 
413  Arguments:
414  device (torch.device or int, optional): selected device. Returns
415  statistic for the current device, given by :meth:`~torch.cuda.current_device`,
416  if :attr:`device` is ``None`` (default).
417 
418  .. note::
419  This is likely less than the amount shown in `nvidia-smi` since some
420  unused memory can be held by the caching allocator and some context
421  needs to be created on GPU. See :ref:`cuda-memory-management` for more
422  details about GPU memory management.
423  """
424  device = _get_device_index(device, optional=True)
425  return torch._C._cuda_memoryAllocated(device)
426 
427 
428 def max_memory_allocated(device=None):
429  r"""Returns the maximum GPU memory occupied by tensors in bytes for a given
430  device.
431 
432  By default, this returns the peak allocated memory since the beginning of
433  this program. :func:`~torch.cuda.reset_max_memory_allocated` can be used to
434  reset the starting point in tracking this metric. For example, these two
435  functions can measure the peak allocated memory usage of each iteration in a
436  training loop.
437 
438  Arguments:
439  device (torch.device or int, optional): selected device. Returns
440  statistic for the current device, given by :meth:`~torch.cuda.current_device`,
441  if :attr:`device` is ``None`` (default).
442 
443  .. note::
444  See :ref:`cuda-memory-management` for more details about GPU memory
445  management.
446  """
447  device = _get_device_index(device, optional=True)
448  return torch._C._cuda_maxMemoryAllocated(device)
449 
450 
451 def reset_max_memory_allocated(device=None):
452  r"""Resets the starting point in tracking maximum GPU memory occupied by
453  tensors for a given device.
454 
455  See :func:`~torch.cuda.max_memory_allocated` for details.
456 
457  Arguments:
458  device (torch.device or int, optional): selected device. Returns
459  statistic for the current device, given by :meth:`~torch.cuda.current_device`,
460  if :attr:`device` is ``None`` (default).
461 
462  .. note::
463  See :ref:`cuda-memory-management` for more details about GPU memory
464  management.
465  """
466  device = _get_device_index(device, optional=True)
467  return torch._C._cuda_resetMaxMemoryAllocated(device)
468 
469 
470 def memory_cached(device=None):
471  r"""Returns the current GPU memory managed by the caching allocator in bytes
472  for a given device.
473 
474  Arguments:
475  device (torch.device or int, optional): selected device. Returns
476  statistic for the current device, given by :meth:`~torch.cuda.current_device`,
477  if :attr:`device` is ``None`` (default).
478 
479  .. note::
480  See :ref:`cuda-memory-management` for more details about GPU memory
481  management.
482  """
483  device = _get_device_index(device, optional=True)
484  return torch._C._cuda_memoryCached(device)
485 
486 
487 def max_memory_cached(device=None):
488  r"""Returns the maximum GPU memory managed by the caching allocator in bytes
489  for a given device.
490 
491  By default, this returns the peak cached memory since the beginning of this
492  program. :func:`~torch.cuda.reset_max_memory_cached` can be used to reset
493  the starting point in tracking this metric. For example, these two functions
494  can measure the peak cached memory amount of each iteration in a training
495  loop.
496 
497  Arguments:
498  device (torch.device or int, optional): selected device. Returns
499  statistic for the current device, given by :meth:`~torch.cuda.current_device`,
500  if :attr:`device` is ``None`` (default).
501 
502  .. note::
503  See :ref:`cuda-memory-management` for more details about GPU memory
504  management.
505  """
506  device = _get_device_index(device, optional=True)
507  return torch._C._cuda_maxMemoryCached(device)
508 
509 
510 def reset_max_memory_cached(device=None):
511  r"""Resets the starting point in tracking maximum GPU memory managed by the
512  caching allocator for a given device.
513 
514  See :func:`~torch.cuda.max_memory_cached` for details.
515 
516  Arguments:
517  device (torch.device or int, optional): selected device. Returns
518  statistic for the current device, given by :meth:`~torch.cuda.current_device`,
519  if :attr:`device` is ``None`` (default).
520 
521  .. note::
522  See :ref:`cuda-memory-management` for more details about GPU memory
523  management.
524  """
525  device = _get_device_index(device, optional=True)
526  return torch._C._cuda_resetMaxMemoryCached(device)
527 
528 
529 def _host_allocator():
530  _lazy_init()
531  return torch._C._cuda_cudaHostAllocator()
532 
533 
534 @contextlib.contextmanager
535 def _free_mutex():
536  torch._C._cuda_lock_mutex()
537  try:
538  yield
539  finally:
540  torch._C._cuda_unlock_mutex()
541 
542 
543 from .random import *
544 
545 ################################################################################
546 # Define Storage and Tensor classes
547 ################################################################################
548 
549 
550 from ..storage import _StorageBase
551 
552 
553 def _dummy_type(name):
554  def init_err(self):
555  class_name = self.__class__.__name__
556  raise RuntimeError(
557  "Tried to instantiate dummy base class {}".format(class_name))
558  return type(storage_name, (object,), {"__init__": init_err})
559 
560 
561 if not hasattr(torch._C, 'CudaDoubleStorageBase'):
562  # Define dummy base classes
563  for t in ['Double', 'Float', 'Long', 'Int', 'Short', 'Char', 'Byte', 'Half', 'Bool']:
564  storage_name = 'Cuda{0}StorageBase'.format(t)
565  tensor_name = 'Cuda{0}TensorBase'.format(t)
566 
567  torch._C.__dict__[storage_name] = _dummy_type(storage_name)
568  torch._C.__dict__[tensor_name] = _dummy_type(tensor_name)
569 
570  torch._C.__dict__['_CudaStreamBase'] = _dummy_type('CudaStreamBase')
571  torch._C.__dict__['_CudaEventBase'] = _dummy_type('CudaEventBase')
572 
573 
574 @staticmethod
575 def _lazy_new(cls, *args, **kwargs):
576  _lazy_init()
577  # We need this method only for lazy init, so we can remove it
578  del _CudaBase.__new__
579  return super(_CudaBase, cls).__new__(cls, *args, **kwargs)
580 
581 
582 class _CudaBase(object):
583  is_cuda = True
584  is_sparse = False
585 
586  def type(self, *args, **kwargs):
587  with device(self.get_device()):
588  return super(_CudaBase, self).type(*args, **kwargs)
589 
590  __new__ = _lazy_new
591 
592 
593 class DoubleStorage(_CudaBase, torch._C.CudaDoubleStorageBase, _StorageBase):
594  pass
595 
596 
597 class FloatStorage(_CudaBase, torch._C.CudaFloatStorageBase, _StorageBase):
598  pass
599 
600 
601 class LongStorage(_CudaBase, torch._C.CudaLongStorageBase, _StorageBase):
602  pass
603 
604 
605 class IntStorage(_CudaBase, torch._C.CudaIntStorageBase, _StorageBase):
606  pass
607 
608 
609 class ShortStorage(_CudaBase, torch._C.CudaShortStorageBase, _StorageBase):
610  pass
611 
612 
613 class CharStorage(_CudaBase, torch._C.CudaCharStorageBase, _StorageBase):
614  pass
615 
616 
617 class ByteStorage(_CudaBase, torch._C.CudaByteStorageBase, _StorageBase):
618  pass
619 
620 
621 class HalfStorage(_CudaBase, torch._C.CudaHalfStorageBase, _StorageBase):
622  pass
623 
624 
625 class BoolStorage(_CudaBase, torch._C.CudaBoolStorageBase, _StorageBase):
626  pass
627 
628 torch._storage_classes.add(DoubleStorage)
629 torch._storage_classes.add(FloatStorage)
630 torch._storage_classes.add(LongStorage)
631 torch._storage_classes.add(IntStorage)
632 torch._storage_classes.add(ShortStorage)
633 torch._storage_classes.add(CharStorage)
634 torch._storage_classes.add(ByteStorage)
635 torch._storage_classes.add(HalfStorage)
636 torch._storage_classes.add(BoolStorage)
637 
638 from . import sparse
639 from . import profiler
640 from . import nvtx
641 from .streams import Stream, Event