Caffe2 - Python API
A deep learning, cross platform ML framework
1 from __future__ import absolute_import, division, print_function, unicode_literals
2 import copy
3 import glob
4 import imp
5 import os
6 import re
7 import setuptools
8 import subprocess
9 import sys
10 import sysconfig
11 import tempfile
12 import warnings
14 import torch
15 from .file_baton import FileBaton
16 from ._cpp_extension_versioner import ExtensionVersioner
18 from setuptools.command.build_ext import build_ext
21 IS_WINDOWS = sys.platform == 'win32'
24 def _find_cuda_home():
25  '''Finds the CUDA install path.'''
26  # Guess #1
27  cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
28  if cuda_home is None:
29  # Guess #2
30  if IS_WINDOWS:
31  cuda_homes = glob.glob(
32  'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*')
33  if len(cuda_homes) == 0:
34  cuda_home = ''
35  else:
36  cuda_home = cuda_homes[0]
37  else:
38  cuda_home = '/usr/local/cuda'
39  if not os.path.exists(cuda_home):
40  # Guess #3
41  try:
42  which = 'where' if IS_WINDOWS else 'which'
43  nvcc = subprocess.check_output(
44  [which, 'nvcc']).decode().rstrip('\r\n')
45  cuda_home = os.path.dirname(os.path.dirname(nvcc))
46  except Exception:
47  cuda_home = None
48  if cuda_home and not torch.cuda.is_available():
49  print("No CUDA runtime is found, using CUDA_HOME='{}'".format(cuda_home))
50  return cuda_home
53 MINIMUM_GCC_VERSION = (4, 9, 0)
54 MINIMUM_MSVC_VERSION = (19, 0, 24215)
57  !! WARNING !!
59 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
60 Your compiler ({}) may be ABI-incompatible with PyTorch!
61 Please use a compiler that is ABI-compatible with GCC 4.9 and above.
62 See
64 See
65 for instructions on how to install GCC 4.9 or higher.
66 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
68  !! WARNING !!
69 '''
72  !! WARNING !!
74 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
75 Your compiler ({user_compiler}) is not compatible with the compiler Pytorch was
76 built with for this platform, which is {pytorch_compiler} on {platform}. Please
77 use {pytorch_compiler} to to compile your extension. Alternatively, you may
78 compile PyTorch from source using {user_compiler}, and then you can also use
79 {user_compiler} to compile your extension.
81 See for help
82 with compiling PyTorch from source.
83 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
85  !! WARNING !!
86 '''
87 CUDA_HOME = _find_cuda_home()
88 CUDNN_HOME = os.environ.get('CUDNN_HOME') or os.environ.get('CUDNN_PATH')
89 # PyTorch releases have the version pattern major.minor.patch, whereas when
90 # PyTorch is built from source, we append the git commit hash, which gives
91 # it the below pattern.
92 BUILT_FROM_SOURCE_VERSION_PATTERN = re.compile(r'\d+\.\d+\.\d+\w+\+\w+')
98 ]
101 JIT_EXTENSION_VERSIONER = ExtensionVersioner()
104 def _is_binary_build():
105  return not BUILT_FROM_SOURCE_VERSION_PATTERN.match(torch.version.__version__)
108 def _accepted_compilers_for_platform():
109  return ['clang++', 'clang'] if sys.platform.startswith('darwin') else ['g++', 'gcc']
112 def get_default_build_root():
113  '''
114  Returns the path to the root folder under which extensions will built.
116  For each extension module built, there will be one folder underneath the
117  folder returned by this function. For example, if ``p`` is the path
118  returned by this function and ``ext`` the name of an extension, the build
119  folder for the extension will be ``p/ext``.
120  '''
121  # tempfile.gettempdir() will be /tmp on UNIX and \TEMP on Windows.
122  return os.path.realpath(os.path.join(tempfile.gettempdir(), 'torch_extensions'))
125 def check_compiler_ok_for_platform(compiler):
126  '''
127  Verifies that the compiler is the expected one for the current platform.
129  Arguments:
130  compiler (str): The compiler executable to check.
132  Returns:
133  True if the compiler is gcc/g++ on Linux or clang/clang++ on macOS,
134  and always True for Windows.
135  '''
136  if IS_WINDOWS:
137  return True
138  which = subprocess.check_output(['which', compiler], stderr=subprocess.STDOUT)
139  # Use os.path.realpath to resolve any symlinks, in particular from 'c++' to e.g. 'g++'.
140  compiler_path = os.path.realpath(which.decode().strip())
141  return any(name in compiler_path for name in _accepted_compilers_for_platform())
144 def check_compiler_abi_compatibility(compiler):
145  '''
146  Verifies that the given compiler is ABI-compatible with PyTorch.
148  Arguments:
149  compiler (str): The compiler executable name to check (e.g. ``g++``).
150  Must be executable in a shell process.
152  Returns:
153  False if the compiler is (likely) ABI-incompatible with PyTorch,
154  else True.
155  '''
156  if not _is_binary_build():
157  return True
158  if os.environ.get('TORCH_DONT_CHECK_COMPILER_ABI') in ['ON', '1', 'YES', 'TRUE', 'Y']:
159  return True
161  # First check if the compiler is one of the expected ones for the particular platform.
162  if not check_compiler_ok_for_platform(compiler):
163  warnings.warn(WRONG_COMPILER_WARNING.format(
164  user_compiler=compiler,
165  pytorch_compiler=_accepted_compilers_for_platform()[0],
166  platform=sys.platform))
167  return False
169  if sys.platform.startswith('darwin'):
170  # There is no particular minimum version we need for clang, so we're good here.
171  return True
172  try:
173  if sys.platform.startswith('linux'):
174  minimum_required_version = MINIMUM_GCC_VERSION
175  version = subprocess.check_output([compiler, '-dumpfullversion', '-dumpversion'])
176  version = version.decode().strip().split('.')
177  else:
178  minimum_required_version = MINIMUM_MSVC_VERSION
179  compiler_info = subprocess.check_output(compiler, stderr=subprocess.STDOUT)
180  match ='(\d+)\.(\d+)\.(\d+)', compiler_info.decode().strip())
181  version = (0, 0, 0) if match is None else match.groups()
182  except Exception:
183  _, error, _ = sys.exc_info()
184  warnings.warn('Error checking compiler version for {}: {}'.format(compiler, error))
185  return False
187  if tuple(map(int, version)) >= minimum_required_version:
188  return True
190  compiler = '{} {}'.format(compiler, ".".join(version))
191  warnings.warn(ABI_INCOMPATIBILITY_WARNING.format(compiler))
193  return False
196 # See below for why we inherit BuildExtension from object.
197 #
200 class BuildExtension(build_ext, object):
201  '''
202  A custom :mod:`setuptools` build extension .
204  This :class:`setuptools.build_ext` subclass takes care of passing the
205  minimum required compiler flags (e.g. ``-std=c++11``) as well as mixed
206  C++/CUDA compilation (and support for CUDA files in general).
208  When using :class:`BuildExtension`, it is allowed to supply a dictionary
209  for ``extra_compile_args`` (rather than the usual list) that maps from
210  languages (``cxx`` or ``cuda``) to a list of additional compiler flags to
211  supply to the compiler. This makes it possible to supply different flags to
212  the C++ and CUDA compiler during mixed compilation.
213  '''
215  @classmethod
216  def with_options(cls, **options):
217  '''
218  Returns an alternative constructor that extends any original keyword
219  arguments to the original constructor with the given options.
220  '''
221  def init_with_options(*args, **kwargs):
222  kwargs = kwargs.copy()
223  kwargs.update(options)
224  return cls(*args, **kwargs)
225  return init_with_options
227  def __init__(self, *args, **kwargs):
228  super(BuildExtension, self).__init__(*args, **kwargs)
229  self.no_python_abi_suffix = kwargs.get("no_python_abi_suffix", False)
231  def build_extensions(self):
232  self._check_abi()
233  for extension in self.extensions:
234  self._add_compile_flag(extension, '-DTORCH_API_INCLUDE_EXTENSION_H')
235  self._define_torch_extension_name(extension)
236  self._add_gnu_abi_flag_if_binary(extension)
238  # Register .cu and .cuh as valid source extensions.
239  self.compiler.src_extensions += ['.cu', '.cuh']
240  # Save the original _compile method for later.
241  if self.compiler.compiler_type == 'msvc':
242  self.compiler._cpp_extensions += ['.cu', '.cuh']
243  original_compile = self.compiler.compile
244  original_spawn = self.compiler.spawn
245  else:
246  original_compile = self.compiler._compile
248  def unix_wrap_compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
249  # Copy before we make any modifications.
250  cflags = copy.deepcopy(extra_postargs)
251  try:
252  original_compiler = self.compiler.compiler_so
253  if _is_cuda_file(src):
254  nvcc = _join_cuda_home('bin', 'nvcc')
255  if not isinstance(nvcc, list):
256  nvcc = [nvcc]
257  self.compiler.set_executable('compiler_so', nvcc)
258  if isinstance(cflags, dict):
259  cflags = cflags['nvcc']
260  cflags = COMMON_NVCC_FLAGS + ['--compiler-options', "'-fPIC'"] + cflags
261  elif isinstance(cflags, dict):
262  cflags = cflags['cxx']
263  # NVCC does not allow multiple -std to be passed, so we avoid
264  # overriding the option if the user explicitly passed it.
265  if not any(flag.startswith('-std=') for flag in cflags):
266  cflags.append('-std=c++11')
268  original_compile(obj, src, ext, cc_args, cflags, pp_opts)
269  finally:
270  # Put the original compiler back in place.
271  self.compiler.set_executable('compiler_so', original_compiler)
273  def win_wrap_compile(sources,
274  output_dir=None,
275  macros=None,
276  include_dirs=None,
277  debug=0,
278  extra_preargs=None,
279  extra_postargs=None,
280  depends=None):
282  self.cflags = copy.deepcopy(extra_postargs)
283  extra_postargs = None
285  def spawn(cmd):
286  # Using regex to match src, obj and include files
287  src_regex = re.compile('/T(p|c)(.*)')
288  src_list = [
289 for m in (src_regex.match(elem) for elem in cmd)
290  if m
291  ]
293  obj_regex = re.compile('/Fo(.*)')
294  obj_list = [
295 for m in (obj_regex.match(elem) for elem in cmd)
296  if m
297  ]
299  include_regex = re.compile(r'((\-|\/)I.*)')
300  include_list = [
302  for m in (include_regex.match(elem) for elem in cmd) if m
303  ]
305  if len(src_list) >= 1 and len(obj_list) >= 1:
306  src = src_list[0]
307  obj = obj_list[0]
308  if _is_cuda_file(src):
309  nvcc = _join_cuda_home('bin', 'nvcc')
310  if isinstance(self.cflags, dict):
311  cflags = self.cflags['nvcc']
312  elif isinstance(self.cflags, list):
313  cflags = self.cflags
314  else:
315  cflags = []
316  cmd = [
317  nvcc, '-c', src, '-o', obj, '-Xcompiler',
318  '/wd4819', '-Xcompiler', '/MD'
319  ] + include_list + cflags
320  elif isinstance(self.cflags, dict):
321  cflags = self.cflags['cxx'] + ['/MD']
322  cmd += cflags
323  elif isinstance(self.cflags, list):
324  cflags = self.cflags + ['/MD']
325  cmd += cflags
327  return original_spawn(cmd)
329  try:
330  self.compiler.spawn = spawn
331  return original_compile(sources, output_dir, macros,
332  include_dirs, debug, extra_preargs,
333  extra_postargs, depends)
334  finally:
335  self.compiler.spawn = original_spawn
337  # Monkey-patch the _compile method.
338  if self.compiler.compiler_type == 'msvc':
339  self.compiler.compile = win_wrap_compile
340  else:
341  self.compiler._compile = unix_wrap_compile
343  build_ext.build_extensions(self)
345  def get_ext_filename(self, ext_name):
346  # Get the original shared library name. For Python 3, this name will be
347  # suffixed with "<SOABI>.so", where <SOABI> will be something like
348  # cpython-37m-x86_64-linux-gnu. On Python 2, there is no such ABI name.
349  # The final extension, .so, would be .lib/.dll on Windows of course.
350  ext_filename = super(BuildExtension, self).get_ext_filename(ext_name)
351  # If `no_python_abi_suffix` is `True`, we omit the Python 3 ABI
352  # component. This makes building shared libraries with setuptools that
353  # aren't Python modules nicer.
354  if self.no_python_abi_suffix and sys.version_info >= (3, 0):
355  # The parts will be e.g. ["my_extension", "cpython-37m-x86_64-linux-gnu", "so"].
356  ext_filename_parts = ext_filename.split('.')
357  # Omit the second to last element.
358  without_abi = ext_filename_parts[:-2] + ext_filename_parts[-1:]
359  ext_filename = '.'.join(without_abi)
360  return ext_filename
362  def _check_abi(self):
363  # On some platforms, like Windows, compiler_cxx is not available.
364  if hasattr(self.compiler, 'compiler_cxx'):
365  compiler = self.compiler.compiler_cxx[0]
366  elif IS_WINDOWS:
367  compiler = os.environ.get('CXX', 'cl')
368  else:
369  compiler = os.environ.get('CXX', 'c++')
370  check_compiler_abi_compatibility(compiler)
372  def _add_compile_flag(self, extension, flag):
373  extension.extra_compile_args = copy.copy(extension.extra_compile_args)
374  if isinstance(extension.extra_compile_args, dict):
375  for args in extension.extra_compile_args.values():
376  args.append(flag)
377  else:
378  extension.extra_compile_args.append(flag)
380  def _define_torch_extension_name(self, extension):
381  # pybind11 doesn't support dots in the names
382  # so in order to support extensions in the packages
383  # like torch._C, we take the last part of the string
384  # as the library name
385  names ='.')
386  name = names[-1]
387  define = '-DTORCH_EXTENSION_NAME={}'.format(name)
388  self._add_compile_flag(extension, define)
390  def _add_gnu_abi_flag_if_binary(self, extension):
391  # If the version string looks like a binary build,
392  # we know that PyTorch was compiled with gcc 4.9.2.
393  # if the extension is compiled with gcc >= 5.1,
394  # then we have to define _GLIBCXX_USE_CXX11_ABI=0
395  # so that the std::string in the API is resolved to
396  # non-C++11 symbols
397  if _is_binary_build():
398  self._add_compile_flag(extension, '-D_GLIBCXX_USE_CXX11_ABI=0')
401 def CppExtension(name, sources, *args, **kwargs):
402  '''
403  Creates a :class:`setuptools.Extension` for C++.
405  Convenience method that creates a :class:`setuptools.Extension` with the
406  bare minimum (but often sufficient) arguments to build a C++ extension.
408  All arguments are forwarded to the :class:`setuptools.Extension`
409  constructor.
411  Example:
412  >>> from setuptools import setup
413  >>> from torch.utils.cpp_extension import BuildExtension, CppExtension
414  >>> setup(
415  name='extension',
416  ext_modules=[
417  CppExtension(
418  name='extension',
419  sources=['extension.cpp'],
420  extra_compile_args=['-g']),
421  ],
422  cmdclass={
423  'build_ext': BuildExtension
424  })
425  '''
426  include_dirs = kwargs.get('include_dirs', [])
427  include_dirs += include_paths()
428  kwargs['include_dirs'] = include_dirs
430  if IS_WINDOWS:
431  library_dirs = kwargs.get('library_dirs', [])
432  library_dirs += library_paths()
433  kwargs['library_dirs'] = library_dirs
435  libraries = kwargs.get('libraries', [])
436  libraries.append('c10')
437  libraries.append('caffe2')
438  libraries.append('torch')
439  libraries.append('torch_python')
440  libraries.append('_C')
441  kwargs['libraries'] = libraries
443  kwargs['language'] = 'c++'
444  return setuptools.Extension(name, sources, *args, **kwargs)
447 def CUDAExtension(name, sources, *args, **kwargs):
448  '''
449  Creates a :class:`setuptools.Extension` for CUDA/C++.
451  Convenience method that creates a :class:`setuptools.Extension` with the
452  bare minimum (but often sufficient) arguments to build a CUDA/C++
453  extension. This includes the CUDA include path, library path and runtime
454  library.
456  All arguments are forwarded to the :class:`setuptools.Extension`
457  constructor.
459  Example:
460  >>> from setuptools import setup
461  >>> from torch.utils.cpp_extension import BuildExtension, CUDAExtension
462  >>> setup(
463  name='cuda_extension',
464  ext_modules=[
465  CUDAExtension(
466  name='cuda_extension',
467  sources=['extension.cpp', ''],
468  extra_compile_args={'cxx': ['-g'],
469  'nvcc': ['-O2']})
470  ],
471  cmdclass={
472  'build_ext': BuildExtension
473  })
474  '''
475  library_dirs = kwargs.get('library_dirs', [])
476  library_dirs += library_paths(cuda=True)
477  kwargs['library_dirs'] = library_dirs
479  libraries = kwargs.get('libraries', [])
480  libraries.append('cudart')
481  if IS_WINDOWS:
482  libraries.append('c10')
483  libraries.append('caffe2')
484  libraries.append('torch')
485  libraries.append('torch_python')
486  libraries.append('caffe2_gpu')
487  libraries.append('_C')
488  kwargs['libraries'] = libraries
490  include_dirs = kwargs.get('include_dirs', [])
491  include_dirs += include_paths(cuda=True)
492  kwargs['include_dirs'] = include_dirs
494  kwargs['language'] = 'c++'
496  return setuptools.Extension(name, sources, *args, **kwargs)
499 def include_paths(cuda=False):
500  '''
501  Get the include paths required to build a C++ or CUDA extension.
503  Args:
504  cuda: If `True`, includes CUDA-specific include paths.
506  Returns:
507  A list of include path strings.
508  '''
509  here = os.path.abspath(__file__)
510  torch_path = os.path.dirname(os.path.dirname(here))
511  lib_include = os.path.join(torch_path, 'include')
512  paths = [
513  lib_include,
514  # Remove this once torch/torch.h is officially no longer supported for C++ extensions.
515  os.path.join(lib_include, 'torch', 'csrc', 'api', 'include'),
516  # Some internal (old) Torch headers don't properly prefix their includes,
517  # so we need to pass -Itorch/lib/include/TH as well.
518  os.path.join(lib_include, 'TH'),
519  os.path.join(lib_include, 'THC')
520  ]
521  if cuda:
522  cuda_home_include = _join_cuda_home('include')
523  # if we have the Debian/Ubuntu packages for cuda, we get /usr as cuda home.
524  # but gcc dosn't like having /usr/include passed explicitly
525  if cuda_home_include != '/usr/include':
526  paths.append(cuda_home_include)
527  if CUDNN_HOME is not None:
528  paths.append(os.path.join(CUDNN_HOME, 'include'))
529  return paths
532 def library_paths(cuda=False):
533  '''
534  Get the library paths required to build a C++ or CUDA extension.
536  Args:
537  cuda: If `True`, includes CUDA-specific library paths.
539  Returns:
540  A list of library path strings.
541  '''
542  paths = []
544  if IS_WINDOWS:
545  here = os.path.abspath(__file__)
546  torch_path = os.path.dirname(os.path.dirname(here))
547  lib_path = os.path.join(torch_path, 'lib')
549  paths.append(lib_path)
551  if cuda:
552  lib_dir = 'lib/x64' if IS_WINDOWS else 'lib64'
553  paths.append(_join_cuda_home(lib_dir))
554  if CUDNN_HOME is not None:
555  paths.append(os.path.join(CUDNN_HOME, lib_dir))
556  return paths
559 def load(name,
560  sources,
561  extra_cflags=None,
562  extra_cuda_cflags=None,
563  extra_ldflags=None,
564  extra_include_paths=None,
565  build_directory=None,
566  verbose=False,
567  with_cuda=None,
568  is_python_module=True):
569  '''
570  Loads a PyTorch C++ extension just-in-time (JIT).
572  To load an extension, a Ninja build file is emitted, which is used to
573  compile the given sources into a dynamic library. This library is
574  subsequently loaded into the current Python process as a module and
575  returned from this function, ready for use.
577  By default, the directory to which the build file is emitted and the
578  resulting library compiled to is ``<tmp>/torch_extensions/<name>``, where
579  ``<tmp>`` is the temporary folder on the current platform and ``<name>``
580  the name of the extension. This location can be overridden in two ways.
581  First, if the ``TORCH_EXTENSIONS_DIR`` environment variable is set, it
582  replaces ``<tmp>/torch_extensions`` and all extensions will be compiled
583  into subfolders of this directory. Second, if the ``build_directory``
584  argument to this function is supplied, it overrides the entire path, i.e.
585  the library will be compiled into that folder directly.
587  To compile the sources, the default system compiler (``c++``) is used,
588  which can be overridden by setting the ``CXX`` environment variable. To pass
589  additional arguments to the compilation process, ``extra_cflags`` or
590  ``extra_ldflags`` can be provided. For example, to compile your extension
591  with optimizations, pass ``extra_cflags=['-O3']``. You can also use
592  ``extra_cflags`` to pass further include directories.
594  CUDA support with mixed compilation is provided. Simply pass CUDA source
595  files (``.cu`` or ``.cuh``) along with other sources. Such files will be
596  detected and compiled with nvcc rather than the C++ compiler. This includes
597  passing the CUDA lib64 directory as a library directory, and linking
598  ``cudart``. You can pass additional flags to nvcc via
599  ``extra_cuda_cflags``, just like with ``extra_cflags`` for C++. Various
600  heuristics for finding the CUDA install directory are used, which usually
601  work fine. If not, setting the ``CUDA_HOME`` environment variable is the
602  safest option.
604  Args:
605  name: The name of the extension to build. This MUST be the same as the
606  name of the pybind11 module!
607  sources: A list of relative or absolute paths to C++ source files.
608  extra_cflags: optional list of compiler flags to forward to the build.
609  extra_cuda_cflags: optional list of compiler flags to forward to nvcc
610  when building CUDA sources.
611  extra_ldflags: optional list of linker flags to forward to the build.
612  extra_include_paths: optional list of include directories to forward
613  to the build.
614  build_directory: optional path to use as build workspace.
615  verbose: If ``True``, turns on verbose logging of load steps.
616  with_cuda: Determines whether CUDA headers and libraries are added to
617  the build. If set to ``None`` (default), this value is
618  automatically determined based on the existence of ``.cu`` or
619  ``.cuh`` in ``sources``. Set it to `True`` to force CUDA headers
620  and libraries to be included.
621  is_python_module: If ``True`` (default), imports the produced shared
622  library as a Python module. If ``False``, loads it into the process
623  as a plain dynamic library.
625  Returns:
626  If ``is_python_module`` is ``True``, returns the loaded PyTorch
627  extension as a Python module. If ``is_python_module`` is ``False``
628  returns nothing (the shared library is loaded into the process as a side
629  effect).
631  Example:
632  >>> from torch.utils.cpp_extension import load
633  >>> module = load(
634  name='extension',
635  sources=['extension.cpp', ''],
636  extra_cflags=['-O2'],
637  verbose=True)
638  '''
639  return _jit_compile(
640  name,
641  [sources] if isinstance(sources, str) else sources,
642  extra_cflags,
643  extra_cuda_cflags,
644  extra_ldflags,
645  extra_include_paths,
646  build_directory or _get_build_directory(name, verbose),
647  verbose,
648  with_cuda,
649  is_python_module)
652 def load_inline(name,
653  cpp_sources,
654  cuda_sources=None,
655  functions=None,
656  extra_cflags=None,
657  extra_cuda_cflags=None,
658  extra_ldflags=None,
659  extra_include_paths=None,
660  build_directory=None,
661  verbose=False,
662  with_cuda=None,
663  is_python_module=True):
664  '''
665  Loads a PyTorch C++ extension just-in-time (JIT) from string sources.
667  This function behaves exactly like :func:`load`, but takes its sources as
668  strings rather than filenames. These strings are stored to files in the
669  build directory, after which the behavior of :func:`load_inline` is
670  identical to :func:`load`.
672  See `the
673  tests <>`_
674  for good examples of using this function.
676  Sources may omit two required parts of a typical non-inline C++ extension:
677  the necessary header includes, as well as the (pybind11) binding code. More
678  precisely, strings passed to ``cpp_sources`` are first concatenated into a
679  single ``.cpp`` file. This file is then prepended with ``#include
680  <torch/extension.h>``.
682  Furthermore, if the ``functions`` argument is supplied, bindings will be
683  automatically generated for each function specified. ``functions`` can
684  either be a list of function names, or a dictionary mapping from function
685  names to docstrings. If a list is given, the name of each function is used
686  as its docstring.
688  The sources in ``cuda_sources`` are concatenated into a separate ``.cu``
689  file and prepended with ``torch/types.h``, ``cuda.h`` and
690  ``cuda_runtime.h`` includes. The ``.cpp`` and ``.cu`` files are compiled
691  separately, but ultimately linked into a single library. Note that no
692  bindings are generated for functions in ``cuda_sources`` per se. To bind
693  to a CUDA kernel, you must create a C++ function that calls it, and either
694  declare or define this C++ function in one of the ``cpp_sources`` (and
695  include its name in ``functions``).
697  See :func:`load` for a description of arguments omitted below.
699  Args:
700  cpp_sources: A string, or list of strings, containing C++ source code.
701  cuda_sources: A string, or list of strings, containing CUDA source code.
702  functions: A list of function names for which to generate function
703  bindings. If a dictionary is given, it should map function names to
704  docstrings (which are otherwise just the function names).
705  with_cuda: Determines whether CUDA headers and libraries are added to
706  the build. If set to ``None`` (default), this value is
707  automatically determined based on whether ``cuda_sources`` is
708  provided. Set it to `True`` to force CUDA headers
709  and libraries to be included.
711  Example:
712  >>> from torch.utils.cpp_extension import load_inline
713  >>> source = \'\'\'
714  at::Tensor sin_add(at::Tensor x, at::Tensor y) {
715  return x.sin() + y.sin();
716  }
717  \'\'\'
718  >>> module = load_inline(name='inline_extension',
719  cpp_sources=[source],
720  functions=['sin_add'])
721  '''
722  build_directory = build_directory or _get_build_directory(name, verbose)
724  if isinstance(cpp_sources, str):
725  cpp_sources = [cpp_sources]
726  cuda_sources = cuda_sources or []
727  if isinstance(cuda_sources, str):
728  cuda_sources = [cuda_sources]
730  cpp_sources.insert(0, '#include <torch/extension.h>')
732  # If `functions` is supplied, we create the pybind11 bindings for the user.
733  # Here, `functions` is (or becomes, after some processing) a map from
734  # function names to function docstrings.
735  if functions is not None:
736  cpp_sources.append('PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {')
737  if isinstance(functions, str):
738  functions = [functions]
739  if isinstance(functions, list):
740  # Make the function docstring the same as the function name.
741  functions = dict((f, f) for f in functions)
742  elif not isinstance(functions, dict):
743  raise ValueError(
744  "Expected 'functions' to be a list or dict, but was {}".format(
745  type(functions)))
746  for function_name, docstring in functions.items():
747  cpp_sources.append('m.def("{0}", &{0}, "{1}");'.format(
748  function_name, docstring))
749  cpp_sources.append('}')
751  cpp_source_path = os.path.join(build_directory, 'main.cpp')
752  with open(cpp_source_path, 'w') as cpp_source_file:
753  cpp_source_file.write('\n'.join(cpp_sources))
755  sources = [cpp_source_path]
757  if cuda_sources:
758  cuda_sources.insert(0, '#include <torch/types.h>')
759  cuda_sources.insert(1, '#include <cuda.h>')
760  cuda_sources.insert(2, '#include <cuda_runtime.h>')
762  cuda_source_path = os.path.join(build_directory, '')
763  with open(cuda_source_path, 'w') as cuda_source_file:
764  cuda_source_file.write('\n'.join(cuda_sources))
766  sources.append(cuda_source_path)
768  return _jit_compile(
769  name,
770  sources,
771  extra_cflags,
772  extra_cuda_cflags,
773  extra_ldflags,
774  extra_include_paths,
775  build_directory,
776  verbose,
777  with_cuda,
778  is_python_module)
781 def _jit_compile(name,
782  sources,
783  extra_cflags,
784  extra_cuda_cflags,
785  extra_ldflags,
786  extra_include_paths,
787  build_directory,
788  verbose,
789  with_cuda,
790  is_python_module):
791  old_version = JIT_EXTENSION_VERSIONER.get_version(name)
792  version = JIT_EXTENSION_VERSIONER.bump_version_if_changed(
793  name,
794  sources,
795  build_arguments=[extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths],
796  build_directory=build_directory,
797  with_cuda=with_cuda
798  )
799  if version > 0:
800  if version != old_version and verbose:
801  print('The input conditions for extension module {} have changed. '.format(name) +
802  'Bumping to version {0} and re-building as {1}_v{0}...'.format(version, name))
803  name = '{}_v{}'.format(name, version)
805  if version != old_version:
806  baton = FileBaton(os.path.join(build_directory, 'lock'))
807  if baton.try_acquire():
808  try:
809  _write_ninja_file_and_build(
810  name=name,
811  sources=sources,
812  extra_cflags=extra_cflags or [],
813  extra_cuda_cflags=extra_cuda_cflags or [],
814  extra_ldflags=extra_ldflags or [],
815  extra_include_paths=extra_include_paths or [],
816  build_directory=build_directory,
817  verbose=verbose,
818  with_cuda=with_cuda)
819  finally:
820  baton.release()
821  else:
822  baton.wait()
823  elif verbose:
824  print('No modifications detected for re-loaded extension '
825  'module {}, skipping build step...'.format(name))
827  if verbose:
828  print('Loading extension module {}...'.format(name))
829  return _import_module_from_library(name, build_directory, is_python_module)
832 def _write_ninja_file_and_build(name,
833  sources,
834  extra_cflags,
835  extra_cuda_cflags,
836  extra_ldflags,
837  extra_include_paths,
838  build_directory,
839  verbose,
840  with_cuda):
841  verify_ninja_availability()
842  check_compiler_abi_compatibility(os.environ.get('CXX', 'c++'))
843  if with_cuda is None:
844  with_cuda = any(map(_is_cuda_file, sources))
845  extra_ldflags = _prepare_ldflags(
846  extra_ldflags or [],
847  with_cuda,
848  verbose)
849  build_file_path = os.path.join(build_directory, '')
850  if verbose:
851  print(
852  'Emitting ninja build file {}...'.format(build_file_path))
853  # NOTE: Emitting a new ninja build file does not cause re-compilation if
854  # the sources did not change, so it's ok to re-emit (and it's fast).
855  _write_ninja_file(
856  path=build_file_path,
857  name=name,
858  sources=sources,
859  extra_cflags=extra_cflags or [],
860  extra_cuda_cflags=extra_cuda_cflags or [],
861  extra_ldflags=extra_ldflags or [],
862  extra_include_paths=extra_include_paths or [],
863  with_cuda=with_cuda)
865  if verbose:
866  print('Building extension module {}...'.format(name))
867  _build_extension_module(name, build_directory, verbose)
870 def verify_ninja_availability():
871  '''
872  Returns ``True`` if the `ninja <>`_ build system is
873  available on the system.
874  '''
875  with open(os.devnull, 'wb') as devnull:
876  try:
877  subprocess.check_call('ninja --version'.split(), stdout=devnull)
878  except OSError:
879  raise RuntimeError("Ninja is required to load C++ extensions")
880  else:
881  return True
884 def _prepare_ldflags(extra_ldflags, with_cuda, verbose):
885  if IS_WINDOWS:
886  python_path = os.path.dirname(sys.executable)
887  python_lib_path = os.path.join(python_path, 'libs')
889  here = os.path.abspath(__file__)
890  torch_path = os.path.dirname(os.path.dirname(here))
891  lib_path = os.path.join(torch_path, 'lib')
893  extra_ldflags.append('c10.lib')
894  extra_ldflags.append('caffe2.lib')
895  extra_ldflags.append('torch.lib')
896  extra_ldflags.append('torch_python.lib')
897  if with_cuda:
898  extra_ldflags.append('caffe2_gpu.lib')
899  extra_ldflags.append('_C.lib')
900  extra_ldflags.append('/LIBPATH:{}'.format(python_lib_path))
901  extra_ldflags.append('/LIBPATH:{}'.format(lib_path))
903  if with_cuda:
904  if verbose:
905  print('Detected CUDA files, patching ldflags')
906  if IS_WINDOWS:
907  extra_ldflags.append('/LIBPATH:{}'.format(
908  _join_cuda_home('lib/x64')))
909  extra_ldflags.append('cudart.lib')
910  if CUDNN_HOME is not None:
911  extra_ldflags.append(os.path.join(CUDNN_HOME, 'lib/x64'))
912  else:
913  extra_ldflags.append('-L{}'.format(_join_cuda_home('lib64')))
914  extra_ldflags.append('-lcudart')
915  if CUDNN_HOME is not None:
916  extra_ldflags.append('-L{}'.format(os.path.join(CUDNN_HOME, 'lib64')))
918  return extra_ldflags
921 def _get_build_directory(name, verbose):
922  root_extensions_directory = os.environ.get('TORCH_EXTENSIONS_DIR')
923  if root_extensions_directory is None:
924  root_extensions_directory = get_default_build_root()
926  if verbose:
927  print('Using {} as PyTorch extensions root...'.format(
928  root_extensions_directory))
930  build_directory = os.path.join(root_extensions_directory, name)
931  if not os.path.exists(build_directory):
932  if verbose:
933  print('Creating extension directory {}...'.format(build_directory))
934  # This is like mkdir -p, i.e. will also create parent directories.
935  os.makedirs(build_directory)
937  return build_directory
940 def _build_extension_module(name, build_directory, verbose):
941  try:
942  sys.stdout.flush()
943  sys.stderr.flush()
944  if sys.version_info >= (3, 5):
946  ['ninja', '-v'],
947  stdout=None if verbose else subprocess.PIPE,
948  stderr=subprocess.STDOUT,
949  cwd=build_directory,
950  check=True)
951  else:
952  subprocess.check_output(
953  ['ninja', '-v'],
954  stderr=subprocess.STDOUT,
955  cwd=build_directory)
956  except subprocess.CalledProcessError:
957  # Python 2 and 3 compatible way of getting the error object.
958  _, error, _ = sys.exc_info()
959  # error.output contains the stdout and stderr of the build attempt.
960  message = "Error building extension '{}'".format(name)
961  if hasattr(error, 'output') and error.output:
962  message += ": {}".format(str(error.output))
963  raise RuntimeError(message)
966 def _import_module_from_library(module_name, path, is_python_module):
967  #
968  file, path, description = imp.find_module(module_name, [path])
969  # Close the .so file after load.
970  with file:
971  if is_python_module:
972  return imp.load_module(module_name, file, path, description)
973  else:
974  torch.ops.load_library(path)
977 def _write_ninja_file(path,
978  name,
979  sources,
980  extra_cflags,
981  extra_cuda_cflags,
982  extra_ldflags,
983  extra_include_paths,
984  with_cuda):
985  extra_cflags = [flag.strip() for flag in extra_cflags]
986  extra_cuda_cflags = [flag.strip() for flag in extra_cuda_cflags]
987  extra_ldflags = [flag.strip() for flag in extra_ldflags]
988  extra_include_paths = [flag.strip() for flag in extra_include_paths]
990  # Version 1.3 is required for the `deps` directive.
991  config = ['ninja_required_version = 1.3']
992  config.append('cxx = {}'.format(os.environ.get('CXX', 'c++')))
993  if with_cuda:
994  config.append('nvcc = {}'.format(_join_cuda_home('bin', 'nvcc')))
996  # Turn into absolute paths so we can emit them into the ninja build
997  # file wherever it is.
998  sources = [os.path.abspath(file) for file in sources]
999  user_includes = [os.path.abspath(file) for file in extra_include_paths]
1001  # include_paths() gives us the location of torch/extension.h
1002  system_includes = include_paths(with_cuda)
1003  # sysconfig.get_paths()['include'] gives us the location of Python.h
1004  system_includes.append(sysconfig.get_paths()['include'])
1006  # Windoze does not understand `-isystem`.
1007  if IS_WINDOWS:
1008  user_includes += system_includes
1009  system_includes.clear()
1011  common_cflags = ['-DTORCH_EXTENSION_NAME={}'.format(name)]
1012  common_cflags.append('-DTORCH_API_INCLUDE_EXTENSION_H')
1013  common_cflags += ['-I{}'.format(include) for include in user_includes]
1014  common_cflags += ['-isystem {}'.format(include) for include in system_includes]
1016  if _is_binary_build():
1017  common_cflags += ['-D_GLIBCXX_USE_CXX11_ABI=0']
1019  cflags = common_cflags + ['-fPIC', '-std=c++11'] + extra_cflags
1020  if IS_WINDOWS:
1021  from distutils.spawn import _nt_quote_args
1022  cflags = _nt_quote_args(cflags)
1023  flags = ['cflags = {}'.format(' '.join(cflags))]
1025  if with_cuda:
1026  cuda_flags = common_cflags + COMMON_NVCC_FLAGS
1027  if IS_WINDOWS:
1028  cuda_flags = _nt_quote_args(cuda_flags)
1029  else:
1030  cuda_flags += ['--compiler-options', "'-fPIC'"]
1031  cuda_flags += extra_cuda_cflags
1032  if not any(flag.startswith('-std=') for flag in cuda_flags):
1033  cuda_flags.append('-std=c++11')
1035  flags.append('cuda_flags = {}'.format(' '.join(cuda_flags)))
1037  if IS_WINDOWS:
1038  ldflags = ['/DLL'] + extra_ldflags
1039  else:
1040  ldflags = ['-shared'] + extra_ldflags
1041  # The darwin linker needs explicit consent to ignore unresolved symbols.
1042  if sys.platform.startswith('darwin'):
1043  ldflags.append('-undefined dynamic_lookup')
1044  elif IS_WINDOWS:
1045  ldflags = _nt_quote_args(ldflags)
1046  flags.append('ldflags = {}'.format(' '.join(ldflags)))
1048  # See for reference.
1049  compile_rule = ['rule compile']
1050  if IS_WINDOWS:
1051  compile_rule.append(
1052  ' command = cl /showIncludes $cflags -c $in /Fo$out')
1053  compile_rule.append(' deps = msvc')
1054  else:
1055  compile_rule.append(
1056  ' command = $cxx -MMD -MF $out.d $cflags -c $in -o $out')
1057  compile_rule.append(' depfile = $out.d')
1058  compile_rule.append(' deps = gcc')
1060  if with_cuda:
1061  cuda_compile_rule = ['rule cuda_compile']
1062  cuda_compile_rule.append(
1063  ' command = $nvcc $cuda_flags -c $in -o $out')
1065  link_rule = ['rule link']
1066  if IS_WINDOWS:
1067  cl_paths = subprocess.check_output(['where',
1068  'cl']).decode().split('\r\n')
1069  if len(cl_paths) >= 1:
1070  cl_path = os.path.dirname(cl_paths[0]).replace(':', '$:')
1071  else:
1072  raise RuntimeError("MSVC is required to load C++ extensions")
1073  link_rule.append(
1074  ' command = "{}/link.exe" $in /nologo $ldflags /out:$out'.format(
1075  cl_path))
1076  else:
1077  link_rule.append(' command = $cxx $in $ldflags -o $out')
1079  # Emit one build rule per source to enable incremental build.
1080  object_files = []
1081  build = []
1082  for source_file in sources:
1083  # '/path/to/file.cpp' -> 'file'
1084  file_name = os.path.splitext(os.path.basename(source_file))[0]
1085  if _is_cuda_file(source_file) and with_cuda:
1086  rule = 'cuda_compile'
1087  # Use a different object filename in case a C++ and CUDA file have
1088  # the same filename but different extension (.cpp vs. .cu).
1089  target = '{}.cuda.o'.format(file_name)
1090  else:
1091  rule = 'compile'
1092  target = '{}.o'.format(file_name)
1093  object_files.append(target)
1094  if IS_WINDOWS:
1095  source_file = source_file.replace(':', '$:')
1096  source_file = source_file.replace(" ", "$ ")
1097  build.append('build {}: {} {}'.format(target, rule, source_file))
1099  ext = 'pyd' if IS_WINDOWS else 'so'
1100  library_target = '{}.{}'.format(name, ext)
1102  link = ['build {}: link {}'.format(library_target, ' '.join(object_files))]
1104  default = ['default {}'.format(library_target)]
1106  # 'Blocks' should be separated by newlines, for visual benefit.
1107  blocks = [config, flags, compile_rule]
1108  if with_cuda:
1109  blocks.append(cuda_compile_rule)
1110  blocks += [link_rule, build, link, default]
1111  with open(path, 'w') as build_file:
1112  for block in blocks:
1113  lines = '\n'.join(block)
1114  build_file.write('{}\n\n'.format(lines))
1117 def _join_cuda_home(*paths):
1118  '''
1119  Joins paths with CUDA_HOME, or raises an error if it CUDA_HOME is not set.
1121  This is basically a lazy way of raising an error for missing $CUDA_HOME
1122  only once we need to get any CUDA-specific path.
1123  '''
1124  if CUDA_HOME is None:
1125  raise EnvironmentError('CUDA_HOME environment variable is not set. '
1126  'Please set it to your CUDA install root.')
1127  return os.path.join(CUDA_HOME, *paths)
1130 def _is_cuda_file(path):
1131  return os.path.splitext(path)[1] in ['.cu', '.cuh']
Module caffe2.python.layers.split.
def _add_gnu_abi_flag_if_binary(self, extension)
def is_available()
def _add_compile_flag(self, extension, flag)
def _define_torch_extension_name(self, extension)