Caffe2 - Python API
A deep learning, cross platform ML framework
cpp_extension.py
1 from __future__ import absolute_import, division, print_function, unicode_literals
2 import copy
3 import glob
4 import imp
5 import os
6 import re
7 import setuptools
8 import subprocess
9 import sys
10 import sysconfig
11 import tempfile
12 import warnings
13 
14 import torch
15 from .file_baton import FileBaton
16 from ._cpp_extension_versioner import ExtensionVersioner
17 
18 from setuptools.command.build_ext import build_ext
19 
20 
21 IS_WINDOWS = sys.platform == 'win32'
22 
23 
24 def _find_cuda_home():
25  '''Finds the CUDA install path.'''
26  # Guess #1
27  cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
28  if cuda_home is None:
29  # Guess #2
30  if IS_WINDOWS:
31  cuda_homes = glob.glob(
32  'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*')
33  if len(cuda_homes) == 0:
34  cuda_home = ''
35  else:
36  cuda_home = cuda_homes[0]
37  else:
38  cuda_home = '/usr/local/cuda'
39  if not os.path.exists(cuda_home):
40  # Guess #3
41  try:
42  which = 'where' if IS_WINDOWS else 'which'
43  nvcc = subprocess.check_output(
44  [which, 'nvcc']).decode().rstrip('\r\n')
45  cuda_home = os.path.dirname(os.path.dirname(nvcc))
46  except Exception:
47  cuda_home = None
48  if cuda_home and not torch.cuda.is_available():
49  print("No CUDA runtime is found, using CUDA_HOME='{}'".format(cuda_home))
50  return cuda_home
51 
52 
53 MINIMUM_GCC_VERSION = (4, 9, 0)
54 MINIMUM_MSVC_VERSION = (19, 0, 24215)
55 ABI_INCOMPATIBILITY_WARNING = '''
56 
57  !! WARNING !!
58 
59 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
60 Your compiler ({}) may be ABI-incompatible with PyTorch!
61 Please use a compiler that is ABI-compatible with GCC 4.9 and above.
62 See https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html.
63 
64 See https://gist.github.com/goldsborough/d466f43e8ffc948ff92de7486c5216d6
65 for instructions on how to install GCC 4.9 or higher.
66 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
67 
68  !! WARNING !!
69 '''
70 WRONG_COMPILER_WARNING = '''
71 
72  !! WARNING !!
73 
74 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
75 Your compiler ({user_compiler}) is not compatible with the compiler Pytorch was
76 built with for this platform, which is {pytorch_compiler} on {platform}. Please
77 use {pytorch_compiler} to to compile your extension. Alternatively, you may
78 compile PyTorch from source using {user_compiler}, and then you can also use
79 {user_compiler} to compile your extension.
80 
81 See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help
82 with compiling PyTorch from source.
83 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
84 
85  !! WARNING !!
86 '''
87 CUDA_HOME = _find_cuda_home()
88 CUDNN_HOME = os.environ.get('CUDNN_HOME') or os.environ.get('CUDNN_PATH')
89 # PyTorch releases have the version pattern major.minor.patch, whereas when
90 # PyTorch is built from source, we append the git commit hash, which gives
91 # it the below pattern.
92 BUILT_FROM_SOURCE_VERSION_PATTERN = re.compile(r'\d+\.\d+\.\d+\w+\+\w+')
93 
94 COMMON_NVCC_FLAGS = [
95  '-D__CUDA_NO_HALF_OPERATORS__',
96  '-D__CUDA_NO_HALF_CONVERSIONS__',
97  '-D__CUDA_NO_HALF2_OPERATORS__',
98 ]
99 
100 
101 JIT_EXTENSION_VERSIONER = ExtensionVersioner()
102 
103 
104 def _is_binary_build():
105  return not BUILT_FROM_SOURCE_VERSION_PATTERN.match(torch.version.__version__)
106 
107 
108 def _accepted_compilers_for_platform():
109  return ['clang++', 'clang'] if sys.platform.startswith('darwin') else ['g++', 'gcc']
110 
111 
112 def get_default_build_root():
113  '''
114  Returns the path to the root folder under which extensions will built.
115 
116  For each extension module built, there will be one folder underneath the
117  folder returned by this function. For example, if ``p`` is the path
118  returned by this function and ``ext`` the name of an extension, the build
119  folder for the extension will be ``p/ext``.
120  '''
121  # tempfile.gettempdir() will be /tmp on UNIX and \TEMP on Windows.
122  return os.path.realpath(os.path.join(tempfile.gettempdir(), 'torch_extensions'))
123 
124 
125 def check_compiler_ok_for_platform(compiler):
126  '''
127  Verifies that the compiler is the expected one for the current platform.
128 
129  Arguments:
130  compiler (str): The compiler executable to check.
131 
132  Returns:
133  True if the compiler is gcc/g++ on Linux or clang/clang++ on macOS,
134  and always True for Windows.
135  '''
136  if IS_WINDOWS:
137  return True
138  which = subprocess.check_output(['which', compiler], stderr=subprocess.STDOUT)
139  # Use os.path.realpath to resolve any symlinks, in particular from 'c++' to e.g. 'g++'.
140  compiler_path = os.path.realpath(which.decode().strip())
141  return any(name in compiler_path for name in _accepted_compilers_for_platform())
142 
143 
144 def check_compiler_abi_compatibility(compiler):
145  '''
146  Verifies that the given compiler is ABI-compatible with PyTorch.
147 
148  Arguments:
149  compiler (str): The compiler executable name to check (e.g. ``g++``).
150  Must be executable in a shell process.
151 
152  Returns:
153  False if the compiler is (likely) ABI-incompatible with PyTorch,
154  else True.
155  '''
156  if not _is_binary_build():
157  return True
158  if os.environ.get('TORCH_DONT_CHECK_COMPILER_ABI') in ['ON', '1', 'YES', 'TRUE', 'Y']:
159  return True
160 
161  # First check if the compiler is one of the expected ones for the particular platform.
162  if not check_compiler_ok_for_platform(compiler):
163  warnings.warn(WRONG_COMPILER_WARNING.format(
164  user_compiler=compiler,
165  pytorch_compiler=_accepted_compilers_for_platform()[0],
166  platform=sys.platform))
167  return False
168 
169  if sys.platform.startswith('darwin'):
170  # There is no particular minimum version we need for clang, so we're good here.
171  return True
172  try:
173  if sys.platform.startswith('linux'):
174  minimum_required_version = MINIMUM_GCC_VERSION
175  version = subprocess.check_output([compiler, '-dumpfullversion', '-dumpversion'])
176  version = version.decode().strip().split('.')
177  else:
178  minimum_required_version = MINIMUM_MSVC_VERSION
179  compiler_info = subprocess.check_output(compiler, stderr=subprocess.STDOUT)
180  match = re.search(r'(\d+)\.(\d+)\.(\d+)', compiler_info.decode().strip())
181  version = (0, 0, 0) if match is None else match.groups()
182  except Exception:
183  _, error, _ = sys.exc_info()
184  warnings.warn('Error checking compiler version for {}: {}'.format(compiler, error))
185  return False
186 
187  if tuple(map(int, version)) >= minimum_required_version:
188  return True
189 
190  compiler = '{} {}'.format(compiler, ".".join(version))
191  warnings.warn(ABI_INCOMPATIBILITY_WARNING.format(compiler))
192 
193  return False
194 
195 
196 # See below for why we inherit BuildExtension from object.
197 # https://stackoverflow.com/questions/1713038/super-fails-with-error-typeerror-argument-1-must-be-type-not-classobj-when
198 
199 
200 class BuildExtension(build_ext, object):
201  '''
202  A custom :mod:`setuptools` build extension .
203 
204  This :class:`setuptools.build_ext` subclass takes care of passing the
205  minimum required compiler flags (e.g. ``-std=c++11``) as well as mixed
206  C++/CUDA compilation (and support for CUDA files in general).
207 
208  When using :class:`BuildExtension`, it is allowed to supply a dictionary
209  for ``extra_compile_args`` (rather than the usual list) that maps from
210  languages (``cxx`` or ``cuda``) to a list of additional compiler flags to
211  supply to the compiler. This makes it possible to supply different flags to
212  the C++ and CUDA compiler during mixed compilation.
213  '''
214 
215  @classmethod
216  def with_options(cls, **options):
217  '''
218  Returns an alternative constructor that extends any original keyword
219  arguments to the original constructor with the given options.
220  '''
221  def init_with_options(*args, **kwargs):
222  kwargs = kwargs.copy()
223  kwargs.update(options)
224  return cls(*args, **kwargs)
225  return init_with_options
226 
227  def __init__(self, *args, **kwargs):
228  super(BuildExtension, self).__init__(*args, **kwargs)
229  self.no_python_abi_suffix = kwargs.get("no_python_abi_suffix", False)
230 
231  def build_extensions(self):
232  self._check_abi()
233  for extension in self.extensions:
234  self._add_compile_flag(extension, '-DTORCH_API_INCLUDE_EXTENSION_H')
235  self._define_torch_extension_name(extension)
236  self._add_gnu_abi_flag_if_binary(extension)
237 
238  # Register .cu and .cuh as valid source extensions.
239  self.compiler.src_extensions += ['.cu', '.cuh']
240  # Save the original _compile method for later.
241  if self.compiler.compiler_type == 'msvc':
242  self.compiler._cpp_extensions += ['.cu', '.cuh']
243  original_compile = self.compiler.compile
244  original_spawn = self.compiler.spawn
245  else:
246  original_compile = self.compiler._compile
247 
248  def unix_wrap_compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
249  # Copy before we make any modifications.
250  cflags = copy.deepcopy(extra_postargs)
251  try:
252  original_compiler = self.compiler.compiler_so
253  if _is_cuda_file(src):
254  nvcc = _join_cuda_home('bin', 'nvcc')
255  if not isinstance(nvcc, list):
256  nvcc = [nvcc]
257  self.compiler.set_executable('compiler_so', nvcc)
258  if isinstance(cflags, dict):
259  cflags = cflags['nvcc']
260  cflags = COMMON_NVCC_FLAGS + ['--compiler-options', "'-fPIC'"] + cflags
261  elif isinstance(cflags, dict):
262  cflags = cflags['cxx']
263  # NVCC does not allow multiple -std to be passed, so we avoid
264  # overriding the option if the user explicitly passed it.
265  if not any(flag.startswith('-std=') for flag in cflags):
266  cflags.append('-std=c++11')
267 
268  original_compile(obj, src, ext, cc_args, cflags, pp_opts)
269  finally:
270  # Put the original compiler back in place.
271  self.compiler.set_executable('compiler_so', original_compiler)
272 
273  def win_wrap_compile(sources,
274  output_dir=None,
275  macros=None,
276  include_dirs=None,
277  debug=0,
278  extra_preargs=None,
279  extra_postargs=None,
280  depends=None):
281 
282  self.cflags = copy.deepcopy(extra_postargs)
283  extra_postargs = None
284 
285  def spawn(cmd):
286  # Using regex to match src, obj and include files
287  src_regex = re.compile('/T(p|c)(.*)')
288  src_list = [
289  m.group(2) for m in (src_regex.match(elem) for elem in cmd)
290  if m
291  ]
292 
293  obj_regex = re.compile('/Fo(.*)')
294  obj_list = [
295  m.group(1) for m in (obj_regex.match(elem) for elem in cmd)
296  if m
297  ]
298 
299  include_regex = re.compile(r'((\-|\/)I.*)')
300  include_list = [
301  m.group(1)
302  for m in (include_regex.match(elem) for elem in cmd) if m
303  ]
304 
305  if len(src_list) >= 1 and len(obj_list) >= 1:
306  src = src_list[0]
307  obj = obj_list[0]
308  if _is_cuda_file(src):
309  nvcc = _join_cuda_home('bin', 'nvcc')
310  if isinstance(self.cflags, dict):
311  cflags = self.cflags['nvcc']
312  elif isinstance(self.cflags, list):
313  cflags = self.cflags
314  else:
315  cflags = []
316  cmd = [
317  nvcc, '-c', src, '-o', obj, '-Xcompiler',
318  '/wd4819', '-Xcompiler', '/MD'
319  ] + include_list + cflags
320  elif isinstance(self.cflags, dict):
321  cflags = self.cflags['cxx'] + ['/MD']
322  cmd += cflags
323  elif isinstance(self.cflags, list):
324  cflags = self.cflags + ['/MD']
325  cmd += cflags
326 
327  return original_spawn(cmd)
328 
329  try:
330  self.compiler.spawn = spawn
331  return original_compile(sources, output_dir, macros,
332  include_dirs, debug, extra_preargs,
333  extra_postargs, depends)
334  finally:
335  self.compiler.spawn = original_spawn
336 
337  # Monkey-patch the _compile method.
338  if self.compiler.compiler_type == 'msvc':
339  self.compiler.compile = win_wrap_compile
340  else:
341  self.compiler._compile = unix_wrap_compile
342 
343  build_ext.build_extensions(self)
344 
345  def get_ext_filename(self, ext_name):
346  # Get the original shared library name. For Python 3, this name will be
347  # suffixed with "<SOABI>.so", where <SOABI> will be something like
348  # cpython-37m-x86_64-linux-gnu. On Python 2, there is no such ABI name.
349  # The final extension, .so, would be .lib/.dll on Windows of course.
350  ext_filename = super(BuildExtension, self).get_ext_filename(ext_name)
351  # If `no_python_abi_suffix` is `True`, we omit the Python 3 ABI
352  # component. This makes building shared libraries with setuptools that
353  # aren't Python modules nicer.
354  if self.no_python_abi_suffix and sys.version_info >= (3, 0):
355  # The parts will be e.g. ["my_extension", "cpython-37m-x86_64-linux-gnu", "so"].
356  ext_filename_parts = ext_filename.split('.')
357  # Omit the second to last element.
358  without_abi = ext_filename_parts[:-2] + ext_filename_parts[-1:]
359  ext_filename = '.'.join(without_abi)
360  return ext_filename
361 
362  def _check_abi(self):
363  # On some platforms, like Windows, compiler_cxx is not available.
364  if hasattr(self.compiler, 'compiler_cxx'):
365  compiler = self.compiler.compiler_cxx[0]
366  elif IS_WINDOWS:
367  compiler = os.environ.get('CXX', 'cl')
368  else:
369  compiler = os.environ.get('CXX', 'c++')
370  check_compiler_abi_compatibility(compiler)
371 
372  def _add_compile_flag(self, extension, flag):
373  extension.extra_compile_args = copy.copy(extension.extra_compile_args)
374  if isinstance(extension.extra_compile_args, dict):
375  for args in extension.extra_compile_args.values():
376  args.append(flag)
377  else:
378  extension.extra_compile_args.append(flag)
379 
380  def _define_torch_extension_name(self, extension):
381  # pybind11 doesn't support dots in the names
382  # so in order to support extensions in the packages
383  # like torch._C, we take the last part of the string
384  # as the library name
385  names = extension.name.split('.')
386  name = names[-1]
387  define = '-DTORCH_EXTENSION_NAME={}'.format(name)
388  self._add_compile_flag(extension, define)
389 
390  def _add_gnu_abi_flag_if_binary(self, extension):
391  # If the version string looks like a binary build,
392  # we know that PyTorch was compiled with gcc 4.9.2.
393  # if the extension is compiled with gcc >= 5.1,
394  # then we have to define _GLIBCXX_USE_CXX11_ABI=0
395  # so that the std::string in the API is resolved to
396  # non-C++11 symbols
397  if _is_binary_build():
398  self._add_compile_flag(extension, '-D_GLIBCXX_USE_CXX11_ABI=0')
399 
400 
401 def CppExtension(name, sources, *args, **kwargs):
402  '''
403  Creates a :class:`setuptools.Extension` for C++.
404 
405  Convenience method that creates a :class:`setuptools.Extension` with the
406  bare minimum (but often sufficient) arguments to build a C++ extension.
407 
408  All arguments are forwarded to the :class:`setuptools.Extension`
409  constructor.
410 
411  Example:
412  >>> from setuptools import setup
413  >>> from torch.utils.cpp_extension import BuildExtension, CppExtension
414  >>> setup(
415  name='extension',
416  ext_modules=[
417  CppExtension(
418  name='extension',
419  sources=['extension.cpp'],
420  extra_compile_args=['-g']),
421  ],
422  cmdclass={
423  'build_ext': BuildExtension
424  })
425  '''
426  include_dirs = kwargs.get('include_dirs', [])
427  include_dirs += include_paths()
428  kwargs['include_dirs'] = include_dirs
429 
430  if IS_WINDOWS:
431  library_dirs = kwargs.get('library_dirs', [])
432  library_dirs += library_paths()
433  kwargs['library_dirs'] = library_dirs
434 
435  libraries = kwargs.get('libraries', [])
436  libraries.append('c10')
437  libraries.append('caffe2')
438  libraries.append('torch')
439  libraries.append('torch_python')
440  libraries.append('_C')
441  kwargs['libraries'] = libraries
442 
443  kwargs['language'] = 'c++'
444  return setuptools.Extension(name, sources, *args, **kwargs)
445 
446 
447 def CUDAExtension(name, sources, *args, **kwargs):
448  '''
449  Creates a :class:`setuptools.Extension` for CUDA/C++.
450 
451  Convenience method that creates a :class:`setuptools.Extension` with the
452  bare minimum (but often sufficient) arguments to build a CUDA/C++
453  extension. This includes the CUDA include path, library path and runtime
454  library.
455 
456  All arguments are forwarded to the :class:`setuptools.Extension`
457  constructor.
458 
459  Example:
460  >>> from setuptools import setup
461  >>> from torch.utils.cpp_extension import BuildExtension, CUDAExtension
462  >>> setup(
463  name='cuda_extension',
464  ext_modules=[
465  CUDAExtension(
466  name='cuda_extension',
467  sources=['extension.cpp', 'extension_kernel.cu'],
468  extra_compile_args={'cxx': ['-g'],
469  'nvcc': ['-O2']})
470  ],
471  cmdclass={
472  'build_ext': BuildExtension
473  })
474  '''
475  library_dirs = kwargs.get('library_dirs', [])
476  library_dirs += library_paths(cuda=True)
477  kwargs['library_dirs'] = library_dirs
478 
479  libraries = kwargs.get('libraries', [])
480  libraries.append('cudart')
481  if IS_WINDOWS:
482  libraries.append('c10')
483  libraries.append('caffe2')
484  libraries.append('torch')
485  libraries.append('torch_python')
486  libraries.append('caffe2_gpu')
487  libraries.append('_C')
488  kwargs['libraries'] = libraries
489 
490  include_dirs = kwargs.get('include_dirs', [])
491  include_dirs += include_paths(cuda=True)
492  kwargs['include_dirs'] = include_dirs
493 
494  kwargs['language'] = 'c++'
495 
496  return setuptools.Extension(name, sources, *args, **kwargs)
497 
498 
499 def include_paths(cuda=False):
500  '''
501  Get the include paths required to build a C++ or CUDA extension.
502 
503  Args:
504  cuda: If `True`, includes CUDA-specific include paths.
505 
506  Returns:
507  A list of include path strings.
508  '''
509  here = os.path.abspath(__file__)
510  torch_path = os.path.dirname(os.path.dirname(here))
511  lib_include = os.path.join(torch_path, 'include')
512  paths = [
513  lib_include,
514  # Remove this once torch/torch.h is officially no longer supported for C++ extensions.
515  os.path.join(lib_include, 'torch', 'csrc', 'api', 'include'),
516  # Some internal (old) Torch headers don't properly prefix their includes,
517  # so we need to pass -Itorch/lib/include/TH as well.
518  os.path.join(lib_include, 'TH'),
519  os.path.join(lib_include, 'THC')
520  ]
521  if cuda:
522  cuda_home_include = _join_cuda_home('include')
523  # if we have the Debian/Ubuntu packages for cuda, we get /usr as cuda home.
524  # but gcc dosn't like having /usr/include passed explicitly
525  if cuda_home_include != '/usr/include':
526  paths.append(cuda_home_include)
527  if CUDNN_HOME is not None:
528  paths.append(os.path.join(CUDNN_HOME, 'include'))
529  return paths
530 
531 
532 def library_paths(cuda=False):
533  '''
534  Get the library paths required to build a C++ or CUDA extension.
535 
536  Args:
537  cuda: If `True`, includes CUDA-specific library paths.
538 
539  Returns:
540  A list of library path strings.
541  '''
542  paths = []
543 
544  if IS_WINDOWS:
545  here = os.path.abspath(__file__)
546  torch_path = os.path.dirname(os.path.dirname(here))
547  lib_path = os.path.join(torch_path, 'lib')
548 
549  paths.append(lib_path)
550 
551  if cuda:
552  lib_dir = 'lib/x64' if IS_WINDOWS else 'lib64'
553  paths.append(_join_cuda_home(lib_dir))
554  if CUDNN_HOME is not None:
555  paths.append(os.path.join(CUDNN_HOME, lib_dir))
556  return paths
557 
558 
559 def load(name,
560  sources,
561  extra_cflags=None,
562  extra_cuda_cflags=None,
563  extra_ldflags=None,
564  extra_include_paths=None,
565  build_directory=None,
566  verbose=False,
567  with_cuda=None,
568  is_python_module=True):
569  '''
570  Loads a PyTorch C++ extension just-in-time (JIT).
571 
572  To load an extension, a Ninja build file is emitted, which is used to
573  compile the given sources into a dynamic library. This library is
574  subsequently loaded into the current Python process as a module and
575  returned from this function, ready for use.
576 
577  By default, the directory to which the build file is emitted and the
578  resulting library compiled to is ``<tmp>/torch_extensions/<name>``, where
579  ``<tmp>`` is the temporary folder on the current platform and ``<name>``
580  the name of the extension. This location can be overridden in two ways.
581  First, if the ``TORCH_EXTENSIONS_DIR`` environment variable is set, it
582  replaces ``<tmp>/torch_extensions`` and all extensions will be compiled
583  into subfolders of this directory. Second, if the ``build_directory``
584  argument to this function is supplied, it overrides the entire path, i.e.
585  the library will be compiled into that folder directly.
586 
587  To compile the sources, the default system compiler (``c++``) is used,
588  which can be overridden by setting the ``CXX`` environment variable. To pass
589  additional arguments to the compilation process, ``extra_cflags`` or
590  ``extra_ldflags`` can be provided. For example, to compile your extension
591  with optimizations, pass ``extra_cflags=['-O3']``. You can also use
592  ``extra_cflags`` to pass further include directories.
593 
594  CUDA support with mixed compilation is provided. Simply pass CUDA source
595  files (``.cu`` or ``.cuh``) along with other sources. Such files will be
596  detected and compiled with nvcc rather than the C++ compiler. This includes
597  passing the CUDA lib64 directory as a library directory, and linking
598  ``cudart``. You can pass additional flags to nvcc via
599  ``extra_cuda_cflags``, just like with ``extra_cflags`` for C++. Various
600  heuristics for finding the CUDA install directory are used, which usually
601  work fine. If not, setting the ``CUDA_HOME`` environment variable is the
602  safest option.
603 
604  Args:
605  name: The name of the extension to build. This MUST be the same as the
606  name of the pybind11 module!
607  sources: A list of relative or absolute paths to C++ source files.
608  extra_cflags: optional list of compiler flags to forward to the build.
609  extra_cuda_cflags: optional list of compiler flags to forward to nvcc
610  when building CUDA sources.
611  extra_ldflags: optional list of linker flags to forward to the build.
612  extra_include_paths: optional list of include directories to forward
613  to the build.
614  build_directory: optional path to use as build workspace.
615  verbose: If ``True``, turns on verbose logging of load steps.
616  with_cuda: Determines whether CUDA headers and libraries are added to
617  the build. If set to ``None`` (default), this value is
618  automatically determined based on the existence of ``.cu`` or
619  ``.cuh`` in ``sources``. Set it to `True`` to force CUDA headers
620  and libraries to be included.
621  is_python_module: If ``True`` (default), imports the produced shared
622  library as a Python module. If ``False``, loads it into the process
623  as a plain dynamic library.
624 
625  Returns:
626  If ``is_python_module`` is ``True``, returns the loaded PyTorch
627  extension as a Python module. If ``is_python_module`` is ``False``
628  returns nothing (the shared library is loaded into the process as a side
629  effect).
630 
631  Example:
632  >>> from torch.utils.cpp_extension import load
633  >>> module = load(
634  name='extension',
635  sources=['extension.cpp', 'extension_kernel.cu'],
636  extra_cflags=['-O2'],
637  verbose=True)
638  '''
639  return _jit_compile(
640  name,
641  [sources] if isinstance(sources, str) else sources,
642  extra_cflags,
643  extra_cuda_cflags,
644  extra_ldflags,
645  extra_include_paths,
646  build_directory or _get_build_directory(name, verbose),
647  verbose,
648  with_cuda,
649  is_python_module)
650 
651 
652 def load_inline(name,
653  cpp_sources,
654  cuda_sources=None,
655  functions=None,
656  extra_cflags=None,
657  extra_cuda_cflags=None,
658  extra_ldflags=None,
659  extra_include_paths=None,
660  build_directory=None,
661  verbose=False,
662  with_cuda=None,
663  is_python_module=True):
664  '''
665  Loads a PyTorch C++ extension just-in-time (JIT) from string sources.
666 
667  This function behaves exactly like :func:`load`, but takes its sources as
668  strings rather than filenames. These strings are stored to files in the
669  build directory, after which the behavior of :func:`load_inline` is
670  identical to :func:`load`.
671 
672  See `the
673  tests <https://github.com/pytorch/pytorch/blob/master/test/test_cpp_extensions.py>`_
674  for good examples of using this function.
675 
676  Sources may omit two required parts of a typical non-inline C++ extension:
677  the necessary header includes, as well as the (pybind11) binding code. More
678  precisely, strings passed to ``cpp_sources`` are first concatenated into a
679  single ``.cpp`` file. This file is then prepended with ``#include
680  <torch/extension.h>``.
681 
682  Furthermore, if the ``functions`` argument is supplied, bindings will be
683  automatically generated for each function specified. ``functions`` can
684  either be a list of function names, or a dictionary mapping from function
685  names to docstrings. If a list is given, the name of each function is used
686  as its docstring.
687 
688  The sources in ``cuda_sources`` are concatenated into a separate ``.cu``
689  file and prepended with ``torch/types.h``, ``cuda.h`` and
690  ``cuda_runtime.h`` includes. The ``.cpp`` and ``.cu`` files are compiled
691  separately, but ultimately linked into a single library. Note that no
692  bindings are generated for functions in ``cuda_sources`` per se. To bind
693  to a CUDA kernel, you must create a C++ function that calls it, and either
694  declare or define this C++ function in one of the ``cpp_sources`` (and
695  include its name in ``functions``).
696 
697  See :func:`load` for a description of arguments omitted below.
698 
699  Args:
700  cpp_sources: A string, or list of strings, containing C++ source code.
701  cuda_sources: A string, or list of strings, containing CUDA source code.
702  functions: A list of function names for which to generate function
703  bindings. If a dictionary is given, it should map function names to
704  docstrings (which are otherwise just the function names).
705  with_cuda: Determines whether CUDA headers and libraries are added to
706  the build. If set to ``None`` (default), this value is
707  automatically determined based on whether ``cuda_sources`` is
708  provided. Set it to `True`` to force CUDA headers
709  and libraries to be included.
710 
711  Example:
712  >>> from torch.utils.cpp_extension import load_inline
713  >>> source = \'\'\'
714  at::Tensor sin_add(at::Tensor x, at::Tensor y) {
715  return x.sin() + y.sin();
716  }
717  \'\'\'
718  >>> module = load_inline(name='inline_extension',
719  cpp_sources=[source],
720  functions=['sin_add'])
721  '''
722  build_directory = build_directory or _get_build_directory(name, verbose)
723 
724  if isinstance(cpp_sources, str):
725  cpp_sources = [cpp_sources]
726  cuda_sources = cuda_sources or []
727  if isinstance(cuda_sources, str):
728  cuda_sources = [cuda_sources]
729 
730  cpp_sources.insert(0, '#include <torch/extension.h>')
731 
732  # If `functions` is supplied, we create the pybind11 bindings for the user.
733  # Here, `functions` is (or becomes, after some processing) a map from
734  # function names to function docstrings.
735  if functions is not None:
736  cpp_sources.append('PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {')
737  if isinstance(functions, str):
738  functions = [functions]
739  if isinstance(functions, list):
740  # Make the function docstring the same as the function name.
741  functions = dict((f, f) for f in functions)
742  elif not isinstance(functions, dict):
743  raise ValueError(
744  "Expected 'functions' to be a list or dict, but was {}".format(
745  type(functions)))
746  for function_name, docstring in functions.items():
747  cpp_sources.append('m.def("{0}", &{0}, "{1}");'.format(
748  function_name, docstring))
749  cpp_sources.append('}')
750 
751  cpp_source_path = os.path.join(build_directory, 'main.cpp')
752  with open(cpp_source_path, 'w') as cpp_source_file:
753  cpp_source_file.write('\n'.join(cpp_sources))
754 
755  sources = [cpp_source_path]
756 
757  if cuda_sources:
758  cuda_sources.insert(0, '#include <torch/types.h>')
759  cuda_sources.insert(1, '#include <cuda.h>')
760  cuda_sources.insert(2, '#include <cuda_runtime.h>')
761 
762  cuda_source_path = os.path.join(build_directory, 'cuda.cu')
763  with open(cuda_source_path, 'w') as cuda_source_file:
764  cuda_source_file.write('\n'.join(cuda_sources))
765 
766  sources.append(cuda_source_path)
767 
768  return _jit_compile(
769  name,
770  sources,
771  extra_cflags,
772  extra_cuda_cflags,
773  extra_ldflags,
774  extra_include_paths,
775  build_directory,
776  verbose,
777  with_cuda,
778  is_python_module)
779 
780 
781 def _jit_compile(name,
782  sources,
783  extra_cflags,
784  extra_cuda_cflags,
785  extra_ldflags,
786  extra_include_paths,
787  build_directory,
788  verbose,
789  with_cuda,
790  is_python_module):
791  old_version = JIT_EXTENSION_VERSIONER.get_version(name)
792  version = JIT_EXTENSION_VERSIONER.bump_version_if_changed(
793  name,
794  sources,
795  build_arguments=[extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths],
796  build_directory=build_directory,
797  with_cuda=with_cuda
798  )
799  if version > 0:
800  if version != old_version and verbose:
801  print('The input conditions for extension module {} have changed. '.format(name) +
802  'Bumping to version {0} and re-building as {1}_v{0}...'.format(version, name))
803  name = '{}_v{}'.format(name, version)
804 
805  if version != old_version:
806  baton = FileBaton(os.path.join(build_directory, 'lock'))
807  if baton.try_acquire():
808  try:
809  _write_ninja_file_and_build(
810  name=name,
811  sources=sources,
812  extra_cflags=extra_cflags or [],
813  extra_cuda_cflags=extra_cuda_cflags or [],
814  extra_ldflags=extra_ldflags or [],
815  extra_include_paths=extra_include_paths or [],
816  build_directory=build_directory,
817  verbose=verbose,
818  with_cuda=with_cuda)
819  finally:
820  baton.release()
821  else:
822  baton.wait()
823  elif verbose:
824  print('No modifications detected for re-loaded extension '
825  'module {}, skipping build step...'.format(name))
826 
827  if verbose:
828  print('Loading extension module {}...'.format(name))
829  return _import_module_from_library(name, build_directory, is_python_module)
830 
831 
832 def _write_ninja_file_and_build(name,
833  sources,
834  extra_cflags,
835  extra_cuda_cflags,
836  extra_ldflags,
837  extra_include_paths,
838  build_directory,
839  verbose,
840  with_cuda):
841  verify_ninja_availability()
842  check_compiler_abi_compatibility(os.environ.get('CXX', 'c++'))
843  if with_cuda is None:
844  with_cuda = any(map(_is_cuda_file, sources))
845  extra_ldflags = _prepare_ldflags(
846  extra_ldflags or [],
847  with_cuda,
848  verbose)
849  build_file_path = os.path.join(build_directory, 'build.ninja')
850  if verbose:
851  print(
852  'Emitting ninja build file {}...'.format(build_file_path))
853  # NOTE: Emitting a new ninja build file does not cause re-compilation if
854  # the sources did not change, so it's ok to re-emit (and it's fast).
855  _write_ninja_file(
856  path=build_file_path,
857  name=name,
858  sources=sources,
859  extra_cflags=extra_cflags or [],
860  extra_cuda_cflags=extra_cuda_cflags or [],
861  extra_ldflags=extra_ldflags or [],
862  extra_include_paths=extra_include_paths or [],
863  with_cuda=with_cuda)
864 
865  if verbose:
866  print('Building extension module {}...'.format(name))
867  _build_extension_module(name, build_directory, verbose)
868 
869 
870 def verify_ninja_availability():
871  '''
872  Returns ``True`` if the `ninja <https://ninja-build.org/>`_ build system is
873  available on the system.
874  '''
875  with open(os.devnull, 'wb') as devnull:
876  try:
877  subprocess.check_call('ninja --version'.split(), stdout=devnull)
878  except OSError:
879  raise RuntimeError("Ninja is required to load C++ extensions")
880  else:
881  return True
882 
883 
884 def _prepare_ldflags(extra_ldflags, with_cuda, verbose):
885  if IS_WINDOWS:
886  python_path = os.path.dirname(sys.executable)
887  python_lib_path = os.path.join(python_path, 'libs')
888 
889  here = os.path.abspath(__file__)
890  torch_path = os.path.dirname(os.path.dirname(here))
891  lib_path = os.path.join(torch_path, 'lib')
892 
893  extra_ldflags.append('c10.lib')
894  extra_ldflags.append('caffe2.lib')
895  extra_ldflags.append('torch.lib')
896  extra_ldflags.append('torch_python.lib')
897  if with_cuda:
898  extra_ldflags.append('caffe2_gpu.lib')
899  extra_ldflags.append('_C.lib')
900  extra_ldflags.append('/LIBPATH:{}'.format(python_lib_path))
901  extra_ldflags.append('/LIBPATH:{}'.format(lib_path))
902 
903  if with_cuda:
904  if verbose:
905  print('Detected CUDA files, patching ldflags')
906  if IS_WINDOWS:
907  extra_ldflags.append('/LIBPATH:{}'.format(
908  _join_cuda_home('lib/x64')))
909  extra_ldflags.append('cudart.lib')
910  if CUDNN_HOME is not None:
911  extra_ldflags.append(os.path.join(CUDNN_HOME, 'lib/x64'))
912  else:
913  extra_ldflags.append('-L{}'.format(_join_cuda_home('lib64')))
914  extra_ldflags.append('-lcudart')
915  if CUDNN_HOME is not None:
916  extra_ldflags.append('-L{}'.format(os.path.join(CUDNN_HOME, 'lib64')))
917 
918  return extra_ldflags
919 
920 
921 def _get_build_directory(name, verbose):
922  root_extensions_directory = os.environ.get('TORCH_EXTENSIONS_DIR')
923  if root_extensions_directory is None:
924  root_extensions_directory = get_default_build_root()
925 
926  if verbose:
927  print('Using {} as PyTorch extensions root...'.format(
928  root_extensions_directory))
929 
930  build_directory = os.path.join(root_extensions_directory, name)
931  if not os.path.exists(build_directory):
932  if verbose:
933  print('Creating extension directory {}...'.format(build_directory))
934  # This is like mkdir -p, i.e. will also create parent directories.
935  os.makedirs(build_directory)
936 
937  return build_directory
938 
939 
940 def _build_extension_module(name, build_directory, verbose):
941  try:
942  sys.stdout.flush()
943  sys.stderr.flush()
944  if sys.version_info >= (3, 5):
945  subprocess.run(
946  ['ninja', '-v'],
947  stdout=None if verbose else subprocess.PIPE,
948  stderr=subprocess.STDOUT,
949  cwd=build_directory,
950  check=True)
951  else:
952  subprocess.check_output(
953  ['ninja', '-v'],
954  stderr=subprocess.STDOUT,
955  cwd=build_directory)
956  except subprocess.CalledProcessError:
957  # Python 2 and 3 compatible way of getting the error object.
958  _, error, _ = sys.exc_info()
959  # error.output contains the stdout and stderr of the build attempt.
960  message = "Error building extension '{}'".format(name)
961  if hasattr(error, 'output') and error.output:
962  message += ": {}".format(str(error.output))
963  raise RuntimeError(message)
964 
965 
966 def _import_module_from_library(module_name, path, is_python_module):
967  # https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path
968  file, path, description = imp.find_module(module_name, [path])
969  # Close the .so file after load.
970  with file:
971  if is_python_module:
972  return imp.load_module(module_name, file, path, description)
973  else:
974  torch.ops.load_library(path)
975 
976 
977 def _write_ninja_file(path,
978  name,
979  sources,
980  extra_cflags,
981  extra_cuda_cflags,
982  extra_ldflags,
983  extra_include_paths,
984  with_cuda):
985  extra_cflags = [flag.strip() for flag in extra_cflags]
986  extra_cuda_cflags = [flag.strip() for flag in extra_cuda_cflags]
987  extra_ldflags = [flag.strip() for flag in extra_ldflags]
988  extra_include_paths = [flag.strip() for flag in extra_include_paths]
989 
990  # Version 1.3 is required for the `deps` directive.
991  config = ['ninja_required_version = 1.3']
992  config.append('cxx = {}'.format(os.environ.get('CXX', 'c++')))
993  if with_cuda:
994  config.append('nvcc = {}'.format(_join_cuda_home('bin', 'nvcc')))
995 
996  # Turn into absolute paths so we can emit them into the ninja build
997  # file wherever it is.
998  sources = [os.path.abspath(file) for file in sources]
999  user_includes = [os.path.abspath(file) for file in extra_include_paths]
1000 
1001  # include_paths() gives us the location of torch/extension.h
1002  system_includes = include_paths(with_cuda)
1003  # sysconfig.get_paths()['include'] gives us the location of Python.h
1004  system_includes.append(sysconfig.get_paths()['include'])
1005 
1006  # Windoze does not understand `-isystem`.
1007  if IS_WINDOWS:
1008  user_includes += system_includes
1009  system_includes.clear()
1010 
1011  common_cflags = ['-DTORCH_EXTENSION_NAME={}'.format(name)]
1012  common_cflags.append('-DTORCH_API_INCLUDE_EXTENSION_H')
1013  common_cflags += ['-I{}'.format(include) for include in user_includes]
1014  common_cflags += ['-isystem {}'.format(include) for include in system_includes]
1015 
1016  if _is_binary_build():
1017  common_cflags += ['-D_GLIBCXX_USE_CXX11_ABI=0']
1018 
1019  cflags = common_cflags + ['-fPIC', '-std=c++11'] + extra_cflags
1020  if IS_WINDOWS:
1021  from distutils.spawn import _nt_quote_args
1022  cflags = _nt_quote_args(cflags)
1023  flags = ['cflags = {}'.format(' '.join(cflags))]
1024 
1025  if with_cuda:
1026  cuda_flags = common_cflags + COMMON_NVCC_FLAGS
1027  if IS_WINDOWS:
1028  cuda_flags = _nt_quote_args(cuda_flags)
1029  else:
1030  cuda_flags += ['--compiler-options', "'-fPIC'"]
1031  cuda_flags += extra_cuda_cflags
1032  if not any(flag.startswith('-std=') for flag in cuda_flags):
1033  cuda_flags.append('-std=c++11')
1034 
1035  flags.append('cuda_flags = {}'.format(' '.join(cuda_flags)))
1036 
1037  if IS_WINDOWS:
1038  ldflags = ['/DLL'] + extra_ldflags
1039  else:
1040  ldflags = ['-shared'] + extra_ldflags
1041  # The darwin linker needs explicit consent to ignore unresolved symbols.
1042  if sys.platform.startswith('darwin'):
1043  ldflags.append('-undefined dynamic_lookup')
1044  elif IS_WINDOWS:
1045  ldflags = _nt_quote_args(ldflags)
1046  flags.append('ldflags = {}'.format(' '.join(ldflags)))
1047 
1048  # See https://ninja-build.org/build.ninja.html for reference.
1049  compile_rule = ['rule compile']
1050  if IS_WINDOWS:
1051  compile_rule.append(
1052  ' command = cl /showIncludes $cflags -c $in /Fo$out')
1053  compile_rule.append(' deps = msvc')
1054  else:
1055  compile_rule.append(
1056  ' command = $cxx -MMD -MF $out.d $cflags -c $in -o $out')
1057  compile_rule.append(' depfile = $out.d')
1058  compile_rule.append(' deps = gcc')
1059 
1060  if with_cuda:
1061  cuda_compile_rule = ['rule cuda_compile']
1062  cuda_compile_rule.append(
1063  ' command = $nvcc $cuda_flags -c $in -o $out')
1064 
1065  link_rule = ['rule link']
1066  if IS_WINDOWS:
1067  cl_paths = subprocess.check_output(['where',
1068  'cl']).decode().split('\r\n')
1069  if len(cl_paths) >= 1:
1070  cl_path = os.path.dirname(cl_paths[0]).replace(':', '$:')
1071  else:
1072  raise RuntimeError("MSVC is required to load C++ extensions")
1073  link_rule.append(
1074  ' command = "{}/link.exe" $in /nologo $ldflags /out:$out'.format(
1075  cl_path))
1076  else:
1077  link_rule.append(' command = $cxx $in $ldflags -o $out')
1078 
1079  # Emit one build rule per source to enable incremental build.
1080  object_files = []
1081  build = []
1082  for source_file in sources:
1083  # '/path/to/file.cpp' -> 'file'
1084  file_name = os.path.splitext(os.path.basename(source_file))[0]
1085  if _is_cuda_file(source_file) and with_cuda:
1086  rule = 'cuda_compile'
1087  # Use a different object filename in case a C++ and CUDA file have
1088  # the same filename but different extension (.cpp vs. .cu).
1089  target = '{}.cuda.o'.format(file_name)
1090  else:
1091  rule = 'compile'
1092  target = '{}.o'.format(file_name)
1093  object_files.append(target)
1094  if IS_WINDOWS:
1095  source_file = source_file.replace(':', '$:')
1096  source_file = source_file.replace(" ", "$ ")
1097  build.append('build {}: {} {}'.format(target, rule, source_file))
1098 
1099  ext = 'pyd' if IS_WINDOWS else 'so'
1100  library_target = '{}.{}'.format(name, ext)
1101 
1102  link = ['build {}: link {}'.format(library_target, ' '.join(object_files))]
1103 
1104  default = ['default {}'.format(library_target)]
1105 
1106  # 'Blocks' should be separated by newlines, for visual benefit.
1107  blocks = [config, flags, compile_rule]
1108  if with_cuda:
1109  blocks.append(cuda_compile_rule)
1110  blocks += [link_rule, build, link, default]
1111  with open(path, 'w') as build_file:
1112  for block in blocks:
1113  lines = '\n'.join(block)
1114  build_file.write('{}\n\n'.format(lines))
1115 
1116 
1117 def _join_cuda_home(*paths):
1118  '''
1119  Joins paths with CUDA_HOME, or raises an error if it CUDA_HOME is not set.
1120 
1121  This is basically a lazy way of raising an error for missing $CUDA_HOME
1122  only once we need to get any CUDA-specific path.
1123  '''
1124  if CUDA_HOME is None:
1125  raise EnvironmentError('CUDA_HOME environment variable is not set. '
1126  'Please set it to your CUDA install root.')
1127  return os.path.join(CUDA_HOME, *paths)
1128 
1129 
1130 def _is_cuda_file(path):
1131  return os.path.splitext(path)[1] in ['.cu', '.cuh']
Module caffe2.python.layers.split.
def _add_gnu_abi_flag_if_binary(self, extension)
def is_available()
Definition: __init__.py:45
def _add_compile_flag(self, extension, flag)
def _define_torch_extension_name(self, extension)