Caffe2 - Python API
A deep learning, cross platform ML framework
nccl.py
1 import warnings
2 import torch.cuda
3 
4 __all__ = ['all_reduce', 'reduce', 'broadcast', 'all_gather', 'reduce_scatter']
5 
6 SUM = 0 # ncclRedOp_t
7 
8 
9 def is_available(tensors):
10  devices = set()
11  for tensor in tensors:
12  if tensor.is_sparse:
13  return False
14  if not tensor.is_contiguous():
15  return False
16  if not tensor.is_cuda:
17  return False
18  device = tensor.get_device()
19  if device in devices:
20  return False
21  devices.add(device)
22 
23  if not hasattr(torch._C, '_nccl_all_reduce'):
24  warnings.warn('PyTorch is not compiled with NCCL support')
25  return False
26 
27  return True
28 
29 
30 def version():
31  return torch._C._nccl_version()
32 
33 
34 def unique_id():
35  return torch._C._nccl_unique_id()
36 
37 
38 def init_rank(num_ranks, uid, rank):
39  return torch._C._nccl_init_rank(num_ranks, uid, rank)
40 
41 
42 def all_reduce(inputs, outputs=None, op=SUM, streams=None, comms=None):
43  if outputs is None:
44  outputs = inputs
45  torch._C._nccl_all_reduce(inputs, outputs, op, streams, comms)
46 
47 
48 def reduce(inputs, outputs=None, root=0, op=SUM, streams=None, comms=None):
49  if outputs is None:
50  outputs = inputs
51  torch._C._nccl_reduce(inputs, outputs, root, op, streams, comms)
52 
53 
54 def broadcast(inputs, root=0, streams=None, comms=None):
55  torch._C._nccl_broadcast(inputs, root, streams, comms)
56 
57 
58 def all_gather(inputs, outputs, streams=None, comms=None):
59  torch._C._nccl_all_gather(inputs, outputs, streams, comms)
60 
61 
62 def reduce_scatter(inputs, outputs, op=SUM, streams=None, comms=None):
63  torch._C._nccl_reduce_scatter(inputs, outputs, op, streams, comms)