7 from common_utils
import TestCase, run_tests, IS_WINDOWS, load_tests
8 from common_cuda
import TEST_CUDA, TEST_MULTIGPU
12 load_tests = load_tests
16 print(
'CUDA not available, skipping tests')
22 @unittest.skipIf(IS_WINDOWS,
"NCCL doesn't support Windows")
23 def test_unique_id(self):
24 uid = nccl.unique_id()
25 self.assertIsInstance(uid, bytes)
26 self.assertGreater(len(uid), 1)
28 @unittest.skipIf(IS_WINDOWS,
"NCCL doesn't support Windows")
29 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
30 def test_broadcast(self):
31 expected = torch.FloatTensor(128).uniform_()
32 tensors = [expected.cuda()]
35 tensors.append(torch.cuda.FloatTensor(128))
37 nccl.broadcast(tensors)
41 @unittest.skipIf(IS_WINDOWS,
"NCCL doesn't support Windows")
42 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
43 def test_reduce(self):
44 tensors = [torch.FloatTensor(128).uniform_()
for i
in range(nGPUs)]
45 expected = torch.FloatTensor(128).zero_()
49 tensors = [tensors[i].cuda(i)
for i
in range(nGPUs)]
54 @unittest.skipIf(IS_WINDOWS,
"NCCL doesn't support Windows")
55 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
56 def test_all_reduce(self):
57 tensors = [torch.FloatTensor(128).uniform_()
for i
in range(nGPUs)]
58 expected = torch.FloatTensor(128).zero_()
62 tensors = [tensors[i].cuda(i)
for i
in range(nGPUs)]
63 nccl.all_reduce(tensors)
65 for tensor
in tensors:
68 @unittest.skipIf(IS_WINDOWS,
"NCCL doesn't support Windows")
69 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
70 def test_all_gather(self):
71 inputs = [torch.FloatTensor(128).uniform_()
for i
in range(nGPUs)]
72 expected = torch.cat(inputs, 0)
74 inputs = [inputs[i].cuda(i)
for i
in range(nGPUs)]
75 outputs = [torch.cuda.FloatTensor(128 * nGPUs, device=i)
76 for i
in range(nGPUs)]
77 nccl.all_gather(inputs, outputs)
79 for tensor
in outputs:
82 @unittest.skipIf(IS_WINDOWS,
"NCCL doesn't support Windows")
83 @unittest.skipIf(
not TEST_MULTIGPU,
"only one GPU detected")
84 def test_reduce_scatter(self):
88 inputs = [torch.FloatTensor(in_size).uniform_()
for i
in range(nGPUs)]
89 expected = torch.FloatTensor(in_size).zero_()
92 expected = expected.view(nGPUs, 32)
94 inputs = [inputs[i].cuda(i)
for i
in range(nGPUs)]
95 outputs = [torch.cuda.FloatTensor(out_size, device=i)
96 for i
in range(nGPUs)]
97 nccl.reduce_scatter(inputs, outputs)
99 for i
in range(nGPUs):
103 if __name__ ==
'__main__':
def assertEqual(self, x, y, prec=None, message='', allow_inf=False)