9 from copy
import deepcopy
10 from itertools
import repeat, product
11 from functools
import wraps, reduce
12 from operator
import mul
13 from collections
import OrderedDict
27 from torch.nn.utils import parameters_to_vector, vector_to_parameters
32 from common_utils
import freeze_rng_state, run_tests, TestCase, skipIfNoLapack, skipIfRocm, TEST_WITH_ROCM, \
33 TEST_NUMPY, TEST_SCIPY, IS_WINDOWS, download_file, PY3, PY34, to_gpu, \
34 get_function_arglist, skipCUDAMemoryLeakCheckIf, load_tests
35 from common_cuda
import TEST_CUDA, TEST_MULTIGPU, TEST_CUDNN, \
37 from common_nn
import NNTestCase, ModuleTest, CriterionTest, TestBase, \
38 module_tests, criterion_tests, loss_reference_fns, get_reduction, \
39 get_weight, smoothl1loss_reference, kldivloss_reference, \
40 ctcloss_reference, new_module_tests
44 load_tests = load_tests
47 from scipy
import stats
53 ALL_TENSORTYPES = [torch.float,
57 NO_HALF_TENSORTYPES = [torch.float,
60 DOUBLE_TENSORTYPES = [torch.double]
62 dtype2prec = {torch.float: 1e-5,
73 def repeat_test_for_types(dtypes):
76 def call_helper(self, *args):
79 with TestCase.subTest(self, dtype=dtype):
80 f(self, *args, dtype=dtype)
82 f(self, *args, dtype=dtype)
91 'torch.DoubleTensor': (torch.DoubleTensor,
'double'),
92 'torch.FloatTensor': (torch.FloatTensor,
'float'),
96 'torch.LongTensor': (torch.LongTensor,
'long'),
97 'torch.IntTensor': (torch.IntTensor,
'int'),
98 'torch.ShortTensor': (torch.ShortTensor,
'short'),
99 'torch.CharTensor': (torch.CharTensor,
'char'),
100 'torch.ByteTensor': (torch.ByteTensor,
'byte'),
103 def __init__(self, *args, **kwargs):
104 super(PackedSequenceTest, self).__init__(*args, **kwargs)
108 def _ordered_sequence(self, tensor_type):
109 """Create ordered list of random sequences""" 110 seqs = [tensor_type(random.randint(1, self.
max_length))
112 seqs = [s.random_(-128, 128)
for s
in seqs]
113 ordered = sorted(seqs, key=len, reverse=
True)
116 def _padded_sequence(self, tensor_type):
117 """Create Tensor of random padded sequences""" 119 lengths = list(map(len, ordered))
120 padded_tensor = rnn_utils.pad_sequence(ordered)
121 return padded_tensor, lengths
124 """Test type casting of `PackedSequence` against type casting of tensor""" 125 for _, (input_type, _)
in self._type_by_name.items():
126 for expected_type_str, (_, cast_str)
in self._type_by_name.items():
127 for enforce_sorted
in [
True,
False]:
129 packed = rnn_utils.pack_padded_sequence(
130 padded, lengths, enforce_sorted=enforce_sorted)
132 masked = getattr(packed, cast_str)()
133 unpacked, lengths_out = rnn_utils.pad_packed_sequence(masked)
134 self.
assertEqual(unpacked.type(), expected_type_str)
136 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
137 def test_cuda_mask(self):
138 for enforce_sorted
in [
True,
False]:
139 tensor_type = torch.FloatTensor
140 cuda_type_str =
'torch.cuda.FloatTensor' 142 packed = rnn_utils.pack_padded_sequence(
143 padded, lengths, enforce_sorted=enforce_sorted)
144 self.assertFalse(packed.is_cuda)
145 packed = packed.cuda()
146 self.assertTrue(packed.is_cuda)
147 unpacked, _ = rnn_utils.pad_packed_sequence(packed)
150 def test_wrong_order(self):
151 a = torch.ones(25, 300)
152 b = torch.ones(22, 300)
153 b_a = rnn_utils.pad_sequence([b, a])
156 lambda: rnn_utils.pack_padded_sequence(b_a, [22, 25], enforce_sorted=
True))
158 def test_total_length(self):
160 max_length = max(lengths)
161 packed = rnn_utils.pack_padded_sequence(padded, lengths)
163 for total_length
in (-1, 0, max_length - 1):
164 for batch_first
in (
True,
False):
166 rnn_utils.pad_packed_sequence(packed, batch_first=batch_first,
167 total_length=total_length)
169 r'Expected total_length to be at least the ' 170 r'length of the longest sequence in input',
173 for batch_first
in (
True,
False):
174 no_extra_pad, _ = rnn_utils.pad_packed_sequence(packed, batch_first=batch_first)
175 for total_length_delta
in (0, 1, 8):
176 total_length = max_length + total_length_delta
177 unpacked, lengths_out = rnn_utils.pad_packed_sequence(packed, batch_first=batch_first,
178 total_length=total_length)
180 self.
assertEqual(unpacked.size(1
if batch_first
else 0), total_length)
181 if total_length_delta == 0:
182 ref_output = no_extra_pad
184 extra_pad = no_extra_pad.new_zeros(self.
batch_size, total_length_delta)
185 ref_output = torch.cat([no_extra_pad, extra_pad], 1)
187 extra_pad = no_extra_pad.new_zeros(total_length_delta, self.
batch_size)
188 ref_output = torch.cat([no_extra_pad, extra_pad], 0)
192 for enforce_sorted
in (
True,
False):
194 a = rnn_utils.pack_padded_sequence(
195 padded, lengths, enforce_sorted=enforce_sorted).cpu()
197 self.assertIs(a, a.to(
'cpu'))
198 self.assertIs(a, a.to(
'cpu', dtype=torch.int32))
203 b = a.cuda(device=cuda)
204 self.assertIs(b, b.to(cuda))
207 self.
assertEqual(a, b.to(
'cpu', dtype=torch.int32))
208 self.assertIs(b, b.to(dtype=torch.int32))
209 self.
assertEqual(b.long(), b.to(dtype=torch.int64))
212 def default_tensor_type(type):
217 def wrapper(*args, **kwargs):
218 old_type = torch.Tensor().type()
221 return fn(*args, **kwargs)
230 def _assertGradAndGradgradChecks(test_case, apply_fn, inputs):
233 test_case.assertTrue(gradcheck(apply_fn, inputs))
234 test_case.assertTrue(gradgradcheck(apply_fn, inputs))
238 def _get_input(self):
239 input = TestBase._get_input(self,
False)
241 def map_variables(i):
242 if isinstance(i, torch.Tensor):
243 if i.is_floating_point():
244 i.requires_grad =
True 247 return type(i)(map_variables(elem)
for elem
in i)
249 return map_variables(input)
253 def __init__(self, *args, **kwargs):
254 super(NewModuleTest, self).__init__(*args, **kwargs)
255 self.
cudnn = kwargs.get(
'cudnn',
False)
258 self.
skip_double = kwargs.get(
'skip_double',
False)
260 def _do_test(self, test_case, module, input):
265 params = tuple(x
for x
in module.parameters())
266 _assertGradAndGradgradChecks(test_case,
267 lambda x, *args, **kw: test_case._forward(module, x), (input,) + params)
278 input_version = input._version
279 with freeze_rng_state():
280 output = module(input)
281 test_case.assertEqual(input._version, input_version)
283 input_ip = deepcopy(input)
284 input_ip_clone = input_ip.clone()
285 with freeze_rng_state():
286 output_ip = module_ip(input_ip_clone)
287 test_case.assertNotEqual(input_ip_clone._version, input_version)
288 test_case.assertEqual(output, output_ip)
289 grad = output.data.clone().normal_()
290 input.grad.data.zero_()
291 output.backward(grad)
292 output_ip.backward(grad)
293 test_case.assertEqual(input.grad, input_ip.grad)
295 if isinstance(input, torch.LongTensor)
and TEST_CUDA:
300 module.float().cuda()
302 for p
in module.parameters():
303 test_case.assertIsInstance(p, torch.cuda.FloatTensor)
304 test_case.assertEqual(p.get_device(), 0)
307 input = input.cuda(1)
311 for p
in module.parameters():
312 test_case.assertIsInstance(p, torch.cuda.FloatTensor)
313 test_case.assertEqual(p.get_device(), 1)
318 if not isinstance(input, torch.LongTensor):
319 input = input.float()
322 for p
in module.parameters():
323 test_case.assertIsInstance(p, torch.FloatTensor)
326 if not isinstance(input, torch.LongTensor):
327 input = input.double()
330 for p
in module.parameters():
331 test_case.assertIsInstance(p, torch.DoubleTensor)
338 input = input.float().cuda()
339 module.float().cuda()
341 for p
in module.parameters():
342 test_case.assertIsInstance(p, torch.cuda.FloatTensor)
343 test_case.assertEqual(p.get_device(), 0)
349 for p
in module.parameters():
350 test_case.assertIsInstance(p, torch.FloatTensor)
356 for p
in module.parameters():
357 test_case.assertIsInstance(p, torch.cuda.FloatTensor)
358 test_case.assertEqual(p.get_device(), 0)
364 for p
in module.parameters():
365 test_case.assertIsInstance(p, torch.cuda.FloatTensor)
366 test_case.assertEqual(p.get_device(), 0)
371 input = input.cuda(1)
375 for p
in module.parameters():
376 test_case.assertIsInstance(p, torch.cuda.FloatTensor)
377 test_case.assertEqual(p.get_device(), 1)
381 input = input.double().cuda()
382 module.double().cuda()
384 for p
in module.parameters():
385 test_case.assertIsInstance(p, torch.cuda.DoubleTensor)
386 test_case.assertEqual(p.get_device(), 0)
389 input = input.half().cuda()
392 for p
in module.parameters():
393 test_case.assertIsInstance(p, torch.cuda.HalfTensor)
394 test_case.assertEqual(p.get_device(), 0)
396 def _get_target(self):
397 return self.
_get_arg(
'target',
False)
400 def constructor_args(self):
401 return self.
_get_arg(
'constructor_args',
False)
407 def __init__(self, *args, **kwargs):
408 super(NewCriterionTest, self).__init__(*args, **kwargs)
410 self.
check_half = kwargs.get(
'check_half',
True)
413 def _do_extra_tests(self, test_case, module, input, target):
417 test_case.assertFalse(target.requires_grad)
419 params = tuple(x
for x
in module.parameters())
420 if not isinstance(input, tuple):
421 inputs = (input,) + params
423 def apply_fn(input, *params):
424 return module(input, target)
426 inputs = input + params
428 def apply_fn(input1, input2, *params):
429 return module(input1, input2, target)
433 gradcheck(apply_fn, inputs)
434 gradgradcheck(apply_fn, inputs)
436 def test_cuda(self, test_case, dtype=None, extra_args=None):
437 def convert_dtype(obj, dtype, requires_grad=False):
438 if isinstance(obj, torch.Tensor):
439 return obj.detach().to(dtype=dtype).requires_grad_(requires_grad)
440 elif isinstance(obj, torch.Tensor):
442 elif isinstance(obj, tuple):
443 return tuple(convert_dtype(o, dtype, requires_grad)
for o
in obj)
448 raise unittest.SkipTest(
'Excluded from CUDA tests')
456 if dtype
is not None:
457 cpu_input = convert_dtype(cpu_input, dtype,
True)
459 if not isinstance(cpu_target, torch.LongTensor)
and self.
convert_target:
460 cpu_target = convert_dtype(cpu_target, dtype)
461 cpu_module.type(dtype)
462 gpu_module.type(dtype)
465 gpu_input = to_gpu(cpu_input)
466 gpu_target = to_gpu(cpu_target)
470 if dtype == torch.half:
476 cpu_output = test_case._forward_criterion(cpu_module, cpu_input, cpu_target, extra_args=extra_args)
477 gpu_output = test_case._forward_criterion(gpu_module, gpu_input, gpu_target, extra_args=extra_args)
479 test_case.assertEqual(cpu_output, gpu_output, 1e-1
if dtype == torch.half
else 4e-4)
481 cpu_gradInput = test_case._backward_criterion(cpu_module, cpu_input, cpu_target, extra_args=extra_args)
482 gpu_gradInput = test_case._backward_criterion(gpu_module, gpu_input, gpu_target, extra_args=extra_args)
483 test_case.assertEqual(cpu_gradInput, gpu_gradInput, 1e-1
if dtype == torch.half
else 4e-4)
484 except NotImplementedError:
487 def _get_target(self):
488 return self.
_get_arg(
'target',
False)
491 def constructor_args(self):
492 return self.
_get_arg(
'constructor_args',
False)
495 def extra_args(self):
496 return self.
_get_arg(
'extra_args',
False)
500 _do_cuda_memory_leak_check =
True 502 def _forward(self, module, input):
503 with freeze_rng_state():
506 def _backward(self, module, input, output, grad_output, create_graph=False):
507 output.backward(grad_output, retain_graph=
True, create_graph=create_graph)
508 if input.grad
is None:
510 return input.grad.data
512 def _forward_criterion(self, criterion, input, target, extra_args=None):
513 if extra_args
is None:
515 if isinstance(input, tuple):
516 args = input + (target,) + extra_args
517 output = criterion(*args)
519 output = criterion(input, target, *extra_args)
522 def _backward_criterion(self, criterion, input, target, gradOutput=None, extra_args=None):
523 if extra_args
is None:
525 input_tuple = input
if isinstance(input, tuple)
else (input,)
526 for i
in input_tuple:
527 if i.grad
is not None:
529 args = input_tuple + (target,) + extra_args
530 if gradOutput
is None:
531 gradOutput = torch.ones(())
532 criterion(*args).backward(gradOutput.type_as(input_tuple[0]))
533 if isinstance(input, tuple):
534 return tuple(map(
lambda i: i.grad.data, input))
536 return input.grad.data
538 def _zero_grad_parameters(self, module):
539 for p
in module.parameters():
540 if p.grad
is not None:
541 with torch.no_grad():
545 def _get_parameters(self, module):
548 for p
in module.parameters():
550 d_params.append(p.grad)
551 return params, d_params
553 def _create_basic_net(self):
554 class Layer(nn.Module):
556 super(Layer, self).__init__()
558 self.register_buffer(
'layer_dummy_buf', torch.zeros(1, 3, 3, 7))
560 class Net(nn.Module):
562 super(Net, self).__init__()
565 self.register_buffer(
'dummy_buf', torch.zeros(7, 3, 3, 1))
569 s = nn.Sequential(n, n)
573 def test_module_backcompat(self):
575 path = download_file(
'https://download.pytorch.org/test_data/linear.pt')
576 with warnings.catch_warnings():
577 warnings.simplefilter(
'ignore', SourceChangeWarning)
579 input = torch.randn(2, 3, dtype=torch.float)
582 def test_share_memory(self):
583 class Net(nn.Module):
585 super(Net, self).__init__()
586 self.
p = nn.Parameter(torch.eye(5))
587 self.
par = nn.ParameterList()
588 self.par.append(nn.Parameter(torch.randn(10)))
594 for p
in net.parameters():
595 self.assertFalse(p.storage().is_shared())
596 for b
in net.buffers():
597 self.assertFalse(b.storage().is_shared())
599 for p
in net.parameters():
600 self.assertTrue(p.storage().is_shared())
601 for b
in net.buffers():
602 self.assertTrue(b.storage().is_shared())
604 def test_hooks(self):
605 module = nn.Sigmoid()
606 input = torch.ones(5, 5, requires_grad=
True)
613 def fw_hook(inc, h_module, input, output):
614 self.assertIsInstance(input, tuple)
615 self.assertTrue(isinstance(output, torch.Tensor))
616 self.assertTrue(h_module
is module)
618 self.
assertEqual(output.data, torch.Tensor(5, 5).fill_(1 / (1 + 1 / math.e)))
619 counter[
'forwards'] += inc
621 def bw_hook(inc, h_module, grad_input, grad_output):
622 self.assertIsInstance(grad_input, tuple)
623 self.assertIsInstance(grad_output, tuple)
624 self.assertTrue(h_module
is module)
625 self.
assertEqual(grad_output[0].data, torch.ones(5, 5) * 2)
626 counter[
'backwards'] += inc
628 test_fwd = module.register_forward_hook(
lambda *args: fw_hook(1, *args))
635 test_bwd = module.register_backward_hook(
636 lambda *args: bw_hook(1, *args))
638 output = module(input)
642 output.backward(torch.ones(5, 5) * 2, retain_graph=
True)
646 output.backward(torch.ones(5, 5) * 2, retain_graph=
True)
650 test2_fwd = module.register_forward_hook(
lambda *args: fw_hook(2, *args))
652 output = module(input)
656 test2_bwd = module.register_backward_hook(
lambda *args: bw_hook(2, *args))
658 module(input).backward(torch.ones(5, 5) * 2)
664 module(input).backward(torch.ones(5, 5) * 2)
670 module(input).backward(torch.ones(5, 5) * 2)
677 def test_hook_cpp(self):
679 bn = nn.BatchNorm1d(5)
681 def hook(module, grad_inputs, grad_outputs):
687 bn.register_backward_hook(hook)
688 output = bn(torch.randn(5, 5, requires_grad=
True))
689 output.sum().backward()
691 def test_hook_fail(self):
692 module = nn.Sigmoid()
693 input = torch.randn(5, 5, requires_grad=
True)
695 def fw_fail1(self, input, output):
698 def fw_fail2(self, input, output):
701 def bw_fail1(self, grad_input, grad_output):
702 return grad_input[:-1]
704 def bw_fail2(self, grad_input, grad_output):
705 return grad_input + (torch.randn(2, 2),)
707 with module.register_forward_hook(fw_fail1):
708 with self.assertRaises(RuntimeError)
as err:
710 self.assertIn(
"fw_fail", err.exception.args[0])
711 self.assertIn(
"didn't return None", err.exception.args[0])
713 with module.register_forward_hook(fw_fail2):
714 with self.assertRaises(RuntimeError)
as err:
716 self.assertIn(
"fw_fail2", err.exception.args[0])
717 self.assertIn(
"didn't return None", err.exception.args[0])
719 with module.register_backward_hook(bw_fail1):
720 with self.assertRaises(RuntimeError)
as err:
721 module(input).sum().backward()
722 self.assertIn(
"bw_fail", err.exception.args[0])
723 self.assertIn(
"got 0, but expected 1", err.exception.args[0])
725 with module.register_backward_hook(bw_fail2):
726 with self.assertRaises(RuntimeError)
as err:
727 module(input).sum().backward()
728 self.assertIn(
"bw_fail2", err.exception.args[0])
729 self.assertIn(
"got 2, but expected 1", err.exception.args[0])
731 def test_hook_writeable(self):
732 module = nn.Linear(5, 5)
733 input = torch.randn(5, 5, requires_grad=
True)
735 def bw_hook(module, grad_input, grad_output):
736 for grad
in grad_input:
737 self.assertTrue(isinstance(grad, torch.Tensor))
738 for grad
in grad_output:
739 self.assertTrue(isinstance(grad, torch.Tensor))
740 return tuple(gi * 2
for gi
in grad_input)
742 module.register_backward_hook(bw_hook)
743 module(input).backward(torch.ones(5, 5))
744 expected_grad = torch.ones(5, 5).mm(module.weight.data) * 2
749 self.assertIs(m, m.to(
'cpu'))
750 self.assertIs(m, m.to(
'cpu', dtype=torch.float32))
752 self.assertRaises(RuntimeError,
lambda: m.to(
'cpu', copy=
True))
756 m2 = m.cuda(device=cuda)
757 self.assertIs(m2, m2.to(cuda))
760 self.assertIs(m2, m2.to(dtype=torch.float32))
761 self.
assertEqual(m2.double(), m2.to(dtype=torch.float64))
763 def test_zero_grad(self):
764 i = torch.randn(2, 5, requires_grad=
True)
765 module = nn.Linear(5, 5)
766 for p
in module.parameters():
767 p.requires_grad =
False 770 module.weight.requires_grad =
True 772 self.assertIsNone(module.weight.grad)
774 module(i).sum().backward()
775 self.assertIsNotNone(module.weight.grad)
776 self.assertGreater(module.weight.grad.data.abs().sum(), 0)
778 self.
assertEqual(module.weight.grad.data, module.weight.data.clone().zero_())
780 module.bias.requires_grad =
True 782 self.assertIsNotNone(module.weight.grad)
783 self.assertIsNone(module.bias.grad)
784 module(i).sum().backward()
785 self.assertIsNotNone(module.weight.grad)
786 self.assertIsNotNone(module.bias.grad)
787 self.assertGreater(module.weight.grad.data.abs().sum(), 0)
788 self.assertGreater(module.bias.grad.data.abs().sum(), 0)
790 self.
assertEqual(module.weight.grad.data, module.weight.data.clone().zero_())
791 self.
assertEqual(module.bias.grad.data, module.bias.data.clone().zero_())
793 def test_no_grad(self):
794 module = nn.Conv2d(2, 5, kernel_size=3, padding=1)
795 input = torch.randn(1, 2, 10, 10)
800 self.assertTrue(output.requires_grad)
801 output.backward(torch.ones(1, 5, 10, 10))
803 with torch.no_grad():
805 self.assertFalse(output2.requires_grad)
806 self.assertRaises(RuntimeError,
lambda: output2.backward(torch.ones(1, 5, 10, 10)))
808 def test_invalid_conv1d(self):
809 module = nn.Conv1d(in_channels=3, out_channels=33, kernel_size=10, stride=1, bias=
True)
810 input = torch.randn(1, 3, 4)
812 r'Calculated padded input size per channel: \(4\). ' +
813 r'Kernel size: \(10\). Kernel size can\'t be greater than actual input size'):
817 module = nn.Conv1d(in_channels=3, out_channels=6, kernel_size=3, stride=-1, bias=
True)
818 input = torch.randn(1, 3, 4)
822 def test_invalid_conv2d(self):
823 module = torch.nn.Conv2d(1, 1, kernel_size=3, dilation=2, stride=2)
824 input = torch.empty(1, 1, 4, 4)
825 self.assertRaises(RuntimeError,
lambda: module(input))
827 module = nn.Conv2d(in_channels=3, out_channels=33, kernel_size=10, stride=1, bias=
True)
828 input = torch.randn(1, 3, 1, 1)
830 r'Calculated padded input size per channel: \(1 x 1\). ' +
831 r'Kernel size: \(10 x 10\). Kernel size can\'t be greater than actual input size'):
835 module = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=4, stride=-1, bias=
True)
836 input = torch.randn(1, 3, 4, 4)
840 def test_invalid_conv3d(self):
841 module = torch.nn.Conv3d(1, 1, kernel_size=3, dilation=2, stride=2)
842 input = torch.empty(1, 1, 4, 4, 4)
843 self.assertRaises(RuntimeError,
lambda: module(input))
846 module = torch.nn.Conv3d(1, 1, kernel_size=3, stride=-2)
847 input = torch.empty(1, 1, 4, 4, 4)
851 def _test_dropout(self, cls, cuda, input):
853 device = torch.device(
"cuda")
if cuda
else torch.device(
"cpu")
854 input = input.to(device).fill_(1 - p)
857 input_var = input.clone().requires_grad_()
858 output = module(input_var)
859 self.assertLess(abs(output.data.mean() - (1 - p)), 0.05)
860 output.backward(input)
861 self.assertLess(abs(input_var.grad.data.mean() - (1 - p)), 0.05)
863 module = cls(p,
True)
864 input_var = input.clone().requires_grad_()
865 output = module(input_var + 0)
866 self.assertLess(abs(output.data.mean() - (1 - p)), 0.05)
867 output.backward(input)
868 self.assertLess(abs(input_var.grad.data.mean() - (1 - p)), 0.05)
871 for inplace
in [
True,
False]:
872 module = cls(p, inplace).eval()
879 def _test_alpha_dropout(self, cls, input):
883 for p
in [0.2, 0.5, 0.8]:
885 input_var = input.detach().clone().requires_grad_()
886 output = module(input_var)
888 self.assertLess(abs(output.data.mean() - mean), 0.1)
890 self.assertLess(abs(output.data.std() - std), 0.1)
891 output.backward(input)
893 def test_parameters_and_named_parameters(self):
894 def names(named_parameters):
895 return [k
for k, _
in named_parameters]
901 names(l.named_parameters()),
902 [
'layer_dummy_param'])
906 names(n.named_parameters()),
907 [
'dummy_param',
'l1.layer_dummy_param'])
909 self.
assertEqual(len(list(n.parameters(recurse=
False))), 1)
911 names(n.named_parameters(recurse=
False)),
916 names(s.named_parameters()),
917 [
'0.dummy_param',
'0.l1.layer_dummy_param'])
919 def test_buffers_and_named_buffers(self):
920 def names(named_buffers):
921 return [k
for k, _
in named_buffers]
927 names(l.named_buffers()),
932 names(n.named_buffers()),
933 [
'dummy_buf',
'l1.layer_dummy_buf'])
935 self.
assertEqual(len(list(n.buffers(recurse=
False))), 1)
937 names(n.named_buffers(recurse=
False)),
942 names(s.named_buffers()),
943 [
'0.dummy_buf',
'0.l1.layer_dummy_buf'])
945 def test_call_supports_python_dict_output(self):
946 class Net(nn.Module):
948 super(Net, self).__init__()
949 self.
l1 = nn.Linear(10, 20)
950 self.register_backward_hook(self.hook)
953 def hook(self, module, grad_out, grad_in):
956 def forward(self, inputs):
957 return {
"output": self.
l1(inputs).sum()}
960 model_output = net(torch.randn([5, 10]))
961 model_output[
"output"].backward()
962 self.assertTrue(net.check_backward_hook_flag)
964 def test_children(self):
969 subnet = nn.Sequential(l3, l4)
970 s = nn.Sequential(l1, l2, l1, l2, subnet)
971 self.
assertEqual(list(s.children()), [l1, l2, subnet])
974 linear = nn.Linear(2, 2)
975 linear._test_submodule = nn.Linear(2, 2)
976 linear._test_parameter = Parameter(torch.Tensor(2, 2))
977 linear.register_buffer(
'_test_buffer', torch.Tensor(2, 2))
979 self.assertIn(
'_test_submodule', keys)
980 self.assertIn(
'_test_parameter', keys)
981 self.assertIn(
'_test_buffer', keys)
984 self.assertTrue(hasattr(linear, key))
988 empty_sequential = nn.Sequential()
989 expected_repr_empty =
'Sequential()' 990 self.
assertEqual(repr(empty_sequential), expected_repr_empty)
993 linear = nn.Linear(1, 1)
994 expected_repr_linear =
'Linear(in_features=1, out_features=1, bias=True)' 995 self.
assertEqual(repr(linear), expected_repr_linear)
998 sequential = nn.Sequential(linear)
999 expected_repr_sequential =
'Sequential(\n' \
1000 ' (0): Linear(in_features=1, out_features=1, bias=True)\n' \
1002 self.
assertEqual(repr(sequential), expected_repr_sequential)
1004 def test_dir_digit(self):
1005 model = nn.Sequential(nn.Linear(2, 2))
1007 self.assertNotIn(
'0', keys)
1009 def test_named_children(self):
1010 l1 = nn.Linear(2, 2)
1011 l2 = nn.Linear(2, 2)
1012 l3 = nn.Linear(2, 2)
1013 l4 = nn.Linear(2, 2)
1014 subnet = nn.Sequential(l3, l4)
1016 with self.assertRaises(KeyError):
1017 s.add_module(
'', l1)
1018 with self.assertRaises(KeyError):
1019 s.add_module(
'name.with.dot', l1)
1020 s.add_module(
'layer1', l1)
1021 s.add_module(
'layer2', l2)
1022 s.add_module(
'layer3', l1)
1023 s.add_module(
'layer4', l2)
1024 s.add_module(
'subnet', subnet)
1025 self.
assertEqual(list(s.named_children()), [(
'layer1', l1), (
'layer2', l2), (
'subnet', subnet)])
1027 def test_modules(self):
1028 class Net(nn.Module):
1030 super(Net, self).__init__()
1033 self.
param = torch.empty(3, 5)
1035 l = nn.Linear(10, 20)
1037 s = nn.Sequential(n, n, n, n)
1040 def test_named_modules(self):
1041 class Net(nn.Module):
1043 super(Net, self).__init__()
1046 self.
param = torch.empty(3, 5)
1048 l = nn.Linear(10, 20)
1049 l1 = nn.Linear(10, 20)
1050 l2 = nn.Linear(10, 20)
1051 block = nn.Sequential()
1052 block.add_module(
'linear1', l1)
1053 block.add_module(
'linear2', l2)
1055 s = nn.Sequential(n, n, n, n)
1056 self.
assertEqual(list(s.named_modules()), [(
'', s), (
'0', n), (
'0.l1', l),
1057 (
'0.block', block), (
'0.block.linear1', l1),
1058 (
'0.block.linear2', l2)])
1060 def test_register_buffer_raises_error_if_name_is_not_string(self):
1062 expected_error =
'buffer name should be a string. Got ' 1064 m.register_buffer(1, torch.rand(5))
1066 m.register_buffer(
None, torch.rand(5))
1068 def test_register_buffer_raises_error_if_attr_exists(self):
1070 m.attribute_name = 5
1071 with self.assertRaises(KeyError):
1072 m.register_buffer(
'attribute_name', torch.rand(5))
1074 del m.attribute_name
1075 m.register_parameter(
'attribute_name', nn.Parameter())
1076 with self.assertRaises(KeyError):
1077 m.register_buffer(
'attribute_name', torch.rand(5))
1079 del m.attribute_name
1080 m.add_module(
'attribute_name', nn.Module())
1081 with self.assertRaises(KeyError):
1082 m.register_buffer(
'attribute_name', torch.rand(5))
1084 def test_register_buffer_raises_error_if_not_tensor(self):
1086 with self.assertRaises(TypeError):
1087 m.register_buffer(
'attribute_name', 5)
1089 def test_register_buffer_allows_overwriting_with_same_name(self):
1091 buffer1 = torch.rand(5)
1092 buffer2 = buffer1 + 5
1094 m.register_buffer(
'buffer_name', buffer1)
1096 m.register_buffer(
'buffer_name', buffer2)
1098 m.register_buffer(
'buffer_name', buffer3)
1101 def test_register_parameter_raises_error_if_name_is_not_string(self):
1103 expected_error =
'parameter name should be a string. Got ' 1105 m.register_parameter(1, nn.Parameter())
1107 m.register_parameter(
None, nn.Parameter())
1109 def test_register_parameter_raises_error_if_attr_exists(self):
1111 m.attribute_name = 5
1112 with self.assertRaises(KeyError):
1113 m.register_parameter(
'attribute_name', nn.Parameter())
1115 del m.attribute_name
1116 m.register_buffer(
'attribute_name', torch.rand(5))
1117 with self.assertRaises(KeyError):
1118 m.register_parameter(
'attribute_name', nn.Parameter())
1120 del m.attribute_name
1121 m.add_module(
'attribute_name', nn.Module())
1122 with self.assertRaises(KeyError):
1123 m.register_parameter(
'attribute_name', nn.Parameter())
1125 def test_register_parameter_allows_overwriting_with_same_name(self):
1127 param1 = nn.Parameter(torch.rand(5))
1128 param2 = nn.Parameter(param1.data + 5)
1130 m.register_parameter(
'param_name', param1)
1132 m.register_parameter(
'param_name', param2)
1134 m.register_parameter(
'param_name', param3)
1137 def test_add_module_raises_error_if_attr_exists(self):
1139 m.attribute_name = 5
1140 with self.assertRaises(KeyError):
1141 m.add_module(
'attribute_name', nn.Module())
1143 del m.attribute_name
1144 m.register_buffer(
'attribute_name', torch.rand(5))
1145 with self.assertRaises(KeyError):
1146 m.add_module(
'attribute_name', nn.Module())
1148 del m.attribute_name
1149 m.register_parameter(
'attribute_name', nn.Parameter())
1150 with self.assertRaises(KeyError):
1151 m.add_module(
'attribute_name', nn.Module())
1153 def test_Sequential_getitem(self):
1154 l1 = nn.Linear(10, 20)
1155 l2 = nn.Linear(20, 30)
1156 l3 = nn.Linear(30, 40)
1157 l4 = nn.Linear(40, 50)
1158 n = nn.Sequential(l1, l2, l3, l4)
1159 self.assertIs(n[0], l1)
1160 self.assertIs(n[1], l2)
1161 self.assertIs(n[2], l3)
1162 self.assertIs(n[3], l4)
1163 self.assertIs(n[
torch.tensor(3, dtype=torch.int64)], l4)
1164 self.
assertEqual(n[1:], nn.Sequential(l2, l3, l4))
1166 self.
assertEqual(n[:-1], nn.Sequential(l1, l2, l3))
1168 self.
assertEqual(n[::-1], nn.Sequential(l4, l3, l2, l1))
1170 def test_Sequential_setitem(self):
1171 l1 = nn.Linear(10, 20)
1172 l2 = nn.Linear(20, 30)
1173 l3 = nn.Linear(30, 40)
1174 l4 = nn.Linear(40, 50)
1175 n = nn.Sequential(l1, l2, l3)
1179 self.assertIs(n[0], l4)
1180 self.assertIs(n[1], l1)
1181 self.assertIs(n[2], l4)
1183 def test_Sequential_setitem_named(self):
1184 l1 = nn.Linear(10, 20)
1185 l2 = nn.Linear(20, 30)
1186 l3 = nn.Linear(30, 40)
1187 l4 = nn.Linear(40, 50)
1188 n = nn.Sequential(OrderedDict([
1199 def test_Sequential_delitem(self):
1200 l1 = nn.Linear(10, 20)
1201 l2 = nn.Linear(20, 30)
1202 l3 = nn.Linear(30, 40)
1203 l4 = nn.Linear(40, 50)
1204 n = nn.Sequential(l1, l2, l3, l4)
1210 def test_ModuleList(self):
1211 modules = [nn.ReLU(), nn.Linear(5, 5)]
1212 module_list = nn.ModuleList(modules)
1216 for m1, m2
in zip(modules, module_list):
1217 self.assertIs(m1, m2)
1218 for m1, m2
in zip(modules, module_list.children()):
1219 self.assertIs(m1, m2)
1220 for i
in range(len(modules)):
1221 self.assertIs(module_list[i], modules[i])
1224 modules += [nn.Conv2d(3, 4, 3)]
1225 module_list += [modules[-1]]
1227 modules.insert(1, nn.Linear(3, 2))
1228 module_list.insert(1, modules[1])
1230 modules.append(nn.Tanh())
1231 module_list.append(modules[-1])
1233 next_modules = [nn.Linear(5, 5), nn.Sigmoid()]
1234 modules.extend(next_modules)
1235 module_list.extend(next_modules)
1237 modules[2] = nn.Conv2d(5, 3, 2)
1238 module_list[2] = modules[2]
1240 modules[-1] = nn.Conv2d(5, 2, 1)
1241 module_list[-1] = modules[-1]
1244 modules[2] = nn.Conv2d(5, 3, 2)
1245 module_list[idx] = modules[2]
1246 self.assertIs(module_list[idx], modules[2])
1248 self.
assertEqual(module_list[1:], nn.ModuleList(modules[1:]))
1249 self.
assertEqual(module_list[3:], nn.ModuleList(modules[3:]))
1250 self.
assertEqual(module_list[:-1], nn.ModuleList(modules[:-1]))
1251 self.
assertEqual(module_list[:-3], nn.ModuleList(modules[:-3]))
1252 self.
assertEqual(module_list[::-1], nn.ModuleList(modules[::-1]))
1254 self.
assertEqual(module_list, nn.ModuleList(modules[:-1]))
1255 del module_list[1::2]
1256 self.
assertEqual(module_list, nn.ModuleList(modules[:-1][0::2]))
1258 with self.assertRaises(TypeError):
1259 module_list += nn.ReLU()
1260 with self.assertRaises(TypeError):
1261 module_list.extend(nn.ReLU())
1263 l1 = nn.Linear(1, 2)
1264 l2 = nn.Linear(2, 3)
1265 l3 = nn.Linear(3, 2)
1266 l4 = nn.Linear(2, 3)
1267 subnet = nn.Sequential(l3, l4)
1274 (
"subnet_layer", subnet)
1277 modules = list(s.modules())
1278 module_list = nn.ModuleList()
1279 module_list.extend(s.modules())
1282 def test_ModuleDict(self):
1283 modules = OrderedDict([
1285 (
'conv', nn.Conv2d(10, 10, 5)),
1286 (
'fc', nn.Linear(5, 5)),
1289 module_dict = nn.ModuleDict(modules)
1293 for k1, m2
in zip(modules, module_dict.children()):
1294 self.assertIs(modules[k1], m2)
1295 for k1, k2
in zip(modules, module_dict):
1296 self.assertIs(modules[k1], module_dict[k2])
1297 for k
in module_dict:
1298 self.assertIs(module_dict[k], modules[k])
1299 for k
in module_dict.keys():
1300 self.assertIs(module_dict[k], modules[k])
1301 for k, v
in module_dict.items():
1302 self.assertIs(modules[k], v)
1303 for k1, m2
in zip(modules, module_dict.values()):
1304 self.assertIs(modules[k1], m2)
1305 for k
in modules.keys():
1306 self.assertTrue(k
in module_dict)
1309 modules[
'conv'] = nn.Conv2d(3, 4, 3)
1310 module_dict[
'conv'] = modules[
'conv']
1314 (
'fc2', nn.Linear(5, 5)),
1315 (
'act', nn.Sigmoid()),
1317 modules.update(next_modules)
1318 module_dict.update(next_modules)
1321 next_modules = OrderedDict([
1322 (
'fc3', nn.Linear(5, 5)),
1323 (
'act2', nn.Sigmoid()),
1325 modules.update(next_modules)
1326 module_dict.update(next_modules)
1330 'fc4': nn.Linear(5, 5),
1331 'act3': nn.Sigmoid()
1333 modules.update(sorted(next_modules.items()))
1334 module_dict.update(next_modules)
1337 del module_dict[
'fc']
1341 with self.assertRaises(TypeError):
1342 module_dict.update(nn.ReLU())
1344 with self.assertRaises(TypeError):
1345 module_dict.update([nn.ReLU()])
1347 with self.assertRaises(ValueError):
1348 module_dict.update([[nn.ReLU()]])
1350 with self.assertRaises(TypeError):
1351 module_dict[1] = nn.ReLU()
1353 s = nn.Sequential(modules)
1354 module_dict = nn.ModuleDict(s.named_children())
1357 c = module_dict.pop(
'conv')
1358 self.assertIs(c, modules[
'conv'])
1367 def test_ParameterList(self):
1369 return Parameter(torch.randn(10, 10))
1370 parameters = [make_param(), make_param()]
1371 param_list = nn.ParameterList(parameters)
1374 self.
assertEqual(len(parameters), len(param_list))
1375 for p1, p2
in zip(parameters, param_list):
1376 self.assertIs(p1, p2)
1377 for p1, p2
in zip(parameters, param_list.parameters()):
1378 self.assertIs(p1, p2)
1379 for i
in range(len(parameters)):
1380 self.assertIs(parameters[i], param_list[i])
1383 parameters += [make_param()]
1384 param_list += [parameters[-1]]
1386 parameters.append(make_param())
1387 param_list.append(parameters[-1])
1389 next_params = [make_param(), make_param()]
1390 parameters.extend(next_params)
1391 param_list.extend(next_params)
1393 parameters[2] = make_param()
1394 param_list[2] = parameters[2]
1396 parameters[-1] = make_param()
1397 param_list[-1] = parameters[-1]
1400 parameters[2] = make_param()
1401 param_list[idx] = parameters[2]
1402 self.assertIs(param_list[idx], parameters[2])
1404 self.
assertEqual(param_list[1:], nn.ParameterList(parameters[1:]))
1405 self.
assertEqual(param_list[3:], nn.ParameterList(parameters[3:]))
1406 self.
assertEqual(param_list[:-1], nn.ParameterList(parameters[:-1]))
1407 self.
assertEqual(param_list[:-3], nn.ParameterList(parameters[:-3]))
1408 self.
assertEqual(param_list[::-1], nn.ParameterList(parameters[::-1]))
1410 with self.assertRaises(TypeError):
1411 param_list += make_param()
1412 with self.assertRaises(TypeError):
1413 param_list.extend(make_param())
1415 l1 = nn.Linear(1, 2)
1416 l2 = nn.Linear(2, 3)
1417 l3 = nn.Linear(3, 2)
1418 l4 = nn.Linear(2, 3)
1419 subnet = nn.Sequential(l3, l4)
1426 (
"subnet_layer", subnet)
1429 parameters = list(s.parameters())
1430 param_list = nn.ParameterList()
1431 param_list.extend(s.parameters())
1434 def test_ParameterDict(self):
1435 parameters = OrderedDict([
1436 (
'p1', Parameter(torch.randn(10, 10))),
1437 (
'p2', Parameter(torch.randn(10, 10))),
1438 (
'p3', Parameter(torch.randn(10, 10))),
1441 parameter_dict = nn.ParameterDict(parameters)
1444 self.
assertEqual(len(parameter_dict), len(parameters))
1445 for k1, m2
in zip(parameters, parameter_dict.parameters()):
1446 self.assertIs(parameters[k1], m2)
1447 for k1, k2
in zip(parameters, parameter_dict):
1448 self.assertIs(parameters[k1], parameter_dict[k2])
1449 for k
in parameter_dict:
1450 self.assertIs(parameter_dict[k], parameters[k])
1451 for k
in parameter_dict.keys():
1452 self.assertIs(parameter_dict[k], parameters[k])
1453 for k, v
in parameter_dict.items():
1454 self.assertIs(v, parameters[k])
1455 for k1, m2
in zip(parameters, parameter_dict.values()):
1456 self.assertIs(parameters[k1], m2)
1457 for k
in parameters.keys():
1458 self.assertTrue(k
in parameter_dict)
1462 parameters[
'p4'] = Parameter(torch.randn(10, 10))
1463 parameter_dict[
'p4'] = parameters[
'p4']
1467 (
'p5', Parameter(torch.randn(10, 10))),
1468 (
'p2', Parameter(torch.randn(10, 10))),
1470 parameters.update(next_parameters)
1471 parameter_dict.update(next_parameters)
1474 next_parameters = OrderedDict([
1475 (
'p6', Parameter(torch.randn(10, 10))),
1476 (
'p5', Parameter(torch.randn(10, 10))),
1478 parameters.update(next_parameters)
1479 parameter_dict.update(next_parameters)
1483 'p8': Parameter(torch.randn(10, 10)),
1484 'p7': Parameter(torch.randn(10, 10))
1486 parameters.update(sorted(next_parameters.items()))
1487 parameter_dict.update(next_parameters)
1490 del parameter_dict[
'p3']
1491 del parameters[
'p3']
1494 with self.assertRaises(TypeError):
1495 parameter_dict.update(1)
1497 with self.assertRaises(TypeError):
1498 parameter_dict.update([1])
1500 with self.assertRaises(ValueError):
1501 parameter_dict.update(Parameter(torch.randn(10, 10)))
1503 with self.assertRaises(TypeError):
1504 parameter_dict[1] = Parameter(torch.randn(10, 10))
1506 p_pop = parameter_dict.pop(
'p4')
1507 self.assertIs(p_pop, parameters[
'p4'])
1508 parameters.pop(
'p4')
1511 parameter_dict.clear()
1516 def test_add_module(self):
1517 l = nn.Linear(10, 20)
1521 net.add_module(
'empty',
None)
1525 net.add_module(
'l3', l)
1527 l3 = nn.Linear(20, 10)
1528 net.add_module(
'l', l3)
1530 self.assertRaises(TypeError,
lambda: net.add_module(
'x',
'non-module'))
1532 lambda: net.add_module(1, l))
1533 self.
assertRaisesRegex(TypeError,
'module name should be a string. Got NoneType',
1534 lambda: net.add_module(
None, l))
1536 def test_module_to_argparse(self):
1537 net = nn.Sequential(nn.Linear(3, 3))
1538 cpu = torch.device(
'cpu')
1539 with self.assertRaises(TypeError):
1541 with self.assertRaises(TypeError):
1543 with self.assertRaises(TypeError):
1545 with self.assertRaises(TypeError):
1546 net.to(cpu, torch.long,
True)
1547 with self.assertRaises(TypeError):
1548 net.to(cpu, dtype=torch.long, non_blocking=
True)
1549 with self.assertRaises(TypeError):
1551 with self.assertRaises(TypeError):
1552 net.to({}, non_blocking=
True)
1553 with self.assertRaises(TypeError):
1554 net.to(
torch.tensor(3, dtype=torch.long), non_blocking=
True)
1555 with self.assertRaises(TypeError):
1556 net.to(cpu,
torch.tensor(3, dtype=torch.long), non_blocking=
True)
1558 def test_type(self):
1559 l = nn.Linear(10, 20)
1563 net.add_module(
'empty',
None)
1564 net.register_buffer(
'indices', torch.LongTensor(1))
1566 self.assertIsInstance(l.weight.data, torch.FloatTensor)
1567 self.assertIsInstance(l.bias.data, torch.FloatTensor)
1568 self.assertIsInstance(net.indices, torch.LongTensor)
1570 self.assertIsInstance(l.weight.data, torch.DoubleTensor)
1571 self.assertIsInstance(l.bias.data, torch.DoubleTensor)
1572 self.assertIsInstance(net.indices, torch.LongTensor)
1574 self.assertIsInstance(l.weight.data, torch.HalfTensor)
1575 self.assertIsInstance(l.bias.data, torch.HalfTensor)
1576 self.assertIsInstance(net.indices, torch.LongTensor)
1579 self.assertIsInstance(l.weight.data, torch.cuda.FloatTensor)
1580 self.assertIsInstance(l.bias.data, torch.cuda.FloatTensor)
1581 self.assertIsInstance(net.indices, torch.cuda.LongTensor)
1583 self.assertIsInstance(l.weight.data, torch.FloatTensor)
1584 self.assertIsInstance(l.bias.data, torch.FloatTensor)
1585 self.assertIsInstance(net.indices, torch.LongTensor)
1586 net.to(
"cuda", torch.double,
True)
1587 self.assertIsInstance(l.weight.data, torch.cuda.DoubleTensor)
1588 self.assertIsInstance(l.bias.data, torch.cuda.DoubleTensor)
1589 self.assertIsInstance(net.indices, torch.cuda.LongTensor)
1590 net.to(torch.empty(1, device=
"cuda:0", dtype=torch.half))
1591 self.assertIsInstance(l.weight.data, torch.cuda.HalfTensor)
1592 self.assertIsInstance(l.bias.data, torch.cuda.HalfTensor)
1593 self.assertIsInstance(net.indices, torch.cuda.LongTensor)
1594 net.to(torch.device(
"cpu"), non_blocking=
True)
1595 self.assertIsInstance(l.weight.data, torch.HalfTensor)
1596 self.assertIsInstance(l.bias.data, torch.HalfTensor)
1597 self.assertIsInstance(net.indices, torch.LongTensor)
1598 net.type(torch.FloatTensor)
1599 self.assertIsInstance(l.weight.data, torch.FloatTensor)
1600 self.assertIsInstance(l.bias.data, torch.FloatTensor)
1601 net.to(torch.DoubleTensor(1))
1602 self.assertIsInstance(l.weight.data, torch.DoubleTensor)
1603 self.assertIsInstance(l.bias.data, torch.DoubleTensor)
1605 net.type(torch.cuda.FloatTensor)
1606 self.assertIsInstance(l.weight.data, torch.cuda.FloatTensor)
1607 self.assertIsInstance(l.bias.data, torch.cuda.FloatTensor)
1609 def test_non_leaf_parameters(self):
1610 l1 = nn.Linear(10, 10)
1611 l2 = nn.Linear(10, 10)
1613 def assign_weight():
1614 l2.weight = l1.weight + 2
1616 self.assertRaises(TypeError, assign_weight)
1618 l2.weight = Parameter(torch.randn(10, 10))
1620 def test_clip_grad_norm(self):
1621 l = nn.Linear(10, 10)
1624 def compute_norm(norm_type):
1625 norm_type = float(norm_type)
1626 if norm_type != inf:
1628 for p
in l.parameters():
1629 total_norm += p.grad.data.abs().pow(norm_type).sum()
1630 return pow(total_norm, 1. / norm_type)
1632 return max(p.grad.data.abs().max()
for p
in l.parameters())
1634 def compare_scaling(grads):
1635 p_scale = [p.grad.data.div(g).view(-1)
for p, g
in zip(l.parameters(), grads)]
1636 scale = torch.cat(p_scale)
1640 grads = torch.arange(1., 101).view(10, 10), torch.ones(10).div(1000)
1641 for norm_type
in [0.5, 1.5, 2, 4,
'inf']:
1642 for p, g
in zip(l.parameters(), grads):
1643 p._grad = Variable(g.clone().view_as(p.data))
1644 norm_before = compute_norm(norm_type)
1645 norm = clip_grad_norm_(l.parameters(), max_norm, norm_type=norm_type)
1646 norm_after = compute_norm(norm_type)
1649 self.assertLessEqual(norm_after, norm_before)
1650 compare_scaling(grads)
1653 grads = torch.rand(10, 10).div(10000), torch.ones(10).div(500)
1654 for norm_type
in [0.5, 1.5, 2, 4,
'inf']:
1655 for p, g
in zip(l.parameters(), grads):
1656 p.grad.data.copy_(g)
1657 norm_before = compute_norm(norm_type)
1658 norm = clip_grad_norm_(l.parameters(), max_norm, norm_type=norm_type)
1659 norm_after = compute_norm(norm_type)
1662 self.assertLessEqual(norm_after, max_norm)
1663 scale = compare_scaling(grads)
1667 p1, p2 = torch.randn(10, 10), torch.randn(10, 10)
1668 g = torch.arange(1., 101).view(10, 10)
1669 p1._grad = g.clone()
1670 p2._grad = g.clone()
1671 for norm_type
in [0.5, 1.5, 2, 4,
'inf']:
1672 clip_grad_norm_(p1, max_norm, norm_type=norm_type)
1673 clip_grad_norm_([p2], max_norm, norm_type=norm_type)
1676 def test_clip_grad_value(self):
1677 l = nn.Linear(10, 10)
1680 grad_w, grad_b = torch.arange(-50., 50).view(10, 10).div_(5), torch.ones(10).mul_(2)
1681 for grad_list
in [[grad_w, grad_b], [grad_w,
None]]:
1682 for p, g
in zip(l.parameters(), grad_list):
1683 p._grad = g.clone().view_as(p.data)
if g
is not None else g
1685 clip_grad_value_(l.parameters(), clip_value)
1686 for p
in filter(
lambda p: p.grad
is not None, l.parameters()):
1687 self.assertLessEqual(p.grad.data.max(), clip_value)
1688 self.assertGreaterEqual(p.grad.data.min(), -clip_value)
1691 p1, p2 = torch.randn(10, 10), torch.randn(10, 10)
1692 g = torch.arange(-50., 50).view(10, 10).div_(5)
1693 p1._grad = g.clone()
1694 p2._grad = g.clone()
1695 clip_grad_value_(p1, clip_value)
1696 clip_grad_value_([p2], clip_value)
1699 def test_parameters_to_vector(self):
1700 conv1 = nn.Conv2d(3, 10, 5)
1701 fc1 = nn.Linear(10, 20)
1702 model = nn.Sequential(conv1, fc1)
1704 vec = parameters_to_vector(model.parameters())
1707 def test_vector_to_parameters(self):
1708 conv1 = nn.Conv2d(3, 10, 5)
1709 fc1 = nn.Linear(10, 20)
1710 model = nn.Sequential(conv1, fc1)
1712 vec = Variable(torch.arange(0., 980))
1713 vector_to_parameters(vec, model.parameters())
1715 sample = next(model.parameters())[0, 0, 0]
1716 self.assertTrue(torch.equal(sample.data, vec.data[:5]))
1720 def _test_nonlinearity_propagate_nan(self, device):
1721 def test(nonlinearity, *args, **kwargs):
1723 fn = getattr(F, nonlinearity)
1725 self.assertTrue(math.isnan(fn(x, *args, **kwargs).item()))
1726 except Exception
as e:
1727 if 'not implemented' not in str(e):
1731 test(
'relu', inplace=
True)
1737 test(
'rrelu', inplace=
True)
1747 test(
'log_softmax', 0)
1748 test(
'leaky_relu', 0.2)
1749 test(
'threshold', 3, 2)
1750 test(
'threshold', 3, 2, inplace=
True)
1752 def test_nonlinearity_propagate_nan(self):
1755 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
1756 def test_nonlinearity_propagate_nan_cuda(self):
1759 def test_weight_norm(self):
1760 input = torch.randn(3, 5)
1762 expected_output = m(input)
1766 self.
assertEqual(m.weight_v.size(), m.weight.size())
1771 m = torch.nn.utils.remove_weight_norm(m)
1772 self.assertFalse(hasattr(m,
'weight_g'))
1773 self.assertFalse(hasattr(m,
'weight_v'))
1778 self.
assertEqual(m.weight_v.size(), m.weight.size())
1784 expected_output = m(input)
1792 def test_weight_norm_pickle(self):
1794 m = pickle.loads(pickle.dumps(m))
1795 self.assertIsInstance(m, nn.Linear)
1798 def test_spectral_norm(self):
1799 input = torch.randn(3, 5)
1803 self.
assertEqual(m.weight_u.size(), torch.Size([m.weight.size(0)]))
1805 self.assertTrue(hasattr(m,
'weight_orig'))
1806 self.assertTrue(
'weight_orig' in m._parameters)
1808 self.assertTrue(hasattr(m,
'weight_u'))
1809 self.assertTrue(
'weight_u' in m._buffers)
1810 self.assertTrue(
'weight_v' in m._buffers)
1812 self.assertFalse(
'weight' in m._buffers)
1813 self.assertFalse(
'weight' in m._parameters)
1815 self.
assertEqual(m.weight_orig.storage(), m.weight.storage())
1816 self.
assertEqual(m.weight_orig.size(), m.weight.size())
1817 self.
assertEqual(m.weight_orig.stride(), m.weight.stride())
1819 m = torch.nn.utils.remove_spectral_norm(m)
1820 self.assertFalse(hasattr(m,
'weight_orig'))
1821 self.assertFalse(hasattr(m,
'weight_u'))
1823 self.assertTrue(hasattr(m,
'weight'))
1824 self.assertTrue(
'weight' in m._parameters)
1831 for apply_dp
in (
True,
False):
1833 if not TEST_MULTIGPU:
1835 device = torch.device(
'cuda:0')
1838 return torch.nn.DataParallel(m, [0, 1])
1840 device = torch.device(
'cpu')
1845 for requires_grad
in (
True,
False):
1846 m = nn.Linear(3, 4).to(device)
1847 m.weight.requires_grad_(requires_grad)
1849 wrapped_m = maybe_wrap(m)
1850 self.assertTrue(hasattr(m,
'weight_u'))
1851 u0 = m.weight_u.clone()
1852 v0 = m.weight_v.clone()
1857 input = torch.randn(2, 3, device=device)
1858 out = wrapped_m(input)
1871 saved_u = m.weight_u.clone()
1872 saved_v = m.weight_v.clone()
1875 m.weight_u.data.copy_(saved_u)
1876 m.weight_v.data.copy_(saved_v)
1877 out0 = wrapped_m(input)
1878 out1 = wrapped_m(input)
1884 pre_remove_out = wrapped_m(input)
1885 m = torch.nn.utils.remove_spectral_norm(m)
1886 self.
assertEqual(wrapped_m(input), pre_remove_out)
1890 pre_remove_out = wrapped_m(input)
1891 m = torch.nn.utils.remove_spectral_norm(m)
1892 self.
assertEqual(wrapped_m(input), pre_remove_out)
1898 last_train_out = wrapped_m(input)
1899 last_train_u = m.weight_u.clone()
1900 last_train_v = m.weight_v.clone()
1901 wrapped_m.zero_grad()
1904 eval_out0 = wrapped_m(input)
1921 saved_u = m.weight_u.clone()
1922 saved_v = m.weight_v.clone()
1925 m.weight_u.data.copy_(saved_u)
1926 m.weight_v.data.copy_(saved_v)
1928 out0 = wrapped_m(input)
1930 out1 = wrapped_m(input)
1932 out2 = wrapped_m(input)
1934 out3 = wrapped_m(input)
1935 return out0 + out1 + out2 + out3
1942 return wrapped_m(input)
1946 def test_spectral_norm_load_state_dict(self):
1947 inp = torch.randn(2, 3)
1948 for activate_times
in (0, 3):
1954 for _
in range(activate_times):
1958 version_none_state_dict = deepcopy(snm.state_dict())
1959 self.
assertEqual({
'weight_orig',
'bias',
'weight_u',
'weight_v'}, set(version_none_state_dict.keys()))
1960 self.assertIn(
'spectral_norm', version_none_state_dict._metadata[
''])
1961 del version_none_state_dict._metadata[
''][
'spectral_norm']
1962 del version_none_state_dict[
'weight_v']
1963 version_none_state_dict[
'weight'] = snm.weight.detach().clone()
1966 version_latest_state_dict = deepcopy(snm.state_dict())
1969 out0_eval = snm(inp)
1971 out1_train = snm(inp)
1972 out2_train = snm(inp)
1974 out3_eval = snm(inp)
1976 snm.load_state_dict(version_none_state_dict)
1977 if activate_times > 0:
1990 snm.load_state_dict(version_latest_state_dict)
1999 def test_spectral_norm_dim(self):
2000 inp = torch.randn(2, 3, 10, 12)
2001 m = nn.ConvTranspose2d(3, 4, (5, 6))
2006 self.
assertEqual(m.weight_u.shape, m.weight_orig[0, :, 0, 0].shape)
2008 def test_spectral_norm_forward(self):
2009 input = torch.randn(3, 5)
2013 _weight, _bias, _u = m.weight_orig, m.bias, m.weight_u
2014 _weight_mat = _weight.view(_weight.size(0), -1)
2015 _v = torch.mv(_weight_mat.t(), _u)
2016 _v = F.normalize(_v, dim=0, eps=1e-12)
2017 _u = torch.mv(_weight_mat, _v)
2018 _u = F.normalize(_u, dim=0, eps=1e-12)
2019 _weight.data /= torch.dot(_u, torch.matmul(_weight_mat, _v))
2021 expect_out = m(input)
2024 def test_spectral_norm_pickle(self):
2026 m = pickle.loads(pickle.dumps(m))
2027 self.assertIsInstance(m, nn.Linear)
2029 def test_threshold_int(self):
2034 def test_embedding_sparse_basic(self):
2035 embedding = nn.Embedding(10, 20, sparse=
True)
2036 input = Variable(torch.LongTensor([[0, 2, 4, 5], [4, 3, 0, 9]]))
2037 embedding(input).sum().backward()
2038 self.assertTrue(embedding.weight.grad.is_sparse)
2039 self.
assertEqual(embedding.weight.grad.shape, embedding.weight.shape)
2041 def test_embedding_sparse_empty_tensor(self):
2042 embedding = nn.Embedding(0, 0, sparse=
True)
2044 embedding(input).sum().backward()
2045 self.assertTrue(embedding.weight.grad.is_sparse)
2046 self.
assertEqual(embedding.weight.grad.shape, embedding.weight.shape)
2048 embedding = nn.Embedding(10, 0, sparse=
True)
2049 input = torch.LongTensor([[0, 2, 4, 5], [4, 3, 0, 9]])
2050 embedding(input).sum().backward()
2051 self.assertTrue(embedding.weight.grad.is_sparse)
2052 self.
assertEqual(embedding.weight.grad.shape, embedding.weight.shape)
2054 def test_embedding_padding_idx(self):
2055 embedding = nn.Embedding(10, 20, padding_idx=0)
2056 input = Variable(torch.LongTensor([[0, 2, 4, 5], [4, 3, 0, 9]]))
2057 output = embedding(input)
2061 embedding = nn.Embedding(10, 20, padding_idx=0, sparse=
True)
2062 input = Variable(torch.LongTensor([[0, 2, 4, 5], [4, 3, 0, 9]]))
2063 output = embedding(input)
2069 embedding = nn.Embedding(10, 20, padding_idx=-2)
2070 input = Variable(torch.LongTensor([[0, 2, 8, 5], [4, 8, 0, 9]]))
2071 output = embedding(input)
2075 embedding = nn.Embedding(10, 20, padding_idx=-2, sparse=
True)
2076 input = Variable(torch.LongTensor([[0, 2, 8, 5], [4, 8, 0, 9]]))
2077 output = embedding(input)
2082 self.assertRaises(AssertionError, nn.Embedding, num_embeddings=10, embedding_dim=20, padding_idx=25)
2083 self.assertRaises(AssertionError, nn.Embedding, num_embeddings=10, embedding_dim=20, padding_idx=-25)
2087 embedding = nn.Embedding(5, 2, padding_idx=padding_idx)
2089 for other_indices
in ([], [1, 3], [2]):
2090 indices = torch.LongTensor(other_indices + [padding_idx] * n)
2091 pre = embedding.weight[padding_idx].clone()
2092 embedding(indices).sum().backward()
2093 after = (embedding.weight + embedding.weight.grad)[padding_idx]
2094 embedding.zero_grad()
2097 def test_embedding_max_norm(self):
2098 embedding = nn.Embedding(22, 5, max_norm=1.0)
2099 input = Variable(torch.LongTensor([2, 8, 8, 6]))
2100 output = embedding(input)
2102 self.assertTrue(output.data.norm(p=2, dim=1).le(1).all())
2104 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
2105 @repeat_test_for_types(ALL_TENSORTYPES)
2106 def test_embedding_max_norm_cuda(self, dtype=torch.float):
2107 embedding = nn.Embedding(22, 5, max_norm=1.0).to(
"cuda", dtype=dtype)
2109 input =
torch.tensor([2, 8, 8, 6], device=
"cuda", dtype=torch.long)
2110 output = embedding(input)
2112 self.assertTrue(output.data.norm(p=2, dim=1).le(1).all())
2114 def test_embedding_from_pretrained(self):
2115 a = torch.Tensor([[1, 2, 3], [4, 5, 6]])
2116 embedding = nn.Embedding.from_pretrained(a)
2119 input = torch.LongTensor([0, 1])
2120 output = embedding(input)
2123 def test_embedding_from_pretrained_options(self):
2124 a = torch.Tensor([[1, 2, 3], [4, 5, 6]])
2128 "scale_grad_by_freq":
False,
2131 embedding = nn.Embedding.from_pretrained(a, **opts)
2132 input = torch.LongTensor([0, 1])
2133 output = embedding(input)
2136 self.assertTrue(a.ne(torch.arange(1, 7, dtype=a.dtype).view(2, 3)).all())
2137 self.assertTrue(output.data.norm(p=opts[
"norm_type"], dim=1).le(opts[
"max_norm"]).all())
2139 def test_embedding_functional(self):
2143 ], dtype=torch.long)
2144 embeddings = torch.rand(4, 3, requires_grad=
True)
2146 embed_old = torch.nn.Embedding(4, 3)
2147 embed_old.weight.data = embeddings.data
2148 res_old = embed_old(a)
2150 res_F = F.embedding(a, embeddings)
2153 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
2154 @repeat_test_for_types([torch.float, torch.half])
2156 def test_softmax_dtype(self, dtype=torch.float):
2157 input = torch.rand(32, 100, device=
"cuda", dtype=dtype, requires_grad=
True)
2158 inputf = input.to(torch.float).detach().requires_grad_(
True)
2159 out = F.softmax(input, dim=-1, dtype=torch.float)
2160 outf = F.softmax(inputf, dim=-1)
2163 gO = torch.empty_like(outf).uniform_()
2167 self.
assertEqual(input.grad, inputf.grad.to(dtype), prec=0)
2169 def _test_softmax_backward(self, device):
2170 if device.type ==
'cuda':
2171 dtypes = [torch.float, torch.half]
2173 dtypes = [torch.float]
2175 sizes = [(0, 10), (32, 20)]
2176 for fn
in [F.softmax, F.log_softmax]:
2177 for dtype
in dtypes:
2179 input = torch.rand(size, device=device, dtype=dtype, requires_grad=
True)
2180 output = fn(input, dtype=torch.float, dim=1).sum()
2182 grad_input.sum().backward()
2184 def test_softmax_backward(self):
2187 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
2189 def test_softmax_backward_cuda(self):
2192 def _test_gumbel_softmax_st_shapes(self, cuda, dtype, shape, dim, count_expected):
2193 logits = torch.randn(shape, dtype=torch.float)
2194 logits = logits.to(dtype)
2196 logits = logits.cuda()
2198 y_draw = F.gumbel_softmax(logits, hard=
True, dim=dim)
2201 self.assertGreaterEqual(y_draw.min(), 0)
2203 self.assertTrue(y_draw.shape == logits.shape)
2205 self.
assertEqual(y_draw.sum(), count_expected, prec=torch.finfo(y_draw.dtype).eps)
2207 def _test_gumbel_softmax_straight_through(self, cuda, dtype):
2211 logits = logits.reshape([1, 3])
2212 logits = logits.to(dtype).requires_grad_()
2214 logits = logits.cuda()
2215 probs = logits.softmax(dim=-1)
2217 counts = torch.zeros_like(logits)
2218 for _
in range(num_draws):
2219 y_draw = F.gumbel_softmax(logits, hard=
True)
2220 counts = counts + y_draw
2223 self.assertGreaterEqual(y_draw.min(), 0)
2225 self.
assertEqual(counts.sum(), num_draws, prec=torch.finfo(counts.dtype).eps)
2228 expected = probs * num_draws
2230 z = (counts - expected) / (expected * (1 - probs)).sqrt()
2233 self.assertLess(z.abs().max().item(), 2.58)
2235 def _test_gumbel_softmax_grad(self, cuda, dtype):
2237 device = torch.device(
"cuda")
if cuda
else torch.device(
"cpu")
2238 logits_soft = torch.zeros(10, 10, dtype=dtype, device=device, requires_grad=
True)
2239 logits_hard = torch.zeros(10, 10, dtype=dtype, device=device, requires_grad=
True)
2242 y_soft = F.gumbel_softmax(logits_soft, hard=
False)
2244 y_hard = F.gumbel_softmax(logits_hard, hard=
True)
2246 y_soft.sum().backward()
2247 y_hard.sum().backward()
2250 tol = 2 * torch.finfo(dtype).eps
2253 @repeat_test_for_types(NO_HALF_TENSORTYPES)
2256 NO_HALF_TENSORTYPES because many half-ops doesnt work on cpu. 2266 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
2267 @repeat_test_for_types(ALL_TENSORTYPES)
2268 def test_gumbel_softmax_cuda(self, dtype=torch.float):
2277 def _test_EmbeddingBag(self, cuda, mode, sparse, dtype=torch.double):
2279 device = torch.device(
"cuda")
if cuda
else torch.device(
"cpu")
2280 es = nn.EmbeddingBag(5, 2, mode=mode, sparse=sparse).to(device, dtype)
2281 es.weight.data.copy_(torch.arange(1, 11, device=device, dtype=dtype).view_as(es.weight))
2282 input =
torch.tensor([3, 1, 1, 1, 4, 0], device=device, dtype=torch.long)
2283 offsets =
torch.tensor([0, 0, 3, 3, 6], device=device, dtype=torch.long)
2287 3, 4], device=device, dtype=dtype).view(2, 2)
2293 99, 99], device=device, dtype=dtype).view(5, 2)
2295 if mode ==
"sum" or mode ==
"mean":
2296 denominator = 1
if mode ==
"sum" else 3
2299 [13, 16]], device=device, dtype=dtype) / denominator
2306 [0, 0]], device=device, dtype=dtype) / denominator
2313 [3, 4]], device=device, dtype=dtype) / denominator
2317 [9, 10]], device=device, dtype=dtype)
2324 [0, 0]], device=device, dtype=dtype)
2331 [3, 4]], device=device, dtype=dtype)
2333 output = es(input, offsets)
2334 output.backward(grad_output_with_empty)
2336 es_weight_grad = es.weight.grad.data
2338 es_weight_grad = es.weight.grad.to_dense()
2339 self.
assertEqual(output, expected_output_with_empty)
2340 self.
assertEqual(es_weight_grad, expected_grad_weight, dtype2prec[dtype])
2343 input = input.view(2, -1)
2346 output.backward(grad_output)
2348 es_weight_grad = es.weight.grad
2350 es_weight_grad = es.weight.grad.to_dense()
2352 self.
assertEqual(es_weight_grad, expected_grad_weight, dtype2prec[dtype])
2356 inputs =
torch.tensor([], dtype=torch.long, device=device)
2358 es(inputs, offsets).sum().backward()
2359 dense_grad = es.weight.grad
2360 if dense_grad.is_sparse:
2361 dense_grad = dense_grad.to_dense()
2362 self.
assertEqual(dense_grad, torch.zeros_like(es.weight))
2365 def _test_vs_Embedding(N, D, B, L, max_norm=None):
2366 es = nn.EmbeddingBag(N, D, mode=mode, sparse=sparse, max_norm=max_norm).to(device, dtype)
2367 e = nn.Embedding(N, D, max_norm=max_norm).to(device, dtype)
2368 e.weight.data.copy_(es.weight)
2369 input = torch.randint(N, (B, L), device=device, dtype=torch.long)
2370 offsets = torch.arange(0, B, device=device, dtype=torch.long).mul_(L)
2371 grad_output = torch.rand(B, D, device=device, dtype=dtype)
2373 output = es(input.view(-1), offsets)
2375 ref_output = e(input).sum(1)
2376 elif mode ==
'mean':
2377 ref_output = e(input).mean(1)
2379 ref_output = e(input).max(1)[0]
2381 self.
assertEqual(output, ref_output, dtype2prec[dtype])
2383 output.backward(grad_output)
2384 ref_output.backward(grad_output)
2385 es_weight_grad = es.weight.grad.data
2387 es_weight_grad = es.weight.grad.data.to_dense()
2390 needed_prec = dtype2prec[dtype] * 2
2391 self.
assertEqual(es_weight_grad, e.weight.grad, needed_prec)
2393 N, D, B, L = random.randint(1, 100), random.randint(1, 100), random.randint(1, 50), random.randint(1, 50)
2394 _test_vs_Embedding(N, D, B, L)
2395 for max_norm
in (
None, 3):
2396 for p
in itertools.product([1, 2], repeat=4):
2397 _test_vs_Embedding(*p, max_norm=max_norm)
2400 es = nn.EmbeddingBag(10, 20, mode=mode, sparse=sparse)
2401 input = torch.ones(3, 4)
2402 offset = torch.arange(0, 3)
2403 self.assertRaises(ValueError,
lambda: es(input, offset))
2404 self.assertRaises(ValueError,
lambda: es(input.view(-1)))
2406 self.assertRaises(ValueError,
lambda: es(input.view(-1), offset))
2409 self.assertRaises(ValueError,
lambda: es(input.view(-1), offset))
2411 def test_embeddingbag_from_pretrained(self):
2412 a = torch.Tensor([[1, 2, 3], [4, 5, 6]])
2413 embeddingbag = nn.EmbeddingBag.from_pretrained(a)
2416 input = torch.LongTensor([[0, 1]])
2417 output = embeddingbag(input)
2420 def test_embeddingbag_from_pretrained_options(self):
2421 a = torch.Tensor([[1, 2, 3], [4, 5, 6]])
2425 "scale_grad_by_freq":
False,
2429 embeddingbag = nn.EmbeddingBag.from_pretrained(a, **opts)
2431 input = torch.LongTensor([[0, 1]])
2432 output = embeddingbag(input)
2433 self.
assertEqual(a.max(0, keepdim=
True)[0], output)
2434 self.assertTrue(a.ne(torch.arange(1, 7, dtype=a.dtype).view(2, 3)).all())
2435 self.assertTrue(a.norm(p=opts[
"norm_type"], dim=1).le(opts[
"max_norm"]).all())
2437 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
2438 def test_pool3d_size_one_feature_dim(self):
2440 x = Variable(torch.randn(7, 1, 5, 3, 2, device=
"cuda"))
2441 strange_strides = [30, 1234, 6, 2, 1]
2442 y = x.as_strided(x.size(), strange_strides)
2443 x = x.cpu().as_strided(x.size(), strange_strides)
2446 'max_pool3d':
lambda t: F.max_pool3d(t, (5, 1, 1), stride=(5, 1, 1)),
2447 'avg_pool3d':
lambda t: F.avg_pool3d(t, (5, 1, 1), stride=(5, 1, 1)),
2450 for test, fn
in to_test.items():
2456 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
2457 def test_AvgPool3d_backward_after_cat_dim1_cuda(self):
2459 x = torch.randn(1, 3, 4, 4, 4, device=
"cuda", requires_grad=
True)
2460 y = F.avg_pool3d(x, kernel_size=3, padding=1, stride=2)
2462 grad = torch.randn(y.size(), device=
"cuda")
2464 stride = list(grad.stride())
2465 stride[0] = stride[0] * 2
2466 grad.set_(grad.storage(), 0, grad.size(), stride)
2467 assert grad.is_contiguous()
2471 @unittest.skipIf(
not TEST_CUDNN,
"needs cudnn")
2472 def test_contig_wrong_stride_cudnn(self):
2474 x = torch.randn(1, 16, 5, 5, device=
"cuda")
2475 stride = list(x.stride())
2478 x.set_(x.storage(), 0, x.size(), stride)
2479 self.assertTrue(x.is_contiguous())
2480 F.conv_transpose2d(x, torch.randn(16, 1, 1, 1, device=
"cuda"))
2481 F.conv2d(x, torch.randn(1, 16, 1, 1, device=
"cuda"))
2483 def test_embedding_bag(self):
2491 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
2492 @repeat_test_for_types(ALL_TENSORTYPES)
2493 def test_embedding_bag_cuda(self, dtype=torch.float):
2497 if dtype != torch.half:
2502 def test_fractional_max_pool2d(self):
2503 x = torch.randn(1, 2, 7, 7, requires_grad=
True)
2504 samples = x.new(1, 2, 2).uniform_()
2507 return F.fractional_max_pool2d(
2508 x, (2, 2), output_size=(3, 3), _random_samples=samples)
2511 gradcheck(func, [x])
2512 gradgradcheck(func, [x])
2514 x = torch.randn(2, 7, 7, requires_grad=
True)
2515 samples = x.new(2, 2).uniform_()
2517 gradcheck(func, [x])
2518 gradgradcheck(func, [x])
2520 def test_Dropout(self):
2521 input = torch.Tensor(1000)
2524 def test_Dropout2d(self):
2525 b = random.randint(1, 5)
2526 w = random.randint(1, 5)
2527 h = random.randint(1, 5)
2529 input = torch.Tensor(num_features, b, w, h)
2532 def test_Dropout3d(self):
2533 b = random.randint(1, 5)
2534 w = random.randint(1, 5)
2535 h = random.randint(1, 5)
2536 d = random.randint(1, 2)
2538 input = torch.Tensor(num_features, b, d, w, h)
2541 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
2542 def test_Dropout_cuda(self):
2543 input = torch.Tensor(1000)
2546 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
2547 def test_Dropout2d_cuda(self):
2548 b = random.randint(1, 5)
2549 w = random.randint(1, 5)
2550 h = random.randint(1, 5)
2552 input = torch.Tensor(num_features, b, w, h)
2555 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
2556 def test_Dropout3d_cuda(self):
2557 b = random.randint(1, 5)
2558 w = random.randint(1, 5)
2559 h = random.randint(1, 5)
2560 d = random.randint(1, 2)
2562 input = torch.Tensor(num_features, b, d, w, h)
2565 def test_AlphaDropout(self):
2567 input = torch.randn(5000)
2570 def test_FeatureAlphaDropout(self):
2571 b = random.randint(1, 5)
2572 w = random.randint(1, 5)
2573 h = random.randint(1, 5)
2574 d = random.randint(1, 2)
2576 input = torch.randn(num_features, b, d, w, h)
2579 def _test_InstanceNorm_general(self, cls, input, device="cpu", dtype=torch.float):
2581 b, c = input.size(0), input.size(1)
2582 input_var = input.to(device=device, dtype=dtype).requires_grad_()
2584 IN = cls(c, eps=0).to(device, dtype)
2586 output = IN(input_var)
2587 out_reshaped = output.view(b * c, -1)
2589 mean = out_reshaped.mean(1)
2590 var = out_reshaped.var(1, unbiased=
False)
2596 grad_out = torch.randn_like(output)
2597 res1 = output.data.clone()
2598 output.backward(grad_out)
2599 grad1 = input_var.grad.data.clone()
2602 output = IN(input_var)
2603 input_var.grad =
None 2604 output.backward(grad_out)
2606 grad2 = input_var.grad.data
2612 IN = cls(c, momentum=1, eps=0, track_running_stats=
True).to(device, dtype)
2614 output = IN(input_var)
2616 input_reshaped = input_var.transpose(1, 0).reshape(c, -1)
2617 mean = input_reshaped.mean(1)
2619 input_reshaped = input_var.transpose(1, 0).reshape(c, b, -1)
2620 var = input_reshaped.var(2, unbiased=
True)[:, :]
2622 self.
assertAlmostEqual(torch.abs(mean.data - IN.running_mean).mean(), 0, delta=1e-5)
2623 self.
assertAlmostEqual(torch.abs(var.data.mean(1) - IN.running_var).mean(), 0, delta=1e-5)
2628 delta = IN.running_var.sqrt() * torch.arange(c, device=device, dtype=dtype)
2629 delta = delta.view(-1, *[1
for _
in range(2, input.dim())])
2630 output = IN(input_var + delta)
2631 self.
assertEqual(output.transpose(0, 1).reshape(c, -1).mean(1), torch.arange(c))
2633 def _test_InstanceNorm_cuda_half(self, cls, input):
2635 input = Variable(input.cuda().half().random_(1, 10), requires_grad=
True)
2636 m = cls(input.size(1), affine=
True, track_running_stats=
True).to(
"cuda", torch.half)
2637 thnn_output = m(input)
2638 thnn_output.sum().backward()
2639 thnn_input_grad = input.grad.data.clone()
2640 self.
assertEqual(thnn_output.type(), input.type())
2645 cudnn_output = m(input)
2646 cudnn_output.sum().backward()
2647 cudnn_input_grad = input.grad.data.clone()
2648 self.
assertEqual(cudnn_output.type(), input.type())
2652 def test_InstanceNorm1d_general(self):
2653 b = random.randint(3, 5)
2654 c = random.randint(3, 5)
2655 d = random.randint(8, 10)
2657 input = torch.rand(b, c, d)
2660 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
2661 def test_InstanceNorm1d_general_cuda(self):
2662 b = random.randint(3, 5)
2663 c = random.randint(3, 5)
2664 d = random.randint(8, 10)
2666 input = torch.rand(b, c, d)
2670 def test_InstanceNorm2d_general(self):
2671 b = random.randint(3, 5)
2672 c = random.randint(3, 5)
2673 w = random.randint(3, 6)
2674 h = random.randint(6, 8)
2676 input = torch.rand(b, c, h, w)
2679 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
2680 def test_InstanceNorm2d_general_cuda(self):
2681 b = random.randint(3, 5)
2682 c = random.randint(3, 5)
2683 w = random.randint(3, 6)
2684 h = random.randint(6, 8)
2686 input = torch.rand(b, c, h, w)
2690 def test_InstanceNorm3d_general(self):
2691 b = random.randint(3, 5)
2692 c = random.randint(3, 5)
2693 w = random.randint(2, 5)
2694 h = random.randint(2, 5)
2695 d = random.randint(2, 5)
2697 input = torch.rand(b, c, h, w, d)
2700 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
2702 def test_InstanceNorm3d_general_cuda(self):
2703 b = random.randint(3, 5)
2704 c = random.randint(2, 5)
2705 w = random.randint(2, 5)
2706 h = random.randint(2, 5)
2707 d = random.randint(2, 5)
2709 input = torch.rand(b, c, h, w, d)
2713 def _test_LayerNorm_general(self, device="cpu", dtype=torch.float):
2714 for i
in range(2, 6):
2715 shape = torch.randint(3, 6, (i,), dtype=torch.long).tolist()
2716 x = torch.empty(*shape, device=device, dtype=dtype).uniform_(0, 10)
2717 normalized_ndim = random.randint(1, i - 1)
2718 normalized_shape = shape[-normalized_ndim:]
2719 unnormalized_shape = shape[:-normalized_ndim]
2722 ln = nn.LayerNorm(normalized_shape, eps=0).to(device, dtype)
2723 ln.weight.data.fill_(1)
2724 ln.bias.data.fill_(0)
2726 out_reshaped = output.view(*(unnormalized_shape + [-1]))
2727 mean = out_reshaped.mean(-1)
2728 var = out_reshaped.var(-1, unbiased=
False)
2733 scale, bias = torch.empty(2).uniform_(0.2, 2).tolist()
2734 ln.weight.data.fill_(scale)
2735 ln.bias.data.fill_(bias)
2737 out_reshaped = output.view(*(unnormalized_shape + [-1]))
2738 mean = out_reshaped.mean(-1)
2739 var = out_reshaped.var(-1, unbiased=
False)
2743 bad_norm_shape_input_shape = {
2750 for norm_shape, input_shape
in bad_norm_shape_input_shape.items():
2751 ln = nn.LayerNorm(norm_shape)
2752 input = torch.empty(input_shape, device=device, dtype=dtype).uniform_(0, 10)
2753 self.assertRaises(RuntimeError,
lambda: ln(input))
2755 def _test_LayerNorm_cuda_half(self):
2756 input = Variable(torch.empty(2, 3, 3, 2).to(
"cuda", torch.half).random_(1, 10), requires_grad=
True)
2757 m = nn.LayerNorm([3, 2]).to(
"cuda", torch.half)
2759 output.sum().backward()
2762 def test_LayerNorm_general(self):
2765 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
2766 def test_LayerNorm_general_cuda(self):
2770 def _test_GroupNorm_general(self, device="cpu", dtype=torch.float):
2777 for shape, g
in good_shape_g.items():
2778 x = torch.empty(*shape, device=device, dtype=dtype).uniform_(0, 10)
2783 gn = nn.GroupNorm(g, c, eps=0).to(device, dtype)
2784 gn.weight.data.fill_(1)
2785 gn.bias.data.fill_(0)
2787 out_reshaped = output.view(b, g, -1)
2788 mean = out_reshaped.mean(-1)
2789 var = out_reshaped.var(-1, unbiased=
False)
2794 scale = torch.empty(c, device=device, dtype=dtype).uniform_(0.2, 2)
2795 bias = torch.empty(c, device=device, dtype=dtype).uniform_(0.2, 2)
2796 gn.weight.data.copy_(scale)
2797 gn.bias.data.copy_(bias)
2799 out_reshaped = output.view(b, c, -1)
2800 out_normed = (out_reshaped - bias.view(c, 1)) / scale.view(c, 1)
2801 out_normed_reshaped = out_normed.view(b, g, -1)
2802 mean = out_normed_reshaped.mean(-1)
2803 var = out_normed_reshaped.var(-1, unbiased=
False)
2810 (3, 1, 1, 1, 2): 10,
2813 for shape, g
in bad_shape_g.items():
2814 gn = nn.GroupNorm(g, shape[1])
2815 input = torch.empty(*shape, device=device, dtype=dtype).uniform_(0, 10)
2816 self.assertRaises(RuntimeError,
lambda: gn(input))
2818 def _test_GroupNorm_cuda_half(self):
2819 input = Variable(torch.empty(2, 3, 3, 2).to(
"cuda", torch.half).random_(1, 10), requires_grad=
True)
2820 input = torch.zeros(2, 4, 3, 2, requires_grad=
True).cuda().half().random_(1, 10)
2821 m = nn.GroupNorm(2, 4).to(
"cuda", torch.half)
2823 output.sum().backward()
2826 def test_GroupNorm_general(self):
2829 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
2830 def test_GroupNorm_general_cuda(self):
2835 inputs = torch.randn(1, 3, 4, 4, requires_grad=
True)
2836 _assertGradAndGradgradChecks(self,
lambda x: F.pad(x, (1, 1, 1, 1)), (inputs,))
2837 _assertGradAndGradgradChecks(self,
lambda x: F.pad(x, (-1, 1, -2, 1)), (inputs,))
2838 _assertGradAndGradgradChecks(self,
lambda x: F.pad(x, (-1, 1, -2, 1), value=2), (inputs,))
2839 self.assertTrue(gradcheck(
lambda x: F.pad(x, (-1, 1, -2, 1), mode=
'replicate'), (inputs,)))
2840 self.assertTrue(gradcheck(
lambda x: F.pad(x, (-1, 1, -2, 1), mode=
'reflect'), (inputs,)))
2842 inputs = torch.randn(1, 2, 3, 4, 4, requires_grad=
True)
2843 self.assertTrue(gradcheck(
lambda x: F.pad(x, (1, 1, 1, 1, 1, 1), mode=
'replicate'), (inputs,)))
2846 expected_err_msg =
r"Padding size should be less than the corresponding input dimension" 2848 lambda: F.pad(torch.randn(1, 1, 2, 3), (1, 1, 3, 0), mode=
'reflect'))
2850 lambda: F.pad(torch.randn(1, 1, 2), (2, 1), mode=
'reflect'))
2853 def _test_one_hot(self, use_cuda=False):
2854 device = torch.device(
'cuda' if use_cuda
else 'cpu')
2855 with self.assertRaises(RuntimeError):
2858 with self.assertRaises(RuntimeError):
2865 [1, 0, 0, 0, 0]], device=device)
2872 [1, 0, 0, 0, 0]], device=device)
2879 [1, 0, 0, 0, 0, 0]], device=device)
2886 [1, 0, 0, 0, 0]]], device=device)
2890 expected =
torch.tensor([0, 0, 0, 0, 1], device=device)
2894 expected = torch.empty([4, 0, 100])
2897 with self.assertRaises(RuntimeError):
2900 with self.assertRaises(RuntimeError):
2903 def test_one_hot(self):
2906 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
2907 def test_one_hot_cuda(self):
2910 def test_pad_scalar_error(self):
2912 self.assertRaises(AssertionError,
lambda: F.pad(inputs, (1, 1)))
2913 self.assertRaises(AssertionError,
lambda: F.pad(inputs, (1,)))
2915 def test_nn_scalars(self):
2917 def verify_scalars(input, output):
2918 if input.dim() == 0:
2922 output.sum().backward()
2926 for device
in devices:
2927 for input_shape
in [(5, 6), ()]:
2928 for module
in [torch.nn.ELU, torch.nn.Hardtanh, torch.nn.LeakyReLU, torch.nn.LogSigmoid,
2929 torch.nn.RReLU, torch.nn.Softshrink, torch.nn.Softplus, torch.nn.Sigmoid,
2931 input = torch.randn(input_shape, device=device, requires_grad=
True)
2934 verify_scalars(input, output)
2936 def test_nn_scalars_reductions(self):
2938 def verify_reduction_scalars(input, reduction, output):
2939 if reduction !=
'none' or input.dim() == 0:
2943 output.sum().backward()
2947 for device
in devices:
2948 for input_shape
in [(5, 6), ()]:
2949 for reduction
in [
'none',
'mean',
'sum']:
2950 for module
in [torch.nn.BCELoss, torch.nn.L1Loss, torch.nn.MSELoss,
2951 torch.nn.SmoothL1Loss, torch.nn.SoftMarginLoss]:
2952 input = torch.randn(input_shape, device=device, requires_grad=
True)
2953 target = torch.empty(input_shape, device=device).random_(2)
2954 sigmoid = nn.Sigmoid()
2956 input = torch.randn(input_shape, device=device, requires_grad=
True)
2957 m = module(reduction=reduction)
2958 output = m(sigmoid(input), target)
2959 verify_reduction_scalars(input, reduction, output)
2961 def test_normalize(self):
2962 inputs = torch.randn(1, 3, 4, 4, requires_grad=
True)
2963 self.assertTrue(gradcheck(
lambda x: F.normalize(x, p=1, dim=-1), (inputs,)))
2964 self.assertTrue(gradcheck(
lambda x: F.normalize(x, p=2, dim=-2), (inputs,)))
2966 inputs = torch.randn((), requires_grad=
True)
2967 self.assertTrue(gradcheck(
lambda x: F.normalize(x, p=1, dim=-1), (inputs,)))
2969 def _test_maxpool_indices(self, num_dim, adaptive=False, device="cpu", dtype=torch.float):
2970 def expected_indices(dim):
2972 return torch.tensor([1, 3], dtype=torch.double).repeat(2, 2, 1)
2974 return torch.tensor([[5, 7], [13, 15]], dtype=torch.double).repeat(2, 2, 1, 1)
2976 def expected_grad(dim):
2978 return torch.tensor([0, 1, 0, 1], dtype=torch.double).repeat(2, 2, 1)
2979 grad = expected_grad(dim - 1)
2980 zero = torch.zeros(grad.size())
2981 return torch.stack((zero, grad, zero, grad), 2)
2983 def expected_output(dim):
2985 return torch.arange(2, 17, 2).view(2, 2, 2)
2987 col = torch.arange(6, 63, 8)
2988 return torch.stack([col, col + 2], 1).view(2, 2, 2, 2)
2991 cls_name =
'AdaptiveMaxPool{}d'.format(num_dim)
2993 cls_name =
'MaxPool{}d'.format(num_dim)
2994 module_cls = getattr(nn, cls_name)
2995 module = module_cls(2, return_indices=
True).to(device, dtype=dtype)
2996 numel = 4 ** (num_dim + 1)
2997 input = torch.arange(1, numel + 1).view(2, 2, *repeat(4, num_dim)).to(device, dtype=dtype)
2998 input_var = input.clone().detach().requires_grad_()
3001 output, indices = module(input_var)
3003 expected_indices = expected_indices(num_dim)
3004 expected_output = expected_output(num_dim)
3006 self.
assertEqual(indices.data.squeeze(), expected_indices)
3007 self.
assertEqual(output.data.squeeze(), expected_output)
3008 self.assertTrue(output.requires_grad)
3009 self.assertFalse(indices.requires_grad)
3012 grad_output = torch.ones(output.size(), device=device, dtype=dtype)
3013 output.backward(grad_output, retain_graph=
True)
3014 expected_grad = expected_grad(num_dim)
3015 self.
assertEqual(input_var.grad.data, expected_grad.view_as(input))
3019 self.assertRaises(RuntimeError,
lambda: output.backward(grad_output))
3021 def test_adaptive_pooling_input_size(self):
3022 for numel
in (2, 3):
3023 for pool_type
in (
'Max',
'Avg'):
3024 cls_name =
'Adaptive{}Pool{}d'.format(pool_type, numel)
3025 module_cls = getattr(nn, cls_name)
3026 output_size = (2,) * numel
3027 module = module_cls(output_size)
3029 input = torch.randn(output_size)
3030 self.assertRaises(ValueError,
lambda: module(input))
3032 def test_adaptive_pooling_size_none(self):
3033 for numel
in (2, 3):
3034 for pool_type
in (
'Max',
'Avg'):
3035 cls_name =
'Adaptive{}Pool{}d'.format(pool_type, numel)
3036 module_cls = getattr(nn, cls_name)
3037 output_size = (2,) * (numel - 1) + (
None,)
3038 module = module_cls(output_size)
3040 input = torch.randn((4,) * (numel + 1))
3041 output = module(input)
3042 self.
assertEqual(output.size(), (4,) + (2,) * (numel - 1) + (4,))
3044 def test_Conv2d_naive_groups(self):
3047 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3048 @repeat_test_for_types(ALL_TENSORTYPES)
3050 def test_Conv2d_naive_groups_cuda(self, dtype=torch.float):
3053 def test_batchnorm_grad(self):
3056 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3058 def test_batchnorm_grad_cuda(self):
3064 def test_batchnorm_eval(self):
3067 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3068 def test_batchnorm_eval_cuda(self, dtype=torch.float):
3074 def test_batchnorm_simple_average(self):
3077 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3078 def test_batchnorm_simple_average_cuda(self):
3084 def test_MaxPool1d_indices(self):
3087 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3088 @repeat_test_for_types(ALL_TENSORTYPES)
3089 def test_MaxPool1d_indices_cuda(self, dtype=torch.float):
3092 def test_MaxPool2d_indices(self):
3095 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3096 @repeat_test_for_types(ALL_TENSORTYPES)
3097 def test_MaxPool2d_indices_cuda(self, dtype=torch.float):
3100 def test_MaxPool3d_indices(self):
3103 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3104 @repeat_test_for_types(ALL_TENSORTYPES)
3105 def test_MaxPool3d_indices_cuda(self, dtype=torch.float):
3108 def test_AdaptiveMaxPool1d_indices(self):
3111 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3112 @repeat_test_for_types(ALL_TENSORTYPES)
3113 def test_AdaptiveMaxPool1d_indices_cuda(self, dtype=torch.float):
3116 def test_AdaptiveMaxPool2d_indices(self):
3119 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3120 @repeat_test_for_types(ALL_TENSORTYPES)
3121 def test_AdaptiveMaxPool2d_indices_cuda(self, dtype=torch.float):
3124 def test_AdaptiveMaxPool3d_indices(self):
3127 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3128 @repeat_test_for_types(ALL_TENSORTYPES)
3129 def test_AdaptiveMaxPool3d_indices_cuda(self, dtype=torch.float):
3133 def _test_max_pool_nan(self, device, dtype=torch.float):
3134 for adaptive
in [
'',
'adaptive_']:
3135 for num_dim
in [1, 2, 3]:
3136 fn_name =
'{}max_pool{}d'.format(adaptive, num_dim)
3137 fn = getattr(F, fn_name)
3138 x = torch.full([1, 1] + num_dim * [3], nan)
3139 res = fn(x, 1
if adaptive
else 3)
3140 self.assertTrue(math.isnan(res.item()))
3142 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3143 @repeat_test_for_types(ALL_TENSORTYPES)
3144 def test_max_pool_nan_cuda(self, dtype=torch.float):
3147 def test_max_pool_nan(self, dtype=torch.float):
3151 def _test_pool_large_size(self, device, dtype=torch.float):
3152 for op
in (
'max',
'avg'):
3153 for num_dim
in [1, 2, 3]:
3154 fn_name =
'{}_pool{}d'.format(op, num_dim)
3155 fn = getattr(F, fn_name)
3157 x = torch.ones([1, 1, 16777217] + (num_dim - 1) * [1],
3158 device=device, dtype=dtype)
3159 res = fn(x, 1, stride=1, padding=0)
3163 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3164 @repeat_test_for_types(ALL_TENSORTYPES)
3165 def test_pool_large_size_cuda(self, dtype=torch.float):
3168 def test_pool_large_size(self, dtype=torch.float):
3171 def _test_scatter(self, tensor):
3172 x = tensor.detach().requires_grad_()
3173 result = dp.scatter(x, (0, 1))
3179 grad = result[0].data.clone().fill_(2)
3180 result[0].backward(grad)
3182 self.
assertEqual(x.grad.data[2:], grad.clone().zero_())
3183 _assertGradAndGradgradChecks(self,
lambda y: dp.scatter(y, (0, 1)), (x,))
3185 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3187 def test_scatter_cpu(self):
3190 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3191 def test_scatter_gpu(self):
3194 def _test_gather(self, output_device):
3196 torch.randn(2, 4, device=
'cuda:0', requires_grad=
True),
3197 torch.randn(2, 4, device=
'cuda:1', requires_grad=
True),
3199 result = dp.gather(inputs, output_device)
3200 self.
assertEqual(result.size(), torch.Size([4, 4]))
3203 if output_device != -1:
3204 self.
assertEqual(result.get_device(), output_device)
3206 self.assertFalse(result.is_cuda)
3207 grad = torch.randn(4, 4)
3208 if output_device != -1:
3209 grad = grad.cuda(output_device)
3210 result.backward(grad)
3213 _assertGradAndGradgradChecks(self,
lambda x, y: dp.gather((x, y), output_device), inputs)
3217 torch.randn((), device=
'cuda:0', requires_grad=
True),
3218 torch.randn((), device=
'cuda:1', requires_grad=
True),
3220 result = dp.gather(inputs, output_device)
3224 if output_device != -1:
3225 self.
assertEqual(result.get_device(), output_device)
3227 self.assertFalse(result.is_cuda)
3228 grad = torch.randn(2)
3229 if output_device != -1:
3230 grad = grad.cuda(output_device)
3231 result.backward(grad)
3234 _assertGradAndGradgradChecks(self,
lambda x, y: dp.gather((x, y), output_device), inputs)
3236 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3238 def test_gather_cpu(self):
3241 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3242 def test_gather_gpu(self):
3245 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3246 def test_gather_different_len_dicts(self):
3248 {
'a': Variable(torch.randn(1, 2).cuda(0), requires_grad=
True)},
3250 'b': Variable(torch.randn(1, 2).cuda(1), requires_grad=
True),
3251 'a': Variable(torch.randn(1, 2).cuda(1), requires_grad=
True)
3254 with self.assertRaises(ValueError):
3255 _ = dp.gather(inputs, target_device=0)
3257 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3259 def test_broadcast_double_backwards_gpu(self):
3260 tensors = (torch.randn(4, 4, device=
'cuda', requires_grad=
True),
3261 torch.randn(4, 4, device=
'cuda', requires_grad=
True),
3262 torch.randn(4, 4, device=
'cuda', requires_grad=
True))
3263 _assertGradAndGradgradChecks(self,
lambda *i: Broadcast.apply((0, 1), *i), tensors)
3265 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3266 def test_broadcast_not_requiring_grad(self):
3268 torch.randn(1, 2, device=
'cuda', requires_grad=
True),
3269 torch.randn(1, 2, device=
'cuda', requires_grad=
False),
3270 torch.randn(1, 2, device=
'cuda', requires_grad=
False),
3271 torch.randn(1, 2, device=
'cuda', requires_grad=
True),
3272 torch.randn(1, 2, device=
'cuda', requires_grad=
True),
3274 broadcasted_variables = Broadcast.apply((0, 1), *variables)
3275 for output_idx, broadcasted_var
in enumerate(broadcasted_variables):
3276 input_var = variables[output_idx % len(variables)]
3277 self.
assertEqual(input_var.requires_grad, broadcasted_var.requires_grad)
3279 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3280 def test_broadcast_no_grad(self):
3281 x = torch.randn(1, 2, dtype=torch.float32, requires_grad=
True, device=
'cuda')
3282 with torch.no_grad():
3283 broadcasted = Broadcast.apply((0, 1), x)
3284 self.assertTrue(x.requires_grad)
3285 for output
in broadcasted:
3286 self.assertFalse(output.requires_grad)
3288 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3289 def test_replicate(self):
3290 module = nn.Linear(10, 5).float().cuda()
3291 input = Variable(torch.randn(2, 10).float().cuda())
3292 expected_output = module(input).data
3293 replicas = dp.replicate(module, (0, 1))
3294 for i, replica
in enumerate(replicas):
3295 for p
in replica.parameters():
3297 replica_input = input.cuda(i)
3298 self.
assertEqual(replica(replica_input).data, expected_output)
3300 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3301 def test_replicate_buffers(self):
3303 net.bn = nn.BatchNorm2d(10)
3305 replicas = dp.replicate(net, (0, 1))
3306 for i, replica
in enumerate(replicas):
3307 self.
assertEqual(replica.bn.running_mean.get_device(), i,
'buffer on wrong device')
3308 self.
assertEqual(replica.bn.running_var.get_device(), i,
'buffer on wrong device')
3309 self.
assertEqual(replica.bn.num_batches_tracked.get_device(), i,
'buffer on wrong device')
3311 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3313 def test_data_parallel_buffers_requiring_grad(self):
3314 class TestModule(nn.Module):
3315 def __init__(self, t):
3316 super(TestModule, self).__init__()
3317 self.register_buffer(
't_rg', t)
3318 self.register_buffer(
't_not_rg', t.clone().detach())
3320 def forward(self, x):
3321 return x * self.t_rg + self.t_not_rg
3323 m = TestModule(torch.randn(100, device=
'cuda', requires_grad=
True))
3324 self.assertTrue(m.t_rg.requires_grad)
3326 dpm = nn.DataParallel(m, [0, 1])
3327 inp = torch.randn(2, 100, device=
'cuda')
3334 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3335 def test_parallel_apply(self):
3336 l1 = nn.Linear(10, 5).to(
"cuda:0", torch.float)
3337 l2 = nn.Linear(10, 5).to(
"cuda:1", torch.float)
3338 i1 = torch.randn(2, 10, device=
"cuda:0", dtype=torch.float)
3339 i2 = torch.randn(2, 10, device=
"cuda:1", dtype=torch.float)
3340 expected1 = l1(i1).data
3341 expected2 = l2(i2).data
3343 expected_outputs = (expected1, expected2)
3347 for inputs
in [((i1,), (i2,)), (i1, i2)]:
3348 outputs = dp.parallel_apply(modules, inputs,
None)
3349 for out, expected
in zip(outputs, expected_outputs):
3352 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3354 def test_data_parallel_multiple_input(self):
3355 class TestModule(nn.Module):
3357 def forward(self, var1, var2, float1, var3=None):
3359 return float1 * (var1 * var2)
3361 return float1 * (var1 * var2 + var3)
3364 var1 = torch.randn(5, 5, dtype=torch.float, requires_grad=
True)
3365 var2 = torch.randn(5, 5, dtype=torch.float, requires_grad=
True)
3366 var3 = torch.randn(5, 5, dtype=torch.float, requires_grad=
False)
3368 float1 = torch.randn(1).item()
3370 expected = m(var1, var2, float1)
3371 loss = expected.sum()
3373 gvar1_exp = var1.grad.clone()
3374 gvar2_exp = var2.grad.clone()
3376 def local_test(out):
3377 var1.grad.data.fill_(0.0)
3378 var2.grad.data.fill_(0.0)
3385 out = dp.data_parallel(m, (var1, var2, float1), (0, 1))
3388 out = dp.data_parallel(m, (var1, var2, float1), (1, 0))
3391 out = dp.data_parallel(m, (var1, var2, float1), (0,))
3394 var1.grad.data.fill_(0.0)
3395 var2.grad.data.fill_(0.0)
3396 expected = m(var1, var2, float1, var3=var3)
3397 loss = expected.sum()
3399 gvar1_exp = var1.grad.clone()
3400 gvar2_exp = var2.grad.clone()
3402 dpm = nn.DataParallel(TestModule())
3403 out = dpm(var1, var2, float1, var3=var3)
3406 dpm = nn.DataParallel(TestModule(), device_ids=[0])
3407 out = dpm(var1, var2, float1, var3=var3)
3410 kwarg_wrap = {
'var3': var3}
3411 out = dp.data_parallel(
3412 m, (var1, var2, float1), (0, 1), module_kwargs=kwarg_wrap)
3415 out = dp.data_parallel(
3416 m, (var1, var2, float1), (0,), module_kwargs=kwarg_wrap)
3419 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3420 def test_data_parallel_small_back(self):
3421 l = nn.Linear(10, 5).float().cuda()
3422 i = Variable(torch.randn(20, 10).float().cuda())
3423 out = dp.data_parallel(l, i, (0, 1))
3426 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3428 def test_data_parallel_model_device(self):
3429 r"""Test device[0] check at forward time. 3432 inp = torch.randn(2, 2)
3433 inp_cuda0 = inp.cuda(0)
3434 inp_cuda1 = inp.cuda(1)
3436 error_msg =
"module must have its parameters and buffers on device {}" 3438 @contextlib.contextmanager
3439 def dummy_ctx_manager():
3442 def test(inner_m, dp_device, inp, device_ids, should_fail):
3443 if device_ids
is None:
3446 if isinstance(device_ids[0], torch.device):
3447 expect_device = device_ids[0]
3449 expect_device = torch.device(
"cuda:{}".format(device_ids[0]))
3452 def assert_correct():
3453 return self.assertRaisesRegex(RuntimeError, error_msg.format(expect_device))
3455 assert_correct = dummy_ctx_manager
3458 dpm = nn.DataParallel(inner_m, device_ids)
3459 if dp_device
is not None:
3460 dpm = dpm.to(dp_device)
3462 with assert_correct():
3466 with assert_correct():
3467 nn.parallel.data_parallel(inner_m.to(dp_device), inp, device_ids)
3469 test(l.to(
'cpu'),
None, inp,
None, should_fail=
True)
3470 test(l.cuda(1),
None, inp_cuda0,
None, should_fail=
True)
3471 test(l.cuda(),
None, inp_cuda0, [1, 0], should_fail=
True)
3473 test(l.cuda(),
None, inp_cuda0,
None, should_fail=
False)
3474 test(l.cpu(),
'cuda', inp_cuda0,
None, should_fail=
False)
3475 test(l.cuda(1),
None, inp_cuda1, [1, 0], should_fail=
False)
3476 test(l.cpu(),
'cuda:1', inp_cuda1, [1, 0], should_fail=
False)
3478 s = nn.Sequential(l.cpu())
3479 test(s,
None, inp,
None, should_fail=
True)
3480 test(s,
None, inp, [0, 1], should_fail=
True)
3481 test(s,
None, inp, [1, 0], should_fail=
True)
3483 s = nn.Sequential(deepcopy(l).cpu(), l.cuda())
3484 test(s,
None, inp,
None, should_fail=
True)
3485 test(s,
None, inp, [0, 1], should_fail=
True)
3486 test(s,
None, inp, [1, 0], should_fail=
True)
3488 s = nn.Sequential(l.cuda(), deepcopy(l).cuda(1))
3489 test(s,
None, inp,
None, should_fail=
True)
3490 test(s,
None, inp, [0, 1], should_fail=
True)
3491 test(s,
None, inp, [1, 0], should_fail=
True)
3493 s = nn.Sequential(l.cuda(), deepcopy(l).cuda())
3494 test(s,
None, inp,
None, should_fail=
False)
3495 test(s,
None, inp, [0, 1], should_fail=
False)
3496 test(s,
None, inp, [1, 0], should_fail=
True)
3497 test(s.cpu(),
None, inp, [1, 0], should_fail=
True)
3498 test(s.cuda(1),
None, inp, [1, 0], should_fail=
False)
3500 @unittest.skipIf(
not TEST_MULTIGPU
or not PY3,
"multi-GPU not supported")
3502 def test_data_parallel_model_no_refcycles(self):
3508 class Model(nn.Module):
3510 super(Model, self).__init__()
3511 self.linear = nn.Linear(1, 1)
3513 def forward(self, x):
3514 return self.linear(x)
3517 model = nn.DataParallel(Model().cuda())
3518 data = Variable(torch.randn(1).cuda())
3521 refcycles = gc.collect()
3522 self.assertEqual(refcycles, 0)
3524 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3525 def test_data_parallel_no_grad(self):
3528 class Layer(nn.Module):
3529 def forward(self, x):
3530 test.assertFalse(torch.is_grad_enabled())
3534 i = Variable(torch.randn(20, 10).float().cuda())
3535 with torch.no_grad():
3536 dp.data_parallel(l, i, (0, 1))
3537 self.assertRaises(AssertionError,
lambda: dp.data_parallel(l, i, (0, 1)))
3539 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3540 def test_data_parallel(self):
3541 l = nn.Linear(10, 5).float().cuda()
3542 i = Variable(torch.randn(20, 10).float().cuda(1))
3545 loss = expected_out.sum()
3548 for param
in l.parameters():
3549 expected_grads.append(param.grad.clone())
3550 dev_ids_list = [(0, 1), (1, 0)]
3551 for dev_id
in dev_ids_list:
3555 out = dp.data_parallel(l, i, dev_id)
3558 self.assertEqual(out.get_device(), dev_id[0])
3559 self.assertEqual(out.data, expected_out.data)
3560 for expected, param
in zip(expected_grads, l.parameters()):
3561 self.assertEqual(param.grad.data, expected.data)
3565 out = dp.data_parallel(l, i)
3567 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3569 def test_data_parallel_sparse(self):
3570 l = nn.Embedding(10, 5, sparse=
True).to(
"cuda:1")
3571 i = torch.randint(10, (20, 5), device=
"cuda:1", dtype=torch.long)
3573 loss = expected_out.sum()
3576 for param
in l.parameters():
3577 expected_grads.append(param.grad.clone())
3578 dev_ids_list = [(0, 1), (1, 0)]
3579 for dev_id
in dev_ids_list:
3583 out = dp.data_parallel(l, i, dev_id)
3586 self.assertEqual(out.get_device(), dev_id[0])
3587 self.assertEqual(out.data, expected_out.data)
3588 for expected, param
in zip(expected_grads, l.parameters()):
3589 self.assertEqual(param.grad.data, expected.data)
3593 out = dp.data_parallel(l, i)
3595 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3596 def test_data_parallel_nested_output(self):
3599 input, (input.sin(), input.cos(), [input.add(1)]), input,
3600 OrderedDict(a=input, b=[input.sin()])
3603 class Net(nn.Module):
3604 def forward(self, input):
3607 i = torch.randn(2, 2).float().cuda(1)
3609 output = dp.data_parallel(Net(), i, gpus)
3610 self.assertEqual(output, fn(i))
3611 self.assertIsInstance(output[0], torch.Tensor)
3612 self.assertIsInstance(output[1], tuple)
3613 self.assertIsInstance(output[1][0], torch.Tensor)
3614 self.assertIsInstance(output[1][1], torch.Tensor)
3615 self.assertIsInstance(output[1][2], list)
3616 self.assertIsInstance(output[1][2][0], torch.Tensor)
3617 self.assertIsInstance(output[2], torch.Tensor)
3618 self.assertIsInstance(output[3], dict)
3619 self.assertEqual(len(output[3]), 2)
3620 self.assertIn(
'a', output[3])
3621 self.assertIn(
'b', output[3])
3622 self.assertIsInstance(output[3][
'a'], torch.Tensor)
3623 self.assertIsInstance(output[3][
'b'], list)
3624 self.assertIsInstance(output[3][
'b'][0], torch.Tensor)
3626 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3627 def test_data_parallel_nested_input(self):
3631 class Net(nn.Module):
3632 def forward(self, *input):
3635 i = Variable(torch.randn(20, 3).float().cuda(1))
3636 input = (i.cos(), (i.sin(), i), i.sin())
3638 output = dp.data_parallel(Net(), input, gpus)
3639 self.assertEqual(output, fn(input))
3641 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3642 @repeat_test_for_types(ALL_TENSORTYPES)
3643 def test_data_parallel_module(self, dtype=torch.float):
3644 l = nn.Linear(10, 5).to(
"cuda", dtype)
3645 i = torch.randn(20, 10, device=
"cuda", dtype=dtype)
3646 expected_out = l(i).data
3647 net = nn.DataParallel(l)
3649 self.assertEqual(out.get_device(), 0)
3650 self.assertEqual(out.data, expected_out, dtype2prec[dtype])
3652 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3653 @repeat_test_for_types(ALL_TENSORTYPES)
3654 def test_data_parallel_module_kwargs_only(self, dtype=torch.float):
3655 class Net(nn.Module):
3657 super(Net, self).__init__()
3660 def forward(self, input):
3661 return self.l(input)
3663 l = nn.Linear(10, 5).to(
"cuda", dtype)
3664 i = torch.randn(20, 10, device=
"cuda", dtype=dtype)
3665 expected_out = l(i).data
3666 n = nn.DataParallel(Net())
3668 self.assertEqual(out.get_device(), 0)
3669 self.assertEqual(out.data, expected_out, dtype2prec[dtype])
3671 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3672 @repeat_test_for_types(ALL_TENSORTYPES)
3673 def test_data_parallel_module_kwargs_only_empty_list(self, dtype=torch.float):
3674 class Net(nn.Module):
3676 super(Net, self).__init__()
3679 def forward(self, input):
3680 return self.l(input[
'data'])
3682 l = nn.Linear(10, 5).to(
"cuda", dtype)
3683 i = torch.randn(20, 10, device=
"cuda", dtype=dtype)
3684 expected_out = l(i).data
3685 n = nn.DataParallel(Net())
3686 out = n(input={
'data': i,
'unused': []})
3687 self.assertEqual(out.get_device(), 0)
3688 self.assertEqual(out.data, expected_out, dtype2prec[dtype])
3690 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3691 @repeat_test_for_types(ALL_TENSORTYPES)
3692 def test_data_parallel_module_kwargs_only_empty_dict(self, dtype=torch.float):
3693 class Net(nn.Module):
3695 super(Net, self).__init__()
3698 def forward(self, input):
3699 return self.l(input[
'data'])
3701 l = nn.Linear(10, 5).to(
"cuda", dtype)
3702 i = torch.randn(20, 10, device=
"cuda", dtype=dtype)
3703 expected_out = l(i).data
3704 n = nn.DataParallel(Net())
3705 out = n(input={
'data': i,
'unused': {}})
3706 self.assertEqual(out.get_device(), 0)
3707 self.assertEqual(out.data, expected_out, dtype2prec[dtype])
3709 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
3710 @repeat_test_for_types(ALL_TENSORTYPES)
3711 def test_data_parallel_module_kwargs_only_empty_tuple(self, dtype=torch.float):
3712 class Net(nn.Module):
3714 super(Net, self).__init__()
3717 def forward(self, input):
3718 return self.l(input[
'data'])
3720 l = nn.Linear(10, 5).to(
"cuda", dtype)
3721 i = torch.randn(20, 10, device=
"cuda", dtype=dtype)
3722 expected_out = l(i).data
3723 n = nn.DataParallel(Net())
3724 out = n(input={
'data': i,
'unused': ()})
3725 self.assertEqual(out.get_device(), 0)
3726 self.assertEqual(out.data, expected_out, dtype2prec[dtype])
3728 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
3729 def test_data_parallel_device_args(self):
3730 cuda0 = torch.device(
'cuda:0')
3731 cuda1 = torch.device(
'cuda:1')
3734 l = nn.Linear(10, 5).to(cuda0, torch.float)
3735 i = torch.randn(20, 10, dtype=torch.float, device=cuda0, requires_grad=
True)
3736 out = dp.data_parallel(l, i, device_ids=(0, 1), output_device=cuda0)
3737 self.assertEqual(out, l(i))
3740 l = nn.Linear(10, 5).to(cuda0, torch.float)
3741 i = torch.randn(20, 10, dtype=torch.float, device=cuda0, requires_grad=
True)
3742 out = dp.data_parallel(l, i, device_ids=(cuda0, cuda1), output_device=cuda0)
3743 self.assertEqual(out, l(i))
3745 def test_state_dict(self):
3748 block.conv = nn.Conv2d(3, 3, 3, bias=
False)
3752 net.bn = nn.BatchNorm2d(2)
3754 net.add_module(
'empty',
None)
3756 state_dict = net.state_dict()
3757 self.assertEqual(len(state_dict), 10)
3758 self.assertEqual(len(state_dict._metadata), 6)
3759 self.assertIn(
'', state_dict._metadata)
3760 self.assertIn(
'linear1', state_dict._metadata)
3761 self.assertIn(
'linear1.weight', state_dict)
3762 self.assertIn(
'linear1.bias', state_dict)
3763 self.assertIn(
'linear2', state_dict._metadata)
3764 self.assertIn(
'linear2.weight', state_dict)
3765 self.assertIn(
'linear2.bias', state_dict)
3766 self.assertIn(
'block', state_dict._metadata)
3767 self.assertIn(
'block.conv', state_dict._metadata)
3768 self.assertIn(
'block.conv.weight', state_dict)
3769 self.assertIn(
'block.conv.weight', state_dict)
3770 self.assertNotIn(
'block.conv.bias', state_dict)
3771 self.assertIn(
'bn', state_dict._metadata)
3772 self.assertIn(
'bn.weight', state_dict)
3773 self.assertIn(
'bn.bias', state_dict)
3774 self.assertIn(
'bn.running_var', state_dict)
3775 self.assertIn(
'bn.running_mean', state_dict)
3776 self.assertIn(
'bn.num_batches_tracked', state_dict)
3777 self.assertFalse(any(map(
lambda k: k.startswith(
'empty'), state_dict.keys())))
3778 for k, v
in state_dict.items():
3780 for component
in k.split(
'.'):
3781 param = getattr(param, component)
3782 if isinstance(param, Parameter):
3784 self.assertEqual(v.data_ptr(), param.data_ptr())
3787 state_dict = l.state_dict()
3788 self.assertEqual(len(state_dict), 2)
3789 self.assertEqual(len(state_dict._metadata), 1)
3790 self.assertIn(
'', state_dict._metadata)
3791 self.assertTrue(state_dict._metadata[
''][
'version'] >= 0)
3792 self.assertEqual(state_dict[
'weight'].data_ptr(), l.weight.data_ptr())
3793 self.assertEqual(state_dict[
'bias'].data_ptr(), l.bias.data_ptr())
3795 def test_load_state_dict(self):
3798 block.conv1 = nn.Conv2d(3, 3, 3, bias=
True)
3799 block.conv2 = nn.Conv2d(3, 3, 3, bias=
False)
3803 net.bn = nn.BatchNorm2d(2)
3805 net.add_module(
'empty',
None)
3807 state_dict = net.state_dict()
3809 'linear1.weight': torch.ones(5, 5),
3810 'block.conv1.bias': torch.arange(1, 4),
3811 'bn.running_mean': torch.randn(2),
3813 net.load_state_dict(state_dict)
3814 self.assertEqual(net.linear1.weight.data, state_dict[
'linear1.weight'])
3815 self.assertEqual(net.block.conv1.bias.data, state_dict[
'block.conv1.bias'])
3816 self.assertEqual(net.bn.running_mean, state_dict[
'bn.running_mean'])
3818 state_dict = net.state_dict()
3819 state_dict.update({
'extra': torch.ones(5)})
3820 self.assertRaises(RuntimeError,
lambda: net.load_state_dict(state_dict))
3822 state_dict = net.state_dict()
3823 state_dict.update({
'extra.param': torch.ones(5)})
3824 self.assertRaises(RuntimeError,
lambda: net.load_state_dict(state_dict))
3826 state_dict = net.state_dict()
3827 del state_dict[
'linear1.weight']
3828 self.assertRaises(RuntimeError,
lambda: net.load_state_dict(state_dict))
3830 state_dict = net.state_dict()
3831 state_dict.update({
'bn.running_mean': torch.rand(14, 4)})
3832 self.assertRaises(RuntimeError,
lambda: net.load_state_dict(state_dict))
3834 state_dict = net.state_dict()
3835 old_state_dict = deepcopy(state_dict)
3837 'linear1.weight': torch.ones(5, 5),
3838 'block.conv1.bias': torch.arange(1, 4),
3839 'bn.running_mean': torch.randn(2),
3840 'nonexistent_key': torch.rand(3)
3842 net.load_state_dict(state_dict, strict=
False)
3843 self.assertEqual(net.linear1.weight.data, state_dict[
'linear1.weight'])
3844 self.assertEqual(net.block.conv1.bias.data, state_dict[
'block.conv1.bias'])
3845 self.assertEqual(net.bn.running_mean, state_dict[
'bn.running_mean'])
3846 new_state_dict = net.state_dict()
3847 del old_state_dict[
'linear1.weight']
3848 del old_state_dict[
'block.conv1.bias']
3849 del old_state_dict[
'bn.running_mean']
3850 for k, v,
in old_state_dict.items():
3851 self.assertTrue(v.equal(new_state_dict[k]))
3853 def test_load_state_dict_BC(self):
3857 bn = nn.BatchNorm2d(3)
3858 state_dict = bn.state_dict()
3859 del state_dict[
'num_batches_tracked']
3860 state_dict._metadata[
''][
'version'] = 1
3861 bn.load_state_dict(state_dict)
3862 self.assertEqual(bn.num_batches_tracked.dtype, torch.long)
3863 self.assertEqual(bn.num_batches_tracked.item(), 0)
3864 del state_dict._metadata[
''][
'version']
3865 bn.load_state_dict(state_dict)
3866 self.assertEqual(bn.num_batches_tracked.dtype, torch.long)
3867 self.assertEqual(bn.num_batches_tracked.item(), 0)
3869 def test_parameter_assignment(self):
3873 return len(list(l.parameters()))
3875 self.assertEqual(num_params(), 2)
3877 new_param = Parameter(torch.randn(5, 5))
3878 l.param_name = new_param
3879 self.assertEqual(num_params(), 3)
3880 self.assertObjectIn(new_param, l.parameters())
3882 var = torch.randn(5, 5)
3884 self.assertEqual(num_params(), 3)
3885 self.assertNotIn(id(var), map(id, l.parameters()))
3888 l.variable_attr = torch.empty(5, 5)
3889 self.assertEqual(num_params(), 3)
3890 l.param_attr = Parameter(torch.empty(5, 5))
3891 self.assertEqual(num_params(), 4)
3895 l.param_attr = torch.empty(5, 5)
3897 self.assertRaises(TypeError, assign_var)
3900 self.assertEqual(num_params(), 3)
3902 def test_assignment(self):
3904 a = nn.Parameter(torch.randn(2))
3905 b = nn.Parameter(torch.randn(3))
3906 c = nn.Parameter(torch.randn(4))
3911 def test_assignments(get_list, a, b, c):
3914 self.assertIsNone(l.a)
3915 self.assertIn(
'a', l.__dict__)
3917 self.assertIs(l.a, a)
3918 self.assertEqual(get_list(), [a])
3919 self.assertNotIn(
'a', l.__dict__)
3923 self.assertIsNone(l.b)
3924 self.assertIn(
'b', l.__dict__)
3926 self.assertIs(l.b, b)
3927 self.assertEqual(get_list(), [a, b])
3928 self.assertNotIn(
'b', l.__dict__)
3932 self.assertIsNone(l.a)
3933 self.assertEqual(get_list(), [b])
3935 self.assertIs(l.a, a)
3936 self.assertEqual(get_list(), [a, b])
3940 self.assertIs(l.a, c)
3941 self.assertEqual(get_list(), [c, b])
3945 self.assertFalse(hasattr(l,
'a'))
3947 self.assertIs(l.a, a)
3948 self.assertEqual(get_list(), [b, a])
3950 test_assignments(
lambda: list(l.parameters()), a, b, c)
3952 self.assertEqual(list(l.parameters()), [])
3954 test_assignments(
lambda: list(l.children()), q, r, w)
3956 self.assertEqual(list(l.children()), [])
3958 buf = torch.randn(10)
3959 l.register_buffer(
'buf', buf)
3960 self.assertIs(l.buf, buf)
3962 self.assertIs(l.buf,
None)
3963 self.assertNotIn(
'buf', l.__dict__)
3965 self.assertIn(
'buf', l.state_dict())
3966 self.assertEqual(l.state_dict()[
'buf'], buf)
3968 def test_Conv2d_inconsistent_types(self):
3969 inputs = Variable(torch.randn(4, 1, 7, 7).float())
3970 weights = Variable(torch.randn(1, 1, 3, 3).double())
3972 self.assertRaises(RuntimeError,
lambda: nn.functional.conv2d(inputs, weights))
3974 nn.functional.conv2d(inputs.float(), weights.float())
3976 @unittest.skipIf(
not TEST_CUDA,
'CUDA not available')
3977 def test_Conv2d_inconsistent_types_on_GPU_without_cudnn(self):
3978 inputs = Variable(torch.randn(4, 1, 7, 7).float().cuda())
3979 weights = Variable(torch.randn(1, 1, 3, 3).double().cuda())
3980 bias = Variable(torch.randn(1).double().cuda())
3984 self.assertRaises(RuntimeError,
lambda: nn.functional.conv2d(inputs, weights))
3985 self.assertRaises(RuntimeError,
lambda: nn.functional.conv2d(inputs, weights.float(), bias))
3988 nn.functional.conv2d(inputs.float(), weights.float(), bias.float())
3990 @unittest.skipIf(
not TEST_CUDA,
'CUDA not available')
3991 @unittest.skipIf(
not TEST_CUDNN,
'CUDNN not available')
3992 def test_Conv2d_inconsistent_types_on_GPU_with_cudnn(self):
3993 inputs = Variable(torch.randn(4, 1, 7, 7).float().cuda())
3994 weights = Variable(torch.randn(1, 1, 3, 3).double().cuda())
3995 bias = Variable(torch.randn(1).double().cuda())
3999 self.assertRaises(RuntimeError,
lambda: nn.functional.conv2d(inputs, weights))
4000 self.assertRaises(RuntimeError,
lambda: nn.functional.conv2d(inputs, weights.float(), bias))
4003 nn.functional.conv2d(inputs.float(), weights.float(), bias.float())
4005 @unittest.skipIf(
not TEST_CUDA,
'CUDA not available')
4006 @unittest.skipIf(
not TEST_CUDNN,
'CUDNN not available')
4008 def test_cudnn_multiple_threads_same_device(self):
4012 weight = torch.ones((1, 1, 2, 2), device=
'cuda')
4021 def _worker(t, input):
4022 my_stream = torch.cuda.Stream()
4025 for _
in range(test_iters):
4035 results[t].div_(4.0)
4038 for _
in range(trials):
4039 for t
in range(num_threads):
4040 results[t] = torch.ones((1, 1, 2048, 2048), device=
'cuda')
4042 threads = [threading.Thread(target=_worker,
4043 args=(t, results[t]))
for t
in range(num_threads)]
4045 for thread
in threads:
4047 for thread
in threads:
4050 for t
in range(num_threads):
4051 self.assertEqual(results[t].sum().item(),
4052 (2048 - test_iters) * (2048 - test_iters))
4054 @unittest.skipIf(
not TEST_CUDA,
'CUDA not available')
4055 @unittest.skipIf(
not TEST_CUDNN,
'CUDNN not available')
4056 @repeat_test_for_types(ALL_TENSORTYPES)
4057 def test_Conv2d_deterministic_cudnn(self, dtype=torch.float):
4058 inputs = torch.randn(2, 3, 5, 5, device=
"cuda", dtype=dtype, requires_grad=
True)
4059 with cudnn.flags(enabled=
True, benchmark=
True, deterministic=
True):
4060 conv1 = torch.nn.Conv2d(3, 3, 3).to(
"cuda", dtype)
4061 conv2 = torch.nn.Conv2d(3, 3, 3).to(
"cuda", dtype)
4062 conv2.bias.data.copy_(conv1.bias.data)
4063 conv2.weight.data.copy_(conv1.weight.data)
4064 out1 = conv1(inputs)
4065 out2 = conv2(inputs)
4066 self.assertEqual(out1, out2, prec=0.0)
4067 y = torch.randn(out1.size(), device=
"cuda", dtype=dtype)
4070 self.assertEqual(conv1.bias.grad.data, conv2.bias.grad.data, prec=0.0)
4071 self.assertEqual(conv1.weight.grad.data, conv2.weight.grad.data, prec=0.0)
4073 def test_Conv2d_missing_argument(self):
4074 c = nn.Conv2d(3, 3, 3)
4075 self.assertRaises(TypeError,
lambda: c(
None))
4077 def test_Conv2d_backward_twice(self):
4078 input = torch.randn(2, 3, 5, 5)
4079 c = nn.Conv2d(3, 3, 3)
4082 self.assertRaisesRegex(RuntimeError,
'Specify retain_graph=True',
4083 lambda: o1.sum().backward())
4085 @unittest.skipIf(
not TEST_CUDA,
'CUDA not available')
4086 @repeat_test_for_types(ALL_TENSORTYPES)
4087 def test_Conv2d_large_workspace(self, dtype=torch.float):
4096 def run_test(benchmark):
4098 conv = torch.nn.Conv2d(256, 256, kernel_size=3, padding=1).to(
"cuda", dtype)
4100 x = torch.randn(size, device=
"cuda", dtype=dtype)
4101 out =
conv(x.detach().clone().requires_grad_())
4102 out.backward(torch.ones_like(out))
4104 run_test(benchmark=
False)
4105 run_test(benchmark=
True)
4107 def test_conv_modules_raise_error_on_incorrect_input_size(self):
4108 modules = [nn.Conv1d(3, 8, 3), nn.ConvTranspose1d(3, 8, 3),
4109 nn.Conv2d(3, 8, 3), nn.ConvTranspose2d(3, 8, 3),
4110 nn.Conv3d(3, 8, 3), nn.ConvTranspose3d(3, 8, 3)]
4112 invalid_input_dims = [(2, 4), (2, 4),
4116 for invalid_dims, module
in zip(invalid_input_dims, modules):
4117 for dims
in invalid_dims:
4118 input = torch.empty(torch.Size((3, ) * dims))
4119 self.assertRaises(RuntimeError,
lambda: module(input))
4121 def test_conv_shapecheck(self):
4122 def test(should_raise, module, input_size):
4123 input = torch.empty(3, *input_size)
4125 self.assertRaises(RuntimeError,
lambda: module(input))
4131 test(
True, nn.Conv1d(1, 1, 3), (1, 2))
4132 test(
True, nn.Conv1d(1, 1, 3, stride=2), (1, 2))
4133 test(
False, nn.Conv1d(1, 1, 2), (1, 2))
4134 test(
False, nn.Conv1d(1, 1, 2, stride=2), (1, 2))
4135 test(
False, nn.Conv1d(1, 1, 3, stride=2, padding=1), (1, 2))
4138 test(
True, nn.Conv2d(1, 1, (3, 3)), (1, 2, 2))
4139 test(
False, nn.Conv2d(1, 1, (3, 3)), (1, 3, 3))
4140 test(
False, nn.Conv2d(1, 1, (3, 3), padding=1), (1, 2, 2))
4143 test(
True, nn.Conv3d(1, 1, (3, 3, 3)), (1, 2, 2, 2))
4144 test(
False, nn.Conv3d(1, 1, (3, 3, 3)), (1, 3, 3, 3))
4145 test(
False, nn.Conv3d(1, 1, (3, 3, 3), padding=1), (1, 2, 2, 2))
4147 def test_ConvTranspose2d_output_size(self):
4148 m = nn.ConvTranspose2d(3, 4, 3, 3, 0, 2)
4149 i = torch.randn(2, 3, 6, 6)
4150 for h
in range(15, 22):
4151 for w
in range(15, 22):
4152 if 18 <= h <= 20
and 18 <= w <= 20:
4153 output = m(i, output_size=(h, w))
4154 self.assertEqual(output.size()[2:], (h, w))
4156 self.assertRaises(ValueError,
lambda: m(i, (h, w)))
4158 def test_ConvTranspose3d_correct_output_size(self):
4160 m = nn.ConvTranspose3d(2, 2, 2)
4161 i = torch.rand(1, 2, 1, 1, 1)
4162 out = m(i, output_size=(1, 2, 2, 2, 2))
4164 def _test_Conv2d_naive_groups(self, device="cpu", dtype=torch.float):
4166 m = nn.Conv2d(4, 4, kernel_size=3, groups=2).to(device, dtype)
4167 i = torch.randn(2, 4, 6, 6, device=device, dtype=dtype, requires_grad=
True)
4169 grad_output = torch.randn(2, 4, 4, 4, device=device, dtype=dtype)
4170 output.backward(grad_output)
4172 m1 = nn.Conv2d(2, 2, kernel_size=3).to(device, dtype)
4173 m1.weight.data.copy_(m.weight.data[:2])
4174 m1.bias.data.copy_(m.bias.data[:2])
4175 i1 = Variable(i.data[:, :2].contiguous(), requires_grad=
True)
4177 output1.backward(grad_output[:, :2].contiguous())
4179 m2 = nn.Conv2d(2, 2, kernel_size=3).to(device, dtype)
4180 m2.weight.data.copy_(m.weight.data[2:])
4181 m2.bias.data.copy_(m.bias.data[2:])
4182 i2 = Variable(i.data[:, 2:].contiguous(), requires_grad=
True)
4184 output2.backward(grad_output[:, 2:].contiguous())
4186 self.assertEqual(output, torch.cat([output1, output2], 1))
4187 self.assertEqual(i.grad.data,
4188 torch.cat([i1.grad.data, i2.grad.data], 1),
4189 prec=dtype2prec[dtype])
4190 self.assertEqual(m.bias.grad.data,
4191 torch.cat([m1.bias.grad.data, m2.bias.grad.data], 0),
4192 prec=dtype2prec[dtype])
4193 self.assertEqual(m.weight.grad.data,
4194 torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0),
4195 prec=dtype2prec[dtype])
4199 def test_Conv2d_groups_nobias(self):
4200 dev_dtypes = [(
"cpu", torch.float)]
4202 dev_dtypes += [(
"cuda", torch.float), (
"cuda", torch.half)]
4203 for device, dtype
in dev_dtypes:
4204 m = nn.Conv2d(4, 4, kernel_size=3, groups=2, bias=
False).to(device, dtype)
4205 i = torch.randn(2, 4, 6, 6, device=device, dtype=dtype, requires_grad=
True)
4207 grad_output = torch.randn(2, 4, 4, 4, device=device, dtype=dtype)
4208 output.backward(grad_output)
4210 m1 = nn.Conv2d(2, 2, kernel_size=3, bias=
False).to(device, dtype)
4211 m1.weight.data.copy_(m.weight.data[:2])
4212 i1 = Variable(i.data[:, :2].contiguous(), requires_grad=
True)
4214 output1.backward(grad_output[:, :2].contiguous())
4216 m2 = nn.Conv2d(2, 2, kernel_size=3, bias=
False).to(device, dtype)
4217 m2.weight.data.copy_(m.weight.data[2:])
4218 i2 = Variable(i.data[:, 2:].contiguous(), requires_grad=
True)
4220 output2.backward(grad_output[:, 2:].contiguous())
4222 self.assertEqual(output, torch.cat([output1, output2], 1))
4223 self.assertEqual(i.grad.data,
4224 torch.cat([i1.grad.data, i2.grad.data], 1),
4226 self.assertEqual(m.weight.grad.data,
4227 torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0),
4228 1e-1
if dtype == torch.half
else dtype2prec[dtype])
4232 @unittest.skipIf(
not TEST_CUDA,
'CUDA not available')
4234 @repeat_test_for_types(ALL_TENSORTYPES)
4235 def test_Conv2d_depthwise_naive_groups_cuda(self, dtype=torch.float):
4236 for depth_multiplier
in [1, 2]:
4237 m = nn.Conv2d(2, 2 * depth_multiplier, kernel_size=3, groups=2).to(
"cuda", dtype)
4238 i = torch.randn(2, 2, 6, 6, device=
"cuda", dtype=dtype).div_(2).requires_grad_()
4240 grad_output = torch.randn(2, 2 * depth_multiplier, 4, 4, device=
"cuda", dtype=dtype) / 2
4241 output.backward(grad_output)
4243 offset = 1 * depth_multiplier
4245 m1 = nn.Conv2d(1, 1 * depth_multiplier, kernel_size=3).to(
"cuda", dtype)
4246 m1.weight.data = m.weight.data[:offset].clone()
4247 m1.bias.data = m.bias.data[:offset].clone()
4248 i1 = i.detach()[:, :1].clone().requires_grad_()
4250 output1.backward(grad_output[:, :offset].contiguous())
4252 m2 = nn.Conv2d(1, 1 * depth_multiplier, kernel_size=3).to(
"cuda", dtype)
4253 m2.weight.data.copy_(m.weight.data[offset:])
4254 m2.bias.data.copy_(m.bias.data[offset:])
4255 i2 = i.detach()[:, 1:].clone().requires_grad_()
4257 output2.backward(grad_output[:, offset:].contiguous())
4259 self.assertEqual(output, torch.cat([output1, output2], 1),
4260 prec=dtype2prec[dtype])
4261 self.assertEqual(i.grad.data,
4262 torch.cat([i1.grad.data, i2.grad.data], 1),
4263 prec=dtype2prec[dtype])
4264 self.assertEqual(m.bias.grad.data,
4265 torch.cat([m1.bias.grad.data,
4266 m2.bias.grad.data], 0),
4267 prec=dtype2prec[dtype])
4268 self.assertEqual(m.weight.grad.data,
4269 torch.cat([m1.weight.grad.data,
4270 m2.weight.grad.data], 0),
4271 prec=dtype2prec[dtype])
4273 def test_MaxUnpool2d_output_size(self):
4274 m = nn.MaxPool2d(3, stride=2, return_indices=
True)
4275 mu = nn.MaxUnpool2d(3, stride=2)
4276 big_t = torch.rand(1, 1, 6, 6)
4277 big_t[0][0][4][4] = 100
4278 output_big, indices_big = m(big_t)
4279 self.assertRaises(RuntimeError,
lambda: mu(output_big, indices_big))
4281 small_t = torch.rand(1, 1, 5, 5)
4282 for i
in range(0, 4, 2):
4283 for j
in range(0, 4, 2):
4284 small_t[:, :, i, j] = 100
4285 output_small, indices_small = m(Variable(small_t))
4286 for h
in range(3, 10):
4287 for w
in range(3, 10):
4288 if 4 <= h <= 6
and 4 <= w <= 6:
4291 size = (1, 1) + size
4293 mu(output_small, indices_small, output_size=size)
4295 self.assertRaises(ValueError,
lambda: mu(output_small, indices_small, (h, w)))
4297 def test_container_copy(self):
4298 class Model(nn.Module):
4300 super(Model, self).__init__()
4301 self.linear = nn.Linear(4, 5)
4303 def forward(self, input):
4304 return self.linear(input)
4306 input = torch.randn(2, 4)
4309 model_cp = deepcopy(model)
4310 self.assertEqual(
model(input).data, model_cp(input).data)
4312 model_cp.linear.weight.data[:] = 2
4313 self.assertNotEqual(
model(input).data, model_cp(input).data)
4315 def test_RNN_cell(self):
4318 for module
in (nn.RNNCell, nn.GRUCell):
4319 for bias
in (
True,
False):
4320 input = torch.randn(3, 10)
4321 hx = torch.randn(3, 20)
4322 cell = module(10, 20, bias=bias)
4324 hx = cell(input, hx)
4328 def _test_loss_equal_input_target_shape(self, cast):
4331 'mse_loss':
lambda x, y: F.mse_loss(x, y),
4332 'l1_loss':
lambda x, y: F.l1_loss(x, y),
4333 'smooth_l1_loss':
lambda x, y: F.smooth_l1_loss(x, y),
4334 'kl_div':
lambda x, y: F.kl_div(x, y),
4335 'poisson_nll_loss':
lambda x, y: F.poisson_nll_loss(x, y),
4338 input = Variable(cast(torch.randn(3, 5)))
4339 target = Variable(cast(torch.randn(5, 3)))
4340 for _name, fn
in losses.items():
4341 self.assertRaises(Exception,
lambda: fn(input, target))
4343 def test_loss_equal_input_target_shape(self):
4344 self._test_loss_equal_input_target_shape(
lambda x: x)
4346 def test_nll_loss_mismatched_batch(self):
4347 x = torch.randn((10, 3), requires_grad=
True)
4349 t = torch.zeros((3,), dtype=torch.int64)
4350 with self.assertRaisesRegex(ValueError,
'Expected.*batch_size'):
4353 def test_nll_loss_out_of_bounds_ignore_index(self):
4354 x = torch.randn(6, 3, requires_grad=
True)
4355 t =
torch.tensor([0, 1, 255, 0, 1, 2], dtype=torch.int64)
4356 for reduction
in [
'mean',
'none']:
4357 F.nll_loss(x, t, ignore_index=255, reduction=reduction).sum().backward()
4359 def test_poisson_nll_loss_reduction_modes(self):
4362 component_wise_loss = torch.exp(input) - target * input
4363 self.assertEqual(component_wise_loss,
4364 F.poisson_nll_loss(input, target, reduction=
'none'))
4365 self.assertEqual(torch.sum(component_wise_loss),
4366 F.poisson_nll_loss(input, target, reduction=
'sum'))
4367 self.assertEqual(torch.mean(component_wise_loss),
4368 F.poisson_nll_loss(input, target, reduction=
'mean'))
4369 with self.assertRaisesRegex(ValueError,
'is not valid'):
4370 F.poisson_nll_loss(input, target, reduction=
'total')
4372 def test_KLDivLoss_batch_mean(self):
4373 input_shape = (2, 5)
4374 log_prob1 = F.log_softmax(torch.randn(input_shape), 1)
4375 prob2 = F.softmax(torch.randn(input_shape), 1)
4377 loss = nn.KLDivLoss(reduction=
'batchmean')
4378 l = loss(log_prob1, prob2)
4380 loss_none_reduce = nn.KLDivLoss(reduction=
'sum')(log_prob1, prob2)
4381 expected = loss_none_reduce / input_shape[0]
4383 self.assertEqual(l, expected)
4385 @unittest.skipIf(
not (TEST_CUDNN
and TEST_CUDNN_VERSION >= 7000),
"needs cudnn >= 7.0")
4386 def test_CTCLoss_cudnn(self):
4387 target_lengths = [30, 25, 20]
4388 input_lengths = [50, 50, 50]
4389 targets = torch.randint(1, 15, (sum(target_lengths),), dtype=torch.int)
4390 log_probs = torch.randn(50, 3, 15, dtype=torch.float, device=
'cuda').log_softmax(2)
4392 expected = ctcloss_reference(log_probs, targets.cuda(), input_lengths, target_lengths).float()
4395 self.assertEqual(res, expected)
4396 self.assertEqual(res2, res)
4398 def test_CTCLoss_typechecks(self):
4401 targets = torch.randint(1, 15, (sum(target_lengths),), dtype=torch.int)
4402 log_probs = torch.randn(50, 3, 15, dtype=torch.float).log_softmax(2)
4403 with self.assertRaises(RuntimeError):
4404 _input_lengths = input_lengths.to(dtype=torch.float)
4406 with self.assertRaises(RuntimeError):
4407 target_lengths = target_lengths.to(dtype=torch.float)
4410 @unittest.skipIf(
not TEST_CUDA,
'CUDA not available')
4411 def test_CTCLoss_lengthchecks_cuda(self):
4412 target_lengths = [30, 25, 20]
4413 input_lengths = [50, 50, 50]
4414 targets = torch.randint(1, 15, (3, 29), dtype=torch.long, device=
'cuda')
4415 log_probs = torch.randn(50, 3, 15, dtype=torch.float, device=
'cuda').log_softmax(2)
4416 with self.assertRaises(RuntimeError):
4419 def test_CTCLoss_lengthchecks_cpu(self):
4420 target_lengths = [30, 25, 20]
4421 input_lengths = [50, 50, 50]
4422 targets = torch.randint(1, 15, (3, 29), dtype=torch.int)
4423 log_probs = torch.randn(50, 3, 15, dtype=torch.float).log_softmax(2)
4424 with self.assertRaises(RuntimeError):
4427 @unittest.skipIf(
not TEST_CUDA,
'CUDA not available')
4428 def test_CTCLoss_zero_infinity(self):
4429 target_lengths = [60, 25, 20]
4430 input_lengths = [50, 50, 50]
4431 targets = torch.randint(1, 15, (sum(target_lengths),), dtype=torch.int)
4432 log_probs = torch.randn(50, 3, 15, dtype=torch.float, device=
'cuda').log_softmax(2).requires_grad_()
4434 reduction=
'sum', zero_infinity=
True)
4437 reduction=
'sum', zero_infinity=
True)
4439 reduction=
'sum', zero_infinity=
True)
4441 self.assertAlmostEqual(res2, res, delta=1e-4)
4442 self.assertAlmostEqual(res_cpu, res.cpu(), delta=1e-4)
4446 self.assertAlmostEqual(g2, g3, delta=1e-4)
4447 self.assertAlmostEqual(g1, g2, delta=1e-4)
4448 self.assertTrue((g1 == g1).all().item())
4450 def test_RNN_cell_no_broadcasting(self):
4451 def test(cell_module, input, hx, input_size, hidden_size):
4452 cell = cell_module(input_size, hidden_size)
4453 self.assertRaises(RuntimeError,
lambda: cell(input, hx))
4455 def test_all(hidden_size, bad_hx, good_hx, input_size, input):
4456 test(nn.RNNCell, input, bad_hx, input_size, hidden_size)
4457 test(nn.GRUCell, input, bad_hx, input_size, hidden_size)
4458 test(nn.LSTMCell, input, (bad_hx, good_hx), input_size, hidden_size)
4459 test(nn.LSTMCell, input, (good_hx, bad_hx), input_size, hidden_size)
4463 input = torch.randn(3, input_size)
4464 bad_hx = torch.randn(1, hidden_size)
4465 good_hx = torch.randn(3, hidden_size)
4468 test_all(hidden_size, bad_hx, good_hx, input_size, input)
4471 bad_hx = torch.randn(3, 1)
4472 test_all(hidden_size, bad_hx, good_hx, input_size, input)
4475 bad_input = torch.randn(3, 1)
4476 test_all(hidden_size, good_hx, good_hx, input_size, bad_input)
4478 def test_invalid_dropout_p(self):
4480 self.assertRaises(ValueError,
lambda: nn.Dropout(-0.1))
4481 self.assertRaises(ValueError,
lambda: nn.Dropout(1.1))
4482 self.assertRaises(ValueError,
lambda: nn.Dropout2d(-0.1))
4483 self.assertRaises(ValueError,
lambda: nn.Dropout2d(1.1))
4484 self.assertRaises(ValueError,
lambda: nn.Dropout3d(-0.1))
4485 self.assertRaises(ValueError,
lambda: nn.Dropout3d(1.1))
4486 self.assertRaises(ValueError,
lambda: F.dropout(v, -0.1))
4487 self.assertRaises(ValueError,
lambda: F.dropout(v, 1.1))
4489 def test_pad_sequence(self):
4490 def pad(tensor, length):
4492 [tensor.data, tensor.data.new(
4493 length - tensor.size(0), *tensor.size()[1:]).zero_()])
4501 expected =
torch.tensor([[4, 5, 0], [1, 2, 3], [6, 0, 0]])
4502 padded = rnn_utils.pad_sequence([b, a, c],
True)
4503 self.assertEqual(padded, expected)
4506 padded = rnn_utils.pad_sequence([b, a, c])
4507 self.assertEqual(padded, expected.transpose(0, 1))
4510 expected =
torch.tensor([[4, 5, 1], [1, 2, 3], [6, 1, 1]])
4511 padded = rnn_utils.pad_sequence([b, a, c],
True, 1)
4512 self.assertEqual(padded, expected)
4515 expected =
torch.tensor([[1, 2, 3], [4, 5, 0], [6, 0, 0]])
4516 padded = rnn_utils.pad_sequence([a, b, c],
True)
4517 self.assertEqual(padded, expected)
4521 for num_dim
in (0, 1, 2, 3):
4523 trailing_dims = [4] * num_dim
4524 for i
in range(1, maxlen + 1):
4526 sequences.append(torch.rand(seq_len, 5, *trailing_dims))
4527 random.shuffle(sequences)
4529 for seq
in sequences:
4530 expected.append(pad(seq, maxlen * maxlen))
4532 expected = torch.stack(expected)
4533 padded = rnn_utils.pad_sequence(sequences,
True)
4534 self.assertEqual(padded, expected)
4537 padded = rnn_utils.pad_sequence(sequences)
4538 self.assertEqual(padded, expected.transpose(0, 1))
4540 def test_pack_sequence(self):
4541 def _compatibility_test(sequences, lengths, batch_first, enforce_sorted=False):
4542 padded = rnn_utils.pad_sequence(sequences, batch_first)
4543 packed = rnn_utils.pack_sequence(sequences, enforce_sorted)
4544 unpacked = rnn_utils.pad_packed_sequence(packed, batch_first)
4545 self.assertEqual(padded, unpacked[0])
4546 pack_padded = rnn_utils.pack_padded_sequence(
4547 padded, lengths, batch_first, enforce_sorted)
4548 self.assertEqual(packed, pack_padded)
4554 packed = rnn_utils.pack_sequence([a, b, c], enforce_sorted=
False)
4556 self.assertEqual(packed.batch_sizes, [3, 2, 1])
4557 self.assertEqual(packed.data.data, expected)
4558 self.assertEqual(packed.sorted_indices, [0, 1, 2])
4559 self.assertEqual(packed.unsorted_indices, [0, 1, 2])
4561 packed_unsorted = rnn_utils.pack_sequence([b, c, a], enforce_sorted=
False)
4562 self.assertEqual(packed_unsorted.batch_sizes, [3, 2, 1])
4563 self.assertEqual(packed_unsorted.data.data, expected)
4564 self.assertEqual(packed_unsorted.sorted_indices, [2, 0, 1])
4565 self.assertEqual(packed_unsorted.unsorted_indices, [1, 2, 0])
4568 packed_enforce_sorted = rnn_utils.pack_sequence([a, b, c], enforce_sorted=
True)
4569 self.assertEqual(packed_enforce_sorted.batch_sizes, [3, 2, 1])
4570 self.assertEqual(packed_enforce_sorted.data.data, expected)
4571 self.assertTrue(packed_enforce_sorted.sorted_indices
is None)
4572 self.assertTrue(packed_enforce_sorted.unsorted_indices
is None)
4574 with self.assertRaisesRegex(RuntimeError,
'must be sorted in decreasing order'):
4575 rnn_utils.pack_sequence([b, c, a], enforce_sorted=
True)
4577 with self.assertRaisesRegex(RuntimeError,
'You can pass `enforce_sorted=False`'):
4578 rnn_utils.pack_sequence([b, c, a], enforce_sorted=
True)
4582 for num_dim
in (0, 1, 2, 3):
4585 trailing_dims = [4] * num_dim
4586 for i
in range(maxlen, 0, -1):
4588 lengths.append(seq_len)
4589 sequences.append(torch.rand(seq_len, 5, *trailing_dims))
4590 unsorted_sequences = [s.clone()
for s
in sequences]
4591 random.shuffle(unsorted_sequences)
4592 unsorted_sequences_lengths = [t.size(0)
for t
in unsorted_sequences]
4595 for batch_first
in (
True,
False):
4596 for enforce_sorted
in (
True,
False):
4597 _compatibility_test(sequences, lengths, batch_first, enforce_sorted)
4598 _compatibility_test(unsorted_sequences, unsorted_sequences_lengths,
4601 def test_pack_padded_sequence(self):
4602 def generate_test_case(sorted_lengths, should_shuffle):
4603 def pad(tensor, length):
4604 return torch.cat([tensor, tensor.new(length - tensor.size(0), *tensor.size()[1:]).zero_()])
4606 max_length = sorted_lengths[0]
4607 batch_sizes = [sum(map(bool, filter(
lambda x: x >= i, sorted_lengths)))
4608 for i
in range(1, max_length + 1)]
4610 padded = torch.cat([pad(i * 100 + torch.arange(1., 5 * l + 1).view(l, 1, 5), max_length)
4611 for i, l
in enumerate(sorted_lengths, 1)], 1)
4612 expected_data = [[torch.arange(1., 6) + (i + 1) * 100 + 5 * n
for i
in range(batch_size)]
4613 for n, batch_size
in enumerate(batch_sizes)]
4614 expected_data = list(itertools.chain.from_iterable(expected_data))
4615 expected_data = torch.stack(expected_data, dim=0)
4619 permutation = list(range(len(sorted_lengths)))
4620 random.shuffle(permutation)
4623 padded = padded.index_select(1, unsorted_indices)
4624 lengths =
torch.tensor(sorted_lengths).index_select(0, unsorted_indices)
4626 unsorted_indices =
None 4627 lengths = sorted_lengths
4629 return padded.requires_grad_(), lengths, expected_data, batch_sizes, unsorted_indices
4633 [[10, 8, 4, 2, 2, 2, 1],
False],
4634 [[11, 10, 8, 6, 4, 3, 1],
False],
4635 [[11, 10, 8, 6, 4, 3, 1],
True],
4638 for test_case, batch_first
in itertools.product(test_cases, (
True,
False)):
4639 sorted_lengths, should_shuffle = test_case
4640 padded, lengths, expected_data, batch_sizes, unsorted_indices = generate_test_case(
4641 sorted_lengths, should_shuffle)
4645 src = src.transpose(0, 1)
4648 packed = rnn_utils.pack_padded_sequence(src, lengths, batch_first=batch_first,
4649 enforce_sorted=
not should_shuffle)
4650 self.assertEqual(packed.data.data, expected_data)
4651 self.assertEqual(packed.batch_sizes, batch_sizes)
4652 self.assertEqual(packed.unsorted_indices, unsorted_indices)
4655 unpacked, unpacked_len = rnn_utils.pad_packed_sequence(packed, batch_first=batch_first)
4656 self.assertEqual(unpacked, src)
4657 self.assertEqual(unpacked_len, lengths)
4660 if padded.grad
is not None:
4661 padded.grad.data.zero_()
4662 grad_output = unpacked.data.clone().normal_()
4663 unpacked.backward(grad_output)
4665 grad_output.transpose_(0, 1)
4666 for i, l
in enumerate(lengths):
4667 self.assertEqual(padded.grad.data[:l, i], grad_output[:l, i])
4669 self.assertEqual(padded.grad.data[l:, i].abs().sum(), 0)
4672 with self.assertRaisesRegex(RuntimeError,
'You can pass `enforce_sorted=False`'):
4673 packed = rnn_utils.pack_padded_sequence(torch.randn(3, 3), [1, 3, 2])
4675 def _test_variable_sequence(self, device="cpu", dtype=torch.float):
4676 def pad(var, length):
4677 if var.size(0) == length:
4679 return torch.cat([var, var.new_zeros(length - var.size(0), *var.size()[1:])])
4681 def maybe_index_tuple(maybe_tuple_of_tensors, index):
4682 if maybe_tuple_of_tensors
is None:
4684 return tuple(maybe_tuple_of_tensors[j][:, index:index + 1, :].contiguous()
4687 def check_lengths(lengths, enforce_sorted, use_default_hiddens):
4691 bidirectional =
True 4693 max_length = max(lengths)
4694 x_leaf = torch.randn(max_length, len(lengths), input_size, device=device,
4695 dtype=dtype, requires_grad=
True)
4696 num_directions = 2
if bidirectional
else 1
4697 lstm = nn.LSTM(input_size, hidden_size, bidirectional=bidirectional,
4698 num_layers=num_layers).to(device, dtype)
4699 lstm2 = deepcopy(lstm).to(device, dtype)
4703 if not use_default_hiddens:
4704 hidden0 = tuple(torch.randn(num_directions * num_layers, len(lengths), hidden_size,
4705 device=device, dtype=dtype)
4711 for i, l
in enumerate(lengths):
4712 hidden_i = maybe_index_tuple(hidden0, i)
4713 out, hid = lstm2(x[:l, i:i + 1], hidden_i)
4714 out_pad = pad(out, max_length)
4715 seq_outs.append(out_pad)
4716 seq_hiddens.append(hid)
4717 seq_out = torch.cat(seq_outs, 1)
4718 seq_hidden = tuple(torch.cat(hids, 1)
for hids
in zip(*seq_hiddens))
4721 packed = rnn_utils.pack_padded_sequence(x, lengths, enforce_sorted=enforce_sorted)
4722 packed_out, packed_hidden = lstm(packed, hidden0)
4723 unpacked, unpacked_len = rnn_utils.pad_packed_sequence(packed_out)
4726 prec = dtype2prec[dtype]
4727 self.assertEqual(packed_hidden, seq_hidden, prec)
4728 self.assertEqual(unpacked, seq_out, prec)
4729 self.assertEqual(unpacked_len, lengths, prec)
4732 seq_out.sum().backward()
4733 grad_x = x_leaf.grad.data.clone()
4734 x_leaf.grad.data.zero_()
4735 unpacked.sum().backward()
4737 self.assertEqual(x_leaf.grad, grad_x, dtype2prec[dtype])
4738 for p1, p2
in zip(lstm.parameters(), lstm2.parameters()):
4739 prec = dtype2prec[dtype]
4740 if dtype == torch.float16:
4742 self.assertEqual(p1.grad, p2.grad, prec)
4748 [
True, [10, 10, 6, 2, 2, 1, 1]],
4749 [
False, [10, 10, 6, 2, 2, 1, 1]],
4750 [
False, [2, 1, 3, 2, 10, 5, 3]],
4753 for enforce_sorted, seq_lens,
in tests:
4754 for use_default_hiddens
in (
True,
False):
4755 check_lengths(seq_lens, enforce_sorted, use_default_hiddens)
4757 def test_variable_sequence(self):
4758 self._test_variable_sequence()
4760 @unittest.skipIf(
not TEST_CUDA,
'CUDA not available')
4761 @repeat_test_for_types(ALL_TENSORTYPES)
4762 def test_variable_sequence_cuda(self, dtype=torch.float):
4763 self._test_variable_sequence(
"cuda", dtype)
4765 def test_LSTM_cell(self):
4768 for bias
in (
True,
False):
4769 input = torch.randn(3, 10)
4770 hx = torch.randn(3, 20)
4771 cx = torch.randn(3, 20)
4772 lstm = nn.LSTMCell(10, 20, bias=bias)
4774 hx, cx = lstm(input, (hx, cx))
4776 (hx + cx).sum().backward()
4778 @unittest.skipIf(
not (TEST_CUDNN
and TEST_MULTIGPU),
'CUDNN or multi-gpu not available')
4779 def test_cudnn_rnn_dropout_states_device(self):
4780 rnn = nn.RNN(10, 20, num_layers=2, dropout=.5)
4782 input = torch.randn(5, 4, 10).cuda(device)
4784 hx = torch.randn(2, 4, 20).cuda(device)
4785 output = rnn(input, hx)
4787 @unittest.skipIf(
not TEST_CUDNN,
'CUDNN not available')
4789 def test_cudnn_weight_format(self):
4791 nn.LSTM(10, 20, batch_first=
True),
4792 nn.GRU(10, 20, batch_first=
True),
4793 nn.RNN(10, 20, batch_first=
True)
4798 input = Variable(torch.randn(5, 4, 10).cuda(), requires_grad=
True)
4799 hx = Variable(torch.randn(1, 5, 20).cuda(), requires_grad=
True)
4800 all_vars = [input, hx] + list(rnn.parameters())
4801 if isinstance(rnn, nn.LSTM):
4802 cx = Variable(torch.randn(1, 5, 20).cuda(), requires_grad=
True)
4803 all_vars[2:2] = [cx]
4806 output = rnn(input, hx)
4807 output[0].sum().backward()
4808 grads = [v.grad.data.clone()
for v
in all_vars]
4813 weight = all_vars[4]
4814 weight_data = weight.data.clone()
4815 with torch.no_grad():
4816 weight.set_(weight_data)
4819 with warnings.catch_warnings(record=
True)
as w:
4820 output_noncontig = rnn(input, hx)
4822 self.assertEqual(len(w), 1)
4823 self.assertIn(
'weights are not part of single contiguous chunk of memory', w[0].message.args[0])
4825 warnings.resetwarnings()
4826 output_noncontig[0].sum().backward()
4827 grads_noncontig = [v.grad.data.clone()
for v
in all_vars]
4830 self.assertEqual(output, output_noncontig)
4831 self.assertEqual(grads_noncontig, grads)
4835 self.assertEqual(weight_data, all_vars[4].data)
4837 @unittest.skipIf(
not TEST_CUDNN,
'CUDNN not available')
4838 def test_cudnn_weight_tying(self):
4840 nn.LSTM(10, 20, batch_first=
True, bidirectional=
True),
4841 nn.GRU(10, 20, batch_first=
True, bidirectional=
True),
4842 nn.RNN(10, 20, batch_first=
True, bidirectional=
True)
4845 rnn.bias_ih_l0_reverse = rnn.bias_ih_l0
4847 input = Variable(torch.randn(5, 4, 10).cuda(), requires_grad=
True)
4848 hx = Variable(torch.randn(2, 5, 20).cuda(), requires_grad=
True)
4849 all_vars = [input, hx] + list(rnn.parameters())
4850 opt = torch.optim.SGD(rnn.parameters(), lr=0.1)
4852 if isinstance(rnn, nn.LSTM):
4853 cx = Variable(torch.randn(2, 5, 20).cuda(), requires_grad=
True)
4854 all_vars[2:2] = [cx]
4857 with warnings.catch_warnings(record=
True)
as w:
4858 output = rnn(input, hx)
4859 output[0].sum().backward()
4862 with warnings.catch_warnings(record=
True)
as w:
4863 output_cuda = rnn(input, hx)
4865 hx = (hx[0].cpu(), hx[1].cpu())
if isinstance(rnn, nn.LSTM)
else hx.cpu()
4866 output_cpu = rnn(input.cpu(), hx)
4867 self.assertEqual(output_cuda, output_cpu)
4869 @unittest.skipIf(
not TEST_CUDA,
'CUDA not available')
4870 @repeat_test_for_types(NO_HALF_TENSORTYPES)
4871 def test_cuda_rnn_fused(self, dtype=torch.float):
4873 def copy_rnn(rnn1, rnn2):
4874 for x_layer, y_layer
in zip(rnn1.all_weights, rnn2.all_weights):
4875 for x, y
in zip(x_layer, y_layer):
4876 x.data.copy_(y.data)
4878 def check_rnn_grads(rnn1, rnn2):
4879 for x_layer, y_layer
in zip(rnn1.all_weights, rnn2.all_weights):
4880 for x, y
in zip(x_layer, y_layer):
4881 self.assertEqual(x.grad, y.grad, prec=5e-5)
4888 input_val = torch.randn(seq_length, batch, input_size, dtype=dtype)
4889 grad_output = torch.randn(seq_length, batch, hidden_size, dtype=dtype)
4890 hx_val = torch.randn(num_layers, batch, hidden_size, dtype=dtype)
4891 grad_hy = torch.randn(num_layers, batch, hidden_size, dtype=dtype)
4893 for module
in (nn.GRU, nn.LSTM):
4894 for bias
in (
True,
False):
4895 rnn = module(input_size, hidden_size, num_layers, bias=bias).to(dtype)
4896 rnn_cuda = module(input_size, hidden_size, num_layers, bias=bias).to(
"cuda", dtype)
4897 copy_rnn(rnn, rnn_cuda)
4899 is_lstm = isinstance(rnn, nn.LSTM)
4901 hx = (Variable(hx_val.clone(), requires_grad=
True),
4902 Variable(hx_val.clone().add(1), requires_grad=
True))
4903 hx_cuda = (Variable(hx_val.clone().cuda(), requires_grad=
True),
4904 Variable(hx_val.clone().cuda().add(1), requires_grad=
True))
4906 hx = Variable(hx_val.clone(), requires_grad=
True)
4907 hx_cuda = Variable(hx_val.clone().cuda(), requires_grad=
True)
4909 inp = Variable(input_val.clone(), requires_grad=
True)
4910 inp_cu = Variable(input_val.clone().cuda(), requires_grad=
True)
4911 output1, hy1 = rnn(inp, hx)
4912 output2, hy2 = rnn_cuda(inp_cu, hx_cuda)
4915 [output1, hy1[0], hy1[1]], [grad_output, grad_hy, grad_hy + 1]
4918 [output2, hy2[0], hy2[1]],
4919 [grad_output.cuda(), grad_hy.cuda(), (grad_hy + 1).cuda()]
4925 self.assertEqual(output1, output2)
4926 self.assertEqual(hy1, hy2)
4928 check_rnn_grads(rnn, rnn_cuda)
4929 self.assertEqual(inp.grad.data, inp_cu.grad.data)
4931 self.assertEqual(hx[0].grad.data, hx_cuda[0].grad.data)
4932 self.assertEqual(hx[1].grad.data, hx_cuda[1].grad.data)
4934 self.assertEqual(hx.grad.data, hx_cuda.grad.data)
4936 def test_rnn_args_check(self):
4945 def test(input_shape, hidden_shape, mode):
4946 for input, hidden
in get_inputs(input_shape, hidden_shape, mode):
4947 model = getattr(nn, mode)(input_size, hidden_size, num_layers)
4948 self.assertRaises(RuntimeError,
lambda:
model(input, hidden))
4950 correct_input_shape = (seq_len, batch_size, input_size)
4951 correct_hidden_shape = (num_layers * num_directions, batch_size, hidden_size)
4953 def update_shape(shape, dim, new_dim_size):
4954 new_shape = list(shape)
4955 new_shape[dim] = new_dim_size
4956 return tuple(new_shape)
4958 def get_inputs(input_shape, hidden_shape, mode):
4959 '''returns list( tuple(input, hidden) ) 4960 where input, hidden are inputs to a model''' 4961 input = torch.randn(input_shape)
4962 hidden = torch.randn(hidden_shape)
4964 return [(input, hidden)]
4965 if hidden_shape == correct_hidden_shape:
4966 return [(input, (hidden, hidden))]
4967 good_hidden = torch.randn(correct_hidden_shape)
4969 (input, (hidden, good_hidden)),
4970 (input, (good_hidden, hidden)),
4973 rnn_modes = [
'RNN',
'GRU',
'LSTM']
4974 for mode
in rnn_modes:
4976 input_shape = update_shape(correct_input_shape, 1, bad_size)
4977 hidden_shape = correct_hidden_shape
4978 test(input_shape, hidden_shape, mode)
4981 input_shape = correct_input_shape
4982 hidden_shape = update_shape(correct_hidden_shape, 1, bad_size)
4983 test(input_shape, hidden_shape, mode)
4986 input_shape = update_shape(correct_input_shape, 2, bad_size)
4987 hidden_shape = correct_hidden_shape
4988 test(input_shape, hidden_shape, mode)
4991 input_shape = correct_input_shape
4992 hidden_shape = update_shape(correct_hidden_shape, 2, bad_size)
4993 test(input_shape, hidden_shape, mode)
4996 input_shape = correct_input_shape
4997 hidden_shape = update_shape(correct_hidden_shape, 0, bad_size)
4998 test(input_shape, hidden_shape, mode)
5000 @unittest.skipIf(
not TEST_MULTIGPU,
"multi-GPU not supported")
5001 def test_rnn_check_device(self):
5009 correct_input_shape = (seq_len, batch_size, input_size)
5010 correct_hidden_shape = (num_layers * num_directions, batch_size, hidden_size)
5011 rnn_modes = [
'RNN',
'GRU',
'LSTM']
5013 for mode
in rnn_modes:
5014 model = getattr(nn, mode)(input_size, hidden_size, num_layers)
5015 input = torch.randn(correct_input_shape)
5016 hidden = torch.randn(correct_hidden_shape)
5019 with self.assertRaisesRegex(RuntimeError,
5020 "Input and parameter tensors are not at the same device"):
5021 model(input.to(
'cuda:0'))
5024 with self.assertRaisesRegex(RuntimeError,
5025 r"Input and hidden tensors are not at the same device"):
5027 model(input, (hidden.to(
'cuda:0'), hidden.to(
'cuda:0')))
5029 model(input, (hidden.to(
'cuda:0')))
5033 with self.assertRaisesRegex(RuntimeError,
5034 "Input and hidden tensors are not at the same device"):
5035 model(input.to(
'cuda:0'), (hidden.to(
'cuda:0'), hidden.to(
'cuda:1')))
5037 def test_rnn_initial_hidden_state(self):
5038 rnn_modes = [
'RNN',
'GRU',
'LSTM']
5039 for mode
in rnn_modes:
5040 rnn = getattr(nn, mode)(30, 20, 2)
5041 input = torch.randn(10, 32, 30)
5042 hidden = torch.zeros(2, 32, 20)
5045 hidden = (hidden, hidden)
5046 output1, hidden1 = rnn(input, hidden)
5047 output2, hidden2 = rnn(input)
5048 self.assertEqual(output1, output2)
5049 self.assertEqual(hidden1, hidden2)
5051 def _test_rnn_retain_variables(self, device="cpu", dtype=torch.double):
5052 rnns = [nn.LSTM(10, 20, num_layers=2).to(device, dtype),
5053 nn.GRU(10, 20, num_layers=2).to(device, dtype),
5054 nn.RNN(10, 20, num_layers=2).to(device, dtype)]
5056 input = torch.randn(5, 6, 10, device=device, dtype=dtype, requires_grad=
True)
5058 output[0].sum().backward(retain_graph=
True)
5059 grads = [input.grad.data.clone()] + [p.grad.data.clone()
for p
in rnn.parameters()]
5062 input.grad.data.zero_()
5063 output[0].sum().backward(retain_graph=
True)
5064 grads2 = [input.grad.data] + [p.grad.data
for p
in rnn.parameters()]
5065 self.assertEqual(grads, grads2)
5067 def test_rnn_retain_variables(self):
5068 self._test_rnn_retain_variables()
5070 @unittest.skipIf(
not TEST_CUDA,
'CUDA not available')
5071 @repeat_test_for_types(ALL_TENSORTYPES)
5072 def test_rnn_retain_variables_cuda(self, dtype=torch.float):
5074 self._test_rnn_retain_variables(
"cuda", dtype)
5075 self._test_rnn_retain_variables(
"cuda", dtype)
5077 def _test_RNN_cpu_vs_cudnn(self, dropout):
5079 def forward_backward(cuda, rnn, input_val, hx_val, grad_output, grad_hy, weights_val):
5080 is_lstm = isinstance(rnn, nn.LSTM)
5082 for x_layer, y_layer
in zip(rnn.all_weights, weights_val):
5083 for x, y
in zip(x_layer, y_layer):
5084 x.data.copy_(y.data)
5086 if isinstance(input_val, rnn_utils.PackedSequence):
5087 input = rnn_utils.PackedSequence(
5088 Variable(input_val.data.data, requires_grad=
True), input_val.batch_sizes)
5089 input_var = input.data
5091 input = Variable(input_val.clone(), requires_grad=
True)
5094 hx = (Variable(hx_val.clone(), requires_grad=
True),
5095 Variable(hx_val.add(1), requires_grad=
True))
5097 hx = Variable(hx_val.clone(), requires_grad=
True)
5101 input_var.data = input_var.data.cuda()
5103 hx[0].data = hx[0].data.cuda()
5104 hx[1].data = hx[1].data.cuda()
5106 hx.data = hx.data.cuda()
5107 grad_hy = grad_hy.cuda()
5108 grad_output = grad_output.cuda()
5110 output, hy = rnn(input, hx)
5112 if isinstance(output, rnn_utils.PackedSequence):
5113 output = output.data
5120 return {
'output': output.data,
5121 'hy': hy[0].data
if is_lstm
else hy.data,
5122 'weights': rnn.all_weights,
5123 'grad_input': input_var.grad.data,
5124 'grad_hx': hx[0].grad.data
if is_lstm
else hx.grad.data,
5125 'cy': hy[1].data
if is_lstm
else None,
5126 'grad_cx': hx[1].grad.data
if is_lstm
else None}
5134 def make_noncontig(tensor):
5136 return torch.stack([tensor.clone().zero_(), tensor], ndim).select(ndim, 1)
5138 def compare_cpu_gpu(outputs_cpu, outputs_gpu):
5139 self.assertEqual(list(outputs_cpu.keys()), list(outputs_gpu.keys()))
5140 for key
in outputs_cpu.keys():
5141 if key !=
'weights':
5142 self.assertEqual(outputs_cpu[key], outputs_gpu[key], prec=5e-5, message=key)
5145 for cpu_layer_weight, gpu_layer_weight
in zip(outputs_cpu[
'weights'], outputs_gpu[
'weights']):
5146 for (cpu_weight, gpu_weight)
in zip(cpu_layer_weight, gpu_layer_weight):
5147 self.assertEqual(cpu_weight.grad.data, gpu_weight.grad.data, prec=5e-5)
5149 for module
in (nn.RNN, nn.LSTM, nn.GRU):
5150 for bias, bidirectional, batch_first, contig, variable_len, lens_as_tensor \
5151 in product((
True,
False), repeat=6):
5153 num_directions = 2
if bidirectional
else 1
5155 input_val = torch.randn(batch, seq_length, input_size)
5156 grad_output = torch.randn(batch, seq_length, hidden_size * num_directions)
5158 input_val = torch.randn(seq_length, batch, input_size)
5159 grad_output = torch.randn(seq_length, batch, hidden_size * num_directions)
5162 grad_output = make_noncontig(grad_output)
5163 grad_hy = make_noncontig(grad_hy)
5164 input_var = make_noncontig(input_val)
5165 hx_val = make_noncontig(hx_val)
5167 hx_val = torch.randn(num_layers * num_directions, batch, hidden_size)
5168 grad_hy = torch.randn(num_layers * num_directions, batch, hidden_size)
5171 lengths = [7, 5, 5, 2, 1, 1]
5174 input_val = rnn_utils.pack_padded_sequence(input_val, lengths, batch_first=batch_first)
5175 grad_output = rnn_utils.pack_padded_sequence(grad_output, lengths, batch_first=batch_first).data
5177 rnn = module(input_size,
5182 bidirectional=bidirectional,
5183 batch_first=batch_first)
5185 outputs_cpu = forward_backward(
5186 False, rnn, input_val, hx_val, grad_output, grad_hy, rnn.all_weights)
5188 rnn_gpu = module(input_size,
5193 bidirectional=bidirectional,
5194 batch_first=batch_first)
5196 outputs_gpu = forward_backward(
5197 True, rnn_gpu, input_val, hx_val, grad_output, grad_hy, rnn.all_weights)
5199 compare_cpu_gpu(outputs_cpu, outputs_gpu)
5201 for nonlinearity
in (
'tanh',
'relu'):
5202 hx_val = torch.randn(num_layers, batch, hidden_size)
5203 input_val = torch.randn(seq_length, batch, input_size)
5204 grad_output = torch.randn(
5205 seq_length, batch, hidden_size * num_directions)
5206 grad_hy = torch.randn(
5207 num_layers * num_directions, batch, hidden_size)
5209 rnn = nn.RNN(input_size, hidden_size, num_layers, bias=bias, nonlinearity=nonlinearity)
5210 outputs_cpu = forward_backward(
False, rnn, input_val, hx_val, grad_output, grad_hy, rnn.all_weights)
5212 rnn_gpu = nn.RNN(input_size, hidden_size, num_layers, bias=bias, nonlinearity=nonlinearity)
5213 outputs_gpu = forward_backward(
True, rnn_gpu, input_val, hx_val, grad_output, grad_hy, rnn.all_weights)
5215 compare_cpu_gpu(outputs_cpu, outputs_gpu)
5217 @unittest.skipIf(
not TEST_CUDNN,
"needs cudnn")
5218 @default_tensor_type(torch.FloatTensor)
5219 def test_RNN_cpu_vs_cudnn_no_dropout(self):
5220 self._test_RNN_cpu_vs_cudnn(0)
5222 @unittest.skipIf(
not TEST_CUDNN,
"needs cudnn")
5223 def test_RNN_cudnn_weight_norm(self):
5229 m = nn.LSTM(input_size, hidden_size, num_layers).cuda()
5230 input = torch.randn(seq_length, batch, input_size).cuda()
5231 expected_output = m(input)
5233 name =
'weight_hh_l0' 5236 warnings.simplefilter(
"always")
5237 self.assertEqual(m(input), expected_output)
5240 m = torch.nn.utils.remove_weight_norm(m, name=name)
5241 self.assertEqual(m(input), expected_output)
5243 @unittest.skipIf(
not (TEST_CUDNN
and TEST_CUDNN_VERSION >= 5103),
"needs cudnn >= 5.1")
5244 @default_tensor_type(torch.FloatTensor)
5245 def test_RNN_cpu_vs_cudnn_with_dropout(self):
5247 self._test_RNN_cpu_vs_cudnn(1)
5249 @unittest.skipIf(
not (TEST_CUDNN
and TEST_CUDNN_VERSION >= 5103),
"needs cudnn >= 5.1")
5250 def test_RNN_dropout(self):
5253 for p
in (0, 0.276, 0.731, 1):
5254 for train
in (
True,
False):
5255 for cuda
in (
True,
False):
5256 rnn = nn.RNN(10, 1000, 2, bias=
False, dropout=p, nonlinearity=
'relu')
5264 rnn.weight_ih_l0.data.fill_(1)
5265 rnn.weight_hh_l0.data.fill_(1)
5266 rnn.weight_ih_l1.data.fill_(1)
5267 rnn.weight_hh_l1.data.fill_(1)
5268 input = torch.ones(1, 1, 10)
5269 hx = torch.zeros(2, 1, 1000)
5271 input = input.cuda()
5274 output, hy = rnn(input, hx)
5275 self.assertEqual(output.data.min(), output.data.max())
5276 output_val = output.data[0][0][0]
5277 if p == 0
or not train:
5278 self.assertEqual(output_val, 10000)
5280 self.assertEqual(output_val, 0)
5282 self.assertGreater(output_val, 8000)
5283 self.assertLess(output_val, 12000)
5284 denorm_mod = (output_val * (1 - p)) % 10
5285 self.assertLess(min(denorm_mod, 10 - denorm_mod), 1e-2)
5287 self.assertEqual(hy[0].data.min(), hy[0].data.max())
5288 self.assertEqual(hy[1].data.min(), hy[1].data.max())
5289 self.assertEqual(hy.data[0][0][0], 10)
5290 self.assertEqual(hy.data[1][0][0], output_val)
5292 @unittest.skipIf(
not (TEST_CUDNN
and TEST_CUDNN_VERSION >= 5103),
"needs cudnn >= 5.1")
5293 def test_RNN_dropout_state(self):
5295 if sys.version_info[0] == 2:
5296 import cPickle
as pickle
5299 for p
in (0, 0.1234):
5300 for train
in (
True,
False):
5301 for cuda
in (
True,
False):
5302 rnn = nn.RNN(100, 100, 2, bias=
False, dropout=p, nonlinearity=
'relu')
5310 input = torch.rand(1, 1, 100)
5311 hx = torch.rand(2, 1, 100)
5313 input = input.cuda()
5316 output1, hy1 = rnn(input, hx)
5317 output2, hy2 = rnn(input, hx)
5319 rnn_pickle = pickle.dumps(rnn)
5320 rnn2 = pickle.loads(rnn_pickle)
5321 rnn2.flatten_parameters()
5322 output3, hy3 = rnn2(input, hx)
5324 if p == 0
or not train:
5325 self.assertEqual(output1, output2)
5326 self.assertEqual(output1, output3)
5327 self.assertEqual(hy1, hy2)
5328 self.assertEqual(hy1, hy3)
5330 self.assertNotEqual(output1, output2)
5331 self.assertNotEqual(output1, output3)
5332 self.assertNotEqual(hy1, hy2)
5333 self.assertNotEqual(hy1, hy3)
5335 @unittest.skipIf(
not (TEST_CUDNN
and TEST_CUDNN_VERSION >= 5103),
"needs cudnn >= 5.1")
5336 def test_RNN_change_dropout(self):
5337 for train, cuda
in product((
True,
False), repeat=2):
5338 rnn = nn.RNN(100, 100, 2, dropout=0, nonlinearity=
'relu')
5339 input = torch.rand(3, 2, 100)
5341 input.data = input.data.cuda()
5350 for p
in (0, 0.5, 0, 0.7, 0.2, 1, 0.2, 0):
5352 output1, hy1 = rnn(input)
5353 output2, hy2 = rnn(input)
5355 if p == 0
or p == 1
or not train:
5356 self.assertEqual(output1, output2)
5357 self.assertEqual(hy1, hy2)
5359 self.assertNotEqual(output1, output2)
5360 self.assertNotEqual(hy1, hy2)
5362 if prev_output
is not None:
5364 self.assertEqual(output1.data, prev_output)
5365 self.assertEqual(output2.data, prev_output)
5367 self.assertNotEqual(output1.data, prev_output)
5368 self.assertNotEqual(output2.data, prev_output)
5369 prev_output = output1.data
5371 def _verify_pixel_shuffle(self, input, output, upscale_factor):
5372 for c
in range(output.size(1)):
5373 for h
in range(output.size(2)):
5374 for w
in range(output.size(3)):
5375 height_idx = h // upscale_factor
5376 weight_idx = w // upscale_factor
5377 channel_idx = (upscale_factor * (h % upscale_factor)) + (w % upscale_factor) + \
5378 (c * upscale_factor ** 2)
5379 self.assertEqual(output[:, c, h, w], input[:, channel_idx, height_idx, weight_idx])
5381 def test_inplace_thnn(self):
5382 modules = [nn.ReLU, nn.ELU, nn.SELU, nn.CELU, nn.RReLU]
5384 r = mod(inplace=
True)
5385 input = torch.randn(5, 5, requires_grad=
True)
5386 output = r(input + 0)
5387 grad_output = torch.randn(5, 5)
5388 grad_output_clone = grad_output.clone()
5389 output.backward(grad_output)
5390 self.assertEqual(grad_output, grad_output_clone)
5392 @unittest.skipIf(
not TEST_CUDA,
'CUDA not available')
5393 @repeat_test_for_types(ALL_TENSORTYPES)
5394 def test_noncontig_conv_grad_cuda(self, dtype=torch.float):
5396 module = nn.Conv2d(3, 5, kernel_size=3, padding=1).to(
"cuda", dtype)
5397 input = torch.randn(2, 3, 10, 10, dtype=dtype, device=
"cuda", requires_grad=
True)
5398 output = module(input)
5400 grad = torch.randn(2, 2, 5, 10, 10, dtype=dtype, device=
"cuda")[:, 1]
5401 assert not grad.is_contiguous()
5402 output.backward(grad, retain_graph=
True)
5403 self.assertIsNotNone(input.grad)
5404 result = input.grad.data.clone()
5405 input.grad.data.zero_()
5407 output.backward(grad.contiguous())
5408 self.assertEqual(result, input.grad.data, dtype2prec[dtype])
5410 def test_pixel_shuffle(self):
5411 batch_size = random.randint(1, 3)
5412 upscale_factor = random.randint(2, 5)
5413 channels = random.randint(1, 4) * upscale_factor ** 2
5414 height = random.randint(5, 10)
5415 width = random.randint(5, 10)
5417 input = torch.rand(batch_size, channels, height, width, requires_grad=
True)
5418 ps = nn.PixelShuffle(upscale_factor)
5420 self._verify_pixel_shuffle(input.data, output.data, upscale_factor)
5421 output.backward(output.data)
5422 self.assertEqual(input.data, input.grad.data)
5424 def test_elu_inplace_view(self):
5425 v =
torch.tensor([1.0, -1.0, 1.0, -1.0], requires_grad=
True)
5429 view = x.narrow(0, 1, 2)
5430 res = F.elu(view, inplace=
True)
5431 self.assertIs(res, view)
5434 gradcheck(func, [v])
5435 gradgradcheck(func, [v])
5437 def test_relu_inplace_view(self):
5438 v =
torch.tensor([1.0, -1.0, 1.0, -1.0], requires_grad=
True)
5442 view = x.narrow(0, 1, 2)
5443 res = F.relu(view, inplace=
True)
5444 self.assertIs(res, view)
5447 gradcheck(func, [v])
5448 gradgradcheck(func, [v])
5450 @unittest.skipIf(
not TEST_CUDA,
'CUDA not available')
5451 def test_PReLU_backward_requires_grad_false(self):
5452 m = nn.PReLU().to(
'cuda')
5453 x = torch.randn(2, 3, 4, 5, requires_grad=
False, device=
'cuda')
5456 self.assertEqual(x.grad,
None)
5458 def test_bce_loss_always_nonnegative(self):
5459 target = torch.ones(5)
5460 input = torch.ones(5)
5461 self.assertEqual((nn.BCELoss()(input, target) < 0).sum(), 0)
5463 target = torch.zeros(5)
5464 input = torch.zeros(5)
5465 self.assertEqual((nn.BCELoss()(input, target) < 0).sum(), 0)
5467 def test_bce_with_logits_raises_if_target_and_input_are_different_size(self):
5468 target = torch.rand(5)
5469 input = torch.rand(5, 1)
5470 with self.assertRaises(ValueError):
5471 nn.BCEWithLogitsLoss()(input, target)
5473 target = torch.rand(5, 1)
5474 input = torch.rand(5)
5475 with self.assertRaises(ValueError):
5476 nn.BCEWithLogitsLoss()(input, target)
5478 def test_bce_with_logits_gives_same_result_as_sigmoid_and_bce_loss(self):
5479 sigmoid = nn.Sigmoid()
5481 target = torch.rand(64, 4)
5482 output = torch.rand(64, 4) - 0.5
5484 self.assertEqual(nn.BCEWithLogitsLoss()(output, target), nn.BCELoss()(sigmoid(output), target))
5486 weight = torch.rand(4)
5487 self.assertEqual(nn.BCEWithLogitsLoss(weight)(output, target), nn.BCELoss(weight)(sigmoid(output), target))
5489 target = torch.zeros(4, 1, dtype=torch.float)
5490 output = torch.empty(4, 1, dtype=torch.float).fill_(-100)
5492 self.assertEqual(nn.BCEWithLogitsLoss()(output, target), nn.BCELoss()(sigmoid(output), target))
5494 self.assertEqual(nn.BCEWithLogitsLoss(reduction=
'none')(output, target),
5495 nn.BCELoss(reduction=
'none')(sigmoid(output), target))
5497 weight = torch.rand(1, dtype=torch.float)
5498 self.assertEqual(nn.BCEWithLogitsLoss(weight)(output, target), nn.BCELoss(weight)(sigmoid(output), target))
5500 def test_bce_with_logits_has_correct_grad_at_zero(self):
5501 output = torch.zeros(3, 1, requires_grad=
True)
5502 target = torch.zeros(3, 1)
5503 nn.BCEWithLogitsLoss(reduction=
'sum')(output, target).backward()
5504 expected_grad = torch.empty(3, 1).fill_(0.5)
5505 self.assertEqual(output.grad, expected_grad)
5507 def test_bce_with_logits_broadcasts_weights(self):
5508 target = torch.rand(16, 4)
5509 output = torch.rand(16, 4) - 0.5
5511 weight = torch.rand(4)
5512 out1 = nn.BCEWithLogitsLoss(weight)(output, target)
5514 weight = weight.expand(16, 4).contiguous()
5515 out2 = nn.BCEWithLogitsLoss(weight)(output, target)
5517 self.assertEqual(out1, out2)
5519 weight = torch.rand(16, 1)
5520 out1 = nn.BCEWithLogitsLoss(weight)(output, target)
5522 weight = weight.expand(16, 4).contiguous()
5523 out2 = nn.BCEWithLogitsLoss(weight)(output, target)
5525 self.assertEqual(out1, out2)
5527 def test_bce_with_logits_ones_in_pos_weights_are_the_same_as_none(self):
5528 target = torch.rand(64, 4)
5529 output = torch.rand(64, 4) - 0.5
5530 pos_weight = torch.ones(64, 4)
5532 self.assertEqual(nn.BCEWithLogitsLoss()(output, target),
5533 nn.BCEWithLogitsLoss(pos_weight=pos_weight)(output, target))
5535 def test_bce_with_logits_broadcasts_pos_weights(self):
5536 target = torch.rand(64, 4)
5537 output = torch.rand(64, 4) - 0.5
5538 pos_weight = torch.rand(4)
5539 out1 = nn.BCEWithLogitsLoss(pos_weight=pos_weight)(output, target)
5541 pos_weight1 = pos_weight.expand(1, 4)
5542 out2 = nn.BCEWithLogitsLoss(pos_weight=pos_weight1)(output, target)
5544 pos_weight2 = pos_weight.expand(64, 4)
5545 out3 = nn.BCEWithLogitsLoss(pos_weight=pos_weight2)(output, target)
5547 self.assertEqual(out1, out2)
5548 self.assertEqual(out1, out3)
5550 def test_bce_with_logits_with_pos_weight_has_correct_grad_at_zero(self):
5551 output = torch.zeros(3, 1, requires_grad=
True)
5552 target = torch.zeros(3, 1)
5553 pos_weight = torch.ones(3, 1)
5554 nn.BCEWithLogitsLoss(pos_weight=pos_weight, reduction=
'sum')(output, target).backward()
5555 expected_grad = torch.empty(3, 1).fill_(0.5)
5557 self.assertEqual(grad, expected_grad)
5559 def test_bce_with_logits_stability(self):
5564 out1 = nn.BCEWithLogitsLoss()(output, target)
5565 self.assertTrue(torch.isfinite(out1).all().item())
5567 out2 = nn.BCEWithLogitsLoss(pos_weight=pos_weight)(output, target)
5568 self.assertTrue(torch.isfinite(out2).all().item())
5570 def test_bce_loss_broadcasts_weights(self):
5571 sigmoid = nn.Sigmoid()
5572 target = torch.rand(16, 4)
5573 output = torch.rand(16, 4) - 0.5
5575 weight = torch.rand(4)
5576 out1 = nn.BCELoss(weight)(sigmoid(output), target)
5578 weight = weight.expand(16, 4).contiguous()
5579 out2 = nn.BCELoss(weight)(sigmoid(output), target)
5581 self.assertEqual(out1, out2)
5583 weight = torch.rand(16, 1)
5584 out1 = nn.BCELoss(weight)(sigmoid(output), target)
5586 weight = weight.expand(16, 4).contiguous()
5587 out2 = nn.BCELoss(weight)(sigmoid(output), target)
5589 self.assertEqual(out1, out2)
5591 def test_elu_inplace_gradgrad(self):
5592 v = torch.randn(8, requires_grad=
True)
5596 return F.elu(x, inplace=
True)
5598 gradcheck(func, [v])
5599 gradgradcheck(func, [v])
5601 def test_hardtanh_inplace_gradgrad(self):
5602 v = torch.randn(8, requires_grad=
True)
5606 return F.hardtanh(x, inplace=
True)
5608 gradcheck(func, [v])
5609 gradgradcheck(func, [v])
5611 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
5612 def test_batchnorm_cudnn_half(self):
5614 input = torch.randint(1, 10, (2, 3, 2, 2), dtype=torch.half, device=
"cuda", requires_grad=
True)
5615 m = nn.BatchNorm2d(3).half().cuda()
5616 thnn_output = m(input)
5617 thnn_output.sum().backward()
5618 thnn_input_grad = input.grad.data.clone()
5619 self.assertEqual(thnn_output.type(), input.type())
5624 cudnn_output = m(input)
5625 cudnn_output.sum().backward()
5626 cudnn_input_grad = input.grad.data.clone()
5627 self.assertEqual(cudnn_output.type(), input.type())
5628 self.assertEqual(cudnn_output, thnn_output)
5629 self.assertAlmostEqual(cudnn_input_grad, thnn_input_grad, delta=1e-3)
5631 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
5632 @repeat_test_for_types([torch.float, torch.half])
5633 def test_batchnorm_large_batch(self, dtype=torch.float):
5634 bn = nn.BatchNorm1d(1).to(
'cuda', dtype)
5635 data = torch.rand(131072, 1, device=
"cuda", dtype=dtype)
5636 out = bn(data).sum().backward()
5638 def _test_batchnorm_update_stats(self, device="cpu", dtype=torch.float):
5639 module = nn.BatchNorm1d(3).to(device, dtype)
5641 data = torch.rand(4, 3, device=device, dtype=dtype)
5644 old_running_mean = module.running_mean.clone()
5645 old_running_var = module.running_var.clone()
5646 old_num_batches_tracked = module.num_batches_tracked.clone()
5648 self.assertNotEqual(old_running_mean, module.running_mean)
5649 self.assertNotEqual(old_running_var, module.running_var)
5650 self.assertEqual(old_num_batches_tracked + 1, module.num_batches_tracked)
5654 old_running_mean = module.running_mean.clone()
5655 old_running_var = module.running_var.clone()
5656 old_num_batches_tracked = module.num_batches_tracked.clone()
5658 self.assertEqual(old_running_mean, module.running_mean)
5659 self.assertEqual(old_running_var, module.running_var)
5660 self.assertEqual(old_num_batches_tracked, module.num_batches_tracked)
5662 def test_batchnorm_update_stats(self):
5663 self._test_batchnorm_update_stats()
5665 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
5666 def test_batchnorm_update_stats_cuda(self):
5667 self._test_batchnorm_update_stats(
"cuda", torch.float)
5670 self._test_batchnorm_update_stats(
"cuda", torch.float)
5672 def test_batchnorm_raises_error_if_running_mean_is_not_same_size_as_input(self):
5673 input = torch.rand(2, 10)
5674 running_var = torch.rand(10)
5675 wrong_sizes = [9, 11]
5676 for size
in wrong_sizes:
5677 with self.assertRaises(RuntimeError):
5678 F.batch_norm(input, torch.rand(size), running_var)
5680 def test_batchnorm_raises_error_if_running_var_is_not_same_size_as_input(self):
5681 input = torch.rand(2, 10)
5682 running_mean = torch.rand(10)
5683 wrong_sizes = [9, 11]
5684 for size
in wrong_sizes:
5685 with self.assertRaises(RuntimeError):
5686 F.batch_norm(input, running_mean, torch.rand(size))
5688 def test_batchnorm_raises_error_if_weight_is_not_same_size_as_input(self):
5689 input = torch.rand(2, 10)
5690 running_mean = torch.rand(10)
5691 running_var = torch.rand(10)
5692 wrong_sizes = [9, 11]
5693 for size
in wrong_sizes:
5694 with self.assertRaises(RuntimeError):
5695 F.batch_norm(input, running_mean, running_var, weight=Parameter(torch.rand(size)))
5697 def test_batchnorm_raises_error_if_bias_is_not_same_size_as_input(self):
5698 input = torch.rand(2, 10)
5699 running_mean = torch.rand(10)
5700 running_var = torch.rand(10)
5701 wrong_sizes = [9, 11]
5702 for size
in wrong_sizes:
5703 with self.assertRaises(RuntimeError):
5704 F.batch_norm(input, running_mean, running_var, bias=Parameter(torch.rand(size)))
5706 def _test_batchnorm_grad(self, device="cpu", dtype=torch.double):
5707 bs, n_feat, size_feat = 4, 5, 6
5708 input = torch.arange(bs * n_feat * size_feat, device=device,
5709 requires_grad=
True, dtype=dtype).view(bs, n_feat, size_feat)
5710 weight = torch.arange(1, n_feat + 1, device=device, requires_grad=
True, dtype=dtype)
5711 bias = torch.arange(n_feat, device=device, requires_grad=
True, dtype=dtype)
5712 running_mean = 1 - torch.arange(n_feat, device=device, dtype=dtype)
5713 running_var = 2 * torch.arange(n_feat, device=device, dtype=dtype)
5714 for training
in [
False,
True]:
5715 _assertGradAndGradgradChecks(self, F.batch_norm, (input, running_mean, running_var, weight, bias,
5716 training, 0.1, 0.0001))
5718 def _test_batchnorm_eval(self, device="cpu", dtype=torch.float):
5719 module = nn.BatchNorm1d(3).to(device, dtype)
5722 data = torch.rand(4, 3, device=device, dtype=dtype, requires_grad=
True)
5723 grad = torch.rand(4, 3, device=device, dtype=dtype)
5728 grad1 = data.grad.clone()
5731 if data.grad
is not None:
5732 data.grad.data.zero_()
5736 grad2 = data.grad.clone()
5737 self.assertEqual(res1, res2)
5738 self.assertEqual(grad1, grad2)
5741 module = nn.BatchNorm1d(3, track_running_stats=
False).to(device, dtype)
5743 data = torch.rand(4, 3, device=device, dtype=dtype, requires_grad=
True)
5744 grad = torch.rand(4, 3, device=device, dtype=dtype)
5749 grad1 = data.grad.clone()
5755 if data.grad
is not None:
5756 data.grad.data.zero_()
5760 grad2 = data.grad.clone()
5761 self.assertEqual(res1, res2)
5762 self.assertEqual(grad1, grad2)
5764 def _test_batchnorm_simple_average(self, test_type=torch.FloatTensor):
5765 module = nn.BatchNorm1d(3, momentum=
None).type(test_type)
5766 zeros = torch.zeros(3).type(test_type)
5767 ones = torch.ones(3).type(test_type)
5768 self.assertEqual(module.running_mean, zeros)
5769 self.assertEqual(module.running_var, ones)
5771 data1 = torch.rand(4, 3).type(test_type)
5772 data2 = torch.rand(4, 3).type(test_type)
5775 res1 = module(data1)
5776 running_mean1 = module.running_mean.clone()
5777 running_var1 = module.running_var.clone()
5778 self.assertNotEqual(running_mean1, zeros)
5779 self.assertNotEqual(running_var1, ones)
5782 module.reset_running_stats()
5783 self.assertEqual(module.running_mean, zeros)
5784 self.assertEqual(module.running_var, ones)
5787 res2 = module(data2)
5788 running_mean2 = module.running_mean.clone()
5789 running_var2 = module.running_var.clone()
5790 self.assertNotEqual(running_mean2, zeros)
5791 self.assertNotEqual(running_var2, ones)
5794 module.reset_running_stats()
5795 self.assertEqual(module.running_mean, zeros)
5796 self.assertEqual(module.running_var, ones)
5799 res3 = module(data1)
5800 res4 = module(data2)
5801 self.assertEqual(res3, res1)
5802 self.assertEqual(res4, res2)
5803 self.assertAlmostEqual(module.running_mean, (running_mean1 + running_mean2) / 2)
5804 self.assertAlmostEqual(module.running_var, (running_var1 + running_var2) / 2)
5806 def test_pairwise_distance(self):
5807 input1 = torch.randn(4, 4, requires_grad=
True)
5808 input2 = torch.randn(4, 4, requires_grad=
True)
5809 self.assertTrue(gradcheck(
lambda x, y: F.pairwise_distance(x, y), (input1, input2)))
5812 def test_pdist(self):
5813 for device, trans
in itertools.product(device_(), [
False,
True]):
5814 inp = torch.randn(4, 5, dtype=torch.double, device=device, requires_grad=
True)
5816 inp = inp.transpose(0, 1)
5817 for p
in [0, 1, 2, 0.5, 1.5, 2.5, float(
'inf')]:
5818 self.assertTrue(gradcheck(
lambda x: F.pdist(x, p), (inp,)))
5821 """Test that grad is still valid when dist is 0""" 5822 for device
in device_():
5823 inp = torch.randn(1, 3, dtype=torch.double, device=device, requires_grad=
True).repeat([2, 1])
5824 for p
in [0, 1, 2, 0.5, 1.5, 2.5, float(
'inf')]:
5825 self.assertTrue(gradcheck(
lambda x: F.pdist(x, p), (inp,)))
5827 def test_pdist_empty_row(self):
5828 for device
in device_():
5829 inp = torch.randn(1, 3, dtype=torch.double, device=device, requires_grad=
True)
5830 self.assertTrue(gradcheck(F.pdist, (inp,)))
5832 def test_pdist_empty_col(self):
5833 for device
in device_():
5834 inp = torch.randn(4, 0, dtype=torch.double, device=device, requires_grad=
True)
5835 self.assertTrue(gradcheck(F.pdist, (inp,)))
5837 @unittest.expectedFailure
5838 def test_pdist_cpu_gradgrad_unimplemented(self):
5839 inp = torch.randn(4, 5, requires_grad=
True)
5840 gradgradcheck(F.pdist, (inp,))
5843 @unittest.expectedFailure
5844 def test_pdist_cuda_gradgrad_unimplemented(self):
5845 inp = torch.randn(4, 5, device=
'cuda', requires_grad=
True)
5846 gradgradcheck(F.pdist, (inp,))
5848 def test_cosine_embedding_loss_no_reduce(self):
5849 input1 = torch.randn(15, 10, requires_grad=
True)
5850 input2 = torch.randn(15, 10, requires_grad=
True)
5851 target = torch.randn(15).sign()
5852 self.assertTrue(gradcheck(
lambda x, y, z: F.cosine_embedding_loss(
5853 x, y, z, reduction=
'none'), (input1, input2, target)))
5854 self.
assertEqual(F.cosine_embedding_loss(input1, input2, target, reduction=
'none'),
5855 loss_reference_fns[
'CosineEmbeddingLoss'](input1, input2, target, reduction=
'none'))
5857 def test_cosine_embedding_loss_margin_no_reduce(self):
5858 input1 = torch.randn(15, 10, requires_grad=
True)
5859 input2 = torch.randn(15, 10, requires_grad=
True)
5860 target = torch.randn(15).sign()
5861 self.assertTrue(gradcheck(
lambda x, y, z: F.cosine_embedding_loss(
5862 x, y, z, margin=0.5, reduction=
'none'), (input1, input2, target)))
5863 self.
assertEqual(F.cosine_embedding_loss(input1, input2, target, margin=0.5, reduction=
'none'),
5864 loss_reference_fns[
'CosineEmbeddingLoss'](input1, input2, target,
5865 margin=0.5, reduction=
'none'))
5867 def test_margin_ranking_loss_no_reduce(self):
5868 input1 = torch.randn(15).mul_(10).requires_grad_()
5869 input2 = torch.randn(15).mul_(10).requires_grad_()
5870 target = torch.randn(15).sign()
5871 self.assertTrue(gradcheck(
lambda x, y, z: F.margin_ranking_loss(
5872 x, y, z, reduction=
'none'), (input1, input2, target)))
5873 self.
assertEqual(F.margin_ranking_loss(input1, input2, target, reduction=
'none'),
5874 loss_reference_fns[
'MarginRankingLoss'](input1, input2, target, reduction=
'none'))
5876 def test_margin_ranking_loss_margin_no_reduce(self):
5877 input1 = torch.randn(15).mul_(10).requires_grad_()
5878 input2 = torch.randn(15).mul_(10).requires_grad_()
5879 target = torch.randn(15).sign()
5880 self.assertTrue(gradcheck(
lambda x, y, z: F.margin_ranking_loss(
5881 x, y, z, margin=0.5, reduction=
'none'), (input1, input2, target)))
5882 self.
assertEqual(F.margin_ranking_loss(input1, input2, target, margin=0.5, reduction=
'none'),
5883 loss_reference_fns[
'MarginRankingLoss'](input1, input2, target, margin=0.5, reduction=
'none'))
5885 def test_triplet_margin_loss(self):
5886 input1 = torch.randn(5, 10, requires_grad=
True)
5887 input2 = torch.randn(5, 10, requires_grad=
True)
5888 input3 = torch.randn(5, 10, requires_grad=
True)
5889 self.assertTrue(gradcheck(
lambda x1, x2, x3: F.triplet_margin_loss(
5890 x1, x2, x3), (input1, input2, input3)))
5891 self.
assertEqual(F.triplet_margin_loss(input1, input2, input3),
5892 loss_reference_fns[
'TripletMarginLoss'](input1, input2, input3))
5894 def test_triplet_margin_loss_swap(self):
5895 input1 = torch.randn(5, 10, requires_grad=
True)
5896 input2 = torch.randn(5, 10, requires_grad=
True)
5897 input3 = torch.randn(5, 10, requires_grad=
True)
5898 self.assertTrue(gradcheck(
lambda x1, x2, x3: F.triplet_margin_loss(
5899 x1, x2, x3, swap=
True), (input1, input2, input3)))
5900 self.
assertEqual(F.triplet_margin_loss(input1, input2, input3, swap=
True),
5901 loss_reference_fns[
'TripletMarginLoss'](input1, input2, input3, swap=
True))
5903 def test_triplet_margin_loss_no_reduce(self):
5904 input1 = torch.randn(5, 10, requires_grad=
True)
5905 input2 = torch.randn(5, 10, requires_grad=
True)
5906 input3 = torch.randn(5, 10, requires_grad=
True)
5907 self.assertTrue(gradcheck(
lambda x1, x2, x3: F.triplet_margin_loss(
5908 x1, x2, x3, reduction=
'none'), (input1, input2, input3)))
5909 self.
assertEqual(F.triplet_margin_loss(input1, input2, input3, reduction=
'none'),
5910 loss_reference_fns[
'TripletMarginLoss'](input1, input2, input3, reduction=
'none'))
5912 def test_triplet_margin_loss_swap_no_reduce(self):
5913 input1 = torch.randn(5, 10, requires_grad=
True)
5914 input2 = torch.randn(5, 10, requires_grad=
True)
5915 input3 = torch.randn(5, 10, requires_grad=
True)
5916 self.assertTrue(gradcheck(
lambda x1, x2, x3: F.triplet_margin_loss(
5917 x1, x2, x3, swap=
True, reduction=
'none'), (input1, input2, input3)))
5918 self.
assertEqual(F.triplet_margin_loss(input1, input2, input3, swap=
True, reduction=
'none'),
5919 loss_reference_fns[
'TripletMarginLoss'](input1, input2, input3, swap=
True, reduction=
'none'))
5921 def test_pointwise_loss_target_grad_none_reduction(self):
5922 i = torch.randn(5, 10)
5923 t = torch.randn(5, 10, requires_grad=
True)
5924 self.
assertEqual(F.mse_loss(i, t, reduction=
'none').size(), t.size())
5925 self.
assertEqual(F.l1_loss(i, t, reduction=
'none').size(), t.size())
5927 def test_pointwise_loss_broadcast(self):
5929 'mse_loss':
lambda x, y, r: F.mse_loss(x, y, reduction=r),
5930 'l1_loss':
lambda x, y, r: F.l1_loss(x, y, reduction=r),
5931 'smooth_l1_loss':
lambda x, y, r: F.smooth_l1_loss(x, y, reduction=r),
5934 input = torch.randn(2, 1, requires_grad=
True)
5935 for _name, fn
in losses.items():
5936 for requires_grad
in [
True,
False]:
5938 target = torch.randn(2, 10, requires_grad=requires_grad)
5939 for reduction
in [
'none',
'mean',
'sum']:
5940 l = fn(input, target, reduction)
5941 if reduction ==
'none':
5943 self.assertTrue(gradcheck(fn, (input, target, reduction)))
5945 def test_cosine_similarity(self):
5946 input1 = torch.randn(4, 4, requires_grad=
True)
5947 input2 = torch.randn(4, 4, requires_grad=
True)
5948 self.assertTrue(gradcheck(
lambda x, y: F.cosine_similarity(x, y), (input1, input2)))
5950 input1 = torch.randn(4, 5, 6, requires_grad=
True)
5951 input2 = torch.randn(4, 5, 6, requires_grad=
True)
5952 self.assertTrue(gradcheck(
lambda x, y: F.cosine_similarity(x, y, dim=0), (input1, input2)))
5953 self.assertTrue(gradcheck(
lambda x, y: F.cosine_similarity(x, y, dim=-1), (input1, input2)))
5955 input1 = torch.randn((), requires_grad=
True)
5956 input2 = torch.randn((), requires_grad=
True)
5957 self.assertTrue(gradcheck(
lambda x, y: F.cosine_similarity(x, y, dim=0), (input1, input2)))
5958 self.assertTrue(gradcheck(
lambda x, y: F.cosine_similarity(x, y, dim=-1), (input1, input2)))
5961 input_size = (1, 3, 2, 1)
5962 expected_size = (1, 2, 1)
5963 input1 = torch.randn(input_size, requires_grad=
True)
5964 input2 = torch.randn(input_size, requires_grad=
True)
5965 self.
assertEqual(F.cosine_similarity(input1, input2, dim=1).size(), expected_size)
5968 vv1 =
torch.tensor(list([float(i)
for i
in range(84)])).unsqueeze(0)
5969 vv2 =
torch.tensor(list([float(i)
for i
in range(84)])).unsqueeze(0)
5970 out = F.cosine_similarity(vv1, vv2)
5971 self.assertLessEqual(out, 1.0)
5973 def test_grid_sample_error_checking(self):
5974 input = torch.empty(1, 1, 2, 2)
5975 grid = torch.empty(1, 1, 1, 2)
5978 F.grid_sample(input, grid)
5981 F.grid_sample(input, grid, mode=
'garbage')
5984 F.grid_sample(input, grid, padding_mode=
'garbage')
5986 with self.
assertRaisesRegex(RuntimeError,
"expected input and grid to have same dtype"):
5987 F.grid_sample(input.float(), grid.double())
5990 F.grid_sample(input[0], grid)
5992 with self.
assertRaisesRegex(RuntimeError,
"grid with same number of dimensions"):
5993 F.grid_sample(input, torch.empty(1, 1, 1, 1, 3))
5995 with self.
assertRaisesRegex(RuntimeError,
"expected grid and input to have same batch size"):
5996 F.grid_sample(input, torch.empty(2, 1, 1, 2))
5998 with self.
assertRaisesRegex(RuntimeError,
"expected grid to have size 2 in last dimension"):
5999 F.grid_sample(input, torch.empty(1, 1, 1, 3))
6001 with self.
assertRaisesRegex(RuntimeError,
"expected input to have non-empty spatial dimensions"):
6002 F.grid_sample(torch.empty(1, 1, 0, 2), grid)
6005 with self.
assertRaisesRegex(RuntimeError,
"expected input and grid to be on same device"):
6006 F.grid_sample(input.cuda(), grid)
6008 def test_grid_sample(self):
6009 def test(N, C, H, W, mode, padding_mode):
6010 def test_shape(N, C, IH, IW, H, W, mode, padding_mode):
6011 for grid_dim_contig_order
in [(0, 1, 2, 3), (0, 3, 1, 2), (3, 0, 1, 2), (0, 2, 1, 3)]:
6018 grid_shape = [N, H, W, 2]
6019 grid_init_shape = [grid_shape[d]
for d
in grid_dim_contig_order]
6020 grid_fwd_permute = [
None,
None,
None,
None]
6021 for i, d
in enumerate(grid_dim_contig_order):
6022 grid_fwd_permute[d] = i
6024 def get_grid(device='cpu', data=None):
6025 if data
is not None:
6026 assert list(data.shape) == grid_shape
6027 data = data.permute(grid_dim_contig_order).to(device)
6029 data = torch.randn(grid_init_shape, device=device)
6030 grid = data.permute(grid_fwd_permute)
6031 assert grid.permute(grid_dim_contig_order).is_contiguous()
6034 input_cpu = torch.randn(C, N, IH, IW).transpose(0, 1).requires_grad_()
6035 grid_cpu = get_grid().requires_grad_()
6036 out_cpu = F.grid_sample(input_cpu, grid_cpu, mode=mode, padding_mode=padding_mode)
6037 self.assertTrue(out_cpu.size() == torch.Size([N, C, H, W]))
6039 gradients = torch.randn_like(out_cpu)
6040 out_cpu.backward(gradients)
6043 input_cuda = input_cpu.detach().transpose(0, 1).cuda().transpose(0, 1).requires_grad_()
6044 grid_cuda = get_grid(
'cuda', grid_cpu.detach()).requires_grad_()
6045 out_cuda = F.grid_sample(input_cuda, grid_cuda, mode=mode, padding_mode=padding_mode)
6048 out_cuda.backward(gradients.cuda())
6050 self.
assertEqual(grid_cpu.grad, grid_cuda.grad, prec=5e-5)
6053 base_input = torch.randn(N, C, 1, IW)
6054 input_cpu = base_input.expand_as(input_cuda).requires_grad_()
6055 out_cpu = F.grid_sample(input_cpu, grid_cpu, mode=mode, padding_mode=padding_mode)
6057 input_cuda = base_input.cuda().expand_as(input_cuda).requires_grad_()
6058 out_cuda = F.grid_sample(input_cuda, grid_cuda, mode=mode, padding_mode=padding_mode)
6062 test_shape(N, C, H, W, H, W, mode, padding_mode)
6065 N = random.randint(2, 8)
6066 C = random.randint(2, 8)
6067 IH = random.randint(2, 8)
6068 IW = random.randint(2, 8)
6069 H = random.randint(IH + 1, 12)
6070 W = random.randint(IW + 1, 12)
6071 test_shape(N, C, IH, IW, H, W, mode, padding_mode)
6074 N = random.randint(2, 8)
6075 C = random.randint(2, 8)
6076 IH = random.randint(2, 8)
6077 IW = random.randint(2, 8)
6078 H = random.randint(2, IH)
6079 W = random.randint(2, IW)
6080 test_shape(N, C, IH, IW, H, W, mode, padding_mode)
6083 N = random.randint(2, 8)
6084 C = random.randint(2, 8)
6087 H = random.randint(2, 5)
6088 W = random.randint(2, 5)
6089 test_shape(N, C, IH, IW, H, W, mode, padding_mode)
6092 N = random.randint(2, 8)
6093 C = random.randint(2, 8)
6094 IH = random.randint(2, 8)
6095 IW = random.randint(2, 8)
6096 W = random.randint(3, IW + 2)
6097 test_shape(N, C, IH, IW, 0, W, mode, padding_mode)
6100 N = random.randint(2, 8)
6101 IH = random.randint(2, 8)
6102 IW = random.randint(2, 8)
6103 H = random.randint(3, IH + 2)
6104 W = random.randint(3, IW + 2)
6105 test_shape(N, 0, IH, IW, H, W, mode, padding_mode)
6108 C = random.randint(2, 8)
6109 IH = random.randint(2, 8)
6110 IW = random.randint(2, 8)
6111 H = random.randint(3, IH + 2)
6112 W = random.randint(3, IW + 2)
6113 test_shape(0, C, IH, IW, H, W, mode, padding_mode)
6115 for mode
in (
'bilinear',
'nearest'):
6116 for padding_mode
in (
'zeros',
'border',
'reflection'):
6118 input = torch.arange(1., 11).view(1, 1, 2, 5)
6120 [[[-0.9, -4.1], [0, 0.2000], [1, -1], [-0.333, 1e-10], [0.5, 1.0]],
6121 [[-1.0, -0.5], [0, 0.3333], [1, -1], [-0.200, 1e-10], [1.5, 0.5]]]).view(1, 2, 5, 2)
6122 if mode ==
'bilinear':
6123 if padding_mode ==
'zeros':
6125 [[0.0000, 6.0000000000, 5.0000, 4.8340, 9.0000],
6126 [2.2500, 6.3332500450, 5.0000, 5.1000, 0.0000]]).view(1, 1, 2, 5)
6127 elif padding_mode ==
'border':
6129 [[1.2000, 6.0000000000, 5.0000, 4.8340, 9.0000],
6130 [2.2500, 6.3332500450, 5.0000, 5.1000, 8.7500]]).view(1, 1, 2, 5)
6131 elif padding_mode ==
'reflection':
6133 [[3.4500, 6.0000000000, 5.0000, 4.8340, 9.0000],
6134 [2.2500, 6.3332500450, 5.0000, 5.1000, 7.7500]]).view(1, 1, 2, 5)
6136 assert False,
"missing groundtruth test for padding mode '{}'".format(padding_mode)
6137 elif mode ==
'nearest':
6138 if padding_mode ==
'zeros':
6140 [[0., 8., 5., 7., 9.],
6141 [1., 8., 5., 8., 0.]]).view(1, 1, 2, 5)
6142 elif padding_mode ==
'border':
6144 [[1., 8., 5., 7., 9.],
6145 [1., 8., 5., 8., 10.]]).view(1, 1, 2, 5)
6146 elif padding_mode ==
'reflection':
6148 [[1., 8., 5., 7., 9.],
6149 [1., 8., 5., 8., 9.]]).view(1, 1, 2, 5)
6151 assert False,
"missing groundtruth test for padding mode '{}'".format(padding_mode)
6153 assert False,
"missing groundtruth test for interpolation mode '{}'".format(mode)
6154 output = F.grid_sample(input, grid, mode=mode, padding_mode=padding_mode)
6156 "groundtruth comparison failed for mode={}, " 6157 "padding_mode={}".format(mode, padding_mode))
6160 N = random.randint(2, 8)
6161 C = random.randint(2, 6)
6162 H = random.randint(2, 8)
6163 W = random.randint(2, 8)
6164 input = torch.randn(N, C, H, W, requires_grad=
True)
6165 grid = torch.randn(N, H, W, 2, requires_grad=
True)
6166 self.assertTrue(gradcheck(
6167 lambda inp, grid: F.grid_sample(inp, grid, mode=mode, padding_mode=padding_mode),
6170 test(N, C, H, W, mode, padding_mode)
6172 with cudnn.flags(enabled=
False):
6173 test(N, C, H, W, mode, padding_mode)
6175 def test_grid_sample_3d(self):
6176 def test(N, C, D, H, W, mode, padding_mode):
6177 def test_shape(N, C, ID, IH, IW, D, H, W, mode, padding_mode):
6178 input_cpu = torch.randn(C, N, ID, IH, IW).transpose(0, 1).requires_grad_()
6179 grid_cpu = torch.randn(D, N, H, W, 3).transpose(0, 1).requires_grad_()
6180 out_cpu = F.grid_sample(input_cpu, grid_cpu, mode=mode, padding_mode=padding_mode)
6181 self.assertTrue(out_cpu.size() == torch.Size([N, C, D, H, W]))
6183 gradients = torch.randn_like(out_cpu)
6184 out_cpu.backward(gradients)
6187 input_cuda = input_cpu.detach().transpose(0, 1).cuda().transpose(0, 1).requires_grad_()
6188 grid_cuda = grid_cpu.detach().transpose(0, 1).cuda().transpose(0, 1).requires_grad_()
6189 out_cuda = F.grid_sample(input_cuda, grid_cuda, mode=mode, padding_mode=padding_mode)
6192 out_cuda.backward(gradients.cuda())
6194 self.
assertEqual(grid_cpu.grad, grid_cuda.grad, prec=5e-5)
6197 base_input = torch.randn(N, C, 1, IH, IW)
6198 input_cpu = base_input.expand_as(input_cuda).requires_grad_()
6199 grid_cpu = torch.randn(N, D, H, W, 3, requires_grad=
True)
6200 out_cpu = F.grid_sample(input_cpu, grid_cpu, mode=mode, padding_mode=padding_mode)
6202 input_cuda = base_input.cuda().expand_as(input_cuda).requires_grad_()
6203 grid_cuda = grid_cpu.detach().cuda().requires_grad_()
6204 out_cuda = F.grid_sample(input_cuda, grid_cuda, mode=mode, padding_mode=padding_mode)
6208 test_shape(N, C, D, H, W, D, H, W, mode, padding_mode)
6211 N = random.randint(2, 7)
6212 C = random.randint(2, 5)
6213 ID = random.randint(2, 7)
6214 IH = random.randint(2, 7)
6215 IW = random.randint(2, 7)
6216 D = random.randint(ID + 1, 10)
6217 H = random.randint(IH + 1, 10)
6218 W = random.randint(IW + 1, 10)
6219 test_shape(N, C, ID, IH, IW, D, H, W, mode, padding_mode)
6222 N = random.randint(2, 7)
6223 C = random.randint(2, 5)
6224 ID = random.randint(2, 7)
6225 IH = random.randint(2, 7)
6226 IW = random.randint(2, 7)
6227 D = random.randint(2, ID)
6228 H = random.randint(2, IH)
6229 W = random.randint(2, IW)
6230 test_shape(N, C, ID, IH, IW, D, H, W, mode, padding_mode)
6233 N = random.randint(2, 7)
6234 C = random.randint(2, 7)
6238 H = random.randint(2, 5)
6239 W = random.randint(2, 5)
6240 test_shape(N, C, ID, IH, IW, D, H, W, mode, padding_mode)
6243 N = random.randint(2, 7)
6244 C = random.randint(2, 5)
6245 ID = random.randint(2, 7)
6246 IH = random.randint(2, 7)
6247 IW = random.randint(2, 7)
6248 D = random.randint(3, ID + 2)
6249 W = random.randint(3, IW + 2)
6250 test_shape(N, C, ID, IH, IW, D, 0, W, mode, padding_mode)
6253 N = random.randint(2, 7)
6254 ID = random.randint(2, 5)
6255 IH = random.randint(2, 7)
6256 IW = random.randint(2, 7)
6257 D = random.randint(3, ID + 2)
6258 H = random.randint(3, IH + 2)
6259 W = random.randint(3, IW + 2)
6260 test_shape(N, 0, ID, IH, IW, D, H, W, mode, padding_mode)
6263 C = random.randint(2, 5)
6264 ID = random.randint(2, 7)
6265 IH = random.randint(2, 7)
6266 IW = random.randint(2, 7)
6267 D = random.randint(3, ID + 2)
6268 H = random.randint(3, IH + 2)
6269 W = random.randint(3, IW + 2)
6270 test_shape(0, C, ID, IH, IW, D, H, W, mode, padding_mode)
6272 for mode
in (
'bilinear',
'nearest'):
6273 for padding_mode
in (
'zeros',
'border',
'reflection'):
6275 N = random.randint(2, 5)
6276 C = random.randint(2, 4)
6277 D = random.randint(2, 5)
6278 H = random.randint(2, 5)
6279 W = random.randint(2, 5)
6280 input = torch.randn(N, C, D, H, W, requires_grad=
True)
6281 grid = torch.randn(N, D, H, W, 3, requires_grad=
True)
6282 self.assertTrue(gradcheck(
6283 lambda inp, grid: F.grid_sample(inp, grid, mode=mode, padding_mode=padding_mode),
6286 test(N, C, D, H, W, mode, padding_mode)
6288 def test_affine_grid(self):
6290 input = torch.arange(1., 7).view(1, 2, 3)
6291 output = F.affine_grid(input, torch.Size([1, 1, 2, 2]))
6292 groundtruth = torch.Tensor(
6293 [[[0, -3], [2, 5]], [[4, 7], [6, 15]]]).view(1, 2, 2, 2)
6297 N = random.randint(1, 8)
6298 C = random.randint(1, 8)
6299 H = random.randint(1, 8)
6300 W = random.randint(1, 8)
6301 sz = torch.Size([N, C, H, W])
6302 inp = torch.randn(N, 2, 3, requires_grad=
True)
6303 self.assertTrue(gradcheck(
lambda inp: F.affine_grid(inp, sz), (inp,)))
6307 input_cpu = torch.randn(N, 2, 3, requires_grad=
True)
6308 out_cpu = F.affine_grid(input_cpu, sz)
6309 gradients = torch.randn(out_cpu.size())
6310 out_cpu.backward(gradients)
6311 input_gpu = input_cpu.detach().cuda().requires_grad_()
6312 out_cuda = F.affine_grid(input_gpu, sz)
6313 out_cuda.backward(gradients.cuda())
6317 @unittest.skipIf((
not TEST_NUMPY)
or (
not TEST_SCIPY)
or (scipy.__version__ <
'1.0.0'),
6318 "Scipy v1.0 and/or numpy not found")
6320 def test_affine_2d_rotate0(self):
6323 for device
in device_():
6324 input_size = [1, 1, 3, 3]
6325 input_ary = np.array(np.random.random(input_size), dtype=np.float32)
6326 output_size = [1, 1, 5, 5]
6329 transform_tensor, transform_ary, offset = \
6330 _buildEquivalentAffineTransforms2d(device, input_size, output_size, angle_rad)
6332 scipy_ary = scipy.ndimage.affine_transform(
6336 output_shape=output_size[2:],
6343 torch.Size(output_size)
6349 padding_mode=
'border' 6352 assert np.abs(scipy_ary.mean() - gridsample_ary.mean()) < 1e-6
6353 assert np.abs(scipy_ary - gridsample_ary).max() < 1e-6
6355 @unittest.skipIf((
not TEST_NUMPY)
or (
not TEST_SCIPY)
or (scipy.__version__ <
'1.0.0'),
6356 "Scipy v1.0 and/or numpy not found")
6358 def test_affine_2d_rotate90(self):
6361 for device, input_size2dsq, output_size2dsq
in \
6362 itertools.product(device_(), input_size2dsq_(), output_size2dsq_()):
6363 input_size = input_size2dsq
6364 input_ary = np.array(np.random.random(input_size), dtype=np.float32)
6365 output_size = output_size2dsq
6366 angle_rad = 0.25 * math.pi * 2
6368 transform_tensor, transform_ary, offset = \
6369 _buildEquivalentAffineTransforms2d(device, input_size, output_size, angle_rad)
6371 scipy_ary = scipy.ndimage.affine_transform(
6375 output_shape=output_size[2:],
6380 if input_size2dsq == output_size2dsq:
6381 assert np.abs(scipy_ary.mean() - input_ary.mean()) < 1e-6
6382 assert np.abs(scipy_ary[0, 0] - input_ary[0, 0, 0, -1]).max() < 1e-6
6383 assert np.abs(scipy_ary[0, -1] - input_ary[0, 0, -1, -1]).max() < 1e-6
6384 assert np.abs(scipy_ary[-1, -1] - input_ary[0, 0, -1, 0]).max() < 1e-6
6385 assert np.abs(scipy_ary[-1, 0] - input_ary[0, 0, 0, 0]).max() < 1e-6
6389 torch.Size(output_size)
6395 padding_mode=
'border' 6398 assert np.abs(scipy_ary.mean() - gridsample_ary.mean()) < 1e-6
6399 assert np.abs(scipy_ary - gridsample_ary).max() < 1e-6
6401 @unittest.skipIf((
not TEST_NUMPY)
or (
not TEST_SCIPY)
or (scipy.__version__ <
'1.0.0'),
6402 "Scipy v1.0 and/or numpy not found")
6404 def test_affine_2d_rotate45(self):
6407 for device
in device_():
6408 input_size = [1, 1, 3, 3]
6409 input_ary = np.array(np.zeros(input_size), dtype=np.float32)
6410 input_ary[0, 0, 0, :] = 0.5
6411 input_ary[0, 0, 2, 2] = 1.0
6412 output_size = [1, 1, 3, 3]
6413 angle_rad = 0.125 * math.pi * 2
6415 transform_tensor, transform_ary, offset = \
6416 _buildEquivalentAffineTransforms2d(device, input_size, output_size, angle_rad)
6418 scipy_ary = scipy.ndimage.affine_transform(
6422 output_shape=output_size[2:],
6429 torch.Size(output_size)
6435 padding_mode=
'border' 6438 assert np.abs(scipy_ary - gridsample_ary).max() < 1e-6
6440 @unittest.skipIf((
not TEST_NUMPY)
or (
not TEST_SCIPY)
or (scipy.__version__ <
'1.0.0'),
6441 "Scipy v1.0 and/or numpy not found")
6443 def test_affine_2d_rotateRandom(self):
6446 for device, angle_rad, input_size2d, output_size2d
in \
6447 itertools.product(device_(), angle_rad_(), input_size2d_(), output_size2d_()):
6449 input_size = input_size2d
6450 input_ary = np.array(np.random.random(input_size), dtype=np.float32).round(3)
6451 output_size = output_size2d
6453 input_ary[0, 0, 0, 0] = 2
6454 input_ary[0, 0, 0, -1] = 4
6455 input_ary[0, 0, -1, 0] = 6
6456 input_ary[0, 0, -1, -1] = 8
6458 transform_tensor, transform_ary, grid_ary = \
6459 _buildEquivalentAffineTransforms2d(device, input_size, output_size, angle_rad)
6461 scipy_ary = scipy.ndimage.affine_transform(
6464 output_shape=output_size[2:],
6471 torch.Size(output_size)
6477 padding_mode=
'border' 6480 affine_tensor = affine_tensor.to(
'cpu')
6482 for r
in range(affine_tensor.size(1)):
6483 for c
in range(affine_tensor.size(2)):
6484 grid_out = np.dot(grid_ary, [r, c, 1])
6485 assert np.allclose(affine_tensor[0, r, c], grid_out[:2], atol=1e-5)
6487 assert np.abs(scipy_ary - gridsample_ary).max() < 1e-5
6489 @unittest.skipIf((
not TEST_NUMPY)
or (
not TEST_SCIPY)
or (scipy.__version__ <
'1.0.0'),
6490 "Scipy v1.0 and/or numpy not found")
6492 def test_affine_3d_rotateRandom(self):
6495 for device, angle_rad, axis_vector, input_size3d, output_size3d
in \
6496 itertools.product(device_(), angle_rad_(), axis_vector_(), input_size3d_(), output_size3d_()):
6497 input_size = input_size3d
6498 input_ary = np.array(np.random.random(input_size), dtype=np.float32)
6499 output_size = output_size3d
6501 input_ary[0, 0, 0, 0, 0] = 2
6502 input_ary[0, 0, 0, 0, -1] = 3
6503 input_ary[0, 0, 0, -1, 0] = 4
6504 input_ary[0, 0, 0, -1, -1] = 5
6505 input_ary[0, 0, -1, 0, 0] = 6
6506 input_ary[0, 0, -1, 0, -1] = 7
6507 input_ary[0, 0, -1, -1, 0] = 8
6508 input_ary[0, 0, -1, -1, -1] = 9
6510 transform_tensor, transform_ary, grid_ary = \
6511 _buildEquivalentAffineTransforms3d(device, input_size, output_size, angle_rad, axis_vector)
6513 scipy_ary = scipy.ndimage.affine_transform(
6516 output_shape=output_size[2:],
6523 torch.Size(output_size)
6529 padding_mode=
'border' 6532 affine_tensor = affine_tensor.to(
'cpu')
6534 for i
in range(affine_tensor.size(1)):
6535 for r
in range(affine_tensor.size(2)):
6536 for c
in range(affine_tensor.size(3)):
6537 grid_out = np.dot(grid_ary, [i, r, c, 1])
6538 assert np.allclose(affine_tensor[0, i, r, c], grid_out[:3], atol=1e-5)
6540 assert np.abs(scipy_ary - gridsample_ary).max() < 1e-5
6542 def test_upsamplingNearest1d(self):
6543 m = nn.Upsample(size=4, mode=
'nearest')
6544 in_t = torch.ones(1, 1, 2)
6545 with warnings.catch_warnings(record=
True)
as w:
6549 input = torch.randn(1, 1, 2, requires_grad=
True)
6550 gradcheck(
lambda x: F.interpolate(x, 4, mode=
'nearest'), [input])
6552 def test_upsamplingLinear1d(self):
6553 for align_corners
in [
True,
False]:
6554 kwargs = dict(mode=
'linear', align_corners=align_corners)
6557 for scale_factor
in [0.5, 1.5, 2]:
6558 m = nn.Upsample(scale_factor=scale_factor, **kwargs)
6559 in_t = torch.ones(1, 1, 2)
6560 out_size = int(math.floor(in_t.shape[-1] * scale_factor))
6561 with warnings.catch_warnings(record=
True)
as w:
6563 self.
assertEqual(torch.ones(1, 1, out_size), out_t.data)
6565 input = torch.randn(1, 1, 2, requires_grad=
True)
6566 gradcheck(
lambda x: F.interpolate(x, out_size, **kwargs), (input,))
6568 def test_upsamplingLinear1d_spatial_invariance(self):
6569 m = nn.Upsample(scale_factor=3, mode=
'linear', align_corners=
False)
6570 in_t_9 = torch.zeros(1, 1, 9)
6571 in_t_9[:, :, :4].normal_()
6572 with warnings.catch_warnings(record=
True)
as w:
6574 out_t_5 = m(in_t_9[:, :, :5])
6577 def test_upsamplingNearest2d(self):
6578 m = nn.Upsample(size=4, mode=
'nearest')
6579 in_t = torch.ones(1, 1, 2, 2)
6580 with warnings.catch_warnings(record=
True)
as w:
6581 out_t = m(Variable(in_t))
6582 self.
assertEqual(torch.ones(1, 1, 4, 4), out_t.data)
6584 input = torch.randn(1, 1, 2, 2, requires_grad=
True)
6586 F.interpolate(input, 4, mode=
'nearest'),
6587 F.interpolate(input, scale_factor=2, mode=
'nearest'))
6588 gradcheck(
lambda x: F.interpolate(x, 4, mode=
'nearest'), [input])
6589 gradgradcheck(
lambda x: F.interpolate(x, 4, mode=
'nearest'), [input])
6591 def test_upsamplingBilinear2d(self):
6592 for align_corners
in [
True,
False]:
6593 kwargs = dict(mode=
'bilinear', align_corners=align_corners)
6596 for scale_factor
in [0.5, 1.5, 2]:
6597 m = nn.Upsample(scale_factor=scale_factor, **kwargs)
6598 in_t = torch.ones(1, 1, 2, 2)
6599 out_size = int(math.floor(in_t.shape[-1] * scale_factor))
6600 with warnings.catch_warnings(record=
True)
as w:
6602 self.
assertEqual(torch.ones(1, 1, out_size, out_size), out_t.data)
6604 input = torch.randn(1, 1, 2, 2, requires_grad=
True)
6605 gradcheck(
lambda x: F.interpolate(x, out_size, **kwargs), [input])
6607 def test_upsamplingBicubic2d(self):
6609 in_t = torch.arange(4).view(1, 1, 2, 2).type(torch.FloatTensor)
6610 expected_out_t = torch.Tensor(
6611 [[[[0.00000, 0.31481, 0.68519, 1.00000],
6612 [0.62963, 0.94444, 1.31481, 1.62963],
6613 [1.37037, 1.68518, 2.05556, 2.37037],
6614 [2.00000, 2.31481, 2.68519, 3.00000]]]])
6615 out_t = F.interpolate(in_t, scale_factor=2, mode=
'bicubic', align_corners=
True)
6616 torch.set_printoptions(precision=5)
6619 for align_corners
in [
True,
False]:
6620 kwargs = dict(mode=
'bicubic', align_corners=align_corners)
6623 for scale_factor
in [0.5, 1.5, 2]:
6624 in_t = torch.ones(2, 2, 2, 2)
6625 out_t = F.interpolate(in_t, scale_factor=scale_factor, **kwargs)
6626 out_size = int(math.floor(in_t.shape[-1] * scale_factor))
6627 self.
assertEqual(torch.ones(2, 2, out_size, out_size), out_t.data)
6629 input = torch.randn(2, 2, 2, 2, requires_grad=
True)
6630 gradcheck(
lambda x: F.interpolate(x, out_size, **kwargs), [input])
6632 def test_upsamplingBilinear2d_spatial_invariance(self):
6633 m = nn.Upsample(scale_factor=3, mode=
'bilinear', align_corners=
False)
6634 in_t_9 = torch.zeros(1, 1, 9, 9)
6635 in_t_9[:, :, :4, :4].normal_()
6636 with warnings.catch_warnings(record=
True)
as w:
6638 out_t_5 = m(in_t_9[:, :, :5, :5])
6639 self.
assertEqual(out_t_9[:, :, :15, :15], out_t_5)
6641 def test_upsamplingNearest3d(self):
6642 m = nn.Upsample(size=4, mode=
'nearest')
6643 in_t = torch.ones(1, 1, 2, 2, 2)
6644 with warnings.catch_warnings(record=
True)
as w:
6645 out_t = m(Variable(in_t))
6646 self.
assertEqual(torch.ones(1, 1, 4, 4, 4), out_t.data)
6648 input = torch.randn(1, 1, 2, 2, 2, requires_grad=
True)
6649 gradcheck(
lambda x: F.interpolate(x, 4, mode=
'nearest'), [input])
6651 def test_upsamplingTrilinear3d(self):
6652 for align_corners
in [
True,
False]:
6653 kwargs = dict(mode=
'trilinear', align_corners=align_corners)
6656 for scale_factor
in [0.5, 1.5, 2]:
6657 m = nn.Upsample(scale_factor=scale_factor, **kwargs)
6658 in_t = torch.ones(1, 1, 2, 2, 2)
6659 out_size = int(math.floor(in_t.shape[-1] * scale_factor))
6660 with warnings.catch_warnings(record=
True)
as w:
6662 self.
assertEqual(torch.ones(1, 1, out_size, out_size, out_size), out_t.data)
6664 input = torch.randn(1, 1, 2, 2, 2, requires_grad=
True)
6666 F.interpolate(input, (out_size, out_size, out_size), **kwargs),
6667 F.interpolate(input, scale_factor=scale_factor, **kwargs))
6668 gradcheck(
lambda x: F.interpolate(x, out_size, **kwargs), [input])
6669 gradgradcheck(
lambda x: F.interpolate(x, out_size, **kwargs), [input])
6671 def test_upsamplingTrilinear3d_spatial_invariance(self):
6672 m = nn.Upsample(scale_factor=3, mode=
'trilinear', align_corners=
False)
6673 in_t_9 = torch.zeros(1, 1, 9, 9, 9)
6674 in_t_9[:, :, :4, :4, :4].normal_()
6675 with warnings.catch_warnings(record=
True)
as w:
6677 out_t_5 = m(in_t_9[:, :, :5, :5, :5])
6678 self.
assertEqual(out_t_9[:, :, :15, :15, :15], out_t_5)
6680 def test_interpolate(self):
6681 def _test_interpolate_helper(in_t, scale_factor, layer):
6682 out_size = int(math.floor(in_t.shape[-1] * scale_factor))
6683 dim = len(in_t.shape) - 2
6684 out_shape = [1, 1] + [out_size] * dim
6685 with warnings.catch_warnings(record=
True)
as w:
6690 F.interpolate(in_t, (out_size,) * dim, **kwargs),
6691 F.interpolate(in_t, scale_factor=scale_factor, **kwargs))
6692 gradcheck(
lambda x: F.interpolate(x, out_size, **kwargs), [in_t])
6693 gradgradcheck(
lambda x: F.interpolate(x, out_size, **kwargs), [in_t])
6695 def _make_input(dim):
6698 return torch.ones(size, requires_grad=
True)
6700 device_list = [
'cpu']
6702 device_list.append(
'cuda')
6704 for device
in device_list:
6705 for scale_factor
in [0.5, 1.5, 2]:
6706 for mode
in [
'nearest',
'area']:
6707 kwargs = dict(mode=mode)
6708 m = nn.Upsample(scale_factor=scale_factor, **kwargs).to(device)
6709 for input
in [_make_input(1), _make_input(2), _make_input(3)]:
6710 _test_interpolate_helper(input, scale_factor, m)
6712 for align_corners
in [
True,
False]:
6713 kwargs = dict(mode=
'linear', align_corners=align_corners)
6714 m = nn.Upsample(scale_factor=scale_factor, **kwargs).to(device)
6715 _test_interpolate_helper(_make_input(1), scale_factor, m)
6717 kwargs = dict(mode=
'bilinear', align_corners=align_corners)
6718 m = nn.Upsample(scale_factor=scale_factor, **kwargs).to(device)
6719 _test_interpolate_helper(_make_input(2), scale_factor, m)
6721 kwargs = dict(mode=
'bicubic', align_corners=align_corners)
6724 return F.interpolate(t, scale_factor=scale_factor, **kwargs).to(device)
6725 _test_interpolate_helper(_make_input(2), scale_factor, m)
6727 kwargs = dict(mode=
'trilinear', align_corners=align_corners)
6728 m = nn.Upsample(scale_factor=scale_factor, **kwargs).to(device)
6729 _test_interpolate_helper(_make_input(3), scale_factor, m)
6731 def test_linear_broadcasting(self):
6733 inp = torch.randn(2, 3, 5)
6734 expected = m(inp.view(6, 5)).view(2, 3, 8)
6737 def test_bilinear(self):
6738 module = nn.Bilinear(10, 10, 8)
6739 input1 = torch.randn(4, 10, requires_grad=
True)
6740 input2 = torch.randn(4, 10, requires_grad=
True)
6741 grad_output = torch.randn(4, 8)
6743 res = module(input1, input2)
6744 expected = (torch.einsum(
"bi,kij,bj->bk", input1, module.weight, input2) +
6747 grads =
torch.autograd.grad(res, [module.weight, module.bias, input1, input2], grad_output)
6748 grads_expected =
torch.autograd.grad(expected, [module.weight, module.bias, input1, input2], grad_output)
6749 for g, ge
in zip(grads, grads_expected):
6752 def test_bilinear_no_bias(self):
6753 module = nn.Bilinear(10, 10, 8)
6754 module_no_bias = nn.Bilinear(10, 10, 8,
False)
6756 module.bias.data.zero_()
6757 module.weight.data.copy_(module_no_bias.weight)
6759 input1 = torch.randn(4, 10, requires_grad=
True)
6760 input2 = torch.randn(4, 10, requires_grad=
True)
6761 grad_output = torch.randn(4, 8)
6764 input1.grad = input2.grad =
None 6765 output = net(input1, input2)
6766 output.backward(grad_output)
6768 return output.data, input1.grad.data, input2.grad.data
6770 out, g1, g2 = run(module)
6771 out_nb, g1_nb, g2_nb = run(module_no_bias)
6777 _assertGradAndGradgradChecks(self,
6778 lambda x1, x2: F.bilinear(x1, x2, module_no_bias.weight, module_no_bias.bias),
6781 def test_bilinear_broadcasting(self):
6782 m = nn.Bilinear(5, 6, 8)
6783 input1 = torch.randn(2, 3, 5)
6784 input2 = torch.randn(2, 3, 6)
6785 expected = m(input1.view(6, 5), input2.view(6, 6)).view(2, 3, 8)
6788 def test_conv_tbc(self):
6789 inp = torch.randn(9, 4, 5, requires_grad=
True)
6790 weight = torch.randn(3, 5, 6, requires_grad=
True)
6791 bias = torch.randn(6, requires_grad=
True)
6793 gradcheck(
lambda i, w, b, pad: F.conv_tbc(i, w, b, pad), (inp, weight, bias, 3))
6796 def _test_conv_noncontig_weights(self, device):
6797 for dim
in (1, 2, 3):
6798 for grouped
in (
False,
True):
6800 groups = 3
if grouped
else 1
6801 w = torch.randn([3] * dim, device=device)
6802 w = w.expand([nc, int(nc / groups)] + list(w.shape))
6803 w = w.detach().requires_grad_()
6804 x = torch.randn([1, nc] + ([5] * dim), device=device, requires_grad=
True)
6805 y = getattr(F,
'conv{}d'.format(dim))(x, w, groups=groups)
6807 y = getattr(F,
'conv_transpose{}d'.format(dim))(x, w, groups=groups)
6810 def test_conv_noncontig_weights(self):
6813 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
6814 def test_conv_noncontig_weights_cuda(self):
6818 def _test_conv_noncontig_weights_and_bias(self, device):
6820 for bias
in [
True,
False]:
6821 conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
6822 bias=bias).to(device, torch.float)
6824 input_nc = torch.randn((1, 3, 224, 224, 2), device=device, dtype=torch.float)[:, :, :, :, 1]
6825 input_c = input_nc.contiguous()
6827 weight_nc = torch.randn((64, 3, 7, 7, 2), device=device, dtype=torch.float)[:, :, :, :, 1]
6828 conv1.weight = nn.Parameter(weight_nc)
6829 weight_c = conv1.weight.contiguous()
6832 bias_nc = torch.randn((64, 2), device=device, dtype=torch.float)[:, 1]
6833 conv1.bias = nn.Parameter(bias_nc)
6834 bias_c = conv1.bias.contiguous()
6836 out1 = conv1(input_nc)
6837 conv1.weight = nn.Parameter(weight_c)
6839 conv1.bias = nn.Parameter(bias_c)
6840 out2 = conv1(input_c)
6843 def test_conv_noncontig_weights_and_bias(self):
6846 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
6847 def test_conv_noncontig_weights_and_bias_cuda(self):
6850 def run_conv_double_back_test(self, kern, stride, padding, chan_in, chan_out, batch_size,
6851 inp_size, dilation, no_weight, groups=1, use_cuda=
False,
6852 use_bias=
True, dtype=torch.double):
6854 device = torch.device(
"cuda")
6856 device = torch.device(
"cpu")
6858 x = torch.randn(batch_size, chan_in, inp_size, inp_size, device=device,
6859 dtype=dtype, requires_grad=
True)
6860 weight = torch.randn(chan_out, chan_in // groups, kern, kern, device=device,
6861 dtype=dtype, requires_grad=
not no_weight)
6863 bias = torch.randn(chan_out, device=device, dtype=dtype, requires_grad=
True)
6869 lx, lweight, lbias = inputs
6871 lx, lweight = inputs
6874 with cudnn.flags(enabled=
False):
6875 out = F.conv2d(lx, lweight, lbias, stride, padding, dilation, groups)
6879 inputs = x, weight, bias
6883 dummy_out = func(*inputs)
6884 grad_y = torch.randn_like(dummy_out, device=device, dtype=dtype, requires_grad=
True)
6888 if dtype == torch.float:
6890 return g.requires_grad
6892 return gradgradcheck(func, inputs, (grad_y,))
6894 def test_conv_double_backward(self):
6896 for kern, inp_size, dilations
in [(3, 6, [1, 2]), (3, 7, [1]), (4, 9, [1])]:
6897 for stride, padding, chan_in, chan_out, dilation
in \
6898 product([1, 2], [0, 1, 2], [2], [3], dilations):
6899 for no_weight
in (
True,
False):
6900 for dtype
in (torch.float, torch.double):
6902 padding, chan_in, chan_out,
6903 batch_size, inp_size, dilation,
6904 no_weight, dtype=dtype)
6905 self.assertTrue(result,
6906 "Conv double backward test failed with parameters:" +
6907 "\nkern: " + str(kern) +
6908 "\nstride: " + str(stride) +
6909 "\npadding: " + str(padding) +
6910 "\nchan_in: " + str(chan_in) +
6911 "\nchan_out: " + str(chan_out) +
6912 "\nbatch_size: " + str(batch_size) +
6913 "\ninp_size: " + str(inp_size) +
6914 "\ndilation: " + str(dilation) +
6915 "\ndtype: " + str(dtype))
6917 def test_conv_double_backward_no_bias(self):
6920 chan_in, chan_out = 2, 4
6928 padding, chan_in, chan_out,
6929 batch_size, inp_size, dilation,
6930 no_weight, use_bias=use_bias)
6931 self.assertTrue(result,
6932 "Conv double backward test failed with parameters:" +
6933 "\nkern: " + str(kern) +
6934 "\nstride: " + str(stride) +
6935 "\npadding: " + str(padding) +
6936 "\nchan_in: " + str(chan_in) +
6937 "\nchan_out: " + str(chan_out) +
6938 "\nbatch_size: " + str(batch_size) +
6939 "\ninp_size: " + str(inp_size) +
6940 "\ndilation: " + str(dilation))
6942 def test_conv_double_backward_groups(self):
6946 chan_in, chan_out = 2, 4
6953 padding, chan_in * groups, chan_out * groups,
6954 batch_size, inp_size, dilation,
6955 no_weight, groups=groups)
6956 self.assertTrue(result,
6957 "Conv double backward test failed with parameters:" +
6958 "\nkern: " + str(kern) +
6959 "\nstride: " + str(stride) +
6960 "\npadding: " + str(padding) +
6961 "\nchan_in: " + str(chan_in) +
6962 "\nchan_out: " + str(chan_out) +
6963 "\nbatch_size: " + str(batch_size) +
6964 "\ninp_size: " + str(inp_size) +
6965 "\ndilation: " + str(dilation) +
6966 "\ngroups: " + str(groups))
6968 def test_conv_double_backward_stride(self):
6972 for kern, inp_size, dilations
in [(3, 5, [1, 2]), (3, 7, [1])]:
6973 for stride, padding, chan_in, chan_out, dilation
in product([2], [0, 1], [1], [2], dilations):
6976 padding, chan_in, chan_out,
6977 batch_size, inp_size, dilation,
6980 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
6981 def test_cudnn_noncontiguous_weight(self):
6984 input = Variable(torch.cuda.DoubleTensor([1, 1, 1]).view(1, 1, 3))
6985 weights1 = Variable(torch.cuda.DoubleTensor([1]).expand(1, 1, 2))
6986 weights2 = Variable(torch.cuda.DoubleTensor([1]).expand(1, 1, 2)).contiguous()
6987 self.
assertEqual(F.conv1d(input, weights1, bias=
None, stride=2, dilation=2),
6988 F.conv1d(input, weights2, bias=
None, stride=2, dilation=2))
6990 @unittest.skipIf(
not TEST_CUDA,
"CUDA unavailable")
6991 @repeat_test_for_types(DOUBLE_TENSORTYPES)
6992 def test_conv_double_backward_cuda(self, dtype=torch.double):
6995 for kern, inp_size, dilations
in [(3, 5, [1, 2]), (4, 9, [1])]:
6996 for stride, padding, chan_in, chan_out, dilation
in product([1], [2], [2], [3], dilations):
6997 no_weight = stride == 2
6999 padding, chan_in, chan_out,
7000 batch_size, inp_size, dilation,
7001 no_weight, use_cuda=
True, dtype=dtype)
7002 self.assertTrue(result,
7003 "Conv double backward test failed with parameters:" +
7004 "\nkern: " + str(kern) +
7005 "\nstride: " + str(stride) +
7006 "\npadding: " + str(padding) +
7007 "\nchan_in: " + str(chan_in) +
7008 "\nchan_out: " + str(chan_out) +
7009 "\nbatch_size: " + str(batch_size) +
7010 "\ninp_size: " + str(inp_size) +
7011 "\ndilation: " + str(dilation))
7013 def run_grad_conv_test(self, func_forward, func_backward, dim=1, gradient='input'):
7014 for kern, inp_size
in [(3, 6), (3, 7), (4, 9)]:
7015 for batch, stride, padding, chan_in, chan_out, dilation
in \
7016 product([1, 2], [1, 2], [0, 1, 2], [2], [3], [1]):
7018 for has_bias
in [
True,
False]:
7019 input_shape = [batch, chan_in]
7020 weight_shape = [chan_out, chan_in]
7021 for _
in range(dim):
7022 input_shape.append(inp_size)
7023 weight_shape.append(kern)
7025 input = torch.randn(input_shape, requires_grad=
True)
7026 weight = torch.randn(weight_shape, requires_grad=
True)
7028 bias = torch.randn([chan_out], requires_grad=
True)
7029 output = func_forward(input, weight, stride=stride, padding=padding, dilation=dilation, bias=bias)
7031 gradient_o = torch.randn(output.shape)
7032 gradient_w =
torch.autograd.grad(output, input
if (gradient ==
'input')
else weight, gradient_o)
7036 input_shape
if (gradient ==
'input')
else input,
7037 weight_shape
if (gradient ==
'weight')
else weight,
7043 def test_grad_conv1d_input(self):
7046 def test_grad_conv1d_weight(self):
7049 def test_grad_conv2d_input(self):
7052 def test_grad_conv2d_weight(self):
7055 def test_grad_conv3d_input(self):
7058 def test_grad_conv3d_weight(self):
7061 @unittest.skipIf(
not torch._nnpack_available(),
"NNPACK unavailable")
7062 def test_nnpack_conv(self):
7063 for kern, inp_size
in [(3, 6), (3, 7), (4, 9)]:
7064 for batch, padding, chan_in, chan_out
in \
7065 product([1, 2], [0, 1, 2], [2], [3]):
7067 for has_bias
in [
True,
False]:
7068 input_shape = [batch, chan_in]
7069 weight_shape = [chan_out, chan_in]
7071 input_shape.append(inp_size)
7072 weight_shape.append(kern)
7074 input = torch.randn(input_shape, requires_grad=
True, dtype=torch.float)
7075 weight = torch.randn(weight_shape, requires_grad=
True, dtype=torch.float)
7077 bias = torch.randn([chan_out], requires_grad=
True, dtype=torch.float)
7078 output = torch._nnpack_spatial_convolution(input, weight, padding=padding, bias=bias)
7082 gradient_o = torch.randn(output.shape, dtype=torch.float)
7086 for gr, gr_expected
in zip(grads, grads_expected):
7089 def test_fold_invalid_arg(self):
7092 fold = nn.Fold(output_size=(4, 5), kernel_size=(2, 3))
7093 with self.
assertRaisesRegex(NotImplementedError,
r"Only 3D input Tensors are supported"):
7094 fold(torch.randn(1, 5))
7098 fold = nn.Fold(output_size=(4, 5), kernel_size=(2, 3))
7099 with self.
assertRaisesRegex(RuntimeError,
r"be divisible by the product of kernel_size"):
7100 fold(torch.randn(1, 5, 9))
7102 with self.
assertRaisesRegex(RuntimeError,
r"be divisible by the product of kernel_size"):
7103 fold(torch.randn(1, 19, 9))
7107 with self.
assertRaisesRegex(RuntimeError,
r"match the calculated number of sliding blocks"):
7108 fold = nn.Fold(output_size=(4, 5), kernel_size=(2, 3))
7109 fold(torch.randn(1, 6, 10))
7111 with self.
assertRaisesRegex(RuntimeError,
r"match the calculated number of sliding blocks"):
7112 fold = nn.Fold(output_size=(4, 5), kernel_size=(2, 3), stride=(2, 2))
7113 fold(torch.randn(1, 6, 5))
7115 with self.
assertRaisesRegex(RuntimeError,
r"match the calculated number of sliding blocks"):
7116 fold = nn.Fold(output_size=(4, 5), kernel_size=(2, 3), stride=(2, 2), dilation=(1, 2), padding=(2, 0))
7117 fold(torch.randn(1, 6, 5))
7119 def test_unfold_invalid_arg(self):
7122 unfold = nn.Unfold(kernel_size=(2, 3))
7123 with self.
assertRaisesRegex(NotImplementedError,
r"Only 4D input Tensors are supported"):
7124 unfold(torch.randn(1, 5, 2))
7129 unfold = nn.Unfold(kernel_size=(2, 3))
7130 unfold(torch.randn(1, 2, 2, 2))
7133 unfold = nn.Unfold(kernel_size=(5, 3), padding=(1, 1))
7134 unfold(torch.randn(1, 2, 2, 3))
7137 unfold = nn.Unfold(kernel_size=(1, 3), padding=(1, 1), dilation=(1, 2))
7138 unfold(torch.randn(1, 2, 2, 2))
7140 def test_softmin(self):
7141 x = torch.randn(2, 16)
7142 self.
assertEqual(F.softmin(x, 1), F.softmax(-x, 1))
7143 self.
assertEqual(F.softmin(x, 0), F.softmax(-x, 0))
7145 def test_adaptive_log_softmax(self):
7147 with self.assertRaises(ValueError):
7148 _ = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 15, 15], div_value=2.)
7150 with self.assertRaises(ValueError):
7151 _ = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 15, 10], div_value=2.)
7153 with self.assertRaises(ValueError):
7154 _ = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 10, 25], div_value=2.)
7156 with self.
assertRaisesRegex(ValueError,
"cutoffs should be a sequence of unique,"):
7157 _ = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 10, 20], div_value=2.)
7160 _ = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 10, 19], div_value=2.)
7163 with self.
assertRaisesRegex(RuntimeError,
r"Input and target should have the same size"):
7164 asfm = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 10, 15], div_value=2.)
7165 x = torch.randn(2, 16)
7171 asfm = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 10, 15], div_value=2.)
7172 x = torch.randn(2, 16)
7177 asfm = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 10, 15], div_value=2.)
7178 x = torch.randn(2, 16)
7181 self.
assertEqual(asfm.head.weight.size(), (5 + 3, 16))
7182 self.
assertEqual(asfm.tail[0][1].weight.size(), (5, 8))
7183 self.
assertEqual(asfm.tail[1][1].weight.size(), (5, 4))
7184 self.
assertEqual(asfm.tail[2][1].weight.size(), (5, 2))
7189 asfm = nn.AdaptiveLogSoftmaxWithLoss(8, 4, [2], div_value=2.)
7190 x = torch.randn(4, 8)
7191 logprob_out = asfm.log_prob(x)
7193 self.
assertEqual(torch.exp(logprob_out).data.sum(1), torch.ones(4))
7196 for v
in [0, 1, 2, 3]:
7197 y = torch.full((4,), v, dtype=torch.long)
7198 out, loss = asfm(x, y)
7200 self.
assertEqual(out, logprob_out.gather(1, y.unsqueeze(1)).squeeze())
7201 self.
assertEqual(loss, F.nll_loss(logprob_out, y))
7204 x = torch.randn(64, 8).abs_()
7207 asfm = nn.AdaptiveLogSoftmaxWithLoss(8, 10, [4, 8], div_value=2., head_bias=
True)
7208 asfm.head.weight.data.abs_()
7209 asfm.head.bias.data.abs_()
7210 asfm.head.weight.data[asfm.shortlist_size:, :].zero_()
7212 out = asfm.predict(x)
7213 self.
assertEqual(out, asfm.log_prob(x).argmax(dim=1))
7216 asfm = nn.AdaptiveLogSoftmaxWithLoss(8, 10, [4, 8], div_value=2., head_bias=
True)
7217 asfm.head.weight.data.abs_()
7218 asfm.head.bias.data.abs_()
7219 asfm.head.weight.data[:asfm.shortlist_size, :].zero_()
7221 out = asfm.predict(x)
7222 self.
assertEqual(out, asfm.log_prob(x).argmax(dim=1))
7225 asfm = nn.AdaptiveLogSoftmaxWithLoss(8, 10, [4, 8], div_value=2., head_bias=
True)
7226 asfm.head.weight.data.abs_()
7227 asfm.head.bias.data.abs_()
7229 x[:32, :asfm.shortlist_size].zero_()
7230 x[32:, asfm.shortlist_size:].zero_()
7232 asfm.head.weight.data[:asfm.shortlist_size, asfm.shortlist_size:].zero_()
7233 asfm.head.weight.data[asfm.shortlist_size:, :asfm.shortlist_size].zero_()
7235 out = asfm.predict(x)
7236 self.
assertEqual(out, asfm.log_prob(x).argmax(dim=1))
7241 super(TestNNInit, self).setUp()
7244 def _is_normal(self, tensor, mean, std):
7245 samples = tensor.view(-1).tolist()
7246 p_value = stats.kstest(samples,
'norm', args=(mean, std))[1]
7247 return p_value > 0.0001
7249 def _is_uniform(self, tensor, a, b):
7250 samples = tensor.view(-1).tolist()
7251 p_value = stats.kstest(samples,
'uniform', args=(a, (b - a)))[1]
7252 return p_value > 0.0001
7254 def _create_random_nd_tensor(self, dims, size_min, size_max):
7255 size = [random.randint(size_min, size_max)
for _
in range(dims)]
7256 tensor = torch.zeros(size)
7259 def _random_float(self, a, b):
7260 return (b - a) * random.random() + a
7262 def test_calculate_gain_linear(self):
7263 for fn
in [
'linear',
'conv1d',
'conv2d',
'conv3d',
'conv_transpose2d',
'conv_transpose2d',
'conv_transpose3d']:
7264 gain = init.calculate_gain(fn)
7267 def test_calculate_gain_nonlinear(self):
7268 for fn
in [
'sigmoid',
'tanh',
'relu',
'leaky_relu']:
7269 gain = init.calculate_gain(fn)
7276 elif fn ==
'leaky_relu':
7279 def test_calculate_gain_leaky_relu(self):
7280 for param
in [
None, 0, 0.01, 10]:
7281 gain = init.calculate_gain(
'leaky_relu', param)
7291 def test_calculate_gain_leaky_relu_only_accepts_numbers(self):
7292 for param
in [
True, [1], {
'a':
'b'}]:
7293 with self.assertRaises(ValueError):
7294 init.calculate_gain(
'leaky_relu', param)
7296 def test_calculate_gain_only_accepts_valid_nonlinearities(self):
7297 for n
in [2, 5, 25]:
7299 random_string =
''.join([random.choice(string.ascii_lowercase)
for i
in range(n)])
7300 with self.assertRaises(ValueError):
7301 init.calculate_gain(random_string)
7303 @unittest.skipIf(
not TEST_SCIPY,
"Scipy not found.")
7304 def test_uniform(self):
7305 for dims
in [1, 2, 4]:
7309 init.uniform_(input_tensor, a=a, b=b)
7312 @unittest.skipIf(
not TEST_SCIPY,
"Scipy not found.")
7313 def test_normal(self):
7314 for dims
in [1, 2, 4]:
7318 init.normal_(input_tensor, mean=mean, std=std)
7320 assert self.
_is_normal(input_tensor, mean, std)
7322 def test_constant(self):
7323 for dims
in [1, 2, 4]:
7326 init.constant_(input_tensor, val)
7328 self.
assertEqual(input_tensor, input_tensor.clone().fill_(val))
7330 def test_ones_and_zeros(self):
7331 for init_fn_, val
in zip([init.ones_, init.zeros_], [1, 0]):
7332 for dims
in [1, 2, 4]:
7334 init_fn_(input_tensor)
7336 self.
assertEqual(input_tensor, input_tensor.clone().fill_(val))
7340 init.eye_(input_tensor)
7343 for i
in range(input_tensor.size(0)):
7344 for j
in range(input_tensor.size(1)):
7346 assert input_tensor[i][j] == 1
7348 assert input_tensor[i][j] == 0
7350 def test_eye_only_works_on_2d_inputs(self):
7352 with self.assertRaises(ValueError):
7356 def test_max_unpool(self):
7358 output, indices = F.max_pool1d(torch.randn([1, 1, 4]), 2, stride=2, return_indices=
True)
7359 self.
assertEqual(F.max_unpool1d(output, indices, 2), F.max_unpool1d(output, indices, 2, stride=2))
7362 input = torch.randn([1, 1, 5])
7363 output, indices = F.max_pool1d(input, 2, stride=2, return_indices=
True)
7364 self.
assertEqual(F.max_unpool1d(output, indices, 2, stride=2, output_size=input.shape),
7365 F.max_unpool1d(output, indices, 2, stride=2, output_size=input.size()))
7368 output, indices = F.max_pool2d(torch.randn([1, 1, 4, 4]), 2, stride=2, return_indices=
True)
7369 self.
assertEqual(F.max_unpool2d(output, indices, 2), F.max_unpool2d(output, indices, 2, stride=2))
7372 output, indices = F.max_pool3d(torch.randn([4, 4, 4, 4, 4]), 2, stride=2, return_indices=
True)
7373 self.
assertEqual(F.max_unpool3d(output, indices, 2), F.max_unpool3d(output, indices, 2, stride=2))
7375 def test_dirac_properties(self):
7376 for dims
in [3, 4, 5]:
7378 init.dirac_(input_tensor)
7380 c_out, c_in = input_tensor.size(0), input_tensor.size(1)
7381 min_d = min(c_out, c_in)
7383 assert torch.nonzero(input_tensor).size(0) == min_d
7387 def test_dirac_identity(self):
7388 batch, in_c, out_c, size, kernel_size = 8, 3, 4, 5, 3
7390 input_var = torch.randn(batch, in_c, size)
7391 filter_var = torch.zeros(out_c, in_c, kernel_size)
7392 init.dirac_(filter_var)
7393 output_var = F.conv1d(input_var, filter_var)
7394 input_tensor, output_tensor = input_var.data, output_var.data
7395 self.
assertEqual(input_tensor[:, :, 1:-1], output_tensor[:, :in_c, :])
7396 assert torch.nonzero(output_tensor[:, in_c:, :]).numel() == 0
7399 input_var = torch.randn(batch, in_c, size, size)
7400 filter_var = torch.zeros(out_c, in_c, kernel_size, kernel_size)
7401 init.dirac_(filter_var)
7402 output_var = F.conv2d(input_var, filter_var)
7403 input_tensor, output_tensor = input_var.data, output_var.data
7404 self.
assertEqual(input_tensor[:, :, 1:-1, 1:-1], output_tensor[:, :in_c, :, :])
7405 assert torch.nonzero(output_tensor[:, in_c:, :, :]).numel() == 0
7408 input_var = torch.randn(batch, in_c, size, size, size)
7409 filter_var = torch.zeros(out_c, in_c, kernel_size, kernel_size, kernel_size)
7410 init.dirac_(filter_var)
7411 output_var = F.conv3d(input_var, filter_var)
7412 input_tensor, output_tensor = input_var.data, output_var.data
7413 self.
assertEqual(input_tensor[:, :, 1:-1, 1:-1, 1:-1], output_tensor[:, :in_c, :, :])
7414 assert torch.nonzero(output_tensor[:, in_c:, :, :, :]).numel() == 0
7416 def test_dirac_only_works_on_3_4_5d_inputs(self):
7417 for dims
in [1, 2, 6]:
7418 with self.assertRaises(ValueError):
7422 def test_xavier_uniform_errors_on_inputs_smaller_than_2d(self):
7425 with self.assertRaises(ValueError):
7426 init.xavier_uniform_(tensor)
7428 def test_xavier_normal_errors_on_inputs_smaller_than_2d(self):
7431 with self.assertRaises(ValueError):
7432 init.xavier_normal_(tensor)
7434 @unittest.skipIf(
not TEST_SCIPY,
"Scipy not found.")
7435 def test_xavier_uniform(self):
7436 for use_gain
in [
True,
False]:
7443 init.xavier_uniform_(input_tensor, gain=gain)
7445 init.xavier_uniform_(input_tensor)
7447 fan_in = input_tensor.size(1)
7448 fan_out = input_tensor.size(0)
7449 if input_tensor.dim() > 2:
7450 fan_in *= input_tensor[0, 0].numel()
7451 fan_out *= input_tensor[0, 0].numel()
7453 expected_std = gain * math.sqrt(2.0 / (fan_in + fan_out))
7454 bounds = expected_std * math.sqrt(3)
7455 assert self.
_is_uniform(input_tensor, -bounds, bounds)
7457 @unittest.skipIf(
not TEST_SCIPY,
"Scipy not found.")
7458 def test_xavier_normal(self):
7459 for use_gain
in [
True,
False]:
7466 init.xavier_normal_(input_tensor, gain=gain)
7468 init.xavier_normal_(input_tensor)
7470 fan_in = input_tensor.size(1)
7471 fan_out = input_tensor.size(0)
7472 if input_tensor.dim() > 2:
7473 fan_in *= input_tensor[0, 0].numel()
7474 fan_out *= input_tensor[0, 0].numel()
7476 expected_std = gain * math.sqrt(2.0 / (fan_in + fan_out))
7477 assert self.
_is_normal(input_tensor, 0, expected_std)
7479 def test_kaiming_uniform_errors_on_inputs_smaller_than_2d(self):
7481 with self.assertRaises(ValueError):
7483 init.kaiming_uniform_(tensor)
7485 def test_kaiming_normal_errors_on_inputs_smaller_than_2d(self):
7487 with self.assertRaises(ValueError):
7489 init.kaiming_normal_(tensor)
7491 @unittest.skipIf(
not TEST_SCIPY,
"Scipy not found.")
7492 def test_kaiming_uniform(self):
7493 for use_a
in [
True,
False]:
7495 for mode
in [
'fan_in',
'fan_out']:
7499 init.kaiming_uniform_(input_tensor, a=a, mode=mode)
7502 init.kaiming_uniform_(input_tensor, mode=mode)
7504 fan_in = input_tensor.size(1)
7505 fan_out = input_tensor.size(0)
7506 if input_tensor.dim() > 2:
7507 fan_in *= input_tensor[0, 0].numel()
7508 fan_out *= input_tensor[0, 0].numel()
7510 if mode ==
'fan_in':
7515 expected_std = math.sqrt(2.0 / ((1 + a**2) * n))
7516 bounds = expected_std * math.sqrt(3.0)
7517 assert self.
_is_uniform(input_tensor, -bounds, bounds)
7519 @unittest.skipIf(
not TEST_SCIPY,
"Scipy not found.")
7520 def test_kaiming_normal(self):
7521 for use_a
in [
True,
False]:
7523 for mode
in [
'fan_in',
'fan_out']:
7527 init.kaiming_normal_(input_tensor, a=a, mode=mode)
7530 init.kaiming_normal_(input_tensor, mode=mode)
7532 fan_in = input_tensor.size(1)
7533 fan_out = input_tensor.size(0)
7534 if input_tensor.dim() > 2:
7535 fan_in *= input_tensor[0, 0].numel()
7536 fan_out *= input_tensor[0, 0].numel()
7538 if mode ==
'fan_in':
7543 expected_std = math.sqrt(2.0 / ((1 + a**2) * n))
7544 assert self.
_is_normal(input_tensor, 0, expected_std)
7546 def test_sparse_only_works_on_2d_inputs(self):
7548 with self.assertRaises(ValueError):
7551 init.sparse_(tensor, sparsity)
7553 @unittest.skipIf(
not TEST_SCIPY,
"Scipy not found.")
7554 def test_sparse_default_std(self):
7555 for use_random_std
in [
True,
False]:
7557 rows, cols = input_tensor.size(0), input_tensor.size(1)
7563 init.sparse_(input_tensor, sparsity=sparsity, std=std)
7565 init.sparse_(input_tensor, sparsity=sparsity)
7567 for col_idx
in range(input_tensor.size(1)):
7568 column = input_tensor[:, col_idx]
7569 assert column[column == 0].nelement() >= math.ceil(sparsity * rows)
7571 assert self.
_is_normal(input_tensor[input_tensor != 0], 0, std)
7574 def test_orthogonal(self):
7575 for use_gain
in [
True,
False]:
7576 for tensor_size
in [[3, 4], [4, 3], [20, 2, 3, 4], [2, 3, 4, 5]]:
7577 input_tensor = torch.zeros(tensor_size)
7582 init.orthogonal_(input_tensor, gain=gain)
7584 init.orthogonal_(input_tensor)
7586 rows, cols = tensor_size[0], reduce(mul, tensor_size[1:])
7587 flattened_tensor = input_tensor.view(rows, cols)
7589 self.
assertEqual(torch.mm(flattened_tensor.t(), flattened_tensor),
7590 torch.eye(cols) * gain ** 2, prec=1e-6)
7592 self.
assertEqual(torch.mm(flattened_tensor, flattened_tensor.t()),
7593 torch.eye(rows) * gain ** 2, prec=1e-6)
7595 def test_deprecation(self):
7596 x = torch.randn(3, 3)
7600 self.
assertWarnsRegex(fn,
'deprecated',
'methods not suffixed with underscore should be deprecated')
7603 def add_test(test, decorator=None):
7604 def add(test_name, fn):
7605 if hasattr(TestNN, test_name):
7606 raise RuntimeError(
'Found two tests with the same name: ' + test_name)
7607 if decorator
is not None:
7609 setattr(TestNN, test_name, fn)
7611 test_name = test.get_name()
7612 add(test_name,
lambda self, test=test:
test(self))
7613 cuda_test_name = test_name +
'_cuda' 7616 if 'extra_args' in get_function_arglist(test.test_cuda):
7617 kwargs[
'extra_args'] = test.extra_args
7619 if 'dtype' in get_function_arglist(test.test_cuda):
7620 add(cuda_test_name +
'_float',
lambda self,
7621 test=test, kwargs=kwargs: test.test_cuda(self, dtype=torch.float, **kwargs))
7622 add(cuda_test_name +
'_double',
lambda self,
7623 test=test, kwargs=kwargs: test.test_cuda(self, dtype=torch.double, **kwargs))
7625 def test_half(self, test=test, kwargs=kwargs):
7626 test.test_cuda(self, dtype=torch.half, **kwargs)
7627 if getattr(test,
'check_half',
True):
7628 add(cuda_test_name +
'_half', test_half)
7630 add(cuda_test_name,
lambda self, test=test, kwargs=kwargs: test.test_cuda(self, **kwargs))
7633 new_criterion_tests = [
7635 module_name=
'BCEWithLogitsLoss',
7636 input_fn=
lambda: torch.rand(15, 10).clamp_(1e-2, 1 - 1e-2),
7637 target_fn=
lambda: torch.randn(15, 10).gt(0).double(),
7640 module_name=
'BCEWithLogitsLoss',
7641 constructor_args=(torch.rand(10),),
7642 input_fn=
lambda: torch.rand(15, 10).clamp_(1e-2, 1 - 1e-2),
7643 target_fn=
lambda: torch.randn(15, 10).gt(0).double(),
7647 module_name=
'BCEWithLogitsLoss',
7648 constructor_args=(torch.rand(()),),
7649 input_fn=
lambda: torch.rand(()).clamp_(1e-2, 1 - 1e-2),
7650 target_fn=
lambda: torch.randn(()).gt(0).double(),
7651 desc=
'scalar_weights' 7654 module_name=
'NLLLoss',
7655 input_size=(2, 3, 5, 5),
7656 target_fn=
lambda: torch.rand(2, 5, 5).mul(3).floor().long(),
7657 reference_fn=
lambda i, t, m:
7658 loss_reference_fns[
'NLLLossNd'](i, t, reduction=get_reduction(m)),
7659 check_sum_reduction=
True,
7663 module_name=
'NLLLoss',
7664 constructor_args_fn=
lambda: (torch.rand(3),),
7665 input_size=(2, 3, 5, 5),
7666 target=torch.rand(2, 5, 5).mul(3).floor().long(),
7667 reference_fn=
lambda i, t, m:
7668 loss_reference_fns[
'NLLLossNd'](i, t, weight=get_weight(m)),
7672 module_name=
'NLLLoss',
7673 constructor_args=(
None,
None, 1),
7674 input_size=(2, 3, 5, 5),
7675 target_fn=
lambda: torch.rand(2, 5, 5).mul(3).floor().long(),
7676 reference_fn=
lambda i, t, m:
7677 loss_reference_fns[
'NLLLossNd'](i, t, ignore_index=1),
7678 desc=
'2d_ignore_index',
7681 module_name=
'NLLLoss',
7682 input_size=(2, 3, 5, 5, 2, 2),
7683 target_fn=
lambda: torch.rand(2, 5, 5, 2, 2).mul(3).floor().long(),
7684 reference_fn=
lambda i, t, m:
7685 loss_reference_fns[
'NLLLossNd'](i, t, reduction=get_reduction(m)),
7686 check_sum_reduction=
True,
7690 module_name=
'NLLLoss',
7691 input_size=(2, 3, 5),
7692 target_fn=
lambda: torch.rand(2, 5).mul(3).floor().long(),
7693 reference_fn=
lambda i, t, m:
7694 loss_reference_fns[
'NLLLossNd'](i, t, reduction=get_reduction(m)),
7695 check_sum_reduction=
True,
7699 module_name=
'PoissonNLLLoss',
7700 input_size=(2, 3, 4, 5),
7701 target_fn=
lambda: torch.randn(2, 3, 4, 5).floor_().abs_(),
7702 desc=
'no_full_loss',
7705 module_name=
'PoissonNLLLoss',
7706 constructor_args=(
False,),
7707 input_fn=
lambda: torch.randn(2, 3, 4, 5).abs_().add_(0.001),
7708 target_fn=
lambda: torch.randn(2, 3, 4, 5).floor_().abs_(),
7712 module_name=
'L1Loss',
7715 reference_fn=
lambda i, t, _: 1. / i.numel() * (i - t).abs().sum(),
7719 module_name=
'KLDivLoss',
7720 input_fn=
lambda: torch.rand(()).log(),
7721 target_fn=
lambda: torch.rand(()),
7722 reference_fn=
lambda i, t, m:
7723 kldivloss_reference(i, t, get_reduction(m)),
7724 check_sum_reduction=
True,
7728 module_name=
'MSELoss',
7731 reference_fn=
lambda i, t, m: ((i - t).abs().pow(2).sum() /
7732 (i.numel()
if get_reduction(m) ==
'mean' else 1)),
7733 check_sum_reduction=
True,
7737 module_name=
'MSELoss',
7738 input_fn=
lambda: torch.ones(5, 68, 64, 64, dtype=torch.float) / 10,
7739 target_fn=
lambda: torch.zeros(5, 68, 64, 64, dtype=torch.float),
7740 reference_fn=
lambda i, t, m: ((i - t).abs().pow(2).sum() /
7741 (i.numel()
if get_reduction(m) ==
'mean' else 1)),
7742 check_forward_only=
True,
7746 module_name=
'BCELoss',
7747 constructor_args_fn=
lambda: (torch.rand(()),),
7748 input_fn=
lambda: torch.rand(()).clamp_(1e-2, 1 - 1e-2),
7749 target_fn=
lambda: torch.rand(()).gt(0).double(),
7750 reference_fn=
lambda i, t, m: -((t * i.log() + (1 - t) * (1 - i).log()) * get_weight(m)).sum() /
7751 (i.numel()
if get_reduction(m) ==
'mean' else 1),
7752 desc=
'scalar_weights',
7753 check_gradgrad=
False,
7756 module_name=
'HingeEmbeddingLoss',
7757 constructor_args=(0.5,),
7759 target_fn=
lambda: torch.randn(()).gt(0).double().mul_(2).sub(1),
7760 desc=
'scalar_margin',
7761 check_sum_reduction=
True,
7764 module_name=
'SmoothL1Loss',
7767 check_sum_reduction=
True,
7768 reference_fn=
lambda i, t, m:
7769 smoothl1loss_reference(i, t, reduction=get_reduction(m)),
7773 module_name=
'MultiLabelSoftMarginLoss',
7774 constructor_args=(torch.rand(10),),
7775 input_fn=
lambda: torch.randn(5, 10),
7776 target_fn=
lambda: torch.rand(5, 10).mul(2).floor(),
7777 reference_fn=
lambda i, t, m: -((t * i.sigmoid().log() + (1 - t) * (-i).sigmoid().log()) * get_weight(m)).sum() /
7778 (i.numel()
if get_reduction(m) ==
'mean' else i.size(1)
if get_reduction(m) ==
'sum' else 1),
7780 check_sum_reduction=
True,
7781 check_gradgrad=
False,
7784 module_name=
'CTCLoss',
7785 constructor_args=(14,),
7786 extra_args=([50, 50, 50], [30, 25, 20]),
7787 input_fn=
lambda: torch.randn(50, 3, 15).log_softmax(2),
7788 target_fn=
lambda: torch.randint(0, 14, (3, 30), dtype=torch.long),
7789 reference_fn=
lambda i, t, il, tl, m:
7790 ctcloss_reference(i, t, il, tl, blank=14, reduction=get_reduction(m)),
7791 check_sum_reduction=
True,
7792 check_gradgrad=
False,
7796 module_name=
'CTCLoss',
7798 constructor_args=(14,),
7799 extra_args=([50, 50, 50], [30, 25, 20]),
7800 input_fn=
lambda: torch.randn(50, 3, 15).log_softmax(2),
7801 target_fn=
lambda: torch.randint(0, 14, (3, 30), dtype=torch.long),
7802 reference_fn=
lambda i, t, il, tl, m:
7803 ctcloss_reference(i, t, il, tl, blank=14, reduction=get_reduction(m)),
7804 check_sum_reduction=
True,
7805 check_gradgrad=
False,
7809 module_name=
'CTCLoss',
7810 desc=
'2d_int_target',
7811 constructor_args=(0,),
7812 extra_args=([50, 50, 50], [30, 25, 20]),
7813 input_fn=
lambda: torch.randn(50, 3, 15).log_softmax(2),
7814 target_fn=
lambda: torch.randint(1, 15, (3, 30), dtype=torch.int),
7815 reference_fn=
lambda i, t, il, tl, m:
7816 ctcloss_reference(i, t, il, tl, blank=0, reduction=get_reduction(m)),
7817 check_sum_reduction=
True,
7818 check_gradgrad=
False,
7820 convert_target=
False,
7823 module_name=
'CTCLoss',
7824 desc=
'2d_lengths_tensors',
7825 constructor_args=(0,),
7827 input_fn=
lambda: torch.randn(50, 3, 15).log_softmax(2),
7828 target_fn=
lambda: torch.randint(1, 15, (3, 30), dtype=torch.int),
7829 reference_fn=
lambda i, t, il, tl, m:
7830 ctcloss_reference(i, t, il, tl, blank=0, reduction=get_reduction(m)),
7831 check_sum_reduction=
True,
7832 check_gradgrad=
False,
7834 convert_target=
False,
7839 for test_params
in module_tests + new_module_tests:
7841 if 'constructor' not in test_params:
7842 name = test_params.pop(
'module_name')
7843 test_params[
'constructor'] = getattr(nn, name)
7844 decorator = test_params.pop(
'decorator',
None)
7846 add_test(test, decorator)
7847 if 'check_eval' in test_params:
7849 desc = test_params.get(
'desc',
None)
7850 test_params[
'desc'] =
'eval' if desc
is None else desc +
'_eval' 7852 def gen_eval_constructor(constructor):
7853 def eval_constructor(*args, **kwargs):
7854 cons = constructor(*args, **kwargs)
7855 cons.training =
False 7857 eval_constructor.__name__ = constructor.__name__
7858 return eval_constructor
7860 test_params[
'constructor'] = gen_eval_constructor(test_params[
'constructor'])
7862 add_test(test, decorator)
7864 for test_params
in criterion_tests + new_criterion_tests:
7865 name = test_params.pop(
'module_name')
7866 test_params[
'constructor'] = getattr(nn, name)
7868 decorator = test_params.pop(
'decorator',
None)
7869 add_test(test, decorator)
7870 if 'check_sum_reduction' in test_params:
7871 desc = test_params.get(
'desc',
None)
7872 test_params[
'desc'] =
'sum_reduction' if desc
is None else desc +
'_sum_reduction' 7874 def gen_sum_reduction_constructor(constructor):
7875 def sum_reduction_constructor(*args, **kwargs):
7876 cons = constructor(*args, reduction=
'sum', **kwargs)
7878 sum_reduction_constructor.__name__ = constructor.__name__
7879 return sum_reduction_constructor
7881 test_params[
'constructor'] = gen_sum_reduction_constructor(test_params[
'constructor'])
7883 add_test(test, decorator)
7887 def __init__(self, pool, unpool):
7888 super(UnpoolingNet, self).__init__()
7892 def forward(self, input):
7898 nn.MaxPool1d(2, return_indices=
True),
7900 input_size=(1, 1, 4),
7901 fullname=
'MaxUnpool1d_net',))
7904 nn.MaxPool2d(2, return_indices=
True),
7906 input_size=(1, 1, 2, 4),
7907 fullname=
'MaxUnpool2d_net',))
7910 nn.MaxPool3d(2, return_indices=
True),
7912 input_size=(1, 1, 2, 4, 6),
7913 fullname=
'MaxUnpool3d_net',
7914 check_gradgrad=
False,))
7918 def __call__(self, input):
7920 return nn.AdaptiveLogSoftmaxWithLoss.__call__(self, input, t).output
7925 fullname=
'AdaptiveLogSoftmax'))
7931 return [
'cpu',
'cuda']
7938 return [r * math.pi * 2
for r
in [0.0, 0.5, 0.25, 0.125, random.random()]]
7942 t = (random.random(), random.random(), random.random())
7943 l = sum(x ** 2
for x
in t) ** 0.5
7945 return [(1.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 1.0), tuple(x / l
for x
in t)]
7948 def input_size2d_():
7949 return [[1, 1, 3, 5], [1, 1, 3, 3], [1, 1, 4, 4], [1, 1, 3, 4]]
7952 def output_size2d_():
7953 return [[1, 1, 5, 3], [1, 1, 3, 5], [1, 1, 4, 3], [1, 1, 5, 5], [1, 1, 6, 6]]
7956 def input_size2dsq_():
7957 return [[1, 1, 2, 2], [1, 1, 3, 3], [1, 1, 4, 4], [1, 1, 6, 6]]
7960 def output_size2dsq_():
7961 return [[1, 1, 2, 2], [1, 1, 3, 3], [1, 1, 4, 4], [1, 1, 5, 5], [1, 1, 6, 6]]
7964 def input_size3d_():
7965 return [[1, 1, 2, 2, 2], [1, 1, 2, 3, 4], [1, 1, 3, 3, 3], [1, 1, 4, 4, 4], [1, 1, 3, 4, 5]]
7968 def input_size3dsq_():
7969 return [[1, 1, 2, 2, 2], [1, 1, 3, 3, 3], [1, 1, 4, 4, 4], [1, 1, 6, 6, 6]]
7972 def output_size3dsq_():
7973 return [[1, 1, 2, 2, 2], [1, 1, 3, 3, 3], [1, 1, 4, 4, 4], [1, 1, 5, 5, 5], [1, 1, 6, 6, 6]]
7976 def output_size3d_():
7977 return [[1, 1, 2, 2, 2], [1, 1, 3, 3, 3], [1, 1, 3, 4, 5], [1, 1, 4, 3, 2], [1, 1, 5, 5, 5], [1, 1, 6, 6, 6]]
7980 def _buildEquivalentAffineTransforms2d(device, input_size, output_size, angle_rad):
7981 input_center = [(x - 1) / 2.0
for x
in input_size]
7982 output_center = [(x - 1) / 2.0
for x
in output_size]
7984 s = math.sin(angle_rad)
7985 c = math.cos(angle_rad)
7987 intrans_ary = np.array([
7988 [1, 0, input_center[2]],
7989 [0, 1, input_center[3]],
7991 ], dtype=np.float64)
7993 inscale_ary = np.array([
7994 [input_center[2], 0, 0],
7995 [0, input_center[3], 0],
7997 ], dtype=np.float64)
7999 rotation_ary = np.array([
8003 ], dtype=np.float64)
8005 outscale_ary = np.array([
8006 [1.0 / output_center[2], 0, 0],
8007 [0, 1.0 / output_center[3], 0],
8009 ], dtype=np.float64)
8011 outtrans_ary = np.array([
8012 [1, 0, -output_center[2]],
8013 [0, 1, -output_center[3]],
8015 ], dtype=np.float64)
8017 reorder_ary = np.array([
8021 ], dtype=np.float64)
8023 transform_ary = np.dot(np.dot(np.dot(np.dot(
8029 grid_ary = np.dot(np.dot(np.dot(reorder_ary, rotation_ary.T), outscale_ary), outtrans_ary)
8031 transform_tensor = torch.from_numpy((rotation_ary)).to(device, torch.float32)
8032 transform_tensor = transform_tensor[:2].unsqueeze(0)
8034 return transform_tensor, transform_ary, grid_ary
8037 def _buildEquivalentAffineTransforms3d(device, input_size, output_size, angle_rad, axis_vector):
8038 input_center = [(x - 1) / 2.0
for x
in input_size]
8039 output_center = [(x - 1) / 2.0
for x
in output_size]
8041 s = math.sin(angle_rad)
8042 c = math.cos(angle_rad)
8045 intrans_ary = np.array([
8046 [1, 0, 0, input_center[2]],
8047 [0, 1, 0, input_center[3]],
8048 [0, 0, 1, input_center[4]],
8050 ], dtype=np.float64)
8052 inscale_ary = np.array([
8053 [input_center[2], 0, 0, 0],
8054 [0, input_center[3], 0, 0],
8055 [0, 0, input_center[4], 0],
8057 ], dtype=np.float64)
8059 l, m, n = axis_vector
8060 scipyRotation_ary = np.array([
8061 [l * l * c1 + c, m * l * c1 - n * s, n * l * c1 + m * s, 0],
8062 [l * m * c1 + n * s, m * m * c1 + c, n * m * c1 - l * s, 0],
8063 [l * n * c1 - m * s, m * n * c1 + l * s, n * n * c1 + c, 0],
8065 ], dtype=np.float64)
8067 z, y, x = axis_vector
8068 torchRotation_ary = np.array([
8069 [x * x * c1 + c, y * x * c1 - z * s, z * x * c1 + y * s, 0],
8070 [x * y * c1 + z * s, y * y * c1 + c, z * y * c1 - x * s, 0],
8071 [x * z * c1 - y * s, y * z * c1 + x * s, z * z * c1 + c, 0],
8073 ], dtype=np.float64)
8075 outscale_ary = np.array([
8076 [1.0 / output_center[2], 0, 0, 0],
8077 [0, 1.0 / output_center[3], 0, 0],
8078 [0, 0, 1.0 / output_center[4], 0],
8080 ], dtype=np.float64)
8082 outtrans_ary = np.array([
8083 [1, 0, 0, -output_center[2]],
8084 [0, 1, 0, -output_center[3]],
8085 [0, 0, 1, -output_center[4]],
8087 ], dtype=np.float64)
8089 reorder_ary = np.array([
8094 ], dtype=np.float64)
8096 transform_ary = np.dot(np.dot(np.dot(np.dot(
8099 np.linalg.inv(scipyRotation_ary)),
8102 grid_ary = np.dot(np.dot(np.dot(reorder_ary, np.linalg.inv(scipyRotation_ary)), outscale_ary), outtrans_ary)
8104 transform_tensor = torch.from_numpy((torchRotation_ary)).to(device, torch.float32)
8105 transform_tensor = transform_tensor[:3].unsqueeze(0)
8107 return transform_tensor, transform_ary, grid_ary
8111 if __name__ ==
'__main__':
def _test_InstanceNorm_cuda_half(self, cls, input)
def assertEqual(self, x, y, prec=None, message='', allow_inf=False)
def test_type_casts(self)
def _test_Conv2d_naive_groups(self, device="cpu", dtype=torch.float)
def backward(tensors, grad_tensors=None, retain_graph=None, create_graph=False, grad_variables=None)
def _random_float(self, a, b)
def test_gumbel_softmax(self, dtype=torch.float)
def _test_batchnorm_eval(self, device="cpu", dtype=torch.float)
def _test_conv_noncontig_weights_and_bias(self, device)
def affine_grid(theta, size)
def _padded_sequence(self, tensor_type)
def _test_conv_noncontig_weights(self, device)
def _test_scatter(self, tensor)
def _test_gather(self, output_device)
def _test_LayerNorm_general(self, device="cpu", dtype=torch.float)
def _test_LayerNorm_cuda_half(self)
def run_grad_conv_test(self, func_forward, func_backward, dim=1, gradient='input')
def _test_gumbel_softmax_st_shapes(self, cuda, dtype, shape, dim, count_expected)
def grid_sample(input, grid, mode='bilinear', padding_mode='zeros')
def _test_gumbel_softmax_grad(self, cuda, dtype)
def run_conv_double_back_test(self, kern, stride, padding, chan_in, chan_out, batch_size, inp_size, dilation, no_weight, groups=1, use_cuda=False, use_bias=True, dtype=torch.double)
def _test_InstanceNorm_general(self, cls, input, device="cpu", dtype=torch.float)
def _test_GroupNorm_cuda_half(self)
def assertNotEqual(self, x, y, prec=None, message='')
def _is_uniform(self, tensor, a, b)
def set_rng_state(new_state)
def _test_EmbeddingBag(self, cuda, mode, sparse, dtype=torch.double)
def _get_input(self, unpack=True)
def _create_basic_net(self)
def _test_gumbel_softmax_straight_through(self, cuda, dtype)
def _test_dropout(self, cls, cuda, input)
def test_pdist_zeros(self)
def _ordered_sequence(self, tensor_type)
Module caffe2.python.helpers.conv.
def _get_arg(self, name, unpack)
def _test_one_hot(self, use_cuda=False)
def _test_batchnorm_grad(self, device="cpu", dtype=torch.double)
def _test_maxpool_indices(self, num_dim, adaptive=False, device="cpu", dtype=torch.float)
def flags(enabled=False, benchmark=False, deterministic=False, verbose=False)
def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reduction='mean', zero_infinity=False)
def assertAlmostEqual(self, x, y, places=None, msg=None, delta=None, allow_inf=None)
def _test_pool_large_size(self, device, dtype=torch.float)
def _test_nonlinearity_propagate_nan(self, device)
def typename(o)
Define basic utilities.
def constructor_args(self)
def _test_batchnorm_simple_average(self, test_type=torch.FloatTensor)
def set_default_tensor_type(t)
def _create_random_nd_tensor(self, dims, size_min, size_max)
def grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=False, only_inputs=True, allow_unused=False)
def linear(input, weight, bias=None)
def _test_softmax_backward(self, device)
def _test_alpha_dropout(self, cls, input)
def _is_normal(self, tensor, mean, std)
def _test_GroupNorm_general(self, device="cpu", dtype=torch.float)
def _test_max_pool_nan(self, device, dtype=torch.float)
def current_stream(device=None)
def assertWarnsRegex(self, callable, regex, msg='')