Caffe2 - Python API
A deep learning, cross platform ML framework
test_nn.py
1 import math
2 import random
3 import string
4 import unittest
5 import itertools
6 import contextlib
7 import warnings
8 import pickle
9 from copy import deepcopy
10 from itertools import repeat, product
11 from functools import wraps, reduce
12 from operator import mul
13 from collections import OrderedDict
14 import hashlib
15 import os
16 import threading
17 
18 import torch
19 from torch._six import inf, nan
20 import torch.backends.cudnn as cudnn
21 import torch.nn as nn
22 import torch.nn.functional as F
23 import torch.nn.parallel as dp
24 import torch.nn.init as init
25 import torch.nn.utils.rnn as rnn_utils
26 from torch.nn.utils import clip_grad_norm_, clip_grad_value_
27 from torch.nn.utils import parameters_to_vector, vector_to_parameters
28 from torch.autograd import Variable, gradcheck
29 from torch.autograd.gradcheck import gradgradcheck
30 from torch.nn import Parameter
31 from torch.nn.parallel._functions import Broadcast
32 from common_utils import freeze_rng_state, run_tests, TestCase, skipIfNoLapack, skipIfRocm, TEST_WITH_ROCM, \
33  TEST_NUMPY, TEST_SCIPY, IS_WINDOWS, download_file, PY3, PY34, to_gpu, \
34  get_function_arglist, skipCUDAMemoryLeakCheckIf, load_tests
35 from common_cuda import TEST_CUDA, TEST_MULTIGPU, TEST_CUDNN, \
36  TEST_CUDNN_VERSION
37 from common_nn import NNTestCase, ModuleTest, CriterionTest, TestBase, \
38  module_tests, criterion_tests, loss_reference_fns, get_reduction, \
39  get_weight, smoothl1loss_reference, kldivloss_reference, \
40  ctcloss_reference, new_module_tests
41 
42 # load_tests from common_utils is used to automatically filter tests for
43 # sharding on sandcastle. This line silences flake warnings
44 load_tests = load_tests
45 
46 if TEST_SCIPY:
47  from scipy import stats
48  import scipy.ndimage
49 
50 if TEST_NUMPY:
51  import numpy as np
52 
53 ALL_TENSORTYPES = [torch.float,
54  torch.double,
55  torch.half]
56 
57 NO_HALF_TENSORTYPES = [torch.float,
58  torch.double]
59 
60 DOUBLE_TENSORTYPES = [torch.double]
61 
62 dtype2prec = {torch.float: 1e-5,
63  torch.double: 1e-5,
64  torch.half: 1e-2}
65 
66 
67 # WARNING: If you add a new top-level test case to this file, you MUST
68 # update test/run_test.py to list it, otherwise it will NOT be run in
69 # CI.
70 
71 
72 # Used to run the same test with different tensor types
73 def repeat_test_for_types(dtypes):
74  def repeat_helper(f):
75  @wraps(f)
76  def call_helper(self, *args):
77  for dtype in dtypes:
78  if PY34:
79  with TestCase.subTest(self, dtype=dtype):
80  f(self, *args, dtype=dtype)
81  else:
82  f(self, *args, dtype=dtype)
83 
84  return call_helper
85  return repeat_helper
86 
87 
89 
90  _type_by_name = {
91  'torch.DoubleTensor': (torch.DoubleTensor, 'double'),
92  'torch.FloatTensor': (torch.FloatTensor, 'float'),
93  # We leave out `'torch.HalfTensor': (torch.HalfTensor, 'half'),`
94  # because of an error in `pad_packed_sequence`
95  # > AttributeError: 'torch.HalfTensor' object has no attribute 'fill_'
96  'torch.LongTensor': (torch.LongTensor, 'long'),
97  'torch.IntTensor': (torch.IntTensor, 'int'),
98  'torch.ShortTensor': (torch.ShortTensor, 'short'),
99  'torch.CharTensor': (torch.CharTensor, 'char'),
100  'torch.ByteTensor': (torch.ByteTensor, 'byte'),
101  }
102 
103  def __init__(self, *args, **kwargs):
104  super(PackedSequenceTest, self).__init__(*args, **kwargs)
105  self.batch_size = 5
106  self.max_length = 6
107 
108  def _ordered_sequence(self, tensor_type):
109  """Create ordered list of random sequences"""
110  seqs = [tensor_type(random.randint(1, self.max_length))
111  for _ in range(self.batch_size)]
112  seqs = [s.random_(-128, 128) for s in seqs]
113  ordered = sorted(seqs, key=len, reverse=True)
114  return ordered
115 
116  def _padded_sequence(self, tensor_type):
117  """Create Tensor of random padded sequences"""
118  ordered = self._ordered_sequence(tensor_type)
119  lengths = list(map(len, ordered))
120  padded_tensor = rnn_utils.pad_sequence(ordered)
121  return padded_tensor, lengths
122 
123  def test_type_casts(self):
124  """Test type casting of `PackedSequence` against type casting of tensor"""
125  for _, (input_type, _) in self._type_by_name.items():
126  for expected_type_str, (_, cast_str) in self._type_by_name.items():
127  for enforce_sorted in [True, False]:
128  padded, lengths = self._padded_sequence(input_type)
129  packed = rnn_utils.pack_padded_sequence(
130  padded, lengths, enforce_sorted=enforce_sorted)
131  # Apply cast to `PackedSequence` instance and unpack
132  masked = getattr(packed, cast_str)()
133  unpacked, lengths_out = rnn_utils.pad_packed_sequence(masked)
134  self.assertEqual(unpacked.type(), expected_type_str)
135 
136  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
137  def test_cuda_mask(self):
138  for enforce_sorted in [True, False]:
139  tensor_type = torch.FloatTensor
140  cuda_type_str = 'torch.cuda.FloatTensor'
141  padded, lengths = self._padded_sequence(tensor_type)
142  packed = rnn_utils.pack_padded_sequence(
143  padded, lengths, enforce_sorted=enforce_sorted)
144  self.assertFalse(packed.is_cuda)
145  packed = packed.cuda()
146  self.assertTrue(packed.is_cuda)
147  unpacked, _ = rnn_utils.pad_packed_sequence(packed)
148  self.assertEqual(unpacked.type(), cuda_type_str)
149 
150  def test_wrong_order(self):
151  a = torch.ones(25, 300)
152  b = torch.ones(22, 300)
153  b_a = rnn_utils.pad_sequence([b, a])
154  self.assertRaises(
155  RuntimeError,
156  lambda: rnn_utils.pack_padded_sequence(b_a, [22, 25], enforce_sorted=True))
157 
158  def test_total_length(self):
159  padded, lengths = self._padded_sequence(torch.FloatTensor)
160  max_length = max(lengths)
161  packed = rnn_utils.pack_padded_sequence(padded, lengths)
162  # test ValueError if total_length < max_length
163  for total_length in (-1, 0, max_length - 1):
164  for batch_first in (True, False):
165  def err_fn():
166  rnn_utils.pad_packed_sequence(packed, batch_first=batch_first,
167  total_length=total_length)
168  self.assertRaisesRegex(ValueError,
169  r'Expected total_length to be at least the '
170  r'length of the longest sequence in input',
171  err_fn)
172  # test that pad_packed_sequence returns results of correct length
173  for batch_first in (True, False):
174  no_extra_pad, _ = rnn_utils.pad_packed_sequence(packed, batch_first=batch_first)
175  for total_length_delta in (0, 1, 8):
176  total_length = max_length + total_length_delta
177  unpacked, lengths_out = rnn_utils.pad_packed_sequence(packed, batch_first=batch_first,
178  total_length=total_length)
179  self.assertEqual(lengths, lengths_out)
180  self.assertEqual(unpacked.size(1 if batch_first else 0), total_length)
181  if total_length_delta == 0:
182  ref_output = no_extra_pad
183  elif batch_first:
184  extra_pad = no_extra_pad.new_zeros(self.batch_size, total_length_delta)
185  ref_output = torch.cat([no_extra_pad, extra_pad], 1)
186  else:
187  extra_pad = no_extra_pad.new_zeros(total_length_delta, self.batch_size)
188  ref_output = torch.cat([no_extra_pad, extra_pad], 0)
189  self.assertEqual(unpacked, ref_output)
190 
191  def test_to(self):
192  for enforce_sorted in (True, False):
193  padded, lengths = self._padded_sequence(torch.IntTensor)
194  a = rnn_utils.pack_padded_sequence(
195  padded, lengths, enforce_sorted=enforce_sorted).cpu()
196 
197  self.assertIs(a, a.to('cpu'))
198  self.assertIs(a, a.to('cpu', dtype=torch.int32))
199  self.assertEqual(a.long(), a.to(torch.int64))
200 
202  for cuda in ['cuda', 'cuda:0' if torch.cuda.device_count() == 1 else 'cuda:1']:
203  b = a.cuda(device=cuda)
204  self.assertIs(b, b.to(cuda))
205  self.assertEqual(a, b.to('cpu'))
206  self.assertEqual(b, a.to(cuda))
207  self.assertEqual(a, b.to('cpu', dtype=torch.int32))
208  self.assertIs(b, b.to(dtype=torch.int32))
209  self.assertEqual(b.long(), b.to(dtype=torch.int64))
210 
211 
212 def default_tensor_type(type):
213  type_str = torch.typename(type)
214 
215  def decorator(fn):
216  @wraps(fn)
217  def wrapper(*args, **kwargs):
218  old_type = torch.Tensor().type()
220  try:
221  return fn(*args, **kwargs)
222  finally:
224 
225  return wrapper
226 
227  return decorator
228 
229 
230 def _assertGradAndGradgradChecks(test_case, apply_fn, inputs):
231  # call assert function rather than returning a bool since it's nicer
232  # if we get whether this failed on the gradcheck or the gradgradcheck.
233  test_case.assertTrue(gradcheck(apply_fn, inputs))
234  test_case.assertTrue(gradgradcheck(apply_fn, inputs))
235 
236 
237 class InputVariableMixin(object):
238  def _get_input(self):
239  input = TestBase._get_input(self, False)
240 
241  def map_variables(i):
242  if isinstance(i, torch.Tensor):
243  if i.is_floating_point():
244  i.requires_grad = True
245  return i
246  else:
247  return type(i)(map_variables(elem) for elem in i)
248 
249  return map_variables(input)
250 
251 
253  def __init__(self, *args, **kwargs):
254  super(NewModuleTest, self).__init__(*args, **kwargs)
255  self.cudnn = kwargs.get('cudnn', False)
256  self.check_inplace = kwargs.get('check_inplace', False)
257  self.check_gradgrad = kwargs.get('check_gradgrad', True)
258  self.skip_double = kwargs.get('skip_double', False)
259 
260  def _do_test(self, test_case, module, input):
261  test_case.check_jacobian(module, input, self.jacobian_input)
262 
263  if self.check_gradgrad:
264  # could probably unify check_jacobian above with this.
265  params = tuple(x for x in module.parameters())
266  _assertGradAndGradgradChecks(test_case,
267  lambda x, *args, **kw: test_case._forward(module, x), (input,) + params)
268 
269  # check if module can be printed
270  module.__repr__()
271 
272  if self.check_inplace:
273  # check if the inplace variant of the module gives the same result
274  # as the out-of-place
275 
276  module_ip = self.constructor(*self.constructor_args, inplace=True)
277 
278  input_version = input._version
279  with freeze_rng_state():
280  output = module(input)
281  test_case.assertEqual(input._version, input_version)
282 
283  input_ip = deepcopy(input)
284  input_ip_clone = input_ip.clone()
285  with freeze_rng_state():
286  output_ip = module_ip(input_ip_clone)
287  test_case.assertNotEqual(input_ip_clone._version, input_version)
288  test_case.assertEqual(output, output_ip)
289  grad = output.data.clone().normal_()
290  input.grad.data.zero_()
291  output.backward(grad)
292  output_ip.backward(grad)
293  test_case.assertEqual(input.grad, input_ip.grad)
294 
295  if isinstance(input, torch.LongTensor) and TEST_CUDA:
296  # check that cuda() moves module parameters to correct GPU device,
297  # and that float() casts parameters correctly
298 
299  input = input.cuda()
300  module.float().cuda()
301  module(input)
302  for p in module.parameters():
303  test_case.assertIsInstance(p, torch.cuda.FloatTensor)
304  test_case.assertEqual(p.get_device(), 0)
305 
306  if torch.cuda.device_count() > 1:
307  input = input.cuda(1)
308  module.cuda(1)
309  with torch.cuda.device(1):
310  module(input)
311  for p in module.parameters():
312  test_case.assertIsInstance(p, torch.cuda.FloatTensor)
313  test_case.assertEqual(p.get_device(), 1)
314  else:
315  # check that float()/double() casters work correctly
316 
317  # to float
318  if not isinstance(input, torch.LongTensor):
319  input = input.float()
320  module.float()
321  module(input)
322  for p in module.parameters():
323  test_case.assertIsInstance(p, torch.FloatTensor)
324 
325  # and back to double
326  if not isinstance(input, torch.LongTensor):
327  input = input.double()
328  module.double()
329  module(input)
330  for p in module.parameters():
331  test_case.assertIsInstance(p, torch.DoubleTensor)
332 
333  if TEST_CUDA and self.should_test_cuda:
334  # check that cuda() moves module parameters to correct GPU device,
335  # and that float() casts parameters correctly
336 
337  # to GPU0
338  input = input.float().cuda()
339  module.float().cuda()
340  module(input)
341  for p in module.parameters():
342  test_case.assertIsInstance(p, torch.cuda.FloatTensor)
343  test_case.assertEqual(p.get_device(), 0)
344 
345  # to CPU
346  input = input.cpu()
347  module.cpu()
348  module(input)
349  for p in module.parameters():
350  test_case.assertIsInstance(p, torch.FloatTensor)
351 
352  # back to GPU0
353  input = input.cuda()
354  module.cuda()
355  module(input)
356  for p in module.parameters():
357  test_case.assertIsInstance(p, torch.cuda.FloatTensor)
358  test_case.assertEqual(p.get_device(), 0)
359 
360  # test that forwards of module runs correctly without cuDNN
361  if self.cudnn:
362  with torch.backends.cudnn.flags(enabled=False):
363  module(input)
364  for p in module.parameters():
365  test_case.assertIsInstance(p, torch.cuda.FloatTensor)
366  test_case.assertEqual(p.get_device(), 0)
367 
368  if torch.cuda.device_count() >= 2:
369  # test cross-GPU transfer works
370  # to GPU1
371  input = input.cuda(1)
372  module.cuda(1)
373  with torch.cuda.device(1):
374  module(input)
375  for p in module.parameters():
376  test_case.assertIsInstance(p, torch.cuda.FloatTensor)
377  test_case.assertEqual(p.get_device(), 1)
378 
379  if not self.skip_double:
380  # test double()
381  input = input.double().cuda()
382  module.double().cuda()
383  module(input)
384  for p in module.parameters():
385  test_case.assertIsInstance(p, torch.cuda.DoubleTensor)
386  test_case.assertEqual(p.get_device(), 0)
387 
388  # test half()
389  input = input.half().cuda()
390  module.half().cuda()
391  module(input)
392  for p in module.parameters():
393  test_case.assertIsInstance(p, torch.cuda.HalfTensor)
394  test_case.assertEqual(p.get_device(), 0)
395 
396  def _get_target(self):
397  return self._get_arg('target', False)
398 
399  @property
400  def constructor_args(self):
401  return self._get_arg('constructor_args', False)
402 
403 
405  # TODO: check that criterions don't ignore grad_output
406 
407  def __init__(self, *args, **kwargs):
408  super(NewCriterionTest, self).__init__(*args, **kwargs)
409  self.check_gradgrad = kwargs.get('check_gradgrad', True)
410  self.check_half = kwargs.get('check_half', True)
411  self.convert_target = kwargs.get('convert_target', True)
412 
413  def _do_extra_tests(self, test_case, module, input, target):
414  if not self.check_gradgrad:
415  return
416 
417  test_case.assertFalse(target.requires_grad)
418 
419  params = tuple(x for x in module.parameters())
420  if not isinstance(input, tuple):
421  inputs = (input,) + params
422 
423  def apply_fn(input, *params):
424  return module(input, target)
425  else:
426  inputs = input + params
427 
428  def apply_fn(input1, input2, *params):
429  return module(input1, input2, target)
430 
431  # TODO: we don't pass `target` as part of inputs because we don't
432  # currently compute the gradient w.r.t. target for loss functions.
433  gradcheck(apply_fn, inputs)
434  gradgradcheck(apply_fn, inputs)
435 
436  def test_cuda(self, test_case, dtype=None, extra_args=None):
437  def convert_dtype(obj, dtype, requires_grad=False):
438  if isinstance(obj, torch.Tensor):
439  return obj.detach().to(dtype=dtype).requires_grad_(requires_grad)
440  elif isinstance(obj, torch.Tensor):
441  return obj.to(dtype)
442  elif isinstance(obj, tuple):
443  return tuple(convert_dtype(o, dtype, requires_grad) for o in obj)
444  else:
445  return obj
446 
447  if not TEST_CUDA or not self.should_test_cuda:
448  raise unittest.SkipTest('Excluded from CUDA tests')
449  try:
450  cpu_input = self._get_input()
451  cpu_target = self._get_target()
452  cpu_module = self.constructor(*self.constructor_args)
453  gpu_module = self.constructor(*self.constructor_args)
454 
455  # Convert input, target and module parameters to dtype
456  if dtype is not None:
457  cpu_input = convert_dtype(cpu_input, dtype, True)
458  # NLLLoss requires target to be LongTensor
459  if not isinstance(cpu_target, torch.LongTensor) and self.convert_target:
460  cpu_target = convert_dtype(cpu_target, dtype)
461  cpu_module.type(dtype)
462  gpu_module.type(dtype)
463 
464  # GPU setup
465  gpu_input = to_gpu(cpu_input)
466  gpu_target = to_gpu(cpu_target)
467  gpu_module.cuda()
468 
469  # torch.HalfTensor doesn't support most operations, converting back to default
470  if dtype == torch.half:
471  cpu_input = self._get_input()
472  cpu_target = self._get_target()
473  # Loss modules with weights require consistent input/module weight types
474  cpu_module = self.constructor(*self.constructor_args)
475 
476  cpu_output = test_case._forward_criterion(cpu_module, cpu_input, cpu_target, extra_args=extra_args)
477  gpu_output = test_case._forward_criterion(gpu_module, gpu_input, gpu_target, extra_args=extra_args)
478  # dtype can be None, so set precision in this way instead of a precision map
479  test_case.assertEqual(cpu_output, gpu_output, 1e-1 if dtype == torch.half else 4e-4)
480 
481  cpu_gradInput = test_case._backward_criterion(cpu_module, cpu_input, cpu_target, extra_args=extra_args)
482  gpu_gradInput = test_case._backward_criterion(gpu_module, gpu_input, gpu_target, extra_args=extra_args)
483  test_case.assertEqual(cpu_gradInput, gpu_gradInput, 1e-1 if dtype == torch.half else 4e-4)
484  except NotImplementedError:
485  pass
486 
487  def _get_target(self):
488  return self._get_arg('target', False)
489 
490  @property
491  def constructor_args(self):
492  return self._get_arg('constructor_args', False)
493 
494  @property
495  def extra_args(self):
496  return self._get_arg('extra_args', False)
497 
498 
500  _do_cuda_memory_leak_check = True
501 
502  def _forward(self, module, input):
503  with freeze_rng_state():
504  return module(input)
505 
506  def _backward(self, module, input, output, grad_output, create_graph=False):
507  output.backward(grad_output, retain_graph=True, create_graph=create_graph)
508  if input.grad is None:
509  return None
510  return input.grad.data
511 
512  def _forward_criterion(self, criterion, input, target, extra_args=None):
513  if extra_args is None:
514  extra_args = tuple()
515  if isinstance(input, tuple):
516  args = input + (target,) + extra_args
517  output = criterion(*args)
518  else:
519  output = criterion(input, target, *extra_args)
520  return output
521 
522  def _backward_criterion(self, criterion, input, target, gradOutput=None, extra_args=None):
523  if extra_args is None:
524  extra_args = tuple()
525  input_tuple = input if isinstance(input, tuple) else (input,)
526  for i in input_tuple:
527  if i.grad is not None:
528  i.grad.data.zero_()
529  args = input_tuple + (target,) + extra_args
530  if gradOutput is None:
531  gradOutput = torch.ones(())
532  criterion(*args).backward(gradOutput.type_as(input_tuple[0]))
533  if isinstance(input, tuple):
534  return tuple(map(lambda i: i.grad.data, input))
535  else:
536  return input.grad.data
537 
538  def _zero_grad_parameters(self, module):
539  for p in module.parameters():
540  if p.grad is not None:
541  with torch.no_grad():
542  p.grad.zero_()
543  p.grad.detach_()
544 
545  def _get_parameters(self, module):
546  params = []
547  d_params = []
548  for p in module.parameters():
549  params.append(p)
550  d_params.append(p.grad)
551  return params, d_params
552 
553  def _create_basic_net(self):
554  class Layer(nn.Module):
555  def __init__(self):
556  super(Layer, self).__init__()
557  self.layer_dummy_param = Parameter(torch.Tensor(3, 5))
558  self.register_buffer('layer_dummy_buf', torch.zeros(1, 3, 3, 7))
559 
560  class Net(nn.Module):
561  def __init__(self):
562  super(Net, self).__init__()
563  self.l1 = Layer()
564  self.dummy_param = Parameter(torch.Tensor(3, 5))
565  self.register_buffer('dummy_buf', torch.zeros(7, 3, 3, 1))
566 
567  l = Layer()
568  n = Net()
569  s = nn.Sequential(n, n)
570 
571  return l, n, s
572 
573  def test_module_backcompat(self):
574  from torch.serialization import SourceChangeWarning
575  path = download_file('https://download.pytorch.org/test_data/linear.pt')
576  with warnings.catch_warnings():
577  warnings.simplefilter('ignore', SourceChangeWarning)
578  m = torch.load(path)
579  input = torch.randn(2, 3, dtype=torch.float)
580  self.assertEqual(m(input).size(), (2, 5))
581 
582  def test_share_memory(self):
583  class Net(nn.Module):
584  def __init__(self):
585  super(Net, self).__init__()
586  self.p = nn.Parameter(torch.eye(5))
587  self.par = nn.ParameterList()
588  self.par.append(nn.Parameter(torch.randn(10)))
589 
590  def forward(inp):
591  return inp.clone()
592 
593  net = Net()
594  for p in net.parameters():
595  self.assertFalse(p.storage().is_shared())
596  for b in net.buffers():
597  self.assertFalse(b.storage().is_shared())
598  net.share_memory()
599  for p in net.parameters():
600  self.assertTrue(p.storage().is_shared())
601  for b in net.buffers():
602  self.assertTrue(b.storage().is_shared())
603 
604  def test_hooks(self):
605  module = nn.Sigmoid()
606  input = torch.ones(5, 5, requires_grad=True)
607 
608  counter = {
609  'forwards': 0,
610  'backwards': 0
611  }
612 
613  def fw_hook(inc, h_module, input, output):
614  self.assertIsInstance(input, tuple)
615  self.assertTrue(isinstance(output, torch.Tensor))
616  self.assertTrue(h_module is module)
617  self.assertEqual(input[0].data, torch.ones(5, 5))
618  self.assertEqual(output.data, torch.Tensor(5, 5).fill_(1 / (1 + 1 / math.e)))
619  counter['forwards'] += inc
620 
621  def bw_hook(inc, h_module, grad_input, grad_output):
622  self.assertIsInstance(grad_input, tuple)
623  self.assertIsInstance(grad_output, tuple)
624  self.assertTrue(h_module is module)
625  self.assertEqual(grad_output[0].data, torch.ones(5, 5) * 2)
626  counter['backwards'] += inc
627 
628  test_fwd = module.register_forward_hook(lambda *args: fw_hook(1, *args))
629 
630  module(input)
631  module(input)
632  self.assertEqual(counter['forwards'], 2)
633  self.assertEqual(counter['backwards'], 0)
634 
635  test_bwd = module.register_backward_hook(
636  lambda *args: bw_hook(1, *args))
637 
638  output = module(input)
639  self.assertEqual(counter['forwards'], 3)
640  self.assertEqual(counter['backwards'], 0)
641 
642  output.backward(torch.ones(5, 5) * 2, retain_graph=True)
643  self.assertEqual(counter['forwards'], 3)
644  self.assertEqual(counter['backwards'], 1)
645 
646  output.backward(torch.ones(5, 5) * 2, retain_graph=True)
647  self.assertEqual(counter['forwards'], 3)
648  self.assertEqual(counter['backwards'], 2)
649 
650  test2_fwd = module.register_forward_hook(lambda *args: fw_hook(2, *args))
651 
652  output = module(input)
653  self.assertEqual(counter['forwards'], 6)
654  self.assertEqual(counter['backwards'], 2)
655 
656  test2_bwd = module.register_backward_hook(lambda *args: bw_hook(2, *args))
657 
658  module(input).backward(torch.ones(5, 5) * 2)
659  self.assertEqual(counter['forwards'], 9)
660  self.assertEqual(counter['backwards'], 5)
661 
662  test2_bwd.remove()
663 
664  module(input).backward(torch.ones(5, 5) * 2)
665  self.assertEqual(counter['forwards'], 12)
666  self.assertEqual(counter['backwards'], 6)
667 
668  test2_fwd.remove()
669 
670  module(input).backward(torch.ones(5, 5) * 2)
671  self.assertEqual(counter['forwards'], 13)
672  self.assertEqual(counter['backwards'], 7)
673 
674  test_fwd.remove()
675  test_bwd.remove()
676 
677  def test_hook_cpp(self):
678  counter = [0]
679  bn = nn.BatchNorm1d(5)
680 
681  def hook(module, grad_inputs, grad_outputs):
682  counter[0] += 1
683  self.assertEqual(len(grad_inputs), 3)
684  self.assertEqual(len(grad_outputs), 1)
685  self.assertEqual(module, bn)
686 
687  bn.register_backward_hook(hook)
688  output = bn(torch.randn(5, 5, requires_grad=True))
689  output.sum().backward()
690 
691  def test_hook_fail(self):
692  module = nn.Sigmoid()
693  input = torch.randn(5, 5, requires_grad=True)
694 
695  def fw_fail1(self, input, output):
696  return output
697 
698  def fw_fail2(self, input, output):
699  return input
700 
701  def bw_fail1(self, grad_input, grad_output):
702  return grad_input[:-1]
703 
704  def bw_fail2(self, grad_input, grad_output):
705  return grad_input + (torch.randn(2, 2),)
706 
707  with module.register_forward_hook(fw_fail1):
708  with self.assertRaises(RuntimeError) as err:
709  module(input)
710  self.assertIn("fw_fail", err.exception.args[0])
711  self.assertIn("didn't return None", err.exception.args[0])
712 
713  with module.register_forward_hook(fw_fail2):
714  with self.assertRaises(RuntimeError) as err:
715  module(input)
716  self.assertIn("fw_fail2", err.exception.args[0])
717  self.assertIn("didn't return None", err.exception.args[0])
718 
719  with module.register_backward_hook(bw_fail1):
720  with self.assertRaises(RuntimeError) as err:
721  module(input).sum().backward()
722  self.assertIn("bw_fail", err.exception.args[0])
723  self.assertIn("got 0, but expected 1", err.exception.args[0])
724 
725  with module.register_backward_hook(bw_fail2):
726  with self.assertRaises(RuntimeError) as err:
727  module(input).sum().backward()
728  self.assertIn("bw_fail2", err.exception.args[0])
729  self.assertIn("got 2, but expected 1", err.exception.args[0])
730 
731  def test_hook_writeable(self):
732  module = nn.Linear(5, 5)
733  input = torch.randn(5, 5, requires_grad=True)
734 
735  def bw_hook(module, grad_input, grad_output):
736  for grad in grad_input:
737  self.assertTrue(isinstance(grad, torch.Tensor))
738  for grad in grad_output:
739  self.assertTrue(isinstance(grad, torch.Tensor))
740  return tuple(gi * 2 for gi in grad_input)
741 
742  module.register_backward_hook(bw_hook)
743  module(input).backward(torch.ones(5, 5))
744  expected_grad = torch.ones(5, 5).mm(module.weight.data) * 2
745  self.assertEqual(input.grad.data, expected_grad)
746 
747  def test_to(self):
748  m = nn.Linear(3, 5)
749  self.assertIs(m, m.to('cpu'))
750  self.assertIs(m, m.to('cpu', dtype=torch.float32))
751  self.assertEqual(m.double(), m.to(torch.float64))
752  self.assertRaises(RuntimeError, lambda: m.to('cpu', copy=True))
753 
755  for cuda in ['cuda', 'cuda:0' if torch.cuda.device_count() == 1 else 'cuda:1']:
756  m2 = m.cuda(device=cuda)
757  self.assertIs(m2, m2.to(cuda))
758  self.assertEqual(m, m2.to('cpu'))
759  self.assertEqual(m2, m.to(cuda))
760  self.assertIs(m2, m2.to(dtype=torch.float32))
761  self.assertEqual(m2.double(), m2.to(dtype=torch.float64))
762 
763  def test_zero_grad(self):
764  i = torch.randn(2, 5, requires_grad=True)
765  module = nn.Linear(5, 5)
766  for p in module.parameters():
767  p.requires_grad = False
768  module.zero_grad()
769 
770  module.weight.requires_grad = True
771  module.zero_grad()
772  self.assertIsNone(module.weight.grad) # uninitialized grad
773 
774  module(i).sum().backward()
775  self.assertIsNotNone(module.weight.grad)
776  self.assertGreater(module.weight.grad.data.abs().sum(), 0)
777  module.zero_grad()
778  self.assertEqual(module.weight.grad.data, module.weight.data.clone().zero_())
779 
780  module.bias.requires_grad = True
781  module.zero_grad()
782  self.assertIsNotNone(module.weight.grad)
783  self.assertIsNone(module.bias.grad)
784  module(i).sum().backward()
785  self.assertIsNotNone(module.weight.grad)
786  self.assertIsNotNone(module.bias.grad)
787  self.assertGreater(module.weight.grad.data.abs().sum(), 0)
788  self.assertGreater(module.bias.grad.data.abs().sum(), 0)
789  module.zero_grad()
790  self.assertEqual(module.weight.grad.data, module.weight.data.clone().zero_())
791  self.assertEqual(module.bias.grad.data, module.bias.data.clone().zero_())
792 
793  def test_no_grad(self):
794  module = nn.Conv2d(2, 5, kernel_size=3, padding=1)
795  input = torch.randn(1, 2, 10, 10)
796  x = input
797  y = input.clone()
798 
799  output = module(x)
800  self.assertTrue(output.requires_grad)
801  output.backward(torch.ones(1, 5, 10, 10))
802 
803  with torch.no_grad():
804  output2 = module(y)
805  self.assertFalse(output2.requires_grad)
806  self.assertRaises(RuntimeError, lambda: output2.backward(torch.ones(1, 5, 10, 10)))
807 
808  def test_invalid_conv1d(self):
809  module = nn.Conv1d(in_channels=3, out_channels=33, kernel_size=10, stride=1, bias=True)
810  input = torch.randn(1, 3, 4)
811  with self.assertRaisesRegex(RuntimeError,
812  r'Calculated padded input size per channel: \(4\). ' +
813  r'Kernel size: \(10\). Kernel size can\'t be greater than actual input size'):
814  module(input)
815 
816  # Negative stride check
817  module = nn.Conv1d(in_channels=3, out_channels=6, kernel_size=3, stride=-1, bias=True)
818  input = torch.randn(1, 3, 4)
819  with self.assertRaisesRegex(RuntimeError, 'negative stride is not supported'):
820  module(input)
821 
822  def test_invalid_conv2d(self):
823  module = torch.nn.Conv2d(1, 1, kernel_size=3, dilation=2, stride=2)
824  input = torch.empty(1, 1, 4, 4)
825  self.assertRaises(RuntimeError, lambda: module(input))
826 
827  module = nn.Conv2d(in_channels=3, out_channels=33, kernel_size=10, stride=1, bias=True)
828  input = torch.randn(1, 3, 1, 1)
829  with self.assertRaisesRegex(RuntimeError,
830  r'Calculated padded input size per channel: \(1 x 1\). ' +
831  r'Kernel size: \(10 x 10\). Kernel size can\'t be greater than actual input size'):
832  module(input)
833 
834  # Negative stride check
835  module = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=4, stride=-1, bias=True)
836  input = torch.randn(1, 3, 4, 4)
837  with self.assertRaisesRegex(RuntimeError, 'negative stride is not supported'):
838  module(input)
839 
840  def test_invalid_conv3d(self):
841  module = torch.nn.Conv3d(1, 1, kernel_size=3, dilation=2, stride=2)
842  input = torch.empty(1, 1, 4, 4, 4)
843  self.assertRaises(RuntimeError, lambda: module(input))
844 
845  # Negative stride check
846  module = torch.nn.Conv3d(1, 1, kernel_size=3, stride=-2)
847  input = torch.empty(1, 1, 4, 4, 4)
848  with self.assertRaisesRegex(RuntimeError, 'negative stride is not supported'):
849  module(input)
850 
851  def _test_dropout(self, cls, cuda, input):
852  p = 0.2
853  device = torch.device("cuda") if cuda else torch.device("cpu")
854  input = input.to(device).fill_(1 - p)
855 
856  module = cls(p)
857  input_var = input.clone().requires_grad_()
858  output = module(input_var)
859  self.assertLess(abs(output.data.mean() - (1 - p)), 0.05)
860  output.backward(input)
861  self.assertLess(abs(input_var.grad.data.mean() - (1 - p)), 0.05)
862 
863  module = cls(p, True)
864  input_var = input.clone().requires_grad_()
865  output = module(input_var + 0)
866  self.assertLess(abs(output.data.mean() - (1 - p)), 0.05)
867  output.backward(input)
868  self.assertLess(abs(input_var.grad.data.mean() - (1 - p)), 0.05)
869 
870  # check eval mode doesn't change anything
871  for inplace in [True, False]:
872  module = cls(p, inplace).eval()
873  self.assertEqual(input, module(input))
874 
875  # Check that these don't raise errors
876  module.__repr__()
877  str(module)
878 
879  def _test_alpha_dropout(self, cls, input):
880  mean = input.mean()
881  std = input.std()
882 
883  for p in [0.2, 0.5, 0.8]:
884  module = cls(p)
885  input_var = input.detach().clone().requires_grad_()
886  output = module(input_var)
887  # output mean should be close to input mean
888  self.assertLess(abs(output.data.mean() - mean), 0.1)
889  # output std should be close to input std
890  self.assertLess(abs(output.data.std() - std), 0.1)
891  output.backward(input)
892 
893  def test_parameters_and_named_parameters(self):
894  def names(named_parameters):
895  return [k for k, _ in named_parameters]
896 
897  l, n, s = self._create_basic_net()
898 
899  self.assertEqual(len(list(l.parameters())), 1)
900  self.assertEqual(
901  names(l.named_parameters()),
902  ['layer_dummy_param'])
903 
904  self.assertEqual(len(list(n.parameters())), 2)
905  self.assertEqual(
906  names(n.named_parameters()),
907  ['dummy_param', 'l1.layer_dummy_param'])
908 
909  self.assertEqual(len(list(n.parameters(recurse=False))), 1)
910  self.assertEqual(
911  names(n.named_parameters(recurse=False)),
912  ['dummy_param'])
913 
914  self.assertEqual(len(list(s.parameters())), 2)
915  self.assertEqual(
916  names(s.named_parameters()),
917  ['0.dummy_param', '0.l1.layer_dummy_param'])
918 
919  def test_buffers_and_named_buffers(self):
920  def names(named_buffers):
921  return [k for k, _ in named_buffers]
922 
923  l, n, s = self._create_basic_net()
924 
925  self.assertEqual(len(list(l.buffers())), 1)
926  self.assertEqual(
927  names(l.named_buffers()),
928  ['layer_dummy_buf'])
929 
930  self.assertEqual(len(list(n.buffers())), 2)
931  self.assertEqual(
932  names(n.named_buffers()),
933  ['dummy_buf', 'l1.layer_dummy_buf'])
934 
935  self.assertEqual(len(list(n.buffers(recurse=False))), 1)
936  self.assertEqual(
937  names(n.named_buffers(recurse=False)),
938  ['dummy_buf'])
939 
940  self.assertEqual(len(list(s.buffers())), 2)
941  self.assertEqual(
942  names(s.named_buffers()),
943  ['0.dummy_buf', '0.l1.layer_dummy_buf'])
944 
945  def test_call_supports_python_dict_output(self):
946  class Net(nn.Module):
947  def __init__(self):
948  super(Net, self).__init__()
949  self.l1 = nn.Linear(10, 20)
950  self.register_backward_hook(self.hook)
951  self.check_backward_hook_flag = False
952 
953  def hook(self, module, grad_out, grad_in):
954  self.check_backward_hook_flag = True
955 
956  def forward(self, inputs):
957  return {"output": self.l1(inputs).sum()}
958 
959  net = Net()
960  model_output = net(torch.randn([5, 10]))
961  model_output["output"].backward()
962  self.assertTrue(net.check_backward_hook_flag)
963 
964  def test_children(self):
965  l1 = nn.Linear(2, 2)
966  l2 = nn.Linear(2, 2)
967  l3 = nn.Linear(2, 2)
968  l4 = nn.Linear(2, 2)
969  subnet = nn.Sequential(l3, l4)
970  s = nn.Sequential(l1, l2, l1, l2, subnet)
971  self.assertEqual(list(s.children()), [l1, l2, subnet])
972 
973  def test_dir(self):
974  linear = nn.Linear(2, 2)
975  linear._test_submodule = nn.Linear(2, 2)
976  linear._test_parameter = Parameter(torch.Tensor(2, 2))
977  linear.register_buffer('_test_buffer', torch.Tensor(2, 2))
978  keys = dir(linear)
979  self.assertIn('_test_submodule', keys)
980  self.assertIn('_test_parameter', keys)
981  self.assertIn('_test_buffer', keys)
982 
983  for key in keys:
984  self.assertTrue(hasattr(linear, key))
985 
986  def test_repr(self):
987  # no extra information or sub-modules
988  empty_sequential = nn.Sequential()
989  expected_repr_empty = 'Sequential()'
990  self.assertEqual(repr(empty_sequential), expected_repr_empty)
991 
992  # one liner extra information
993  linear = nn.Linear(1, 1)
994  expected_repr_linear = 'Linear(in_features=1, out_features=1, bias=True)'
995  self.assertEqual(repr(linear), expected_repr_linear)
996 
997  # sub-modules repr
998  sequential = nn.Sequential(linear)
999  expected_repr_sequential = 'Sequential(\n' \
1000  ' (0): Linear(in_features=1, out_features=1, bias=True)\n' \
1001  ')'
1002  self.assertEqual(repr(sequential), expected_repr_sequential)
1003 
1004  def test_dir_digit(self):
1005  model = nn.Sequential(nn.Linear(2, 2))
1006  keys = dir(model)
1007  self.assertNotIn('0', keys)
1008 
1009  def test_named_children(self):
1010  l1 = nn.Linear(2, 2)
1011  l2 = nn.Linear(2, 2)
1012  l3 = nn.Linear(2, 2)
1013  l4 = nn.Linear(2, 2)
1014  subnet = nn.Sequential(l3, l4)
1015  s = nn.Sequential()
1016  with self.assertRaises(KeyError):
1017  s.add_module('', l1)
1018  with self.assertRaises(KeyError):
1019  s.add_module('name.with.dot', l1)
1020  s.add_module('layer1', l1)
1021  s.add_module('layer2', l2)
1022  s.add_module('layer3', l1)
1023  s.add_module('layer4', l2)
1024  s.add_module('subnet', subnet)
1025  self.assertEqual(list(s.named_children()), [('layer1', l1), ('layer2', l2), ('subnet', subnet)])
1026 
1027  def test_modules(self):
1028  class Net(nn.Module):
1029  def __init__(self):
1030  super(Net, self).__init__()
1031  self.l1 = l
1032  self.l2 = l
1033  self.param = torch.empty(3, 5)
1034 
1035  l = nn.Linear(10, 20)
1036  n = Net()
1037  s = nn.Sequential(n, n, n, n)
1038  self.assertEqual(list(s.modules()), [s, n, l])
1039 
1040  def test_named_modules(self):
1041  class Net(nn.Module):
1042  def __init__(self):
1043  super(Net, self).__init__()
1044  self.l1 = l
1045  self.l2 = l
1046  self.param = torch.empty(3, 5)
1047  self.block = block
1048  l = nn.Linear(10, 20)
1049  l1 = nn.Linear(10, 20)
1050  l2 = nn.Linear(10, 20)
1051  block = nn.Sequential()
1052  block.add_module('linear1', l1)
1053  block.add_module('linear2', l2)
1054  n = Net()
1055  s = nn.Sequential(n, n, n, n)
1056  self.assertEqual(list(s.named_modules()), [('', s), ('0', n), ('0.l1', l),
1057  ('0.block', block), ('0.block.linear1', l1),
1058  ('0.block.linear2', l2)])
1059 
1060  def test_register_buffer_raises_error_if_name_is_not_string(self):
1061  m = nn.Module()
1062  expected_error = 'buffer name should be a string. Got '
1063  with self.assertRaisesRegex(TypeError, expected_error + 'int'):
1064  m.register_buffer(1, torch.rand(5))
1065  with self.assertRaisesRegex(TypeError, expected_error + 'NoneType'):
1066  m.register_buffer(None, torch.rand(5))
1067 
1068  def test_register_buffer_raises_error_if_attr_exists(self):
1069  m = nn.Module()
1070  m.attribute_name = 5
1071  with self.assertRaises(KeyError):
1072  m.register_buffer('attribute_name', torch.rand(5))
1073 
1074  del m.attribute_name
1075  m.register_parameter('attribute_name', nn.Parameter())
1076  with self.assertRaises(KeyError):
1077  m.register_buffer('attribute_name', torch.rand(5))
1078 
1079  del m.attribute_name
1080  m.add_module('attribute_name', nn.Module())
1081  with self.assertRaises(KeyError):
1082  m.register_buffer('attribute_name', torch.rand(5))
1083 
1084  def test_register_buffer_raises_error_if_not_tensor(self):
1085  m = nn.Module()
1086  with self.assertRaises(TypeError):
1087  m.register_buffer('attribute_name', 5)
1088 
1089  def test_register_buffer_allows_overwriting_with_same_name(self):
1090  m = nn.Module()
1091  buffer1 = torch.rand(5)
1092  buffer2 = buffer1 + 5
1093  buffer3 = None
1094  m.register_buffer('buffer_name', buffer1)
1095  self.assertEqual(m.buffer_name, buffer1)
1096  m.register_buffer('buffer_name', buffer2)
1097  self.assertEqual(m.buffer_name, buffer2)
1098  m.register_buffer('buffer_name', buffer3)
1099  self.assertEqual(m.buffer_name, buffer3)
1100 
1101  def test_register_parameter_raises_error_if_name_is_not_string(self):
1102  m = nn.Module()
1103  expected_error = 'parameter name should be a string. Got '
1104  with self.assertRaisesRegex(TypeError, expected_error + 'int'):
1105  m.register_parameter(1, nn.Parameter())
1106  with self.assertRaisesRegex(TypeError, expected_error + 'NoneType'):
1107  m.register_parameter(None, nn.Parameter())
1108 
1109  def test_register_parameter_raises_error_if_attr_exists(self):
1110  m = nn.Module()
1111  m.attribute_name = 5
1112  with self.assertRaises(KeyError):
1113  m.register_parameter('attribute_name', nn.Parameter())
1114 
1115  del m.attribute_name
1116  m.register_buffer('attribute_name', torch.rand(5))
1117  with self.assertRaises(KeyError):
1118  m.register_parameter('attribute_name', nn.Parameter())
1119 
1120  del m.attribute_name
1121  m.add_module('attribute_name', nn.Module())
1122  with self.assertRaises(KeyError):
1123  m.register_parameter('attribute_name', nn.Parameter())
1124 
1125  def test_register_parameter_allows_overwriting_with_same_name(self):
1126  m = nn.Module()
1127  param1 = nn.Parameter(torch.rand(5))
1128  param2 = nn.Parameter(param1.data + 5)
1129  param3 = None
1130  m.register_parameter('param_name', param1)
1131  self.assertEqual(m.param_name, param1)
1132  m.register_parameter('param_name', param2)
1133  self.assertEqual(m.param_name, param2)
1134  m.register_parameter('param_name', param3)
1135  self.assertEqual(m.param_name, param3)
1136 
1137  def test_add_module_raises_error_if_attr_exists(self):
1138  m = nn.Module()
1139  m.attribute_name = 5
1140  with self.assertRaises(KeyError):
1141  m.add_module('attribute_name', nn.Module())
1142 
1143  del m.attribute_name
1144  m.register_buffer('attribute_name', torch.rand(5))
1145  with self.assertRaises(KeyError):
1146  m.add_module('attribute_name', nn.Module())
1147 
1148  del m.attribute_name
1149  m.register_parameter('attribute_name', nn.Parameter())
1150  with self.assertRaises(KeyError):
1151  m.add_module('attribute_name', nn.Module())
1152 
1153  def test_Sequential_getitem(self):
1154  l1 = nn.Linear(10, 20)
1155  l2 = nn.Linear(20, 30)
1156  l3 = nn.Linear(30, 40)
1157  l4 = nn.Linear(40, 50)
1158  n = nn.Sequential(l1, l2, l3, l4)
1159  self.assertIs(n[0], l1)
1160  self.assertIs(n[1], l2)
1161  self.assertIs(n[2], l3)
1162  self.assertIs(n[3], l4)
1163  self.assertIs(n[torch.tensor(3, dtype=torch.int64)], l4)
1164  self.assertEqual(n[1:], nn.Sequential(l2, l3, l4))
1165  self.assertEqual(n[3:], nn.Sequential(l4))
1166  self.assertEqual(n[:-1], nn.Sequential(l1, l2, l3))
1167  self.assertEqual(n[:-3], nn.Sequential(l1))
1168  self.assertEqual(n[::-1], nn.Sequential(l4, l3, l2, l1))
1169 
1170  def test_Sequential_setitem(self):
1171  l1 = nn.Linear(10, 20)
1172  l2 = nn.Linear(20, 30)
1173  l3 = nn.Linear(30, 40)
1174  l4 = nn.Linear(40, 50)
1175  n = nn.Sequential(l1, l2, l3)
1176  n[0] = l4
1177  n[-1] = l4
1178  n[torch.tensor(1, dtype=torch.int16)] = l1
1179  self.assertIs(n[0], l4)
1180  self.assertIs(n[1], l1)
1181  self.assertIs(n[2], l4)
1182 
1183  def test_Sequential_setitem_named(self):
1184  l1 = nn.Linear(10, 20)
1185  l2 = nn.Linear(20, 30)
1186  l3 = nn.Linear(30, 40)
1187  l4 = nn.Linear(40, 50)
1188  n = nn.Sequential(OrderedDict([
1189  ('linear1', l1),
1190  ('linear2', l2),
1191  ('linear3', l3),
1192  ]))
1193 
1194  n[0] = l4
1195  n[-1] = l4
1196  self.assertEqual(n.linear1, l4)
1197  self.assertEqual(n.linear3, l4)
1198 
1199  def test_Sequential_delitem(self):
1200  l1 = nn.Linear(10, 20)
1201  l2 = nn.Linear(20, 30)
1202  l3 = nn.Linear(30, 40)
1203  l4 = nn.Linear(40, 50)
1204  n = nn.Sequential(l1, l2, l3, l4)
1205  del n[-1]
1206  self.assertEqual(n, nn.Sequential(l1, l2, l3))
1207  del n[1::2]
1208  self.assertEqual(n, nn.Sequential(l1, l3))
1209 
1210  def test_ModuleList(self):
1211  modules = [nn.ReLU(), nn.Linear(5, 5)]
1212  module_list = nn.ModuleList(modules)
1213 
1214  def check():
1215  self.assertEqual(len(module_list), len(modules))
1216  for m1, m2 in zip(modules, module_list):
1217  self.assertIs(m1, m2)
1218  for m1, m2 in zip(modules, module_list.children()):
1219  self.assertIs(m1, m2)
1220  for i in range(len(modules)):
1221  self.assertIs(module_list[i], modules[i])
1222 
1223  check()
1224  modules += [nn.Conv2d(3, 4, 3)]
1225  module_list += [modules[-1]]
1226  check()
1227  modules.insert(1, nn.Linear(3, 2))
1228  module_list.insert(1, modules[1])
1229  check()
1230  modules.append(nn.Tanh())
1231  module_list.append(modules[-1])
1232  check()
1233  next_modules = [nn.Linear(5, 5), nn.Sigmoid()]
1234  modules.extend(next_modules)
1235  module_list.extend(next_modules)
1236  check()
1237  modules[2] = nn.Conv2d(5, 3, 2)
1238  module_list[2] = modules[2]
1239  check()
1240  modules[-1] = nn.Conv2d(5, 2, 1)
1241  module_list[-1] = modules[-1]
1242  check()
1243  idx = torch.tensor(2, dtype=torch.int32)
1244  modules[2] = nn.Conv2d(5, 3, 2)
1245  module_list[idx] = modules[2]
1246  self.assertIs(module_list[idx], modules[2])
1247  check()
1248  self.assertEqual(module_list[1:], nn.ModuleList(modules[1:]))
1249  self.assertEqual(module_list[3:], nn.ModuleList(modules[3:]))
1250  self.assertEqual(module_list[:-1], nn.ModuleList(modules[:-1]))
1251  self.assertEqual(module_list[:-3], nn.ModuleList(modules[:-3]))
1252  self.assertEqual(module_list[::-1], nn.ModuleList(modules[::-1]))
1253  del module_list[-1]
1254  self.assertEqual(module_list, nn.ModuleList(modules[:-1]))
1255  del module_list[1::2]
1256  self.assertEqual(module_list, nn.ModuleList(modules[:-1][0::2]))
1257 
1258  with self.assertRaises(TypeError):
1259  module_list += nn.ReLU()
1260  with self.assertRaises(TypeError):
1261  module_list.extend(nn.ReLU())
1262 
1263  l1 = nn.Linear(1, 2)
1264  l2 = nn.Linear(2, 3)
1265  l3 = nn.Linear(3, 2)
1266  l4 = nn.Linear(2, 3)
1267  subnet = nn.Sequential(l3, l4)
1268  s = nn.Sequential(
1269  OrderedDict([
1270  ("layer1", l1),
1271  ("layer2", l2),
1272  ("layer3", l3),
1273  ("layer4", l4),
1274  ("subnet_layer", subnet)
1275  ])
1276  )
1277  modules = list(s.modules())
1278  module_list = nn.ModuleList()
1279  module_list.extend(s.modules())
1280  check()
1281 
1282  def test_ModuleDict(self):
1283  modules = OrderedDict([
1284  ('act', nn.ReLU()),
1285  ('conv', nn.Conv2d(10, 10, 5)),
1286  ('fc', nn.Linear(5, 5)),
1287  ])
1288 
1289  module_dict = nn.ModuleDict(modules)
1290 
1291  def check():
1292  self.assertEqual(len(module_dict), len(modules))
1293  for k1, m2 in zip(modules, module_dict.children()):
1294  self.assertIs(modules[k1], m2)
1295  for k1, k2 in zip(modules, module_dict):
1296  self.assertIs(modules[k1], module_dict[k2])
1297  for k in module_dict:
1298  self.assertIs(module_dict[k], modules[k])
1299  for k in module_dict.keys():
1300  self.assertIs(module_dict[k], modules[k])
1301  for k, v in module_dict.items():
1302  self.assertIs(modules[k], v)
1303  for k1, m2 in zip(modules, module_dict.values()):
1304  self.assertIs(modules[k1], m2)
1305  for k in modules.keys():
1306  self.assertTrue(k in module_dict)
1307  check()
1308 
1309  modules['conv'] = nn.Conv2d(3, 4, 3)
1310  module_dict['conv'] = modules['conv']
1311  check()
1312 
1313  next_modules = [
1314  ('fc2', nn.Linear(5, 5)),
1315  ('act', nn.Sigmoid()),
1316  ]
1317  modules.update(next_modules)
1318  module_dict.update(next_modules)
1319  check()
1320 
1321  next_modules = OrderedDict([
1322  ('fc3', nn.Linear(5, 5)),
1323  ('act2', nn.Sigmoid()),
1324  ])
1325  modules.update(next_modules)
1326  module_dict.update(next_modules)
1327  check()
1328 
1329  next_modules = {
1330  'fc4': nn.Linear(5, 5),
1331  'act3': nn.Sigmoid()
1332  }
1333  modules.update(sorted(next_modules.items()))
1334  module_dict.update(next_modules)
1335  check()
1336 
1337  del module_dict['fc']
1338  del modules['fc']
1339  check()
1340 
1341  with self.assertRaises(TypeError):
1342  module_dict.update(nn.ReLU())
1343 
1344  with self.assertRaises(TypeError):
1345  module_dict.update([nn.ReLU()])
1346 
1347  with self.assertRaises(ValueError):
1348  module_dict.update([[nn.ReLU()]])
1349 
1350  with self.assertRaises(TypeError):
1351  module_dict[1] = nn.ReLU()
1352 
1353  s = nn.Sequential(modules)
1354  module_dict = nn.ModuleDict(s.named_children())
1355  check()
1356 
1357  c = module_dict.pop('conv')
1358  self.assertIs(c, modules['conv'])
1359  modules.pop('conv')
1360  check()
1361 
1362  module_dict.clear()
1363  self.assertEqual(len(module_dict), 0)
1364  modules.clear()
1365  check()
1366 
1367  def test_ParameterList(self):
1368  def make_param():
1369  return Parameter(torch.randn(10, 10))
1370  parameters = [make_param(), make_param()]
1371  param_list = nn.ParameterList(parameters)
1372 
1373  def check():
1374  self.assertEqual(len(parameters), len(param_list))
1375  for p1, p2 in zip(parameters, param_list):
1376  self.assertIs(p1, p2)
1377  for p1, p2 in zip(parameters, param_list.parameters()):
1378  self.assertIs(p1, p2)
1379  for i in range(len(parameters)):
1380  self.assertIs(parameters[i], param_list[i])
1381 
1382  check()
1383  parameters += [make_param()]
1384  param_list += [parameters[-1]]
1385  check()
1386  parameters.append(make_param())
1387  param_list.append(parameters[-1])
1388  check()
1389  next_params = [make_param(), make_param()]
1390  parameters.extend(next_params)
1391  param_list.extend(next_params)
1392  check()
1393  parameters[2] = make_param()
1394  param_list[2] = parameters[2]
1395  check()
1396  parameters[-1] = make_param()
1397  param_list[-1] = parameters[-1]
1398  check()
1399  idx = torch.tensor(2, dtype=torch.int32)
1400  parameters[2] = make_param()
1401  param_list[idx] = parameters[2]
1402  self.assertIs(param_list[idx], parameters[2])
1403  check()
1404  self.assertEqual(param_list[1:], nn.ParameterList(parameters[1:]))
1405  self.assertEqual(param_list[3:], nn.ParameterList(parameters[3:]))
1406  self.assertEqual(param_list[:-1], nn.ParameterList(parameters[:-1]))
1407  self.assertEqual(param_list[:-3], nn.ParameterList(parameters[:-3]))
1408  self.assertEqual(param_list[::-1], nn.ParameterList(parameters[::-1]))
1409 
1410  with self.assertRaises(TypeError):
1411  param_list += make_param()
1412  with self.assertRaises(TypeError):
1413  param_list.extend(make_param())
1414 
1415  l1 = nn.Linear(1, 2)
1416  l2 = nn.Linear(2, 3)
1417  l3 = nn.Linear(3, 2)
1418  l4 = nn.Linear(2, 3)
1419  subnet = nn.Sequential(l3, l4)
1420  s = nn.Sequential(
1421  OrderedDict([
1422  ("layer1", l1),
1423  ("layer2", l2),
1424  ("layer3", l3),
1425  ("layer4", l4),
1426  ("subnet_layer", subnet)
1427  ])
1428  )
1429  parameters = list(s.parameters())
1430  param_list = nn.ParameterList()
1431  param_list.extend(s.parameters())
1432  check()
1433 
1434  def test_ParameterDict(self):
1435  parameters = OrderedDict([
1436  ('p1', Parameter(torch.randn(10, 10))),
1437  ('p2', Parameter(torch.randn(10, 10))),
1438  ('p3', Parameter(torch.randn(10, 10))),
1439  ])
1440 
1441  parameter_dict = nn.ParameterDict(parameters)
1442 
1443  def check():
1444  self.assertEqual(len(parameter_dict), len(parameters))
1445  for k1, m2 in zip(parameters, parameter_dict.parameters()):
1446  self.assertIs(parameters[k1], m2)
1447  for k1, k2 in zip(parameters, parameter_dict):
1448  self.assertIs(parameters[k1], parameter_dict[k2])
1449  for k in parameter_dict:
1450  self.assertIs(parameter_dict[k], parameters[k])
1451  for k in parameter_dict.keys():
1452  self.assertIs(parameter_dict[k], parameters[k])
1453  for k, v in parameter_dict.items():
1454  self.assertIs(v, parameters[k])
1455  for k1, m2 in zip(parameters, parameter_dict.values()):
1456  self.assertIs(parameters[k1], m2)
1457  for k in parameters.keys():
1458  self.assertTrue(k in parameter_dict)
1459 
1460  check()
1461 
1462  parameters['p4'] = Parameter(torch.randn(10, 10))
1463  parameter_dict['p4'] = parameters['p4']
1464  check()
1465 
1466  next_parameters = [
1467  ('p5', Parameter(torch.randn(10, 10))),
1468  ('p2', Parameter(torch.randn(10, 10))),
1469  ]
1470  parameters.update(next_parameters)
1471  parameter_dict.update(next_parameters)
1472  check()
1473 
1474  next_parameters = OrderedDict([
1475  ('p6', Parameter(torch.randn(10, 10))),
1476  ('p5', Parameter(torch.randn(10, 10))),
1477  ])
1478  parameters.update(next_parameters)
1479  parameter_dict.update(next_parameters)
1480  check()
1481 
1482  next_parameters = {
1483  'p8': Parameter(torch.randn(10, 10)),
1484  'p7': Parameter(torch.randn(10, 10))
1485  }
1486  parameters.update(sorted(next_parameters.items()))
1487  parameter_dict.update(next_parameters)
1488  check()
1489 
1490  del parameter_dict['p3']
1491  del parameters['p3']
1492  check()
1493 
1494  with self.assertRaises(TypeError):
1495  parameter_dict.update(1)
1496 
1497  with self.assertRaises(TypeError):
1498  parameter_dict.update([1])
1499 
1500  with self.assertRaises(ValueError):
1501  parameter_dict.update(Parameter(torch.randn(10, 10)))
1502 
1503  with self.assertRaises(TypeError):
1504  parameter_dict[1] = Parameter(torch.randn(10, 10))
1505 
1506  p_pop = parameter_dict.pop('p4')
1507  self.assertIs(p_pop, parameters['p4'])
1508  parameters.pop('p4')
1509  check()
1510 
1511  parameter_dict.clear()
1512  self.assertEqual(len(parameter_dict), 0)
1513  parameters.clear()
1514  check()
1515 
1516  def test_add_module(self):
1517  l = nn.Linear(10, 20)
1518  net = nn.Module()
1519  net.l = l
1520  net.l2 = l
1521  net.add_module('empty', None)
1522  self.assertEqual(net.l, l)
1523  self.assertEqual(net.l2, l)
1524  self.assertEqual(net.empty, None)
1525  net.add_module('l3', l)
1526  self.assertEqual(net.l3, l)
1527  l3 = nn.Linear(20, 10)
1528  net.add_module('l', l3)
1529  self.assertEqual(net.l, l3)
1530  self.assertRaises(TypeError, lambda: net.add_module('x', 'non-module'))
1531  self.assertRaisesRegex(TypeError, 'module name should be a string. Got int',
1532  lambda: net.add_module(1, l))
1533  self.assertRaisesRegex(TypeError, 'module name should be a string. Got NoneType',
1534  lambda: net.add_module(None, l))
1535 
1536  def test_module_to_argparse(self):
1537  net = nn.Sequential(nn.Linear(3, 3))
1538  cpu = torch.device('cpu')
1539  with self.assertRaises(TypeError):
1540  net.to(cpu, True)
1541  with self.assertRaises(TypeError):
1542  net.to(torch.long)
1543  with self.assertRaises(TypeError):
1544  net.to(None, True)
1545  with self.assertRaises(TypeError):
1546  net.to(cpu, torch.long, True)
1547  with self.assertRaises(TypeError):
1548  net.to(cpu, dtype=torch.long, non_blocking=True)
1549  with self.assertRaises(TypeError):
1550  net.to([])
1551  with self.assertRaises(TypeError):
1552  net.to({}, non_blocking=True)
1553  with self.assertRaises(TypeError):
1554  net.to(torch.tensor(3, dtype=torch.long), non_blocking=True)
1555  with self.assertRaises(TypeError):
1556  net.to(cpu, torch.tensor(3, dtype=torch.long), non_blocking=True)
1557 
1558  def test_type(self):
1559  l = nn.Linear(10, 20)
1560  net = nn.Module()
1561  net.l = l
1562  net.l2 = l
1563  net.add_module('empty', None)
1564  net.register_buffer('indices', torch.LongTensor(1))
1565  net.float()
1566  self.assertIsInstance(l.weight.data, torch.FloatTensor)
1567  self.assertIsInstance(l.bias.data, torch.FloatTensor)
1568  self.assertIsInstance(net.indices, torch.LongTensor)
1569  net.double()
1570  self.assertIsInstance(l.weight.data, torch.DoubleTensor)
1571  self.assertIsInstance(l.bias.data, torch.DoubleTensor)
1572  self.assertIsInstance(net.indices, torch.LongTensor)
1573  net.to(torch.half)
1574  self.assertIsInstance(l.weight.data, torch.HalfTensor)
1575  self.assertIsInstance(l.bias.data, torch.HalfTensor)
1576  self.assertIsInstance(net.indices, torch.LongTensor)
1577  if TEST_CUDA:
1578  net.float().cuda()
1579  self.assertIsInstance(l.weight.data, torch.cuda.FloatTensor)
1580  self.assertIsInstance(l.bias.data, torch.cuda.FloatTensor)
1581  self.assertIsInstance(net.indices, torch.cuda.LongTensor)
1582  net.cpu()
1583  self.assertIsInstance(l.weight.data, torch.FloatTensor)
1584  self.assertIsInstance(l.bias.data, torch.FloatTensor)
1585  self.assertIsInstance(net.indices, torch.LongTensor)
1586  net.to("cuda", torch.double, True)
1587  self.assertIsInstance(l.weight.data, torch.cuda.DoubleTensor)
1588  self.assertIsInstance(l.bias.data, torch.cuda.DoubleTensor)
1589  self.assertIsInstance(net.indices, torch.cuda.LongTensor)
1590  net.to(torch.empty(1, device="cuda:0", dtype=torch.half))
1591  self.assertIsInstance(l.weight.data, torch.cuda.HalfTensor)
1592  self.assertIsInstance(l.bias.data, torch.cuda.HalfTensor)
1593  self.assertIsInstance(net.indices, torch.cuda.LongTensor)
1594  net.to(torch.device("cpu"), non_blocking=True)
1595  self.assertIsInstance(l.weight.data, torch.HalfTensor)
1596  self.assertIsInstance(l.bias.data, torch.HalfTensor)
1597  self.assertIsInstance(net.indices, torch.LongTensor)
1598  net.type(torch.FloatTensor)
1599  self.assertIsInstance(l.weight.data, torch.FloatTensor)
1600  self.assertIsInstance(l.bias.data, torch.FloatTensor)
1601  net.to(torch.DoubleTensor(1))
1602  self.assertIsInstance(l.weight.data, torch.DoubleTensor)
1603  self.assertIsInstance(l.bias.data, torch.DoubleTensor)
1604  if TEST_CUDA:
1605  net.type(torch.cuda.FloatTensor)
1606  self.assertIsInstance(l.weight.data, torch.cuda.FloatTensor)
1607  self.assertIsInstance(l.bias.data, torch.cuda.FloatTensor)
1608 
1609  def test_non_leaf_parameters(self):
1610  l1 = nn.Linear(10, 10)
1611  l2 = nn.Linear(10, 10)
1612 
1613  def assign_weight():
1614  l2.weight = l1.weight + 2
1615 
1616  self.assertRaises(TypeError, assign_weight)
1617  # This should work though
1618  l2.weight = Parameter(torch.randn(10, 10))
1619 
1620  def test_clip_grad_norm(self):
1621  l = nn.Linear(10, 10)
1622  max_norm = 2
1623 
1624  def compute_norm(norm_type):
1625  norm_type = float(norm_type)
1626  if norm_type != inf:
1627  total_norm = 0
1628  for p in l.parameters():
1629  total_norm += p.grad.data.abs().pow(norm_type).sum()
1630  return pow(total_norm, 1. / norm_type)
1631  else:
1632  return max(p.grad.data.abs().max() for p in l.parameters())
1633 
1634  def compare_scaling(grads):
1635  p_scale = [p.grad.data.div(g).view(-1) for p, g in zip(l.parameters(), grads)]
1636  scale = torch.cat(p_scale)
1637  self.assertEqual(scale.std(), 0)
1638  return scale[0]
1639 
1640  grads = torch.arange(1., 101).view(10, 10), torch.ones(10).div(1000)
1641  for norm_type in [0.5, 1.5, 2, 4, 'inf']:
1642  for p, g in zip(l.parameters(), grads):
1643  p._grad = Variable(g.clone().view_as(p.data))
1644  norm_before = compute_norm(norm_type)
1645  norm = clip_grad_norm_(l.parameters(), max_norm, norm_type=norm_type)
1646  norm_after = compute_norm(norm_type)
1647  self.assertEqual(norm, norm_before)
1648  self.assertEqual(norm_after, max_norm)
1649  self.assertLessEqual(norm_after, norm_before)
1650  compare_scaling(grads)
1651 
1652  # Small gradients should be left unchanged
1653  grads = torch.rand(10, 10).div(10000), torch.ones(10).div(500)
1654  for norm_type in [0.5, 1.5, 2, 4, 'inf']:
1655  for p, g in zip(l.parameters(), grads):
1656  p.grad.data.copy_(g)
1657  norm_before = compute_norm(norm_type)
1658  norm = clip_grad_norm_(l.parameters(), max_norm, norm_type=norm_type)
1659  norm_after = compute_norm(norm_type)
1660  self.assertEqual(norm, norm_before)
1661  self.assertEqual(norm_before, norm_after)
1662  self.assertLessEqual(norm_after, max_norm)
1663  scale = compare_scaling(grads)
1664  self.assertEqual(scale, 1)
1665 
1666  # Should accept a single Tensor as input
1667  p1, p2 = torch.randn(10, 10), torch.randn(10, 10)
1668  g = torch.arange(1., 101).view(10, 10)
1669  p1._grad = g.clone()
1670  p2._grad = g.clone()
1671  for norm_type in [0.5, 1.5, 2, 4, 'inf']:
1672  clip_grad_norm_(p1, max_norm, norm_type=norm_type)
1673  clip_grad_norm_([p2], max_norm, norm_type=norm_type)
1674  self.assertEqual(p1.grad, p2.grad)
1675 
1676  def test_clip_grad_value(self):
1677  l = nn.Linear(10, 10)
1678  clip_value = 2.5
1679 
1680  grad_w, grad_b = torch.arange(-50., 50).view(10, 10).div_(5), torch.ones(10).mul_(2)
1681  for grad_list in [[grad_w, grad_b], [grad_w, None]]:
1682  for p, g in zip(l.parameters(), grad_list):
1683  p._grad = g.clone().view_as(p.data) if g is not None else g
1684 
1685  clip_grad_value_(l.parameters(), clip_value)
1686  for p in filter(lambda p: p.grad is not None, l.parameters()):
1687  self.assertLessEqual(p.grad.data.max(), clip_value)
1688  self.assertGreaterEqual(p.grad.data.min(), -clip_value)
1689 
1690  # Should accept a single Tensor as input
1691  p1, p2 = torch.randn(10, 10), torch.randn(10, 10)
1692  g = torch.arange(-50., 50).view(10, 10).div_(5)
1693  p1._grad = g.clone()
1694  p2._grad = g.clone()
1695  clip_grad_value_(p1, clip_value)
1696  clip_grad_value_([p2], clip_value)
1697  self.assertEqual(p1.grad, p2.grad)
1698 
1699  def test_parameters_to_vector(self):
1700  conv1 = nn.Conv2d(3, 10, 5)
1701  fc1 = nn.Linear(10, 20)
1702  model = nn.Sequential(conv1, fc1)
1703 
1704  vec = parameters_to_vector(model.parameters())
1705  self.assertEqual(vec.size(0), 980)
1706 
1707  def test_vector_to_parameters(self):
1708  conv1 = nn.Conv2d(3, 10, 5)
1709  fc1 = nn.Linear(10, 20)
1710  model = nn.Sequential(conv1, fc1)
1711 
1712  vec = Variable(torch.arange(0., 980))
1713  vector_to_parameters(vec, model.parameters())
1714 
1715  sample = next(model.parameters())[0, 0, 0]
1716  self.assertTrue(torch.equal(sample.data, vec.data[:5]))
1717 
1718  # We don't want to make propagating NaN a hard requirement on ops, but for
1719  # these easy ones, we should make them do so.
1720  def _test_nonlinearity_propagate_nan(self, device):
1721  def test(nonlinearity, *args, **kwargs):
1722  x = torch.tensor([nan], device=device)
1723  fn = getattr(F, nonlinearity)
1724  try:
1725  self.assertTrue(math.isnan(fn(x, *args, **kwargs).item()))
1726  except Exception as e:
1727  if 'not implemented' not in str(e):
1728  raise
1729 
1730  test('relu')
1731  test('relu', inplace=True)
1732  test('relu6')
1733  test('elu')
1734  test('selu')
1735  test('celu')
1736  test('rrelu')
1737  test('rrelu', inplace=True)
1738  test('hardtanh')
1739  test('tanh')
1740  test('sigmoid')
1741  test('logsigmoid')
1742  test('hardshrink')
1743  test('tanhshrink')
1744  test('softsign')
1745  test('softmin', 0)
1746  test('softmax', 0)
1747  test('log_softmax', 0)
1748  test('leaky_relu', 0.2)
1749  test('threshold', 3, 2)
1750  test('threshold', 3, 2, inplace=True)
1751 
1752  def test_nonlinearity_propagate_nan(self):
1754 
1755  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
1756  def test_nonlinearity_propagate_nan_cuda(self):
1758 
1759  def test_weight_norm(self):
1760  input = torch.randn(3, 5)
1761  m = nn.Linear(5, 7)
1762  expected_output = m(input)
1763 
1764  # add weight normalization
1766  self.assertEqual(m.weight_v.size(), m.weight.size())
1767  self.assertEqual(m.weight_g.size(), (7, 1))
1768  self.assertEqual(m(input), expected_output)
1769 
1770  # remove weight norm
1771  m = torch.nn.utils.remove_weight_norm(m)
1772  self.assertFalse(hasattr(m, 'weight_g'))
1773  self.assertFalse(hasattr(m, 'weight_v'))
1774  self.assertEqual(m(input), expected_output)
1775 
1776  # test with dim=1
1777  m = torch.nn.utils.weight_norm(m, dim=1)
1778  self.assertEqual(m.weight_v.size(), m.weight.size())
1779  self.assertEqual(m.weight_g.size(), (1, 5))
1780  self.assertEqual(m(input), expected_output)
1781 
1782  # test with dim=None
1783  m = nn.Linear(5, 7)
1784  expected_output = m(input)
1785  m = torch.nn.utils.weight_norm(m, dim=None)
1786  self.assertEqual(m(input), expected_output)
1787 
1788  with self.assertRaisesRegex(RuntimeError, 'register two weight_norm hooks'):
1791 
1792  def test_weight_norm_pickle(self):
1793  m = torch.nn.utils.weight_norm(nn.Linear(5, 7))
1794  m = pickle.loads(pickle.dumps(m))
1795  self.assertIsInstance(m, nn.Linear)
1796 
1797  @skipIfRocm
1798  def test_spectral_norm(self):
1799  input = torch.randn(3, 5)
1800  m = nn.Linear(5, 7)
1802 
1803  self.assertEqual(m.weight_u.size(), torch.Size([m.weight.size(0)]))
1804  # weight_orig should be trainable
1805  self.assertTrue(hasattr(m, 'weight_orig'))
1806  self.assertTrue('weight_orig' in m._parameters)
1807  # weight_u should be just a reused buffer
1808  self.assertTrue(hasattr(m, 'weight_u'))
1809  self.assertTrue('weight_u' in m._buffers)
1810  self.assertTrue('weight_v' in m._buffers)
1811  # weight should be a plain attribute, not counted as a buffer or a param
1812  self.assertFalse('weight' in m._buffers)
1813  self.assertFalse('weight' in m._parameters)
1814  # it should also be sharing storage as `weight_orig`
1815  self.assertEqual(m.weight_orig.storage(), m.weight.storage())
1816  self.assertEqual(m.weight_orig.size(), m.weight.size())
1817  self.assertEqual(m.weight_orig.stride(), m.weight.stride())
1818 
1819  m = torch.nn.utils.remove_spectral_norm(m)
1820  self.assertFalse(hasattr(m, 'weight_orig'))
1821  self.assertFalse(hasattr(m, 'weight_u'))
1822  # weight should be converted back as a parameter
1823  self.assertTrue(hasattr(m, 'weight'))
1824  self.assertTrue('weight' in m._parameters)
1825 
1826  with self.assertRaisesRegex(RuntimeError, 'register two spectral_norm hooks'):
1829 
1830  # test correctness in training/eval modes and cpu/multi-gpu settings
1831  for apply_dp in (True, False):
1832  if apply_dp:
1833  if not TEST_MULTIGPU:
1834  continue
1835  device = torch.device('cuda:0')
1836 
1837  def maybe_wrap(m):
1838  return torch.nn.DataParallel(m, [0, 1])
1839  else:
1840  device = torch.device('cpu')
1841 
1842  def maybe_wrap(m):
1843  return m
1844 
1845  for requires_grad in (True, False):
1846  m = nn.Linear(3, 4).to(device)
1847  m.weight.requires_grad_(requires_grad)
1849  wrapped_m = maybe_wrap(m)
1850  self.assertTrue(hasattr(m, 'weight_u'))
1851  u0 = m.weight_u.clone()
1852  v0 = m.weight_v.clone()
1853 
1854  # TEST TRAINING BEHAVIOR
1855 
1856  # assert that u and v are updated
1857  input = torch.randn(2, 3, device=device)
1858  out = wrapped_m(input)
1859  self.assertNotEqual(u0, m.weight_u)
1860  self.assertNotEqual(v0, m.weight_v)
1861 
1862  # assert that backprop reaches weight_orig
1863  # can't use gradcheck because the function changes as we
1864  # activate through it in training mode
1865  if requires_grad:
1866  torch.autograd.grad(out.sum(), m.weight_orig)
1867 
1868  # test backward works with multiple forwards
1869  # it uses training mode so we need to reset `u` and `v` vectors
1870  # to same value at beginning for finite difference test to pass
1871  saved_u = m.weight_u.clone()
1872  saved_v = m.weight_v.clone()
1873 
1874  def fn(input):
1875  m.weight_u.data.copy_(saved_u)
1876  m.weight_v.data.copy_(saved_v)
1877  out0 = wrapped_m(input)
1878  out1 = wrapped_m(input)
1879  return out0 + out1
1880 
1881  torch.autograd.gradcheck(fn, (input.clone().requires_grad_(),))
1882 
1883  # test removing
1884  pre_remove_out = wrapped_m(input)
1885  m = torch.nn.utils.remove_spectral_norm(m)
1886  self.assertEqual(wrapped_m(input), pre_remove_out)
1887 
1889  for _ in range(3):
1890  pre_remove_out = wrapped_m(input)
1891  m = torch.nn.utils.remove_spectral_norm(m)
1892  self.assertEqual(wrapped_m(input), pre_remove_out)
1893 
1894  # TEST EVAL BEHAVIOR
1895 
1897  wrapped_m(input)
1898  last_train_out = wrapped_m(input)
1899  last_train_u = m.weight_u.clone()
1900  last_train_v = m.weight_v.clone()
1901  wrapped_m.zero_grad()
1902  wrapped_m.eval()
1903 
1904  eval_out0 = wrapped_m(input)
1905  # assert eval gives same result as last training iteration
1906  self.assertEqual(eval_out0, last_train_out)
1907  # assert doing more iteartion in eval don't change things
1908  self.assertEqual(eval_out0, wrapped_m(input))
1909  self.assertEqual(last_train_u, m.weight_u)
1910  self.assertEqual(last_train_v, m.weight_v)
1911 
1912  # FIXME: the code below is flaky when executed with DataParallel
1913  # see https://github.com/pytorch/pytorch/issues/13818
1914  if apply_dp:
1915  continue
1916 
1917  # test backward works with multiple forwards in mixed training
1918  # and eval modes
1919  # it uses training mode so we need to reset `u` and `v` vectors
1920  # to same value at beginning for finite difference test to pass
1921  saved_u = m.weight_u.clone()
1922  saved_v = m.weight_v.clone()
1923 
1924  def fn(input):
1925  m.weight_u.data.copy_(saved_u)
1926  m.weight_v.data.copy_(saved_v)
1927  wrapped_m.train()
1928  out0 = wrapped_m(input)
1929  wrapped_m.eval()
1930  out1 = wrapped_m(input)
1931  wrapped_m.train()
1932  out2 = wrapped_m(input)
1933  wrapped_m.eval()
1934  out3 = wrapped_m(input)
1935  return out0 + out1 + out2 + out3
1936 
1937  torch.autograd.gradcheck(fn, (input.clone().requires_grad_(),))
1938 
1939  # assert that backprop reaches weight_orig in eval
1940  if requires_grad:
1941  def fn(weight):
1942  return wrapped_m(input)
1943 
1944  torch.autograd.gradcheck(fn, (m.weight_orig,))
1945 
1946  def test_spectral_norm_load_state_dict(self):
1947  inp = torch.randn(2, 3)
1948  for activate_times in (0, 3):
1949  # Test backward compatibility
1950  # At version None -> 1: weight becomes not a buffer and v vector becomes a buffer
1951  m = nn.Linear(3, 5)
1953  snm.train()
1954  for _ in range(activate_times):
1955  snm(inp)
1956 
1957  # craft a version None state_dict
1958  version_none_state_dict = deepcopy(snm.state_dict())
1959  self.assertEqual({'weight_orig', 'bias', 'weight_u', 'weight_v'}, set(version_none_state_dict.keys()))
1960  self.assertIn('spectral_norm', version_none_state_dict._metadata[''])
1961  del version_none_state_dict._metadata['']['spectral_norm'] # remove metadata info
1962  del version_none_state_dict['weight_v'] # remove v vector
1963  version_none_state_dict['weight'] = snm.weight.detach().clone() # set W as a buffer
1964 
1965  # normal state_dict
1966  version_latest_state_dict = deepcopy(snm.state_dict())
1967 
1968  snm.eval()
1969  out0_eval = snm(inp)
1970  snm.train()
1971  out1_train = snm(inp)
1972  out2_train = snm(inp)
1973  snm.eval()
1974  out3_eval = snm(inp)
1975 
1976  snm.load_state_dict(version_none_state_dict)
1977  if activate_times > 0:
1978  # since in loading version None state dict, we assume that the
1979  # values in the state dict have gone through at lease one
1980  # forward, we only test for equivalence when activate_times > 0.
1981  snm.eval()
1982  self.assertEqual(out0_eval, snm(inp))
1983  snm.train()
1984  self.assertEqual(out1_train, snm(inp))
1985  self.assertEqual(out2_train, snm(inp))
1986  snm.eval()
1987  self.assertEqual(out3_eval, snm(inp))
1988 
1989  # Test normal loading
1990  snm.load_state_dict(version_latest_state_dict)
1991  snm.eval()
1992  self.assertEqual(out0_eval, snm(inp))
1993  snm.train()
1994  self.assertEqual(out1_train, snm(inp))
1995  self.assertEqual(out2_train, snm(inp))
1996  snm.eval()
1997  self.assertEqual(out3_eval, snm(inp))
1998 
1999  def test_spectral_norm_dim(self):
2000  inp = torch.randn(2, 3, 10, 12)
2001  m = nn.ConvTranspose2d(3, 4, (5, 6))
2003  # this should not run into incompatible shapes
2004  x = m(inp)
2005  # check that u refers to the same dimension
2006  self.assertEqual(m.weight_u.shape, m.weight_orig[0, :, 0, 0].shape)
2007 
2008  def test_spectral_norm_forward(self):
2009  input = torch.randn(3, 5)
2010  m = nn.Linear(5, 7)
2012  # naive forward
2013  _weight, _bias, _u = m.weight_orig, m.bias, m.weight_u
2014  _weight_mat = _weight.view(_weight.size(0), -1)
2015  _v = torch.mv(_weight_mat.t(), _u)
2016  _v = F.normalize(_v, dim=0, eps=1e-12)
2017  _u = torch.mv(_weight_mat, _v)
2018  _u = F.normalize(_u, dim=0, eps=1e-12)
2019  _weight.data /= torch.dot(_u, torch.matmul(_weight_mat, _v))
2020  out_hat = torch.nn.functional.linear(input, _weight, _bias)
2021  expect_out = m(input)
2022  self.assertAlmostEqual(expect_out, out_hat)
2023 
2024  def test_spectral_norm_pickle(self):
2025  m = torch.nn.utils.spectral_norm(nn.Linear(5, 7))
2026  m = pickle.loads(pickle.dumps(m))
2027  self.assertIsInstance(m, nn.Linear)
2028 
2029  def test_threshold_int(self):
2030  x = torch.tensor([-3, -2, -1, 0, 1, 2, 3])
2031  expected = torch.tensor([99, 99, 99, 99, 1, 2, 3])
2032  self.assertEqual(F.threshold(x, 0, 99), expected)
2033 
2034  def test_embedding_sparse_basic(self):
2035  embedding = nn.Embedding(10, 20, sparse=True)
2036  input = Variable(torch.LongTensor([[0, 2, 4, 5], [4, 3, 0, 9]]))
2037  embedding(input).sum().backward()
2038  self.assertTrue(embedding.weight.grad.is_sparse)
2039  self.assertEqual(embedding.weight.grad.shape, embedding.weight.shape)
2040 
2041  def test_embedding_sparse_empty_tensor(self):
2042  embedding = nn.Embedding(0, 0, sparse=True)
2043  input = torch.tensor([], dtype=torch.int64)
2044  embedding(input).sum().backward()
2045  self.assertTrue(embedding.weight.grad.is_sparse)
2046  self.assertEqual(embedding.weight.grad.shape, embedding.weight.shape)
2047 
2048  embedding = nn.Embedding(10, 0, sparse=True)
2049  input = torch.LongTensor([[0, 2, 4, 5], [4, 3, 0, 9]])
2050  embedding(input).sum().backward()
2051  self.assertTrue(embedding.weight.grad.is_sparse)
2052  self.assertEqual(embedding.weight.grad.shape, embedding.weight.shape)
2053 
2054  def test_embedding_padding_idx(self):
2055  embedding = nn.Embedding(10, 20, padding_idx=0)
2056  input = Variable(torch.LongTensor([[0, 2, 4, 5], [4, 3, 0, 9]]))
2057  output = embedding(input)
2058  self.assertEqual(output[0][0].sum(), 0)
2059  self.assertEqual(output[1][2].sum(), 0)
2060 
2061  embedding = nn.Embedding(10, 20, padding_idx=0, sparse=True)
2062  input = Variable(torch.LongTensor([[0, 2, 4, 5], [4, 3, 0, 9]]))
2063  output = embedding(input)
2064  self.assertEqual(output[0][0].sum(), 0)
2065  self.assertEqual(output[1][2].sum(), 0)
2066 
2067  # negative indexing check for padding_idx
2068  # padding_idx=-2, num_embeddings=10 ==> index 8 padded
2069  embedding = nn.Embedding(10, 20, padding_idx=-2)
2070  input = Variable(torch.LongTensor([[0, 2, 8, 5], [4, 8, 0, 9]]))
2071  output = embedding(input)
2072  self.assertEqual(output[0][2].sum(), 0)
2073  self.assertEqual(output[1][1].sum(), 0)
2074 
2075  embedding = nn.Embedding(10, 20, padding_idx=-2, sparse=True)
2076  input = Variable(torch.LongTensor([[0, 2, 8, 5], [4, 8, 0, 9]]))
2077  output = embedding(input)
2078  self.assertEqual(output[0][2].sum(), 0)
2079  self.assertEqual(output[1][1].sum(), 0)
2080 
2081  # out of bounds check for padding_idx
2082  self.assertRaises(AssertionError, nn.Embedding, num_embeddings=10, embedding_dim=20, padding_idx=25)
2083  self.assertRaises(AssertionError, nn.Embedding, num_embeddings=10, embedding_dim=20, padding_idx=-25)
2084 
2085  # test backward when input contains padding_idx
2086  padding_idx = 0
2087  embedding = nn.Embedding(5, 2, padding_idx=padding_idx)
2088  for n in (1, 2):
2089  for other_indices in ([], [1, 3], [2]):
2090  indices = torch.LongTensor(other_indices + [padding_idx] * n)
2091  pre = embedding.weight[padding_idx].clone()
2092  embedding(indices).sum().backward()
2093  after = (embedding.weight + embedding.weight.grad)[padding_idx]
2094  embedding.zero_grad()
2095  self.assertEqual(after, pre)
2096 
2097  def test_embedding_max_norm(self):
2098  embedding = nn.Embedding(22, 5, max_norm=1.0)
2099  input = Variable(torch.LongTensor([2, 8, 8, 6]))
2100  output = embedding(input)
2101  self.assertEqual(output[1], output[2])
2102  self.assertTrue(output.data.norm(p=2, dim=1).le(1).all())
2103 
2104  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
2105  @repeat_test_for_types(ALL_TENSORTYPES)
2106  def test_embedding_max_norm_cuda(self, dtype=torch.float):
2107  embedding = nn.Embedding(22, 5, max_norm=1.0).to("cuda", dtype=dtype)
2108  # nn.Embedding only takes LongTensor as input
2109  input = torch.tensor([2, 8, 8, 6], device="cuda", dtype=torch.long)
2110  output = embedding(input)
2111  self.assertEqual(output[1], output[2])
2112  self.assertTrue(output.data.norm(p=2, dim=1).le(1).all())
2113 
2114  def test_embedding_from_pretrained(self):
2115  a = torch.Tensor([[1, 2, 3], [4, 5, 6]])
2116  embedding = nn.Embedding.from_pretrained(a)
2117  self.assertEqual(a, embedding.weight.data)
2118 
2119  input = torch.LongTensor([0, 1])
2120  output = embedding(input)
2121  self.assertEqual(a, output)
2122 
2123  def test_embedding_from_pretrained_options(self):
2124  a = torch.Tensor([[1, 2, 3], [4, 5, 6]])
2125  opts = {
2126  "max_norm": 2.,
2127  "norm_type": .5,
2128  "scale_grad_by_freq": False,
2129  "sparse": True
2130  }
2131  embedding = nn.Embedding.from_pretrained(a, **opts)
2132  input = torch.LongTensor([0, 1])
2133  output = embedding(input)
2134  # test output and that weight matrix was renormalized
2135  self.assertEqual(a, output)
2136  self.assertTrue(a.ne(torch.arange(1, 7, dtype=a.dtype).view(2, 3)).all())
2137  self.assertTrue(output.data.norm(p=opts["norm_type"], dim=1).le(opts["max_norm"]).all())
2138 
2139  def test_embedding_functional(self):
2140  a = torch.tensor([
2141  [1, 3, 2],
2142  [0, 2, 1]
2143  ], dtype=torch.long)
2144  embeddings = torch.rand(4, 3, requires_grad=True)
2145 
2146  embed_old = torch.nn.Embedding(4, 3)
2147  embed_old.weight.data = embeddings.data
2148  res_old = embed_old(a)
2149 
2150  res_F = F.embedding(a, embeddings)
2151  self.assertEqual(res_old, res_F)
2152 
2153  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
2154  @repeat_test_for_types([torch.float, torch.half])
2155  @skipIfRocm
2156  def test_softmax_dtype(self, dtype=torch.float):
2157  input = torch.rand(32, 100, device="cuda", dtype=dtype, requires_grad=True)
2158  inputf = input.to(torch.float).detach().requires_grad_(True)
2159  out = F.softmax(input, dim=-1, dtype=torch.float)
2160  outf = F.softmax(inputf, dim=-1)
2161  # should be bitwise equal
2162  self.assertEqual(out, outf, prec=0)
2163  gO = torch.empty_like(outf).uniform_()
2164  out.backward(gO)
2165  outf.backward(gO)
2166  # should be bitwise equal
2167  self.assertEqual(input.grad, inputf.grad.to(dtype), prec=0)
2168 
2169  def _test_softmax_backward(self, device):
2170  if device.type == 'cuda':
2171  dtypes = [torch.float, torch.half]
2172  else:
2173  dtypes = [torch.float]
2174  # FIXME: add (10, 0) after https://github.com/pytorch/pytorch/issues/17262 is fixed
2175  sizes = [(0, 10), (32, 20)]
2176  for fn in [F.softmax, F.log_softmax]:
2177  for dtype in dtypes:
2178  for size in sizes:
2179  input = torch.rand(size, device=device, dtype=dtype, requires_grad=True)
2180  output = fn(input, dtype=torch.float, dim=1).sum()
2181  grad_input, = torch.autograd.grad(output, input, create_graph=True)
2182  grad_input.sum().backward()
2183 
2184  def test_softmax_backward(self):
2185  self._test_softmax_backward(torch.device('cpu'))
2186 
2187  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
2188  @skipIfRocm
2189  def test_softmax_backward_cuda(self):
2190  self._test_softmax_backward(torch.device('cuda'))
2191 
2192  def _test_gumbel_softmax_st_shapes(self, cuda, dtype, shape, dim, count_expected):
2193  logits = torch.randn(shape, dtype=torch.float)
2194  logits = logits.to(dtype)
2195  if cuda:
2196  logits = logits.cuda()
2197 
2198  y_draw = F.gumbel_softmax(logits, hard=True, dim=dim)
2199 
2200  # All values positive
2201  self.assertGreaterEqual(y_draw.min(), 0)
2202  # Shape unchanged
2203  self.assertTrue(y_draw.shape == logits.shape)
2204  # One choice per draw
2205  self.assertEqual(y_draw.sum(), count_expected, prec=torch.finfo(y_draw.dtype).eps)
2206 
2207  def _test_gumbel_softmax_straight_through(self, cuda, dtype):
2208  num_draws = 100
2209 
2210  logits = torch.tensor([[0.2, 0.8, 0.1]])
2211  logits = logits.reshape([1, 3])
2212  logits = logits.to(dtype).requires_grad_()
2213  if cuda:
2214  logits = logits.cuda()
2215  probs = logits.softmax(dim=-1)
2216 
2217  counts = torch.zeros_like(logits)
2218  for _ in range(num_draws):
2219  y_draw = F.gumbel_softmax(logits, hard=True)
2220  counts = counts + y_draw
2221 
2222  # All values positive
2223  self.assertGreaterEqual(y_draw.min(), 0)
2224  # Each experiment should result in 1 draw.
2225  self.assertEqual(counts.sum(), num_draws, prec=torch.finfo(counts.dtype).eps)
2226 
2227  # check results is asymptotically as expected.
2228  expected = probs * num_draws
2229  # ~z is approximately N(0,1) for unbiased count
2230  z = (counts - expected) / (expected * (1 - probs)).sqrt()
2231  # A (lazy) approximate 99% two-sided test:
2232  # occurs with prob alpha~>=0.01 if unbiased
2233  self.assertLess(z.abs().max().item(), 2.58)
2234 
2235  def _test_gumbel_softmax_grad(self, cuda, dtype):
2236  # "hard" and "not hard" should propagate same gradient.
2237  device = torch.device("cuda") if cuda else torch.device("cpu")
2238  logits_soft = torch.zeros(10, 10, dtype=dtype, device=device, requires_grad=True)
2239  logits_hard = torch.zeros(10, 10, dtype=dtype, device=device, requires_grad=True)
2240 
2242  y_soft = F.gumbel_softmax(logits_soft, hard=False)
2244  y_hard = F.gumbel_softmax(logits_hard, hard=True)
2245 
2246  y_soft.sum().backward()
2247  y_hard.sum().backward()
2248 
2249  # 2eps = 1x addition + 1x subtraction.
2250  tol = 2 * torch.finfo(dtype).eps
2251  self.assertAlmostEqual(logits_soft.grad, logits_hard.grad, delta=tol)
2252 
2253  @repeat_test_for_types(NO_HALF_TENSORTYPES)
2254  def test_gumbel_softmax(self, dtype=torch.float):
2255  """
2256  NO_HALF_TENSORTYPES because many half-ops doesnt work on cpu.
2257  """
2258  self._test_gumbel_softmax_st_shapes(cuda=False, dtype=dtype, shape=[5], dim=0, count_expected=1)
2259  self._test_gumbel_softmax_st_shapes(cuda=False, dtype=dtype, shape=[5], dim=-1, count_expected=1)
2260  self._test_gumbel_softmax_st_shapes(cuda=False, dtype=dtype, shape=[5, 4], dim=1, count_expected=5)
2261  self._test_gumbel_softmax_st_shapes(cuda=False, dtype=dtype, shape=[5, 4, 3], dim=1, count_expected=5 * 3)
2262  self._test_gumbel_softmax_st_shapes(cuda=False, dtype=dtype, shape=[5, 4, 3], dim=-1, count_expected=5 * 4)
2263  self._test_gumbel_softmax_straight_through(cuda=False, dtype=dtype)
2264  self._test_gumbel_softmax_grad(cuda=False, dtype=dtype)
2265 
2266  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
2267  @repeat_test_for_types(ALL_TENSORTYPES)
2268  def test_gumbel_softmax_cuda(self, dtype=torch.float):
2269  self._test_gumbel_softmax_st_shapes(cuda=True, dtype=dtype, shape=[5], dim=0, count_expected=1)
2270  self._test_gumbel_softmax_st_shapes(cuda=True, dtype=dtype, shape=[5], dim=-1, count_expected=1)
2271  self._test_gumbel_softmax_st_shapes(cuda=True, dtype=dtype, shape=[5, 4], dim=1, count_expected=5)
2272  self._test_gumbel_softmax_st_shapes(cuda=True, dtype=dtype, shape=[5, 4, 3], dim=1, count_expected=5 * 3)
2273  self._test_gumbel_softmax_st_shapes(cuda=True, dtype=dtype, shape=[5, 4, 3], dim=-1, count_expected=5 * 4)
2274  self._test_gumbel_softmax_straight_through(cuda=True, dtype=dtype)
2275  self._test_gumbel_softmax_grad(cuda=True, dtype=dtype)
2276 
2277  def _test_EmbeddingBag(self, cuda, mode, sparse, dtype=torch.double):
2278  # check a known test example
2279  device = torch.device("cuda") if cuda else torch.device("cpu")
2280  es = nn.EmbeddingBag(5, 2, mode=mode, sparse=sparse).to(device, dtype)
2281  es.weight.data.copy_(torch.arange(1, 11, device=device, dtype=dtype).view_as(es.weight))
2282  input = torch.tensor([3, 1, 1, 1, 4, 0], device=device, dtype=torch.long)
2283  offsets = torch.tensor([0, 0, 3, 3, 6], device=device, dtype=torch.long)
2284 
2285  grad_output = torch.tensor(
2286  [1, 2,
2287  3, 4], device=device, dtype=dtype).view(2, 2)
2288  grad_output_with_empty = torch.tensor(
2289  [99, 99,
2290  1, 2,
2291  99, 99,
2292  3, 4,
2293  99, 99], device=device, dtype=dtype).view(5, 2)
2294 
2295  if mode == "sum" or mode == "mean":
2296  denominator = 1 if mode == "sum" else 3
2297  expected_output = torch.tensor(
2298  [[13, 16],
2299  [13, 16]], device=device, dtype=dtype) / denominator
2300 
2301  expected_output_with_empty = torch.tensor(
2302  [[0, 0],
2303  [13, 16],
2304  [0, 0],
2305  [13, 16],
2306  [0, 0]], device=device, dtype=dtype) / denominator
2307 
2308  expected_grad_weight = torch.tensor(
2309  [[3, 4],
2310  [5, 8],
2311  [0, 0],
2312  [1, 2],
2313  [3, 4]], device=device, dtype=dtype) / denominator
2314  elif mode == "max":
2315  expected_output = torch.tensor(
2316  [[7, 8],
2317  [9, 10]], device=device, dtype=dtype)
2318 
2319  expected_output_with_empty = torch.tensor(
2320  [[0, 0],
2321  [7, 8],
2322  [0, 0],
2323  [9, 10],
2324  [0, 0]], device=device, dtype=dtype)
2325 
2326  expected_grad_weight = torch.tensor(
2327  [[0, 0],
2328  [0, 0],
2329  [0, 0],
2330  [1, 2],
2331  [3, 4]], device=device, dtype=dtype)
2332 
2333  output = es(input, offsets)
2334  output.backward(grad_output_with_empty)
2335 
2336  es_weight_grad = es.weight.grad.data
2337  if sparse:
2338  es_weight_grad = es.weight.grad.to_dense()
2339  self.assertEqual(output, expected_output_with_empty)
2340  self.assertEqual(es_weight_grad, expected_grad_weight, dtype2prec[dtype])
2341 
2342  # check same example except as 2D (2 x 3)
2343  input = input.view(2, -1)
2344  es.zero_grad()
2345  output = es(input)
2346  output.backward(grad_output)
2347 
2348  es_weight_grad = es.weight.grad
2349  if sparse:
2350  es_weight_grad = es.weight.grad.to_dense()
2351  self.assertEqual(output, expected_output)
2352  self.assertEqual(es_weight_grad, expected_grad_weight, dtype2prec[dtype])
2353 
2354  # test all empty bags
2355  es.zero_grad()
2356  inputs = torch.tensor([], dtype=torch.long, device=device)
2357  offsets = torch.tensor([0, 0, 0, 0], device=device)
2358  es(inputs, offsets).sum().backward()
2359  dense_grad = es.weight.grad
2360  if dense_grad.is_sparse:
2361  dense_grad = dense_grad.to_dense()
2362  self.assertEqual(dense_grad, torch.zeros_like(es.weight))
2363 
2364  # now compare EmbeddingBag vs Embedding + Sum/Mean, for constant bag length
2365  def _test_vs_Embedding(N, D, B, L, max_norm=None):
2366  es = nn.EmbeddingBag(N, D, mode=mode, sparse=sparse, max_norm=max_norm).to(device, dtype)
2367  e = nn.Embedding(N, D, max_norm=max_norm).to(device, dtype)
2368  e.weight.data.copy_(es.weight)
2369  input = torch.randint(N, (B, L), device=device, dtype=torch.long)
2370  offsets = torch.arange(0, B, device=device, dtype=torch.long).mul_(L)
2371  grad_output = torch.rand(B, D, device=device, dtype=dtype)
2372 
2373  output = es(input.view(-1), offsets)
2374  if mode == 'sum':
2375  ref_output = e(input).sum(1)
2376  elif mode == 'mean':
2377  ref_output = e(input).mean(1)
2378  elif mode == 'max':
2379  ref_output = e(input).max(1)[0]
2380 
2381  self.assertEqual(output, ref_output, dtype2prec[dtype])
2382 
2383  output.backward(grad_output)
2384  ref_output.backward(grad_output)
2385  es_weight_grad = es.weight.grad.data
2386  if sparse:
2387  es_weight_grad = es.weight.grad.data.to_dense()
2388 
2389  # We have more floating point error here because we are dealing with larger numbers
2390  needed_prec = dtype2prec[dtype] * 2
2391  self.assertEqual(es_weight_grad, e.weight.grad, needed_prec)
2392 
2393  N, D, B, L = random.randint(1, 100), random.randint(1, 100), random.randint(1, 50), random.randint(1, 50)
2394  _test_vs_Embedding(N, D, B, L)
2395  for max_norm in (None, 3):
2396  for p in itertools.product([1, 2], repeat=4):
2397  _test_vs_Embedding(*p, max_norm=max_norm)
2398 
2399  # check that giving illegal input combos raises error
2400  es = nn.EmbeddingBag(10, 20, mode=mode, sparse=sparse)
2401  input = torch.ones(3, 4)
2402  offset = torch.arange(0, 3)
2403  self.assertRaises(ValueError, lambda: es(input, offset))
2404  self.assertRaises(ValueError, lambda: es(input.view(-1)))
2405  offset[0] = 1
2406  self.assertRaises(ValueError, lambda: es(input.view(-1), offset))
2407  offset[0] = 0
2408  offset[-1] = 100
2409  self.assertRaises(ValueError, lambda: es(input.view(-1), offset))
2410 
2411  def test_embeddingbag_from_pretrained(self):
2412  a = torch.Tensor([[1, 2, 3], [4, 5, 6]])
2413  embeddingbag = nn.EmbeddingBag.from_pretrained(a)
2414  self.assertEqual(a, embeddingbag.weight.data)
2415 
2416  input = torch.LongTensor([[0, 1]])
2417  output = embeddingbag(input)
2418  self.assertEqual(a.mean(0, keepdim=True), output)
2419 
2420  def test_embeddingbag_from_pretrained_options(self):
2421  a = torch.Tensor([[1, 2, 3], [4, 5, 6]])
2422  opts = {
2423  "max_norm": 2.,
2424  "norm_type": .5,
2425  "scale_grad_by_freq": False,
2426  "mode": "max",
2427  "sparse": False
2428  }
2429  embeddingbag = nn.EmbeddingBag.from_pretrained(a, **opts)
2430 
2431  input = torch.LongTensor([[0, 1]])
2432  output = embeddingbag(input)
2433  self.assertEqual(a.max(0, keepdim=True)[0], output)
2434  self.assertTrue(a.ne(torch.arange(1, 7, dtype=a.dtype).view(2, 3)).all())
2435  self.assertTrue(a.norm(p=opts["norm_type"], dim=1).le(opts["max_norm"]).all())
2436 
2437  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
2438  def test_pool3d_size_one_feature_dim(self):
2439  # Tests crazy strides for feature dim of size 1
2440  x = Variable(torch.randn(7, 1, 5, 3, 2, device="cuda"))
2441  strange_strides = [30, 1234, 6, 2, 1]
2442  y = x.as_strided(x.size(), strange_strides)
2443  x = x.cpu().as_strided(x.size(), strange_strides)
2444 
2445  to_test = {
2446  'max_pool3d': lambda t: F.max_pool3d(t, (5, 1, 1), stride=(5, 1, 1)),
2447  'avg_pool3d': lambda t: F.avg_pool3d(t, (5, 1, 1), stride=(5, 1, 1)),
2448  }
2449 
2450  for test, fn in to_test.items():
2451  # Should not crash
2452  out_y = fn(y)
2453  out_x = fn(x)
2454  self.assertEqual(out_y, out_x.cuda(), test)
2455 
2456  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
2457  def test_AvgPool3d_backward_after_cat_dim1_cuda(self):
2458  # x has to have batch_size 1 to test contiguous checks
2459  x = torch.randn(1, 3, 4, 4, 4, device="cuda", requires_grad=True)
2460  y = F.avg_pool3d(x, kernel_size=3, padding=1, stride=2)
2461 
2462  grad = torch.randn(y.size(), device="cuda")
2463  # increase the stride in dimension 0. the tensor is still contiguous because size[0] is 1
2464  stride = list(grad.stride())
2465  stride[0] = stride[0] * 2
2466  grad.set_(grad.storage(), 0, grad.size(), stride)
2467  assert grad.is_contiguous()
2468 
2469  y.backward(grad)
2470 
2471  @unittest.skipIf(not TEST_CUDNN, "needs cudnn")
2472  def test_contig_wrong_stride_cudnn(self):
2473  # x has to have batch_size 1 to test contiguous checks
2474  x = torch.randn(1, 16, 5, 5, device="cuda")
2475  stride = list(x.stride())
2476  stride[0] = 20
2477  # change the stride in dimension 0. the tensor is still contiguous because size[0] is 1
2478  x.set_(x.storage(), 0, x.size(), stride)
2479  self.assertTrue(x.is_contiguous())
2480  F.conv_transpose2d(x, torch.randn(16, 1, 1, 1, device="cuda"))
2481  F.conv2d(x, torch.randn(1, 16, 1, 1, device="cuda"))
2482 
2483  def test_embedding_bag(self):
2484  self._test_EmbeddingBag(False, 'sum', False)
2485  self._test_EmbeddingBag(False, 'mean', False)
2486  self._test_EmbeddingBag(False, 'max', False)
2487 
2488  self._test_EmbeddingBag(False, 'sum', True)
2489  self._test_EmbeddingBag(False, 'mean', True)
2490 
2491  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
2492  @repeat_test_for_types(ALL_TENSORTYPES)
2493  def test_embedding_bag_cuda(self, dtype=torch.float):
2494  self._test_EmbeddingBag(True, 'sum', False, dtype)
2495  self._test_EmbeddingBag(True, 'mean', False, dtype)
2496  self._test_EmbeddingBag(True, 'max', False, dtype)
2497  if dtype != torch.half:
2498  # torch.cuda.sparse.HalfTensor is not enabled.
2499  self._test_EmbeddingBag(True, 'sum', True, dtype)
2500  self._test_EmbeddingBag(True, 'mean', True, dtype)
2501 
2502  def test_fractional_max_pool2d(self):
2503  x = torch.randn(1, 2, 7, 7, requires_grad=True)
2504  samples = x.new(1, 2, 2).uniform_()
2505 
2506  def func(x):
2507  return F.fractional_max_pool2d(
2508  x, (2, 2), output_size=(3, 3), _random_samples=samples)
2509 
2510  self.assertEqual(func(x).shape, (1, 2, 3, 3))
2511  gradcheck(func, [x])
2512  gradgradcheck(func, [x])
2513 
2514  x = torch.randn(2, 7, 7, requires_grad=True)
2515  samples = x.new(2, 2).uniform_()
2516  self.assertEqual(func(x).shape, (2, 3, 3))
2517  gradcheck(func, [x])
2518  gradgradcheck(func, [x])
2519 
2520  def test_Dropout(self):
2521  input = torch.Tensor(1000)
2522  self._test_dropout(nn.Dropout, False, input)
2523 
2524  def test_Dropout2d(self):
2525  b = random.randint(1, 5)
2526  w = random.randint(1, 5)
2527  h = random.randint(1, 5)
2528  num_features = 1000
2529  input = torch.Tensor(num_features, b, w, h)
2530  self._test_dropout(nn.Dropout2d, False, input)
2531 
2532  def test_Dropout3d(self):
2533  b = random.randint(1, 5)
2534  w = random.randint(1, 5)
2535  h = random.randint(1, 5)
2536  d = random.randint(1, 2)
2537  num_features = 1000
2538  input = torch.Tensor(num_features, b, d, w, h)
2539  self._test_dropout(nn.Dropout3d, False, input)
2540 
2541  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
2542  def test_Dropout_cuda(self):
2543  input = torch.Tensor(1000)
2544  self._test_dropout(nn.Dropout, True, input)
2545 
2546  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
2547  def test_Dropout2d_cuda(self):
2548  b = random.randint(1, 5)
2549  w = random.randint(1, 5)
2550  h = random.randint(1, 5)
2551  num_features = 1000
2552  input = torch.Tensor(num_features, b, w, h)
2553  self._test_dropout(nn.Dropout2d, True, input)
2554 
2555  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
2556  def test_Dropout3d_cuda(self):
2557  b = random.randint(1, 5)
2558  w = random.randint(1, 5)
2559  h = random.randint(1, 5)
2560  d = random.randint(1, 2)
2561  num_features = 1000
2562  input = torch.Tensor(num_features, b, d, w, h)
2563  self._test_dropout(nn.Dropout3d, True, input)
2564 
2565  def test_AlphaDropout(self):
2566  # generate random tensor with zero mean and unit std
2567  input = torch.randn(5000)
2568  self._test_alpha_dropout(nn.AlphaDropout, input)
2569 
2570  def test_FeatureAlphaDropout(self):
2571  b = random.randint(1, 5)
2572  w = random.randint(1, 5)
2573  h = random.randint(1, 5)
2574  d = random.randint(1, 2)
2575  num_features = 1000
2576  input = torch.randn(num_features, b, d, w, h)
2577  self._test_alpha_dropout(nn.FeatureAlphaDropout, input)
2578 
2579  def _test_InstanceNorm_general(self, cls, input, device="cpu", dtype=torch.float):
2580  # default case track_running_stats=False
2581  b, c = input.size(0), input.size(1)
2582  input_var = input.to(device=device, dtype=dtype).requires_grad_()
2583 
2584  IN = cls(c, eps=0).to(device, dtype)
2585 
2586  output = IN(input_var)
2587  out_reshaped = output.view(b * c, -1)
2588 
2589  mean = out_reshaped.mean(1)
2590  var = out_reshaped.var(1, unbiased=False)
2591 
2592  self.assertAlmostEqual(torch.abs(mean.data).mean(), 0, delta=1e-5)
2593  self.assertAlmostEqual(torch.abs(var.data).mean(), 1, delta=1e-5)
2594 
2595  # check that eval mode doesn't change behavior
2596  grad_out = torch.randn_like(output)
2597  res1 = output.data.clone()
2598  output.backward(grad_out)
2599  grad1 = input_var.grad.data.clone()
2600 
2601  IN.eval()
2602  output = IN(input_var)
2603  input_var.grad = None
2604  output.backward(grad_out)
2605  res2 = output.data
2606  grad2 = input_var.grad.data
2607  self.assertEqual(res1, res2)
2608  self.assertEqual(grad1, grad2)
2609 
2610  # If track_running_stats=True and momentum=1, running_mean/var should be
2611  # equal to mean/var of the input (with unbias correction)
2612  IN = cls(c, momentum=1, eps=0, track_running_stats=True).to(device, dtype)
2613 
2614  output = IN(input_var)
2615 
2616  input_reshaped = input_var.transpose(1, 0).reshape(c, -1)
2617  mean = input_reshaped.mean(1)
2618 
2619  input_reshaped = input_var.transpose(1, 0).reshape(c, b, -1)
2620  var = input_reshaped.var(2, unbiased=True)[:, :]
2621 
2622  self.assertAlmostEqual(torch.abs(mean.data - IN.running_mean).mean(), 0, delta=1e-5)
2623  self.assertAlmostEqual(torch.abs(var.data.mean(1) - IN.running_var).mean(), 0, delta=1e-5)
2624 
2625  # in eval mode, adding X * std to a channel in input should make the
2626  # corresponding channel in output have mean X
2627  IN.eval()
2628  delta = IN.running_var.sqrt() * torch.arange(c, device=device, dtype=dtype)
2629  delta = delta.view(-1, *[1 for _ in range(2, input.dim())])
2630  output = IN(input_var + delta)
2631  self.assertEqual(output.transpose(0, 1).reshape(c, -1).mean(1), torch.arange(c))
2632 
2633  def _test_InstanceNorm_cuda_half(self, cls, input):
2634  # THNN
2635  input = Variable(input.cuda().half().random_(1, 10), requires_grad=True)
2636  m = cls(input.size(1), affine=True, track_running_stats=True).to("cuda", torch.half)
2637  thnn_output = m(input)
2638  thnn_output.sum().backward()
2639  thnn_input_grad = input.grad.data.clone()
2640  self.assertEqual(thnn_output.type(), input.type())
2641  # cuDNN
2642  if TEST_CUDNN:
2643  input.grad = None
2644  m = m.float()
2645  cudnn_output = m(input)
2646  cudnn_output.sum().backward()
2647  cudnn_input_grad = input.grad.data.clone()
2648  self.assertEqual(cudnn_output.type(), input.type())
2649  self.assertAlmostEqual(cudnn_output, thnn_output, delta=1e-4)
2650  self.assertAlmostEqual(cudnn_input_grad, thnn_input_grad, delta=1e-3)
2651 
2652  def test_InstanceNorm1d_general(self):
2653  b = random.randint(3, 5)
2654  c = random.randint(3, 5)
2655  d = random.randint(8, 10)
2656 
2657  input = torch.rand(b, c, d)
2658  self._test_InstanceNorm_general(nn.InstanceNorm1d, input, dtype=torch.float)
2659 
2660  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
2661  def test_InstanceNorm1d_general_cuda(self):
2662  b = random.randint(3, 5)
2663  c = random.randint(3, 5)
2664  d = random.randint(8, 10)
2665 
2666  input = torch.rand(b, c, d)
2667  self._test_InstanceNorm_general(nn.InstanceNorm1d, input, "cuda", torch.float)
2668  self._test_InstanceNorm_cuda_half(nn.InstanceNorm1d, input)
2669 
2670  def test_InstanceNorm2d_general(self):
2671  b = random.randint(3, 5)
2672  c = random.randint(3, 5)
2673  w = random.randint(3, 6)
2674  h = random.randint(6, 8)
2675 
2676  input = torch.rand(b, c, h, w)
2677  self._test_InstanceNorm_general(nn.InstanceNorm2d, input, dtype=torch.float)
2678 
2679  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
2680  def test_InstanceNorm2d_general_cuda(self):
2681  b = random.randint(3, 5)
2682  c = random.randint(3, 5)
2683  w = random.randint(3, 6)
2684  h = random.randint(6, 8)
2685 
2686  input = torch.rand(b, c, h, w)
2687  self._test_InstanceNorm_general(nn.InstanceNorm2d, input, "cuda", torch.float)
2688  self._test_InstanceNorm_cuda_half(nn.InstanceNorm2d, input)
2689 
2690  def test_InstanceNorm3d_general(self):
2691  b = random.randint(3, 5)
2692  c = random.randint(3, 5)
2693  w = random.randint(2, 5)
2694  h = random.randint(2, 5)
2695  d = random.randint(2, 5)
2696 
2697  input = torch.rand(b, c, h, w, d)
2698  self._test_InstanceNorm_general(nn.InstanceNorm3d, input, dtype=torch.float)
2699 
2700  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
2701  @skipIfRocm
2702  def test_InstanceNorm3d_general_cuda(self):
2703  b = random.randint(3, 5)
2704  c = random.randint(2, 5)
2705  w = random.randint(2, 5)
2706  h = random.randint(2, 5)
2707  d = random.randint(2, 5)
2708 
2709  input = torch.rand(b, c, h, w, d)
2710  self._test_InstanceNorm_general(nn.InstanceNorm3d, input, "cuda", torch.float)
2711  self._test_InstanceNorm_cuda_half(nn.InstanceNorm3d, input)
2712 
2713  def _test_LayerNorm_general(self, device="cpu", dtype=torch.float):
2714  for i in range(2, 6):
2715  shape = torch.randint(3, 6, (i,), dtype=torch.long).tolist()
2716  x = torch.empty(*shape, device=device, dtype=dtype).uniform_(0, 10)
2717  normalized_ndim = random.randint(1, i - 1) # inclusive
2718  normalized_shape = shape[-normalized_ndim:]
2719  unnormalized_shape = shape[:-normalized_ndim]
2720 
2721  # test that LN normalizes to mean 0 and stddev 1
2722  ln = nn.LayerNorm(normalized_shape, eps=0).to(device, dtype)
2723  ln.weight.data.fill_(1)
2724  ln.bias.data.fill_(0)
2725  output = ln(x)
2726  out_reshaped = output.view(*(unnormalized_shape + [-1]))
2727  mean = out_reshaped.mean(-1)
2728  var = out_reshaped.var(-1, unbiased=False)
2729  self.assertAlmostEqual(torch.abs(mean.data).mean(), 0, delta=1e-5)
2730  self.assertAlmostEqual(torch.abs(var.data).mean(), 1, delta=1e-5)
2731 
2732  # test that LN applies weight and bias correctly
2733  scale, bias = torch.empty(2).uniform_(0.2, 2).tolist()
2734  ln.weight.data.fill_(scale)
2735  ln.bias.data.fill_(bias)
2736  output = ln(x)
2737  out_reshaped = output.view(*(unnormalized_shape + [-1]))
2738  mean = out_reshaped.mean(-1)
2739  var = out_reshaped.var(-1, unbiased=False)
2740  self.assertAlmostEqual(torch.abs(mean.data).mean(), bias, delta=1e-5)
2741  self.assertAlmostEqual(torch.abs(var.data).mean(), scale ** 2, delta=1e-5)
2742 
2743  bad_norm_shape_input_shape = {
2744  (): (),
2745  (2, 3): (3,),
2746  (2,): (1, 2, 3),
2747  (10,): (2, 3),
2748  10: (2, 3),
2749  }
2750  for norm_shape, input_shape in bad_norm_shape_input_shape.items():
2751  ln = nn.LayerNorm(norm_shape)
2752  input = torch.empty(input_shape, device=device, dtype=dtype).uniform_(0, 10)
2753  self.assertRaises(RuntimeError, lambda: ln(input))
2754 
2755  def _test_LayerNorm_cuda_half(self):
2756  input = Variable(torch.empty(2, 3, 3, 2).to("cuda", torch.half).random_(1, 10), requires_grad=True)
2757  m = nn.LayerNorm([3, 2]).to("cuda", torch.half)
2758  output = m(input)
2759  output.sum().backward()
2760  self.assertEqual(output.type(), input.type())
2761 
2762  def test_LayerNorm_general(self):
2764 
2765  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
2766  def test_LayerNorm_general_cuda(self):
2767  self._test_LayerNorm_general("cuda")
2769 
2770  def _test_GroupNorm_general(self, device="cpu", dtype=torch.float):
2771  good_shape_g = {
2772  (1, 2, 3, 4): 2,
2773  (2, 3, 10): 3,
2774  (3, 1, 1, 1, 2): 1,
2775  (2, 6, 4, 2, 2): 3,
2776  }
2777  for shape, g in good_shape_g.items():
2778  x = torch.empty(*shape, device=device, dtype=dtype).uniform_(0, 10)
2779  b = shape[0]
2780  c = shape[1]
2781 
2782  # test that GN normalizes to mean 0 and stddev 1
2783  gn = nn.GroupNorm(g, c, eps=0).to(device, dtype)
2784  gn.weight.data.fill_(1)
2785  gn.bias.data.fill_(0)
2786  output = gn(x)
2787  out_reshaped = output.view(b, g, -1)
2788  mean = out_reshaped.mean(-1)
2789  var = out_reshaped.var(-1, unbiased=False)
2790  self.assertAlmostEqual(torch.abs(mean).mean(), 0, delta=1e-5)
2791  self.assertAlmostEqual(torch.abs(var).mean(), 1, delta=1e-5)
2792 
2793  # test that GN applies weight and bias correctly
2794  scale = torch.empty(c, device=device, dtype=dtype).uniform_(0.2, 2)
2795  bias = torch.empty(c, device=device, dtype=dtype).uniform_(0.2, 2)
2796  gn.weight.data.copy_(scale)
2797  gn.bias.data.copy_(bias)
2798  output = gn(x)
2799  out_reshaped = output.view(b, c, -1)
2800  out_normed = (out_reshaped - bias.view(c, 1)) / scale.view(c, 1)
2801  out_normed_reshaped = out_normed.view(b, g, -1)
2802  mean = out_normed_reshaped.mean(-1)
2803  var = out_normed_reshaped.var(-1, unbiased=False)
2804  self.assertAlmostEqual(torch.abs(mean).mean(), 0, delta=1e-5)
2805  self.assertAlmostEqual(torch.abs(var).mean(), 1, delta=1e-5)
2806 
2807  bad_shape_g = {
2808  (1, 2, 3, 4): 3,
2809  (2, 3, 10): 2,
2810  (3, 1, 1, 1, 2): 10,
2811  (2, 6, 4, 2, 2): 4,
2812  }
2813  for shape, g in bad_shape_g.items():
2814  gn = nn.GroupNorm(g, shape[1])
2815  input = torch.empty(*shape, device=device, dtype=dtype).uniform_(0, 10)
2816  self.assertRaises(RuntimeError, lambda: gn(input))
2817 
2818  def _test_GroupNorm_cuda_half(self):
2819  input = Variable(torch.empty(2, 3, 3, 2).to("cuda", torch.half).random_(1, 10), requires_grad=True)
2820  input = torch.zeros(2, 4, 3, 2, requires_grad=True).cuda().half().random_(1, 10)
2821  m = nn.GroupNorm(2, 4).to("cuda", torch.half)
2822  output = m(input)
2823  output.sum().backward()
2824  self.assertEqual(output.type(), input.type())
2825 
2826  def test_GroupNorm_general(self):
2827  self._test_GroupNorm_general(dtype=torch.float)
2828 
2829  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
2830  def test_GroupNorm_general_cuda(self):
2831  self._test_GroupNorm_general("cuda", torch.float)
2833 
2834  def test_pad(self):
2835  inputs = torch.randn(1, 3, 4, 4, requires_grad=True)
2836  _assertGradAndGradgradChecks(self, lambda x: F.pad(x, (1, 1, 1, 1)), (inputs,))
2837  _assertGradAndGradgradChecks(self, lambda x: F.pad(x, (-1, 1, -2, 1)), (inputs,))
2838  _assertGradAndGradgradChecks(self, lambda x: F.pad(x, (-1, 1, -2, 1), value=2), (inputs,))
2839  self.assertTrue(gradcheck(lambda x: F.pad(x, (-1, 1, -2, 1), mode='replicate'), (inputs,)))
2840  self.assertTrue(gradcheck(lambda x: F.pad(x, (-1, 1, -2, 1), mode='reflect'), (inputs,)))
2841 
2842  inputs = torch.randn(1, 2, 3, 4, 4, requires_grad=True)
2843  self.assertTrue(gradcheck(lambda x: F.pad(x, (1, 1, 1, 1, 1, 1), mode='replicate'), (inputs,)))
2844 
2845  # assert that relfection padding errors when pad >= input size
2846  expected_err_msg = r"Padding size should be less than the corresponding input dimension"
2847  self.assertRaisesRegex(RuntimeError, expected_err_msg,
2848  lambda: F.pad(torch.randn(1, 1, 2, 3), (1, 1, 3, 0), mode='reflect'))
2849  self.assertRaisesRegex(RuntimeError, expected_err_msg,
2850  lambda: F.pad(torch.randn(1, 1, 2), (2, 1), mode='reflect'))
2851 
2852  @staticmethod
2853  def _test_one_hot(self, use_cuda=False):
2854  device = torch.device('cuda' if use_cuda else 'cpu')
2855  with self.assertRaises(RuntimeError):
2856  torch.nn.functional.one_hot(torch.tensor([3, 4, -1, 0], device=device), -1)
2857 
2858  with self.assertRaises(RuntimeError):
2859  torch.nn.functional.one_hot(torch.tensor([3, 4, 1, 0], device=device), 3)
2860 
2861  t = torch.nn.functional.one_hot(torch.tensor([3, 4, 1, 0], device=device))
2862  expected = torch.tensor([[0, 0, 0, 1, 0],
2863  [0, 0, 0, 0, 1],
2864  [0, 1, 0, 0, 0],
2865  [1, 0, 0, 0, 0]], device=device)
2866  self.assertEqual(t, expected)
2867 
2868  t = torch.nn.functional.one_hot(torch.tensor([3, 4, 1, 0], device=device), -1)
2869  expected = torch.tensor([[0, 0, 0, 1, 0],
2870  [0, 0, 0, 0, 1],
2871  [0, 1, 0, 0, 0],
2872  [1, 0, 0, 0, 0]], device=device)
2873  self.assertEqual(t, expected)
2874 
2875  t = torch.nn.functional.one_hot(torch.tensor([3, 4, 1, 0], device=device), 6)
2876  expected = torch.tensor([[0, 0, 0, 1, 0, 0],
2877  [0, 0, 0, 0, 1, 0],
2878  [0, 1, 0, 0, 0, 0],
2879  [1, 0, 0, 0, 0, 0]], device=device)
2880  self.assertEqual(t, expected)
2881 
2882  t = torch.nn.functional.one_hot(torch.tensor([[3, 4], [1, 0]], device=device))
2883  expected = torch.tensor([[[0, 0, 0, 1, 0],
2884  [0, 0, 0, 0, 1]],
2885  [[0, 1, 0, 0, 0],
2886  [1, 0, 0, 0, 0]]], device=device)
2887  self.assertEqual(t, expected)
2888 
2889  t = torch.nn.functional.one_hot(torch.tensor(4, device=device))
2890  expected = torch.tensor([0, 0, 0, 0, 1], device=device)
2891  self.assertEqual(t, expected)
2892 
2893  t = torch.nn.functional.one_hot(torch.empty([4, 0], dtype=torch.long, device=device), 100)
2894  expected = torch.empty([4, 0, 100])
2895  self.assertEqual(t, expected)
2896 
2897  with self.assertRaises(RuntimeError):
2898  torch.nn.functional.one_hot(torch.empty([4, 0], dtype=torch.long, device=device))
2899 
2900  with self.assertRaises(RuntimeError):
2901  torch.nn.functional.one_hot(torch.tensor([3, 4, 1, 0], device=device), -2)
2902 
2903  def test_one_hot(self):
2904  self._test_one_hot(self)
2905 
2906  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
2907  def test_one_hot_cuda(self):
2908  self._test_one_hot(self, use_cuda=True)
2909 
2910  def test_pad_scalar_error(self):
2911  inputs = torch.tensor(0., requires_grad=True)
2912  self.assertRaises(AssertionError, lambda: F.pad(inputs, (1, 1)))
2913  self.assertRaises(AssertionError, lambda: F.pad(inputs, (1,)))
2914 
2915  def test_nn_scalars(self):
2916  # One off tests to ensure scalars from nn.yaml are properly applied
2917  def verify_scalars(input, output):
2918  if input.dim() == 0:
2919  self.assertEqual((), output.shape)
2920  else:
2921  self.assertNotEqual((), output.shape)
2922  output.sum().backward()
2923  self.assertEqual(input.shape, input.grad.shape)
2924 
2925  devices = ['cpu'] if not torch.cuda.is_available() else ['cpu', 'cuda']
2926  for device in devices:
2927  for input_shape in [(5, 6), ()]:
2928  for module in [torch.nn.ELU, torch.nn.Hardtanh, torch.nn.LeakyReLU, torch.nn.LogSigmoid,
2929  torch.nn.RReLU, torch.nn.Softshrink, torch.nn.Softplus, torch.nn.Sigmoid,
2930  torch.nn.Tanh]:
2931  input = torch.randn(input_shape, device=device, requires_grad=True)
2932  m = module()
2933  output = m(input)
2934  verify_scalars(input, output)
2935 
2936  def test_nn_scalars_reductions(self):
2937  # One off tests to ensure scalars from nn.yaml are properly applied
2938  def verify_reduction_scalars(input, reduction, output):
2939  if reduction != 'none' or input.dim() == 0:
2940  self.assertEqual((), output.shape)
2941  else:
2942  self.assertNotEqual((), output.shape)
2943  output.sum().backward()
2944  self.assertEqual(input.shape, input.grad.shape)
2945 
2946  devices = ['cpu'] if not torch.cuda.is_available() else ['cpu', 'cuda']
2947  for device in devices:
2948  for input_shape in [(5, 6), ()]:
2949  for reduction in ['none', 'mean', 'sum']:
2950  for module in [torch.nn.BCELoss, torch.nn.L1Loss, torch.nn.MSELoss,
2951  torch.nn.SmoothL1Loss, torch.nn.SoftMarginLoss]:
2952  input = torch.randn(input_shape, device=device, requires_grad=True)
2953  target = torch.empty(input_shape, device=device).random_(2)
2954  sigmoid = nn.Sigmoid()
2955 
2956  input = torch.randn(input_shape, device=device, requires_grad=True)
2957  m = module(reduction=reduction)
2958  output = m(sigmoid(input), target)
2959  verify_reduction_scalars(input, reduction, output)
2960 
2961  def test_normalize(self):
2962  inputs = torch.randn(1, 3, 4, 4, requires_grad=True)
2963  self.assertTrue(gradcheck(lambda x: F.normalize(x, p=1, dim=-1), (inputs,)))
2964  self.assertTrue(gradcheck(lambda x: F.normalize(x, p=2, dim=-2), (inputs,)))
2965 
2966  inputs = torch.randn((), requires_grad=True)
2967  self.assertTrue(gradcheck(lambda x: F.normalize(x, p=1, dim=-1), (inputs,)))
2968 
2969  def _test_maxpool_indices(self, num_dim, adaptive=False, device="cpu", dtype=torch.float):
2970  def expected_indices(dim):
2971  if dim == 1:
2972  return torch.tensor([1, 3], dtype=torch.double).repeat(2, 2, 1)
2973  if dim == 2:
2974  return torch.tensor([[5, 7], [13, 15]], dtype=torch.double).repeat(2, 2, 1, 1)
2975 
2976  def expected_grad(dim):
2977  if dim == 1:
2978  return torch.tensor([0, 1, 0, 1], dtype=torch.double).repeat(2, 2, 1)
2979  grad = expected_grad(dim - 1)
2980  zero = torch.zeros(grad.size())
2981  return torch.stack((zero, grad, zero, grad), 2)
2982 
2983  def expected_output(dim):
2984  if dim == 1:
2985  return torch.arange(2, 17, 2).view(2, 2, 2)
2986  if dim == 2:
2987  col = torch.arange(6, 63, 8)
2988  return torch.stack([col, col + 2], 1).view(2, 2, 2, 2)
2989 
2990  if adaptive:
2991  cls_name = 'AdaptiveMaxPool{}d'.format(num_dim)
2992  else:
2993  cls_name = 'MaxPool{}d'.format(num_dim)
2994  module_cls = getattr(nn, cls_name)
2995  module = module_cls(2, return_indices=True).to(device, dtype=dtype)
2996  numel = 4 ** (num_dim + 1)
2997  input = torch.arange(1, numel + 1).view(2, 2, *repeat(4, num_dim)).to(device, dtype=dtype)
2998  input_var = input.clone().detach().requires_grad_()
2999 
3000  # Check forward
3001  output, indices = module(input_var)
3002  if num_dim != 3:
3003  expected_indices = expected_indices(num_dim)
3004  expected_output = expected_output(num_dim)
3005  self.assertEqual(indices.dim(), input.dim())
3006  self.assertEqual(indices.data.squeeze(), expected_indices)
3007  self.assertEqual(output.data.squeeze(), expected_output)
3008  self.assertTrue(output.requires_grad)
3009  self.assertFalse(indices.requires_grad)
3010 
3011  # Make sure backward works
3012  grad_output = torch.ones(output.size(), device=device, dtype=dtype)
3013  output.backward(grad_output, retain_graph=True)
3014  expected_grad = expected_grad(num_dim)
3015  self.assertEqual(input_var.grad.data, expected_grad.view_as(input))
3016 
3017  # Make sure backward after changing indices will result in an error
3018  indices.add_(1)
3019  self.assertRaises(RuntimeError, lambda: output.backward(grad_output))
3020 
3021  def test_adaptive_pooling_input_size(self):
3022  for numel in (2, 3):
3023  for pool_type in ('Max', 'Avg'):
3024  cls_name = 'Adaptive{}Pool{}d'.format(pool_type, numel)
3025  module_cls = getattr(nn, cls_name)
3026  output_size = (2,) * numel
3027  module = module_cls(output_size)
3028 
3029  input = torch.randn(output_size)
3030  self.assertRaises(ValueError, lambda: module(input))
3031 
3032  def test_adaptive_pooling_size_none(self):
3033  for numel in (2, 3):
3034  for pool_type in ('Max', 'Avg'):
3035  cls_name = 'Adaptive{}Pool{}d'.format(pool_type, numel)
3036  module_cls = getattr(nn, cls_name)
3037  output_size = (2,) * (numel - 1) + (None,)
3038  module = module_cls(output_size)
3039 
3040  input = torch.randn((4,) * (numel + 1))
3041  output = module(input)
3042  self.assertEqual(output.size(), (4,) + (2,) * (numel - 1) + (4,))
3043 
3044  def test_Conv2d_naive_groups(self):
3046 
3047  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3048  @repeat_test_for_types(ALL_TENSORTYPES)
3049  @skipIfRocm
3050  def test_Conv2d_naive_groups_cuda(self, dtype=torch.float):
3051  self._test_Conv2d_naive_groups("cuda", dtype)
3052 
3053  def test_batchnorm_grad(self):
3054  self._test_batchnorm_grad()
3055 
3056  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3057  @skipIfRocm
3058  def test_batchnorm_grad_cuda(self):
3059  self._test_batchnorm_grad("cuda")
3060  if TEST_CUDNN:
3061  with torch.backends.cudnn.flags(enabled=False):
3062  self._test_batchnorm_grad("cuda")
3063 
3064  def test_batchnorm_eval(self):
3065  self._test_batchnorm_eval()
3066 
3067  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3068  def test_batchnorm_eval_cuda(self, dtype=torch.float):
3069  self._test_batchnorm_eval("cuda", dtype)
3070  if TEST_CUDNN:
3071  with torch.backends.cudnn.flags(enabled=False):
3072  self._test_batchnorm_eval("cuda", dtype)
3073 
3074  def test_batchnorm_simple_average(self):
3076 
3077  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3078  def test_batchnorm_simple_average_cuda(self):
3079  self._test_batchnorm_simple_average(torch.cuda.FloatTensor)
3080  if TEST_CUDNN:
3081  with torch.backends.cudnn.flags(enabled=False):
3082  self._test_batchnorm_simple_average(torch.cuda.FloatTensor)
3083 
3084  def test_MaxPool1d_indices(self):
3085  self._test_maxpool_indices(1)
3086 
3087  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3088  @repeat_test_for_types(ALL_TENSORTYPES)
3089  def test_MaxPool1d_indices_cuda(self, dtype=torch.float):
3090  self._test_maxpool_indices(1, device="cuda", dtype=dtype)
3091 
3092  def test_MaxPool2d_indices(self):
3093  self._test_maxpool_indices(2)
3094 
3095  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3096  @repeat_test_for_types(ALL_TENSORTYPES)
3097  def test_MaxPool2d_indices_cuda(self, dtype=torch.float):
3098  self._test_maxpool_indices(2, device="cuda", dtype=dtype)
3099 
3100  def test_MaxPool3d_indices(self):
3101  self._test_maxpool_indices(3)
3102 
3103  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3104  @repeat_test_for_types(ALL_TENSORTYPES)
3105  def test_MaxPool3d_indices_cuda(self, dtype=torch.float):
3106  self._test_maxpool_indices(3, device="cuda", dtype=dtype)
3107 
3108  def test_AdaptiveMaxPool1d_indices(self):
3109  self._test_maxpool_indices(1, adaptive=True)
3110 
3111  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3112  @repeat_test_for_types(ALL_TENSORTYPES)
3113  def test_AdaptiveMaxPool1d_indices_cuda(self, dtype=torch.float):
3114  self._test_maxpool_indices(1, adaptive=True, device="cuda", dtype=dtype)
3115 
3116  def test_AdaptiveMaxPool2d_indices(self):
3117  self._test_maxpool_indices(2, adaptive=True)
3118 
3119  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3120  @repeat_test_for_types(ALL_TENSORTYPES)
3121  def test_AdaptiveMaxPool2d_indices_cuda(self, dtype=torch.float):
3122  self._test_maxpool_indices(2, adaptive=True, device="cuda", dtype=dtype)
3123 
3124  def test_AdaptiveMaxPool3d_indices(self):
3125  self._test_maxpool_indices(3, adaptive=True)
3126 
3127  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3128  @repeat_test_for_types(ALL_TENSORTYPES)
3129  def test_AdaptiveMaxPool3d_indices_cuda(self, dtype=torch.float):
3130  self._test_maxpool_indices(3, adaptive=True, device="cuda", dtype=dtype)
3131 
3132  @staticmethod
3133  def _test_max_pool_nan(self, device, dtype=torch.float):
3134  for adaptive in ['', 'adaptive_']:
3135  for num_dim in [1, 2, 3]:
3136  fn_name = '{}max_pool{}d'.format(adaptive, num_dim)
3137  fn = getattr(F, fn_name)
3138  x = torch.full([1, 1] + num_dim * [3], nan)
3139  res = fn(x, 1 if adaptive else 3)
3140  self.assertTrue(math.isnan(res.item()))
3141 
3142  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3143  @repeat_test_for_types(ALL_TENSORTYPES)
3144  def test_max_pool_nan_cuda(self, dtype=torch.float):
3145  self._test_max_pool_nan(self, device="cuda", dtype=dtype)
3146 
3147  def test_max_pool_nan(self, dtype=torch.float):
3148  self._test_max_pool_nan(self, device="cpu")
3149 
3150  @staticmethod
3151  def _test_pool_large_size(self, device, dtype=torch.float):
3152  for op in ('max', 'avg'):
3153  for num_dim in [1, 2, 3]:
3154  fn_name = '{}_pool{}d'.format(op, num_dim)
3155  fn = getattr(F, fn_name)
3156  # 16777217 is the smallest integer not expressible in float32
3157  x = torch.ones([1, 1, 16777217] + (num_dim - 1) * [1],
3158  device=device, dtype=dtype)
3159  res = fn(x, 1, stride=1, padding=0)
3160  # check if the output shape was still computed correctly
3161  self.assertEqual(x.shape[2], res.shape[2])
3162 
3163  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3164  @repeat_test_for_types(ALL_TENSORTYPES)
3165  def test_pool_large_size_cuda(self, dtype=torch.float):
3166  self._test_pool_large_size(self, device="cuda", dtype=dtype)
3167 
3168  def test_pool_large_size(self, dtype=torch.float):
3169  self._test_pool_large_size(self, device="cpu")
3170 
3171  def _test_scatter(self, tensor):
3172  x = tensor.detach().requires_grad_()
3173  result = dp.scatter(x, (0, 1))
3174  self.assertEqual(len(result), 2)
3175  self.assertEqual(result[0], x[:2])
3176  self.assertEqual(result[0].get_device(), 0)
3177  self.assertEqual(result[1], x[2:])
3178  self.assertEqual(result[1].get_device(), 1)
3179  grad = result[0].data.clone().fill_(2)
3180  result[0].backward(grad)
3181  self.assertEqual(x.grad.data[:2], grad)
3182  self.assertEqual(x.grad.data[2:], grad.clone().zero_())
3183  _assertGradAndGradgradChecks(self, lambda y: dp.scatter(y, (0, 1)), (x,))
3184 
3185  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3186  @skipIfRocm
3187  def test_scatter_cpu(self):
3188  self._test_scatter(torch.randn(4, 4))
3189 
3190  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3191  def test_scatter_gpu(self):
3192  self._test_scatter(torch.randn(4, 4).cuda())
3193 
3194  def _test_gather(self, output_device):
3195  inputs = (
3196  torch.randn(2, 4, device='cuda:0', requires_grad=True),
3197  torch.randn(2, 4, device='cuda:1', requires_grad=True),
3198  )
3199  result = dp.gather(inputs, output_device)
3200  self.assertEqual(result.size(), torch.Size([4, 4]))
3201  self.assertEqual(result[:2], inputs[0])
3202  self.assertEqual(result[2:], inputs[1])
3203  if output_device != -1:
3204  self.assertEqual(result.get_device(), output_device)
3205  else:
3206  self.assertFalse(result.is_cuda)
3207  grad = torch.randn(4, 4)
3208  if output_device != -1:
3209  grad = grad.cuda(output_device)
3210  result.backward(grad)
3211  self.assertEqual(inputs[0].grad.data, grad[:2])
3212  self.assertEqual(inputs[1].grad.data, grad[2:])
3213  _assertGradAndGradgradChecks(self, lambda x, y: dp.gather((x, y), output_device), inputs)
3214 
3215  # test scalar inputs, should stack into a vector in this case
3216  inputs = (
3217  torch.randn((), device='cuda:0', requires_grad=True),
3218  torch.randn((), device='cuda:1', requires_grad=True),
3219  )
3220  result = dp.gather(inputs, output_device)
3221  self.assertEqual(result.size(), torch.Size([2]))
3222  self.assertEqual(result[0], inputs[0])
3223  self.assertEqual(result[1], inputs[1])
3224  if output_device != -1:
3225  self.assertEqual(result.get_device(), output_device)
3226  else:
3227  self.assertFalse(result.is_cuda)
3228  grad = torch.randn(2)
3229  if output_device != -1:
3230  grad = grad.cuda(output_device)
3231  result.backward(grad)
3232  self.assertEqual(inputs[0].grad, grad[0])
3233  self.assertEqual(inputs[1].grad, grad[1])
3234  _assertGradAndGradgradChecks(self, lambda x, y: dp.gather((x, y), output_device), inputs)
3235 
3236  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3237  @skipIfRocm
3238  def test_gather_cpu(self):
3239  self._test_gather(-1)
3240 
3241  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3242  def test_gather_gpu(self):
3243  self._test_gather(0)
3244 
3245  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3246  def test_gather_different_len_dicts(self):
3247  inputs = (
3248  {'a': Variable(torch.randn(1, 2).cuda(0), requires_grad=True)},
3249  {
3250  'b': Variable(torch.randn(1, 2).cuda(1), requires_grad=True),
3251  'a': Variable(torch.randn(1, 2).cuda(1), requires_grad=True)
3252  }
3253  )
3254  with self.assertRaises(ValueError):
3255  _ = dp.gather(inputs, target_device=0)
3256 
3257  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3258  @skipIfRocm
3259  def test_broadcast_double_backwards_gpu(self):
3260  tensors = (torch.randn(4, 4, device='cuda', requires_grad=True),
3261  torch.randn(4, 4, device='cuda', requires_grad=True),
3262  torch.randn(4, 4, device='cuda', requires_grad=True))
3263  _assertGradAndGradgradChecks(self, lambda *i: Broadcast.apply((0, 1), *i), tensors)
3264 
3265  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3266  def test_broadcast_not_requiring_grad(self):
3267  variables = [
3268  torch.randn(1, 2, device='cuda', requires_grad=True),
3269  torch.randn(1, 2, device='cuda', requires_grad=False),
3270  torch.randn(1, 2, device='cuda', requires_grad=False),
3271  torch.randn(1, 2, device='cuda', requires_grad=True),
3272  torch.randn(1, 2, device='cuda', requires_grad=True),
3273  ]
3274  broadcasted_variables = Broadcast.apply((0, 1), *variables)
3275  for output_idx, broadcasted_var in enumerate(broadcasted_variables):
3276  input_var = variables[output_idx % len(variables)]
3277  self.assertEqual(input_var.requires_grad, broadcasted_var.requires_grad)
3278 
3279  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3280  def test_broadcast_no_grad(self):
3281  x = torch.randn(1, 2, dtype=torch.float32, requires_grad=True, device='cuda')
3282  with torch.no_grad():
3283  broadcasted = Broadcast.apply((0, 1), x)
3284  self.assertTrue(x.requires_grad)
3285  for output in broadcasted:
3286  self.assertFalse(output.requires_grad)
3287 
3288  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3289  def test_replicate(self):
3290  module = nn.Linear(10, 5).float().cuda()
3291  input = Variable(torch.randn(2, 10).float().cuda())
3292  expected_output = module(input).data
3293  replicas = dp.replicate(module, (0, 1))
3294  for i, replica in enumerate(replicas):
3295  for p in replica.parameters():
3296  self.assertEqual(p.get_device(), i)
3297  replica_input = input.cuda(i)
3298  self.assertEqual(replica(replica_input).data, expected_output)
3299 
3300  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3301  def test_replicate_buffers(self):
3302  net = nn.Module()
3303  net.bn = nn.BatchNorm2d(10)
3304  net.cuda()
3305  replicas = dp.replicate(net, (0, 1))
3306  for i, replica in enumerate(replicas):
3307  self.assertEqual(replica.bn.running_mean.get_device(), i, 'buffer on wrong device')
3308  self.assertEqual(replica.bn.running_var.get_device(), i, 'buffer on wrong device')
3309  self.assertEqual(replica.bn.num_batches_tracked.get_device(), i, 'buffer on wrong device')
3310 
3311  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3312  @skipIfRocm
3313  def test_data_parallel_buffers_requiring_grad(self):
3314  class TestModule(nn.Module):
3315  def __init__(self, t):
3316  super(TestModule, self).__init__()
3317  self.register_buffer('t_rg', t)
3318  self.register_buffer('t_not_rg', t.clone().detach())
3319 
3320  def forward(self, x):
3321  return x * self.t_rg + self.t_not_rg
3322 
3323  m = TestModule(torch.randn(100, device='cuda', requires_grad=True))
3324  self.assertTrue(m.t_rg.requires_grad)
3325 
3326  dpm = nn.DataParallel(m, [0, 1])
3327  inp = torch.randn(2, 100, device='cuda')
3328 
3329  def fn(t):
3330  return dpm(inp)
3331 
3332  torch.autograd.gradcheck(fn, (m.t_rg,))
3333 
3334  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3335  def test_parallel_apply(self):
3336  l1 = nn.Linear(10, 5).to("cuda:0", torch.float)
3337  l2 = nn.Linear(10, 5).to("cuda:1", torch.float)
3338  i1 = torch.randn(2, 10, device="cuda:0", dtype=torch.float)
3339  i2 = torch.randn(2, 10, device="cuda:1", dtype=torch.float)
3340  expected1 = l1(i1).data
3341  expected2 = l2(i2).data
3342  modules = (l1, l2)
3343  expected_outputs = (expected1, expected2)
3344 
3345  # each input can be either a collection of positional arguments
3346  # or an object representing the single argument
3347  for inputs in [((i1,), (i2,)), (i1, i2)]:
3348  outputs = dp.parallel_apply(modules, inputs, None)
3349  for out, expected in zip(outputs, expected_outputs):
3350  self.assertEqual(out.data, expected)
3351 
3352  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3353  @skipIfRocm
3354  def test_data_parallel_multiple_input(self):
3355  class TestModule(nn.Module):
3356 
3357  def forward(self, var1, var2, float1, var3=None):
3358  if var3 is None:
3359  return float1 * (var1 * var2)
3360  else:
3361  return float1 * (var1 * var2 + var3)
3362 
3363  m = TestModule()
3364  var1 = torch.randn(5, 5, dtype=torch.float, requires_grad=True)
3365  var2 = torch.randn(5, 5, dtype=torch.float, requires_grad=True)
3366  var3 = torch.randn(5, 5, dtype=torch.float, requires_grad=False)
3367 
3368  float1 = torch.randn(1).item()
3369 
3370  expected = m(var1, var2, float1)
3371  loss = expected.sum()
3372  loss.backward()
3373  gvar1_exp = var1.grad.clone()
3374  gvar2_exp = var2.grad.clone()
3375 
3376  def local_test(out):
3377  var1.grad.data.fill_(0.0)
3378  var2.grad.data.fill_(0.0)
3379  loss = out.sum()
3380  loss.backward()
3381  self.assertEqual(out, expected)
3382  self.assertEqual(gvar1_exp, var1.grad)
3383  self.assertEqual(gvar2_exp, var2.grad)
3384 
3385  out = dp.data_parallel(m, (var1, var2, float1), (0, 1))
3386  local_test(out)
3387 
3388  out = dp.data_parallel(m, (var1, var2, float1), (1, 0))
3389  local_test(out)
3390 
3391  out = dp.data_parallel(m, (var1, var2, float1), (0,))
3392  local_test(out)
3393 
3394  var1.grad.data.fill_(0.0)
3395  var2.grad.data.fill_(0.0)
3396  expected = m(var1, var2, float1, var3=var3)
3397  loss = expected.sum()
3398  loss.backward()
3399  gvar1_exp = var1.grad.clone()
3400  gvar2_exp = var2.grad.clone()
3401 
3402  dpm = nn.DataParallel(TestModule())
3403  out = dpm(var1, var2, float1, var3=var3)
3404  local_test(out)
3405 
3406  dpm = nn.DataParallel(TestModule(), device_ids=[0])
3407  out = dpm(var1, var2, float1, var3=var3)
3408  local_test(out)
3409 
3410  kwarg_wrap = {'var3': var3}
3411  out = dp.data_parallel(
3412  m, (var1, var2, float1), (0, 1), module_kwargs=kwarg_wrap)
3413  local_test(out)
3414 
3415  out = dp.data_parallel(
3416  m, (var1, var2, float1), (0,), module_kwargs=kwarg_wrap)
3417  local_test(out)
3418 
3419  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3420  def test_data_parallel_small_back(self):
3421  l = nn.Linear(10, 5).float().cuda()
3422  i = Variable(torch.randn(20, 10).float().cuda())
3423  out = dp.data_parallel(l, i, (0, 1))
3424  self.assertEqual(out, l(i))
3425 
3426  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3427  @skipIfRocm
3428  def test_data_parallel_model_device(self):
3429  r"""Test device[0] check at forward time.
3430  """
3431  l = nn.Linear(2, 2)
3432  inp = torch.randn(2, 2)
3433  inp_cuda0 = inp.cuda(0)
3434  inp_cuda1 = inp.cuda(1)
3435 
3436  error_msg = "module must have its parameters and buffers on device {}"
3437 
3438  @contextlib.contextmanager
3439  def dummy_ctx_manager():
3440  yield
3441 
3442  def test(inner_m, dp_device, inp, device_ids, should_fail):
3443  if device_ids is None:
3444  device_ids = list(range(torch.cuda.device_count()))
3445 
3446  if isinstance(device_ids[0], torch.device):
3447  expect_device = device_ids[0]
3448  else:
3449  expect_device = torch.device("cuda:{}".format(device_ids[0]))
3450 
3451  if should_fail:
3452  def assert_correct():
3453  return self.assertRaisesRegex(RuntimeError, error_msg.format(expect_device))
3454  else:
3455  assert_correct = dummy_ctx_manager
3456 
3457  # test DataParallel module
3458  dpm = nn.DataParallel(inner_m, device_ids)
3459  if dp_device is not None:
3460  dpm = dpm.to(dp_device)
3461 
3462  with assert_correct():
3463  dpm(inp)
3464 
3465  # test functional
3466  with assert_correct():
3467  nn.parallel.data_parallel(inner_m.to(dp_device), inp, device_ids)
3468 
3469  test(l.to('cpu'), None, inp, None, should_fail=True)
3470  test(l.cuda(1), None, inp_cuda0, None, should_fail=True)
3471  test(l.cuda(), None, inp_cuda0, [1, 0], should_fail=True)
3472 
3473  test(l.cuda(), None, inp_cuda0, None, should_fail=False)
3474  test(l.cpu(), 'cuda', inp_cuda0, None, should_fail=False)
3475  test(l.cuda(1), None, inp_cuda1, [1, 0], should_fail=False)
3476  test(l.cpu(), 'cuda:1', inp_cuda1, [1, 0], should_fail=False)
3477 
3478  s = nn.Sequential(l.cpu())
3479  test(s, None, inp, None, should_fail=True)
3480  test(s, None, inp, [0, 1], should_fail=True)
3481  test(s, None, inp, [1, 0], should_fail=True)
3482 
3483  s = nn.Sequential(deepcopy(l).cpu(), l.cuda())
3484  test(s, None, inp, None, should_fail=True)
3485  test(s, None, inp, [0, 1], should_fail=True)
3486  test(s, None, inp, [1, 0], should_fail=True)
3487 
3488  s = nn.Sequential(l.cuda(), deepcopy(l).cuda(1))
3489  test(s, None, inp, None, should_fail=True)
3490  test(s, None, inp, [0, 1], should_fail=True)
3491  test(s, None, inp, [1, 0], should_fail=True)
3492 
3493  s = nn.Sequential(l.cuda(), deepcopy(l).cuda())
3494  test(s, None, inp, None, should_fail=False)
3495  test(s, None, inp, [0, 1], should_fail=False)
3496  test(s, None, inp, [1, 0], should_fail=True)
3497  test(s.cpu(), None, inp, [1, 0], should_fail=True)
3498  test(s.cuda(1), None, inp, [1, 0], should_fail=False)
3499 
3500  @unittest.skipIf(not TEST_MULTIGPU or not PY3, "multi-GPU not supported")
3501  @skipIfRocm
3502  def test_data_parallel_model_no_refcycles(self):
3503  # Python 2.7 will create reference cycles with the following
3504  # Module on multiple GPUs, but Python 3 shouldn't unless
3505  # there are refcycles on the PyTorch side (or the defined module)
3506  import gc
3507 
3508  class Model(nn.Module):
3509  def __init__(self):
3510  super(Model, self).__init__()
3511  self.linear = nn.Linear(1, 1)
3512 
3513  def forward(self, x):
3514  return self.linear(x)
3515 
3516  gc.collect()
3517  model = nn.DataParallel(Model().cuda())
3518  data = Variable(torch.randn(1).cuda())
3519  model(data)
3520 
3521  refcycles = gc.collect()
3522  self.assertEqual(refcycles, 0)
3523 
3524  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3525  def test_data_parallel_no_grad(self):
3526  test = self
3527 
3528  class Layer(nn.Module):
3529  def forward(self, x):
3530  test.assertFalse(torch.is_grad_enabled())
3531  return x
3532 
3533  l = Layer()
3534  i = Variable(torch.randn(20, 10).float().cuda())
3535  with torch.no_grad():
3536  dp.data_parallel(l, i, (0, 1))
3537  self.assertRaises(AssertionError, lambda: dp.data_parallel(l, i, (0, 1)))
3538 
3539  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3540  def test_data_parallel(self):
3541  l = nn.Linear(10, 5).float().cuda()
3542  i = Variable(torch.randn(20, 10).float().cuda(1))
3543  l.cuda(1)
3544  expected_out = l(i)
3545  loss = expected_out.sum()
3546  loss.backward()
3547  expected_grads = []
3548  for param in l.parameters():
3549  expected_grads.append(param.grad.clone())
3550  dev_ids_list = [(0, 1), (1, 0)]
3551  for dev_id in dev_ids_list:
3552  with torch.cuda.device(dev_id[0]):
3553  l.cuda()
3554  l.zero_grad()
3555  out = dp.data_parallel(l, i, dev_id)
3556  loss = out.sum()
3557  loss.backward()
3558  self.assertEqual(out.get_device(), dev_id[0])
3559  self.assertEqual(out.data, expected_out.data)
3560  for expected, param in zip(expected_grads, l.parameters()):
3561  self.assertEqual(param.grad.data, expected.data)
3562 
3563  # Check for None device_ids
3564  l = l.cuda()
3565  out = dp.data_parallel(l, i)
3566 
3567  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3568  @skipIfRocm
3569  def test_data_parallel_sparse(self):
3570  l = nn.Embedding(10, 5, sparse=True).to("cuda:1")
3571  i = torch.randint(10, (20, 5), device="cuda:1", dtype=torch.long)
3572  expected_out = l(i)
3573  loss = expected_out.sum()
3574  loss.backward()
3575  expected_grads = []
3576  for param in l.parameters():
3577  expected_grads.append(param.grad.clone())
3578  dev_ids_list = [(0, 1), (1, 0)]
3579  for dev_id in dev_ids_list:
3580  with torch.cuda.device(dev_id[0]):
3581  l.cuda()
3582  l.zero_grad()
3583  out = dp.data_parallel(l, i, dev_id)
3584  loss = out.sum()
3585  loss.backward()
3586  self.assertEqual(out.get_device(), dev_id[0])
3587  self.assertEqual(out.data, expected_out.data)
3588  for expected, param in zip(expected_grads, l.parameters()):
3589  self.assertEqual(param.grad.data, expected.data)
3590 
3591  # Check for None device_ids
3592  l = l.cuda()
3593  out = dp.data_parallel(l, i)
3594 
3595  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3596  def test_data_parallel_nested_output(self):
3597  def fn(input):
3598  return [
3599  input, (input.sin(), input.cos(), [input.add(1)]), input,
3600  OrderedDict(a=input, b=[input.sin()])
3601  ]
3602 
3603  class Net(nn.Module):
3604  def forward(self, input):
3605  return fn(input)
3606 
3607  i = torch.randn(2, 2).float().cuda(1)
3608  gpus = range(torch.cuda.device_count())
3609  output = dp.data_parallel(Net(), i, gpus)
3610  self.assertEqual(output, fn(i))
3611  self.assertIsInstance(output[0], torch.Tensor)
3612  self.assertIsInstance(output[1], tuple)
3613  self.assertIsInstance(output[1][0], torch.Tensor)
3614  self.assertIsInstance(output[1][1], torch.Tensor)
3615  self.assertIsInstance(output[1][2], list)
3616  self.assertIsInstance(output[1][2][0], torch.Tensor)
3617  self.assertIsInstance(output[2], torch.Tensor)
3618  self.assertIsInstance(output[3], dict)
3619  self.assertEqual(len(output[3]), 2)
3620  self.assertIn('a', output[3])
3621  self.assertIn('b', output[3])
3622  self.assertIsInstance(output[3]['a'], torch.Tensor)
3623  self.assertIsInstance(output[3]['b'], list)
3624  self.assertIsInstance(output[3]['b'][0], torch.Tensor)
3625 
3626  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3627  def test_data_parallel_nested_input(self):
3628  def fn(input):
3629  return input[1][0]
3630 
3631  class Net(nn.Module):
3632  def forward(self, *input):
3633  return fn(input)
3634 
3635  i = Variable(torch.randn(20, 3).float().cuda(1))
3636  input = (i.cos(), (i.sin(), i), i.sin())
3637  gpus = range(torch.cuda.device_count())
3638  output = dp.data_parallel(Net(), input, gpus)
3639  self.assertEqual(output, fn(input))
3640 
3641  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3642  @repeat_test_for_types(ALL_TENSORTYPES)
3643  def test_data_parallel_module(self, dtype=torch.float):
3644  l = nn.Linear(10, 5).to("cuda", dtype)
3645  i = torch.randn(20, 10, device="cuda", dtype=dtype)
3646  expected_out = l(i).data
3647  net = nn.DataParallel(l)
3648  out = net(i)
3649  self.assertEqual(out.get_device(), 0)
3650  self.assertEqual(out.data, expected_out, dtype2prec[dtype])
3651 
3652  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3653  @repeat_test_for_types(ALL_TENSORTYPES)
3654  def test_data_parallel_module_kwargs_only(self, dtype=torch.float):
3655  class Net(nn.Module):
3656  def __init__(self):
3657  super(Net, self).__init__()
3658  self.l = l
3659 
3660  def forward(self, input):
3661  return self.l(input)
3662 
3663  l = nn.Linear(10, 5).to("cuda", dtype)
3664  i = torch.randn(20, 10, device="cuda", dtype=dtype)
3665  expected_out = l(i).data
3666  n = nn.DataParallel(Net())
3667  out = n(input=i)
3668  self.assertEqual(out.get_device(), 0)
3669  self.assertEqual(out.data, expected_out, dtype2prec[dtype])
3670 
3671  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3672  @repeat_test_for_types(ALL_TENSORTYPES)
3673  def test_data_parallel_module_kwargs_only_empty_list(self, dtype=torch.float):
3674  class Net(nn.Module):
3675  def __init__(self):
3676  super(Net, self).__init__()
3677  self.l = l
3678 
3679  def forward(self, input):
3680  return self.l(input['data'])
3681 
3682  l = nn.Linear(10, 5).to("cuda", dtype)
3683  i = torch.randn(20, 10, device="cuda", dtype=dtype)
3684  expected_out = l(i).data
3685  n = nn.DataParallel(Net())
3686  out = n(input={'data': i, 'unused': []})
3687  self.assertEqual(out.get_device(), 0)
3688  self.assertEqual(out.data, expected_out, dtype2prec[dtype])
3689 
3690  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3691  @repeat_test_for_types(ALL_TENSORTYPES)
3692  def test_data_parallel_module_kwargs_only_empty_dict(self, dtype=torch.float):
3693  class Net(nn.Module):
3694  def __init__(self):
3695  super(Net, self).__init__()
3696  self.l = l
3697 
3698  def forward(self, input):
3699  return self.l(input['data'])
3700 
3701  l = nn.Linear(10, 5).to("cuda", dtype)
3702  i = torch.randn(20, 10, device="cuda", dtype=dtype)
3703  expected_out = l(i).data
3704  n = nn.DataParallel(Net())
3705  out = n(input={'data': i, 'unused': {}})
3706  self.assertEqual(out.get_device(), 0)
3707  self.assertEqual(out.data, expected_out, dtype2prec[dtype])
3708 
3709  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3710  @repeat_test_for_types(ALL_TENSORTYPES)
3711  def test_data_parallel_module_kwargs_only_empty_tuple(self, dtype=torch.float):
3712  class Net(nn.Module):
3713  def __init__(self):
3714  super(Net, self).__init__()
3715  self.l = l
3716 
3717  def forward(self, input):
3718  return self.l(input['data'])
3719 
3720  l = nn.Linear(10, 5).to("cuda", dtype)
3721  i = torch.randn(20, 10, device="cuda", dtype=dtype)
3722  expected_out = l(i).data
3723  n = nn.DataParallel(Net())
3724  out = n(input={'data': i, 'unused': ()})
3725  self.assertEqual(out.get_device(), 0)
3726  self.assertEqual(out.data, expected_out, dtype2prec[dtype])
3727 
3728  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
3729  def test_data_parallel_device_args(self):
3730  cuda0 = torch.device('cuda:0')
3731  cuda1 = torch.device('cuda:1')
3732 
3733  # test output_device
3734  l = nn.Linear(10, 5).to(cuda0, torch.float)
3735  i = torch.randn(20, 10, dtype=torch.float, device=cuda0, requires_grad=True)
3736  out = dp.data_parallel(l, i, device_ids=(0, 1), output_device=cuda0)
3737  self.assertEqual(out, l(i))
3738 
3739  # test device_ids
3740  l = nn.Linear(10, 5).to(cuda0, torch.float)
3741  i = torch.randn(20, 10, dtype=torch.float, device=cuda0, requires_grad=True)
3742  out = dp.data_parallel(l, i, device_ids=(cuda0, cuda1), output_device=cuda0)
3743  self.assertEqual(out, l(i))
3744 
3745  def test_state_dict(self):
3746  l = nn.Linear(5, 5)
3747  block = nn.Module()
3748  block.conv = nn.Conv2d(3, 3, 3, bias=False)
3749  net = nn.Module()
3750  net.linear1 = l
3751  net.linear2 = l
3752  net.bn = nn.BatchNorm2d(2)
3753  net.block = block
3754  net.add_module('empty', None)
3755 
3756  state_dict = net.state_dict()
3757  self.assertEqual(len(state_dict), 10)
3758  self.assertEqual(len(state_dict._metadata), 6)
3759  self.assertIn('', state_dict._metadata)
3760  self.assertIn('linear1', state_dict._metadata)
3761  self.assertIn('linear1.weight', state_dict)
3762  self.assertIn('linear1.bias', state_dict)
3763  self.assertIn('linear2', state_dict._metadata)
3764  self.assertIn('linear2.weight', state_dict)
3765  self.assertIn('linear2.bias', state_dict)
3766  self.assertIn('block', state_dict._metadata)
3767  self.assertIn('block.conv', state_dict._metadata)
3768  self.assertIn('block.conv.weight', state_dict)
3769  self.assertIn('block.conv.weight', state_dict)
3770  self.assertNotIn('block.conv.bias', state_dict)
3771  self.assertIn('bn', state_dict._metadata)
3772  self.assertIn('bn.weight', state_dict)
3773  self.assertIn('bn.bias', state_dict)
3774  self.assertIn('bn.running_var', state_dict)
3775  self.assertIn('bn.running_mean', state_dict)
3776  self.assertIn('bn.num_batches_tracked', state_dict)
3777  self.assertFalse(any(map(lambda k: k.startswith('empty'), state_dict.keys())))
3778  for k, v in state_dict.items():
3779  param = net
3780  for component in k.split('.'):
3781  param = getattr(param, component)
3782  if isinstance(param, Parameter):
3783  param = param.data
3784  self.assertEqual(v.data_ptr(), param.data_ptr())
3785 
3786  l = nn.Linear(5, 5)
3787  state_dict = l.state_dict()
3788  self.assertEqual(len(state_dict), 2)
3789  self.assertEqual(len(state_dict._metadata), 1)
3790  self.assertIn('', state_dict._metadata)
3791  self.assertTrue(state_dict._metadata['']['version'] >= 0)
3792  self.assertEqual(state_dict['weight'].data_ptr(), l.weight.data_ptr())
3793  self.assertEqual(state_dict['bias'].data_ptr(), l.bias.data_ptr())
3794 
3795  def test_load_state_dict(self):
3796  l = nn.Linear(5, 5)
3797  block = nn.Module()
3798  block.conv1 = nn.Conv2d(3, 3, 3, bias=True)
3799  block.conv2 = nn.Conv2d(3, 3, 3, bias=False)
3800  net = nn.Module()
3801  net.linear1 = l
3802  net.linear2 = l
3803  net.bn = nn.BatchNorm2d(2)
3804  net.block = block
3805  net.add_module('empty', None)
3806 
3807  state_dict = net.state_dict()
3808  state_dict.update({
3809  'linear1.weight': torch.ones(5, 5),
3810  'block.conv1.bias': torch.arange(1, 4),
3811  'bn.running_mean': torch.randn(2),
3812  })
3813  net.load_state_dict(state_dict)
3814  self.assertEqual(net.linear1.weight.data, state_dict['linear1.weight'])
3815  self.assertEqual(net.block.conv1.bias.data, state_dict['block.conv1.bias'])
3816  self.assertEqual(net.bn.running_mean, state_dict['bn.running_mean'])
3817 
3818  state_dict = net.state_dict()
3819  state_dict.update({'extra': torch.ones(5)})
3820  self.assertRaises(RuntimeError, lambda: net.load_state_dict(state_dict))
3821 
3822  state_dict = net.state_dict()
3823  state_dict.update({'extra.param': torch.ones(5)})
3824  self.assertRaises(RuntimeError, lambda: net.load_state_dict(state_dict))
3825 
3826  state_dict = net.state_dict()
3827  del state_dict['linear1.weight']
3828  self.assertRaises(RuntimeError, lambda: net.load_state_dict(state_dict))
3829 
3830  state_dict = net.state_dict()
3831  state_dict.update({'bn.running_mean': torch.rand(14, 4)}) # wrong size
3832  self.assertRaises(RuntimeError, lambda: net.load_state_dict(state_dict))
3833 
3834  state_dict = net.state_dict()
3835  old_state_dict = deepcopy(state_dict)
3836  state_dict = {
3837  'linear1.weight': torch.ones(5, 5),
3838  'block.conv1.bias': torch.arange(1, 4),
3839  'bn.running_mean': torch.randn(2),
3840  'nonexistent_key': torch.rand(3)
3841  }
3842  net.load_state_dict(state_dict, strict=False)
3843  self.assertEqual(net.linear1.weight.data, state_dict['linear1.weight'])
3844  self.assertEqual(net.block.conv1.bias.data, state_dict['block.conv1.bias'])
3845  self.assertEqual(net.bn.running_mean, state_dict['bn.running_mean'])
3846  new_state_dict = net.state_dict()
3847  del old_state_dict['linear1.weight']
3848  del old_state_dict['block.conv1.bias']
3849  del old_state_dict['bn.running_mean']
3850  for k, v, in old_state_dict.items():
3851  self.assertTrue(v.equal(new_state_dict[k]))
3852 
3853  def test_load_state_dict_BC(self):
3854  # BatchNormNd
3855  # Added num_batches_tracked buffer at version 2. For state dict with
3856  # earlier versions or no versions, it should provide default value of 0.
3857  bn = nn.BatchNorm2d(3)
3858  state_dict = bn.state_dict()
3859  del state_dict['num_batches_tracked']
3860  state_dict._metadata['']['version'] = 1 # version 1
3861  bn.load_state_dict(state_dict)
3862  self.assertEqual(bn.num_batches_tracked.dtype, torch.long)
3863  self.assertEqual(bn.num_batches_tracked.item(), 0)
3864  del state_dict._metadata['']['version'] # no version
3865  bn.load_state_dict(state_dict)
3866  self.assertEqual(bn.num_batches_tracked.dtype, torch.long)
3867  self.assertEqual(bn.num_batches_tracked.item(), 0)
3868 
3869  def test_parameter_assignment(self):
3870  l = nn.Linear(5, 5)
3871 
3872  def num_params():
3873  return len(list(l.parameters()))
3874 
3875  self.assertEqual(num_params(), 2)
3876 
3877  new_param = Parameter(torch.randn(5, 5))
3878  l.param_name = new_param
3879  self.assertEqual(num_params(), 3)
3880  self.assertObjectIn(new_param, l.parameters())
3881 
3882  var = torch.randn(5, 5)
3883  l.var_name = var
3884  self.assertEqual(num_params(), 3)
3885  self.assertNotIn(id(var), map(id, l.parameters()))
3886 
3887  # Make sure Variables are not saved as parameters
3888  l.variable_attr = torch.empty(5, 5)
3889  self.assertEqual(num_params(), 3)
3890  l.param_attr = Parameter(torch.empty(5, 5))
3891  self.assertEqual(num_params(), 4)
3892 
3893  # It shouldn't be possible to replace a parameter with a Variable
3894  def assign_var():
3895  l.param_attr = torch.empty(5, 5)
3896 
3897  self.assertRaises(TypeError, assign_var)
3898  # But replacing it with None should be fine
3899  l.param_attr = None
3900  self.assertEqual(num_params(), 3)
3901 
3902  def test_assignment(self):
3903  l = nn.Module()
3904  a = nn.Parameter(torch.randn(2))
3905  b = nn.Parameter(torch.randn(3))
3906  c = nn.Parameter(torch.randn(4))
3907  q = nn.Linear(4, 4)
3908  r = nn.Linear(5, 5)
3909  w = nn.Linear(6, 6)
3910 
3911  def test_assignments(get_list, a, b, c):
3912  # Check that None can be shadowed
3913  l.a = None
3914  self.assertIsNone(l.a)
3915  self.assertIn('a', l.__dict__)
3916  l.a = a
3917  self.assertIs(l.a, a)
3918  self.assertEqual(get_list(), [a])
3919  self.assertNotIn('a', l.__dict__)
3920 
3921  # Assign second object
3922  l.b = None
3923  self.assertIsNone(l.b)
3924  self.assertIn('b', l.__dict__)
3925  l.b = b
3926  self.assertIs(l.b, b)
3927  self.assertEqual(get_list(), [a, b])
3928  self.assertNotIn('b', l.__dict__)
3929 
3930  # Remove and add the object back. Order should be unchanged.
3931  l.a = None
3932  self.assertIsNone(l.a)
3933  self.assertEqual(get_list(), [b])
3934  l.a = a
3935  self.assertIs(l.a, a)
3936  self.assertEqual(get_list(), [a, b])
3937 
3938  # Replace object with another one. Order should be unchanged.
3939  l.a = c
3940  self.assertIs(l.a, c)
3941  self.assertEqual(get_list(), [c, b])
3942 
3943  # Remove and reassign an attribute. It should appear at the end of the list now.
3944  del l.a
3945  self.assertFalse(hasattr(l, 'a'))
3946  l.a = a
3947  self.assertIs(l.a, a)
3948  self.assertEqual(get_list(), [b, a])
3949 
3950  test_assignments(lambda: list(l.parameters()), a, b, c)
3951  del l.a, l.b
3952  self.assertEqual(list(l.parameters()), [])
3953 
3954  test_assignments(lambda: list(l.children()), q, r, w)
3955  del l.a, l.b
3956  self.assertEqual(list(l.children()), [])
3957 
3958  buf = torch.randn(10)
3959  l.register_buffer('buf', buf)
3960  self.assertIs(l.buf, buf)
3961  l.buf = None
3962  self.assertIs(l.buf, None)
3963  self.assertNotIn('buf', l.__dict__) # should be stored in l._buffers
3964  l.buf = buf
3965  self.assertIn('buf', l.state_dict())
3966  self.assertEqual(l.state_dict()['buf'], buf)
3967 
3968  def test_Conv2d_inconsistent_types(self):
3969  inputs = Variable(torch.randn(4, 1, 7, 7).float())
3970  weights = Variable(torch.randn(1, 1, 3, 3).double())
3971  # inconsistent types should raise an exception
3972  self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights))
3973  # but it should work with the same type
3974  nn.functional.conv2d(inputs.float(), weights.float())
3975 
3976  @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
3977  def test_Conv2d_inconsistent_types_on_GPU_without_cudnn(self):
3978  inputs = Variable(torch.randn(4, 1, 7, 7).float().cuda())
3979  weights = Variable(torch.randn(1, 1, 3, 3).double().cuda())
3980  bias = Variable(torch.randn(1).double().cuda())
3981 
3982  with torch.backends.cudnn.flags(enabled=False):
3983  # inconsistent types should raise an exception
3984  self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights))
3985  self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights.float(), bias))
3986 
3987  # but it should work with the same type
3988  nn.functional.conv2d(inputs.float(), weights.float(), bias.float())
3989 
3990  @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
3991  @unittest.skipIf(not TEST_CUDNN, 'CUDNN not available')
3992  def test_Conv2d_inconsistent_types_on_GPU_with_cudnn(self):
3993  inputs = Variable(torch.randn(4, 1, 7, 7).float().cuda())
3994  weights = Variable(torch.randn(1, 1, 3, 3).double().cuda())
3995  bias = Variable(torch.randn(1).double().cuda())
3996 
3997  with torch.backends.cudnn.flags(enabled=True):
3998  # inconsistent types should raise an exception
3999  self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights))
4000  self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights.float(), bias))
4001 
4002  # but it should work with the same type
4003  nn.functional.conv2d(inputs.float(), weights.float(), bias.float())
4004 
4005  @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
4006  @unittest.skipIf(not TEST_CUDNN, 'CUDNN not available')
4007  @skipIfRocm
4008  def test_cudnn_multiple_threads_same_device(self):
4009  # This function is intended to test the lazy creation and reuse of per-thread
4010  # cudnn handles on each device in aten/src/ATen/cudnn/Handles.cpp.
4011  # Failure here likely indicates something wrong with that logic.
4012  weight = torch.ones((1, 1, 2, 2), device='cuda')
4013 
4014  results = {}
4015 
4016  num_threads = 2
4017  trials = 2
4018  test_iters = 100
4019 
4020  with torch.backends.cudnn.flags(enabled=True):
4021  def _worker(t, input):
4022  my_stream = torch.cuda.Stream()
4023  results[t] = input
4024  with torch.cuda.stream(my_stream):
4025  for _ in range(test_iters):
4026  # If all threads are sharing the same cudnn handle,
4027  # the following sequence may occur:
4028  # thread 0 calls setCuDNNStreamToCurrent()
4029  # thread 1 calls setCuDNNStreamToCurrent()
4030  # thread 0 launches its raw convolution, which it thinks is in
4031  # its own stream, but is actually in thread 1's stream.
4032  # thread 0 enqueues its div_, which IS is its own stream,
4033  # but now races with its convolution.
4034  results[t] = torch.nn.functional.conv2d(results[t], weight, padding=0)
4035  results[t].div_(4.0)
4036  torch.cuda.current_stream().wait_stream(my_stream)
4037 
4038  for _ in range(trials):
4039  for t in range(num_threads):
4040  results[t] = torch.ones((1, 1, 2048, 2048), device='cuda')
4041 
4042  threads = [threading.Thread(target=_worker,
4043  args=(t, results[t])) for t in range(num_threads)]
4044 
4045  for thread in threads:
4046  thread.start()
4047  for thread in threads:
4048  thread.join()
4049 
4050  for t in range(num_threads):
4051  self.assertEqual(results[t].sum().item(),
4052  (2048 - test_iters) * (2048 - test_iters))
4053 
4054  @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
4055  @unittest.skipIf(not TEST_CUDNN, 'CUDNN not available')
4056  @repeat_test_for_types(ALL_TENSORTYPES)
4057  def test_Conv2d_deterministic_cudnn(self, dtype=torch.float):
4058  inputs = torch.randn(2, 3, 5, 5, device="cuda", dtype=dtype, requires_grad=True)
4059  with cudnn.flags(enabled=True, benchmark=True, deterministic=True):
4060  conv1 = torch.nn.Conv2d(3, 3, 3).to("cuda", dtype)
4061  conv2 = torch.nn.Conv2d(3, 3, 3).to("cuda", dtype)
4062  conv2.bias.data.copy_(conv1.bias.data)
4063  conv2.weight.data.copy_(conv1.weight.data)
4064  out1 = conv1(inputs)
4065  out2 = conv2(inputs)
4066  self.assertEqual(out1, out2, prec=0.0)
4067  y = torch.randn(out1.size(), device="cuda", dtype=dtype)
4068  out1.backward(y)
4069  out2.backward(y)
4070  self.assertEqual(conv1.bias.grad.data, conv2.bias.grad.data, prec=0.0)
4071  self.assertEqual(conv1.weight.grad.data, conv2.weight.grad.data, prec=0.0)
4072 
4073  def test_Conv2d_missing_argument(self):
4074  c = nn.Conv2d(3, 3, 3)
4075  self.assertRaises(TypeError, lambda: c(None))
4076 
4077  def test_Conv2d_backward_twice(self):
4078  input = torch.randn(2, 3, 5, 5)
4079  c = nn.Conv2d(3, 3, 3)
4080  o1 = c(input)
4081  o1.sum().backward()
4082  self.assertRaisesRegex(RuntimeError, 'Specify retain_graph=True',
4083  lambda: o1.sum().backward())
4084 
4085  @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
4086  @repeat_test_for_types(ALL_TENSORTYPES)
4087  def test_Conv2d_large_workspace(self, dtype=torch.float):
4088  # These sizes require huge cuDNN workspaces. Make sure we choose a
4089  # reasonable algorithm that does not run out of memory
4090  sizes = [
4091  (1, 256, 109, 175),
4092  (1, 256, 80, 128),
4093  (1, 256, 120, 192),
4094  ]
4095 
4096  def run_test(benchmark):
4097  with torch.backends.cudnn.flags(benchmark=benchmark):
4098  conv = torch.nn.Conv2d(256, 256, kernel_size=3, padding=1).to("cuda", dtype)
4099  for size in sizes:
4100  x = torch.randn(size, device="cuda", dtype=dtype)
4101  out = conv(x.detach().clone().requires_grad_())
4102  out.backward(torch.ones_like(out))
4103 
4104  run_test(benchmark=False)
4105  run_test(benchmark=True)
4106 
4107  def test_conv_modules_raise_error_on_incorrect_input_size(self):
4108  modules = [nn.Conv1d(3, 8, 3), nn.ConvTranspose1d(3, 8, 3),
4109  nn.Conv2d(3, 8, 3), nn.ConvTranspose2d(3, 8, 3),
4110  nn.Conv3d(3, 8, 3), nn.ConvTranspose3d(3, 8, 3)]
4111 
4112  invalid_input_dims = [(2, 4), (2, 4),
4113  (3, 5), (3, 5),
4114  (4, 6), (4, 6)]
4115 
4116  for invalid_dims, module in zip(invalid_input_dims, modules):
4117  for dims in invalid_dims:
4118  input = torch.empty(torch.Size((3, ) * dims))
4119  self.assertRaises(RuntimeError, lambda: module(input))
4120 
4121  def test_conv_shapecheck(self):
4122  def test(should_raise, module, input_size):
4123  input = torch.empty(3, *input_size)
4124  if should_raise:
4125  self.assertRaises(RuntimeError, lambda: module(input))
4126  else:
4127  # just run it to ensure no exception raised.
4128  module(input)
4129 
4130  # Conv1d
4131  test(True, nn.Conv1d(1, 1, 3), (1, 2))
4132  test(True, nn.Conv1d(1, 1, 3, stride=2), (1, 2))
4133  test(False, nn.Conv1d(1, 1, 2), (1, 2))
4134  test(False, nn.Conv1d(1, 1, 2, stride=2), (1, 2))
4135  test(False, nn.Conv1d(1, 1, 3, stride=2, padding=1), (1, 2))
4136 
4137  # Conv2d
4138  test(True, nn.Conv2d(1, 1, (3, 3)), (1, 2, 2))
4139  test(False, nn.Conv2d(1, 1, (3, 3)), (1, 3, 3))
4140  test(False, nn.Conv2d(1, 1, (3, 3), padding=1), (1, 2, 2))
4141 
4142  # Conv3D
4143  test(True, nn.Conv3d(1, 1, (3, 3, 3)), (1, 2, 2, 2))
4144  test(False, nn.Conv3d(1, 1, (3, 3, 3)), (1, 3, 3, 3))
4145  test(False, nn.Conv3d(1, 1, (3, 3, 3), padding=1), (1, 2, 2, 2))
4146 
4147  def test_ConvTranspose2d_output_size(self):
4148  m = nn.ConvTranspose2d(3, 4, 3, 3, 0, 2)
4149  i = torch.randn(2, 3, 6, 6)
4150  for h in range(15, 22):
4151  for w in range(15, 22):
4152  if 18 <= h <= 20 and 18 <= w <= 20:
4153  output = m(i, output_size=(h, w))
4154  self.assertEqual(output.size()[2:], (h, w))
4155  else:
4156  self.assertRaises(ValueError, lambda: m(i, (h, w)))
4157 
4158  def test_ConvTranspose3d_correct_output_size(self):
4159  # Check that ConvTranspose3d can take a 5d output_size.
4160  m = nn.ConvTranspose3d(2, 2, 2)
4161  i = torch.rand(1, 2, 1, 1, 1)
4162  out = m(i, output_size=(1, 2, 2, 2, 2))
4163 
4164  def _test_Conv2d_naive_groups(self, device="cpu", dtype=torch.float):
4165  # Check that grouped convolutions matches two half convolutions
4166  m = nn.Conv2d(4, 4, kernel_size=3, groups=2).to(device, dtype)
4167  i = torch.randn(2, 4, 6, 6, device=device, dtype=dtype, requires_grad=True)
4168  output = m(i)
4169  grad_output = torch.randn(2, 4, 4, 4, device=device, dtype=dtype)
4170  output.backward(grad_output)
4171 
4172  m1 = nn.Conv2d(2, 2, kernel_size=3).to(device, dtype)
4173  m1.weight.data.copy_(m.weight.data[:2])
4174  m1.bias.data.copy_(m.bias.data[:2])
4175  i1 = Variable(i.data[:, :2].contiguous(), requires_grad=True)
4176  output1 = m1(i1)
4177  output1.backward(grad_output[:, :2].contiguous())
4178 
4179  m2 = nn.Conv2d(2, 2, kernel_size=3).to(device, dtype)
4180  m2.weight.data.copy_(m.weight.data[2:])
4181  m2.bias.data.copy_(m.bias.data[2:])
4182  i2 = Variable(i.data[:, 2:].contiguous(), requires_grad=True)
4183  output2 = m2(i2)
4184  output2.backward(grad_output[:, 2:].contiguous())
4185 
4186  self.assertEqual(output, torch.cat([output1, output2], 1))
4187  self.assertEqual(i.grad.data,
4188  torch.cat([i1.grad.data, i2.grad.data], 1),
4189  prec=dtype2prec[dtype])
4190  self.assertEqual(m.bias.grad.data,
4191  torch.cat([m1.bias.grad.data, m2.bias.grad.data], 0),
4192  prec=dtype2prec[dtype])
4193  self.assertEqual(m.weight.grad.data,
4194  torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0),
4195  prec=dtype2prec[dtype])
4196 
4197  # For https://github.com/pytorch/pytorch/pull/1273
4198  # Almost identical to the above `test_Conv2d_naive_groups`
4199  def test_Conv2d_groups_nobias(self):
4200  dev_dtypes = [("cpu", torch.float)]
4201  if TEST_CUDA:
4202  dev_dtypes += [("cuda", torch.float), ("cuda", torch.half)]
4203  for device, dtype in dev_dtypes:
4204  m = nn.Conv2d(4, 4, kernel_size=3, groups=2, bias=False).to(device, dtype)
4205  i = torch.randn(2, 4, 6, 6, device=device, dtype=dtype, requires_grad=True)
4206  output = m(i)
4207  grad_output = torch.randn(2, 4, 4, 4, device=device, dtype=dtype)
4208  output.backward(grad_output)
4209 
4210  m1 = nn.Conv2d(2, 2, kernel_size=3, bias=False).to(device, dtype)
4211  m1.weight.data.copy_(m.weight.data[:2])
4212  i1 = Variable(i.data[:, :2].contiguous(), requires_grad=True)
4213  output1 = m1(i1)
4214  output1.backward(grad_output[:, :2].contiguous())
4215 
4216  m2 = nn.Conv2d(2, 2, kernel_size=3, bias=False).to(device, dtype)
4217  m2.weight.data.copy_(m.weight.data[2:])
4218  i2 = Variable(i.data[:, 2:].contiguous(), requires_grad=True)
4219  output2 = m2(i2)
4220  output2.backward(grad_output[:, 2:].contiguous())
4221 
4222  self.assertEqual(output, torch.cat([output1, output2], 1))
4223  self.assertEqual(i.grad.data,
4224  torch.cat([i1.grad.data, i2.grad.data], 1),
4225  dtype2prec[dtype])
4226  self.assertEqual(m.weight.grad.data,
4227  torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0),
4228  1e-1 if dtype == torch.half else dtype2prec[dtype])
4229 
4230  # Very similar to test_Conv2d_naive_groups but with special care to handle
4231  # the number of groups == number of input channels
4232  @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
4233  @skipIfRocm
4234  @repeat_test_for_types(ALL_TENSORTYPES)
4235  def test_Conv2d_depthwise_naive_groups_cuda(self, dtype=torch.float):
4236  for depth_multiplier in [1, 2]:
4237  m = nn.Conv2d(2, 2 * depth_multiplier, kernel_size=3, groups=2).to("cuda", dtype)
4238  i = torch.randn(2, 2, 6, 6, device="cuda", dtype=dtype).div_(2).requires_grad_()
4239  output = m(i)
4240  grad_output = torch.randn(2, 2 * depth_multiplier, 4, 4, device="cuda", dtype=dtype) / 2
4241  output.backward(grad_output)
4242 
4243  offset = 1 * depth_multiplier
4244 
4245  m1 = nn.Conv2d(1, 1 * depth_multiplier, kernel_size=3).to("cuda", dtype)
4246  m1.weight.data = m.weight.data[:offset].clone()
4247  m1.bias.data = m.bias.data[:offset].clone()
4248  i1 = i.detach()[:, :1].clone().requires_grad_()
4249  output1 = m1(i1)
4250  output1.backward(grad_output[:, :offset].contiguous())
4251 
4252  m2 = nn.Conv2d(1, 1 * depth_multiplier, kernel_size=3).to("cuda", dtype)
4253  m2.weight.data.copy_(m.weight.data[offset:])
4254  m2.bias.data.copy_(m.bias.data[offset:])
4255  i2 = i.detach()[:, 1:].clone().requires_grad_()
4256  output2 = m2(i2)
4257  output2.backward(grad_output[:, offset:].contiguous())
4258 
4259  self.assertEqual(output, torch.cat([output1, output2], 1),
4260  prec=dtype2prec[dtype])
4261  self.assertEqual(i.grad.data,
4262  torch.cat([i1.grad.data, i2.grad.data], 1),
4263  prec=dtype2prec[dtype])
4264  self.assertEqual(m.bias.grad.data,
4265  torch.cat([m1.bias.grad.data,
4266  m2.bias.grad.data], 0),
4267  prec=dtype2prec[dtype])
4268  self.assertEqual(m.weight.grad.data,
4269  torch.cat([m1.weight.grad.data,
4270  m2.weight.grad.data], 0),
4271  prec=dtype2prec[dtype])
4272 
4273  def test_MaxUnpool2d_output_size(self):
4274  m = nn.MaxPool2d(3, stride=2, return_indices=True)
4275  mu = nn.MaxUnpool2d(3, stride=2)
4276  big_t = torch.rand(1, 1, 6, 6)
4277  big_t[0][0][4][4] = 100
4278  output_big, indices_big = m(big_t)
4279  self.assertRaises(RuntimeError, lambda: mu(output_big, indices_big))
4280 
4281  small_t = torch.rand(1, 1, 5, 5)
4282  for i in range(0, 4, 2):
4283  for j in range(0, 4, 2):
4284  small_t[:, :, i, j] = 100
4285  output_small, indices_small = m(Variable(small_t))
4286  for h in range(3, 10):
4287  for w in range(3, 10):
4288  if 4 <= h <= 6 and 4 <= w <= 6:
4289  size = (h, w)
4290  if h == 6:
4291  size = (1, 1) + size
4292 
4293  mu(output_small, indices_small, output_size=size)
4294  else:
4295  self.assertRaises(ValueError, lambda: mu(output_small, indices_small, (h, w)))
4296 
4297  def test_container_copy(self):
4298  class Model(nn.Module):
4299  def __init__(self):
4300  super(Model, self).__init__()
4301  self.linear = nn.Linear(4, 5)
4302 
4303  def forward(self, input):
4304  return self.linear(input)
4305 
4306  input = torch.randn(2, 4)
4307 
4308  model = Model()
4309  model_cp = deepcopy(model)
4310  self.assertEqual(model(input).data, model_cp(input).data)
4311 
4312  model_cp.linear.weight.data[:] = 2
4313  self.assertNotEqual(model(input).data, model_cp(input).data)
4314 
4315  def test_RNN_cell(self):
4316  # this is just a smoke test; these modules are implemented through
4317  # autograd so no Jacobian test is needed
4318  for module in (nn.RNNCell, nn.GRUCell):
4319  for bias in (True, False):
4320  input = torch.randn(3, 10)
4321  hx = torch.randn(3, 20)
4322  cell = module(10, 20, bias=bias)
4323  for _ in range(6):
4324  hx = cell(input, hx)
4325 
4326  hx.sum().backward()
4327 
4328  def _test_loss_equal_input_target_shape(self, cast):
4329  # Tests losses whose inputs should have the same size.
4330  losses = {
4331  'mse_loss': lambda x, y: F.mse_loss(x, y),
4332  'l1_loss': lambda x, y: F.l1_loss(x, y),
4333  'smooth_l1_loss': lambda x, y: F.smooth_l1_loss(x, y),
4334  'kl_div': lambda x, y: F.kl_div(x, y),
4335  'poisson_nll_loss': lambda x, y: F.poisson_nll_loss(x, y),
4336  }
4337 
4338  input = Variable(cast(torch.randn(3, 5)))
4339  target = Variable(cast(torch.randn(5, 3)))
4340  for _name, fn in losses.items():
4341  self.assertRaises(Exception, lambda: fn(input, target))
4342 
4343  def test_loss_equal_input_target_shape(self):
4344  self._test_loss_equal_input_target_shape(lambda x: x)
4345 
4346  def test_nll_loss_mismatched_batch(self):
4347  x = torch.randn((10, 3), requires_grad=True)
4348  # t should have size (10,)
4349  t = torch.zeros((3,), dtype=torch.int64)
4350  with self.assertRaisesRegex(ValueError, 'Expected.*batch_size'):
4351  F.nll_loss(x, t)
4352 
4353  def test_nll_loss_out_of_bounds_ignore_index(self):
4354  x = torch.randn(6, 3, requires_grad=True)
4355  t = torch.tensor([0, 1, 255, 0, 1, 2], dtype=torch.int64)
4356  for reduction in ['mean', 'none']:
4357  F.nll_loss(x, t, ignore_index=255, reduction=reduction).sum().backward()
4358 
4359  def test_poisson_nll_loss_reduction_modes(self):
4360  input = torch.tensor([0.5, 1.5, 2.5])
4361  target = torch.tensor([1., 2., 3.])
4362  component_wise_loss = torch.exp(input) - target * input
4363  self.assertEqual(component_wise_loss,
4364  F.poisson_nll_loss(input, target, reduction='none'))
4365  self.assertEqual(torch.sum(component_wise_loss),
4366  F.poisson_nll_loss(input, target, reduction='sum'))
4367  self.assertEqual(torch.mean(component_wise_loss),
4368  F.poisson_nll_loss(input, target, reduction='mean'))
4369  with self.assertRaisesRegex(ValueError, 'is not valid'):
4370  F.poisson_nll_loss(input, target, reduction='total')
4371 
4372  def test_KLDivLoss_batch_mean(self):
4373  input_shape = (2, 5)
4374  log_prob1 = F.log_softmax(torch.randn(input_shape), 1)
4375  prob2 = F.softmax(torch.randn(input_shape), 1)
4376 
4377  loss = nn.KLDivLoss(reduction='batchmean')
4378  l = loss(log_prob1, prob2)
4379 
4380  loss_none_reduce = nn.KLDivLoss(reduction='sum')(log_prob1, prob2)
4381  expected = loss_none_reduce / input_shape[0]
4382 
4383  self.assertEqual(l, expected)
4384 
4385  @unittest.skipIf(not (TEST_CUDNN and TEST_CUDNN_VERSION >= 7000), "needs cudnn >= 7.0")
4386  def test_CTCLoss_cudnn(self):
4387  target_lengths = [30, 25, 20]
4388  input_lengths = [50, 50, 50]
4389  targets = torch.randint(1, 15, (sum(target_lengths),), dtype=torch.int)
4390  log_probs = torch.randn(50, 3, 15, dtype=torch.float, device='cuda').log_softmax(2)
4391  res = torch.nn.functional.ctc_loss(log_probs, targets, input_lengths, target_lengths)
4392  expected = ctcloss_reference(log_probs, targets.cuda(), input_lengths, target_lengths).float()
4393  with torch.backends.cudnn.flags(enabled=False):
4394  res2 = torch.nn.functional.ctc_loss(log_probs, targets.cuda().long(), input_lengths, target_lengths)
4395  self.assertEqual(res, expected)
4396  self.assertEqual(res2, res)
4397 
4398  def test_CTCLoss_typechecks(self):
4399  target_lengths = torch.tensor([30, 25, 20])
4400  input_lengths = torch.tensor([50, 50, 50])
4401  targets = torch.randint(1, 15, (sum(target_lengths),), dtype=torch.int)
4402  log_probs = torch.randn(50, 3, 15, dtype=torch.float).log_softmax(2)
4403  with self.assertRaises(RuntimeError):
4404  _input_lengths = input_lengths.to(dtype=torch.float)
4405  torch.nn.functional.ctc_loss(log_probs, targets, _input_lengths, target_lengths)
4406  with self.assertRaises(RuntimeError):
4407  target_lengths = target_lengths.to(dtype=torch.float)
4408  torch.nn.functional.ctc_loss(log_probs, targets, input_lengths, target_lengths)
4409 
4410  @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
4411  def test_CTCLoss_lengthchecks_cuda(self):
4412  target_lengths = [30, 25, 20]
4413  input_lengths = [50, 50, 50]
4414  targets = torch.randint(1, 15, (3, 29), dtype=torch.long, device='cuda')
4415  log_probs = torch.randn(50, 3, 15, dtype=torch.float, device='cuda').log_softmax(2)
4416  with self.assertRaises(RuntimeError):
4417  torch.nn.functional.ctc_loss(log_probs, targets, input_lengths, target_lengths)
4418 
4419  def test_CTCLoss_lengthchecks_cpu(self):
4420  target_lengths = [30, 25, 20]
4421  input_lengths = [50, 50, 50]
4422  targets = torch.randint(1, 15, (3, 29), dtype=torch.int)
4423  log_probs = torch.randn(50, 3, 15, dtype=torch.float).log_softmax(2)
4424  with self.assertRaises(RuntimeError):
4425  torch.nn.functional.ctc_loss(log_probs, targets, input_lengths, target_lengths)
4426 
4427  @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
4428  def test_CTCLoss_zero_infinity(self):
4429  target_lengths = [60, 25, 20]
4430  input_lengths = [50, 50, 50]
4431  targets = torch.randint(1, 15, (sum(target_lengths),), dtype=torch.int)
4432  log_probs = torch.randn(50, 3, 15, dtype=torch.float, device='cuda').log_softmax(2).requires_grad_()
4433  res = torch.nn.functional.ctc_loss(log_probs, targets, input_lengths, target_lengths,
4434  reduction='sum', zero_infinity=True)
4435  with torch.backends.cudnn.flags(enabled=False):
4436  res2 = torch.nn.functional.ctc_loss(log_probs, targets.cuda().long(), input_lengths, target_lengths,
4437  reduction='sum', zero_infinity=True)
4438  res_cpu = torch.nn.functional.ctc_loss(log_probs.cpu(), targets.cpu(), input_lengths, target_lengths,
4439  reduction='sum', zero_infinity=True)
4440 
4441  self.assertAlmostEqual(res2, res, delta=1e-4)
4442  self.assertAlmostEqual(res_cpu, res.cpu(), delta=1e-4)
4443  g1, = torch.autograd.grad(res, log_probs)
4444  g2, = torch.autograd.grad(res2, log_probs)
4445  g3, = torch.autograd.grad(res_cpu, log_probs)
4446  self.assertAlmostEqual(g2, g3, delta=1e-4)
4447  self.assertAlmostEqual(g1, g2, delta=1e-4)
4448  self.assertTrue((g1 == g1).all().item()) # check that we don't have NaN
4449 
4450  def test_RNN_cell_no_broadcasting(self):
4451  def test(cell_module, input, hx, input_size, hidden_size):
4452  cell = cell_module(input_size, hidden_size)
4453  self.assertRaises(RuntimeError, lambda: cell(input, hx))
4454 
4455  def test_all(hidden_size, bad_hx, good_hx, input_size, input):
4456  test(nn.RNNCell, input, bad_hx, input_size, hidden_size)
4457  test(nn.GRUCell, input, bad_hx, input_size, hidden_size)
4458  test(nn.LSTMCell, input, (bad_hx, good_hx), input_size, hidden_size)
4459  test(nn.LSTMCell, input, (good_hx, bad_hx), input_size, hidden_size)
4460 
4461  hidden_size = 20
4462  input_size = 10
4463  input = torch.randn(3, input_size)
4464  bad_hx = torch.randn(1, hidden_size)
4465  good_hx = torch.randn(3, hidden_size)
4466 
4467  # Test hidden/input batch size broadcasting
4468  test_all(hidden_size, bad_hx, good_hx, input_size, input)
4469 
4470  # Test hx's hidden_size vs module's hidden_size broadcasting
4471  bad_hx = torch.randn(3, 1)
4472  test_all(hidden_size, bad_hx, good_hx, input_size, input)
4473 
4474  # Test input's input_size vs module's input_size broadcasting
4475  bad_input = torch.randn(3, 1)
4476  test_all(hidden_size, good_hx, good_hx, input_size, bad_input)
4477 
4478  def test_invalid_dropout_p(self):
4479  v = torch.ones(1)
4480  self.assertRaises(ValueError, lambda: nn.Dropout(-0.1))
4481  self.assertRaises(ValueError, lambda: nn.Dropout(1.1))
4482  self.assertRaises(ValueError, lambda: nn.Dropout2d(-0.1))
4483  self.assertRaises(ValueError, lambda: nn.Dropout2d(1.1))
4484  self.assertRaises(ValueError, lambda: nn.Dropout3d(-0.1))
4485  self.assertRaises(ValueError, lambda: nn.Dropout3d(1.1))
4486  self.assertRaises(ValueError, lambda: F.dropout(v, -0.1))
4487  self.assertRaises(ValueError, lambda: F.dropout(v, 1.1))
4488 
4489  def test_pad_sequence(self):
4490  def pad(tensor, length):
4491  return torch.cat(
4492  [tensor.data, tensor.data.new(
4493  length - tensor.size(0), *tensor.size()[1:]).zero_()])
4494 
4495  # single dimensional
4496  a = torch.tensor([1, 2, 3])
4497  b = torch.tensor([4, 5])
4498  c = torch.tensor([6])
4499 
4500  # batch_first = true
4501  expected = torch.tensor([[4, 5, 0], [1, 2, 3], [6, 0, 0]])
4502  padded = rnn_utils.pad_sequence([b, a, c], True)
4503  self.assertEqual(padded, expected)
4504 
4505  # batch_first = false
4506  padded = rnn_utils.pad_sequence([b, a, c])
4507  self.assertEqual(padded, expected.transpose(0, 1))
4508 
4509  # pad with non-zero value
4510  expected = torch.tensor([[4, 5, 1], [1, 2, 3], [6, 1, 1]])
4511  padded = rnn_utils.pad_sequence([b, a, c], True, 1)
4512  self.assertEqual(padded, expected)
4513 
4514  # Test pad sorted sequence
4515  expected = torch.tensor([[1, 2, 3], [4, 5, 0], [6, 0, 0]])
4516  padded = rnn_utils.pad_sequence([a, b, c], True)
4517  self.assertEqual(padded, expected)
4518 
4519  # more dimensions
4520  maxlen = 9
4521  for num_dim in (0, 1, 2, 3):
4522  sequences = []
4523  trailing_dims = [4] * num_dim
4524  for i in range(1, maxlen + 1):
4525  seq_len = i * i
4526  sequences.append(torch.rand(seq_len, 5, *trailing_dims))
4527  random.shuffle(sequences)
4528  expected = []
4529  for seq in sequences:
4530  expected.append(pad(seq, maxlen * maxlen))
4531  # batch first = true
4532  expected = torch.stack(expected)
4533  padded = rnn_utils.pad_sequence(sequences, True)
4534  self.assertEqual(padded, expected)
4535 
4536  # batch first = false
4537  padded = rnn_utils.pad_sequence(sequences)
4538  self.assertEqual(padded, expected.transpose(0, 1))
4539 
4540  def test_pack_sequence(self):
4541  def _compatibility_test(sequences, lengths, batch_first, enforce_sorted=False):
4542  padded = rnn_utils.pad_sequence(sequences, batch_first)
4543  packed = rnn_utils.pack_sequence(sequences, enforce_sorted)
4544  unpacked = rnn_utils.pad_packed_sequence(packed, batch_first)
4545  self.assertEqual(padded, unpacked[0])
4546  pack_padded = rnn_utils.pack_padded_sequence(
4547  padded, lengths, batch_first, enforce_sorted)
4548  self.assertEqual(packed, pack_padded)
4549 
4550  # single dimensional
4551  a = torch.tensor([1, 2, 3])
4552  b = torch.tensor([4, 5])
4553  c = torch.tensor([6])
4554  packed = rnn_utils.pack_sequence([a, b, c], enforce_sorted=False)
4555  expected = torch.tensor([1, 4, 6, 2, 5, 3])
4556  self.assertEqual(packed.batch_sizes, [3, 2, 1])
4557  self.assertEqual(packed.data.data, expected)
4558  self.assertEqual(packed.sorted_indices, [0, 1, 2])
4559  self.assertEqual(packed.unsorted_indices, [0, 1, 2])
4560 
4561  packed_unsorted = rnn_utils.pack_sequence([b, c, a], enforce_sorted=False)
4562  self.assertEqual(packed_unsorted.batch_sizes, [3, 2, 1])
4563  self.assertEqual(packed_unsorted.data.data, expected)
4564  self.assertEqual(packed_unsorted.sorted_indices, [2, 0, 1])
4565  self.assertEqual(packed_unsorted.unsorted_indices, [1, 2, 0])
4566 
4567  # single dimensional, enforce_sorted = True
4568  packed_enforce_sorted = rnn_utils.pack_sequence([a, b, c], enforce_sorted=True)
4569  self.assertEqual(packed_enforce_sorted.batch_sizes, [3, 2, 1])
4570  self.assertEqual(packed_enforce_sorted.data.data, expected)
4571  self.assertTrue(packed_enforce_sorted.sorted_indices is None)
4572  self.assertTrue(packed_enforce_sorted.unsorted_indices is None)
4573 
4574  with self.assertRaisesRegex(RuntimeError, 'must be sorted in decreasing order'):
4575  rnn_utils.pack_sequence([b, c, a], enforce_sorted=True)
4576 
4577  with self.assertRaisesRegex(RuntimeError, 'You can pass `enforce_sorted=False`'):
4578  rnn_utils.pack_sequence([b, c, a], enforce_sorted=True)
4579 
4580  # more dimensions
4581  maxlen = 9
4582  for num_dim in (0, 1, 2, 3):
4583  sequences = []
4584  lengths = []
4585  trailing_dims = [4] * num_dim
4586  for i in range(maxlen, 0, -1):
4587  seq_len = i * i
4588  lengths.append(seq_len)
4589  sequences.append(torch.rand(seq_len, 5, *trailing_dims))
4590  unsorted_sequences = [s.clone() for s in sequences]
4591  random.shuffle(unsorted_sequences)
4592  unsorted_sequences_lengths = [t.size(0) for t in unsorted_sequences]
4593 
4594  # compatibility with other utilities
4595  for batch_first in (True, False):
4596  for enforce_sorted in (True, False):
4597  _compatibility_test(sequences, lengths, batch_first, enforce_sorted)
4598  _compatibility_test(unsorted_sequences, unsorted_sequences_lengths,
4599  batch_first)
4600 
4601  def test_pack_padded_sequence(self):
4602  def generate_test_case(sorted_lengths, should_shuffle):
4603  def pad(tensor, length):
4604  return torch.cat([tensor, tensor.new(length - tensor.size(0), *tensor.size()[1:]).zero_()])
4605 
4606  max_length = sorted_lengths[0]
4607  batch_sizes = [sum(map(bool, filter(lambda x: x >= i, sorted_lengths)))
4608  for i in range(1, max_length + 1)]
4609  offset = 0
4610  padded = torch.cat([pad(i * 100 + torch.arange(1., 5 * l + 1).view(l, 1, 5), max_length)
4611  for i, l in enumerate(sorted_lengths, 1)], 1)
4612  expected_data = [[torch.arange(1., 6) + (i + 1) * 100 + 5 * n for i in range(batch_size)]
4613  for n, batch_size in enumerate(batch_sizes)]
4614  expected_data = list(itertools.chain.from_iterable(expected_data))
4615  expected_data = torch.stack(expected_data, dim=0)
4616 
4617  if should_shuffle:
4618  # Shuffle the padded sequence to create an unsorted sequence
4619  permutation = list(range(len(sorted_lengths)))
4620  random.shuffle(permutation)
4621 
4622  unsorted_indices = torch.tensor(permutation)
4623  padded = padded.index_select(1, unsorted_indices)
4624  lengths = torch.tensor(sorted_lengths).index_select(0, unsorted_indices)
4625  else:
4626  unsorted_indices = None
4627  lengths = sorted_lengths
4628 
4629  return padded.requires_grad_(), lengths, expected_data, batch_sizes, unsorted_indices
4630 
4631  test_cases = [
4632  # sorted_lengths, should_shuffle
4633  [[10, 8, 4, 2, 2, 2, 1], False],
4634  [[11, 10, 8, 6, 4, 3, 1], False],
4635  [[11, 10, 8, 6, 4, 3, 1], True],
4636  ]
4637 
4638  for test_case, batch_first in itertools.product(test_cases, (True, False)):
4639  sorted_lengths, should_shuffle = test_case
4640  padded, lengths, expected_data, batch_sizes, unsorted_indices = generate_test_case(
4641  sorted_lengths, should_shuffle)
4642 
4643  src = padded
4644  if batch_first:
4645  src = src.transpose(0, 1)
4646 
4647  # check output
4648  packed = rnn_utils.pack_padded_sequence(src, lengths, batch_first=batch_first,
4649  enforce_sorted=not should_shuffle)
4650  self.assertEqual(packed.data.data, expected_data)
4651  self.assertEqual(packed.batch_sizes, batch_sizes)
4652  self.assertEqual(packed.unsorted_indices, unsorted_indices)
4653 
4654  # test inverse
4655  unpacked, unpacked_len = rnn_utils.pad_packed_sequence(packed, batch_first=batch_first)
4656  self.assertEqual(unpacked, src)
4657  self.assertEqual(unpacked_len, lengths)
4658 
4659  # check grad
4660  if padded.grad is not None:
4661  padded.grad.data.zero_()
4662  grad_output = unpacked.data.clone().normal_()
4663  unpacked.backward(grad_output)
4664  if batch_first:
4665  grad_output.transpose_(0, 1)
4666  for i, l in enumerate(lengths):
4667  self.assertEqual(padded.grad.data[:l, i], grad_output[:l, i])
4668  if l < 10:
4669  self.assertEqual(padded.grad.data[l:, i].abs().sum(), 0)
4670 
4671  # test error message
4672  with self.assertRaisesRegex(RuntimeError, 'You can pass `enforce_sorted=False`'):
4673  packed = rnn_utils.pack_padded_sequence(torch.randn(3, 3), [1, 3, 2])
4674 
4675  def _test_variable_sequence(self, device="cpu", dtype=torch.float):
4676  def pad(var, length):
4677  if var.size(0) == length:
4678  return var
4679  return torch.cat([var, var.new_zeros(length - var.size(0), *var.size()[1:])])
4680 
4681  def maybe_index_tuple(maybe_tuple_of_tensors, index):
4682  if maybe_tuple_of_tensors is None:
4683  return None
4684  return tuple(maybe_tuple_of_tensors[j][:, index:index + 1, :].contiguous()
4685  for j in range(2))
4686 
4687  def check_lengths(lengths, enforce_sorted, use_default_hiddens):
4688  input_size = 3
4689  hidden_size = 4
4690  num_layers = 2
4691  bidirectional = True
4692 
4693  max_length = max(lengths)
4694  x_leaf = torch.randn(max_length, len(lengths), input_size, device=device,
4695  dtype=dtype, requires_grad=True)
4696  num_directions = 2 if bidirectional else 1
4697  lstm = nn.LSTM(input_size, hidden_size, bidirectional=bidirectional,
4698  num_layers=num_layers).to(device, dtype)
4699  lstm2 = deepcopy(lstm).to(device, dtype)
4700  x = x_leaf
4701 
4702  hidden0 = None
4703  if not use_default_hiddens:
4704  hidden0 = tuple(torch.randn(num_directions * num_layers, len(lengths), hidden_size,
4705  device=device, dtype=dtype)
4706  for _ in range(2))
4707 
4708  # Compute sequences separately
4709  seq_outs = []
4710  seq_hiddens = []
4711  for i, l in enumerate(lengths):
4712  hidden_i = maybe_index_tuple(hidden0, i)
4713  out, hid = lstm2(x[:l, i:i + 1], hidden_i)
4714  out_pad = pad(out, max_length)
4715  seq_outs.append(out_pad)
4716  seq_hiddens.append(hid)
4717  seq_out = torch.cat(seq_outs, 1)
4718  seq_hidden = tuple(torch.cat(hids, 1) for hids in zip(*seq_hiddens))
4719 
4720  # Use packed format
4721  packed = rnn_utils.pack_padded_sequence(x, lengths, enforce_sorted=enforce_sorted)
4722  packed_out, packed_hidden = lstm(packed, hidden0)
4723  unpacked, unpacked_len = rnn_utils.pad_packed_sequence(packed_out)
4724 
4725  # Check forward
4726  prec = dtype2prec[dtype]
4727  self.assertEqual(packed_hidden, seq_hidden, prec)
4728  self.assertEqual(unpacked, seq_out, prec)
4729  self.assertEqual(unpacked_len, lengths, prec)
4730 
4731  # Check backward
4732  seq_out.sum().backward()
4733  grad_x = x_leaf.grad.data.clone()
4734  x_leaf.grad.data.zero_()
4735  unpacked.sum().backward()
4736 
4737  self.assertEqual(x_leaf.grad, grad_x, dtype2prec[dtype])
4738  for p1, p2 in zip(lstm.parameters(), lstm2.parameters()):
4739  prec = dtype2prec[dtype]
4740  if dtype == torch.float16:
4741  prec = 2e-2
4742  self.assertEqual(p1.grad, p2.grad, prec)
4743 
4744  tests = [
4745  # enforce_sorted, lengths
4746  [True, [5]],
4747  [False, [5]],
4748  [True, [10, 10, 6, 2, 2, 1, 1]],
4749  [False, [10, 10, 6, 2, 2, 1, 1]],
4750  [False, [2, 1, 3, 2, 10, 5, 3]],
4751  ]
4752 
4753  for enforce_sorted, seq_lens, in tests:
4754  for use_default_hiddens in (True, False):
4755  check_lengths(seq_lens, enforce_sorted, use_default_hiddens)
4756 
4757  def test_variable_sequence(self):
4758  self._test_variable_sequence()
4759 
4760  @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
4761  @repeat_test_for_types(ALL_TENSORTYPES)
4762  def test_variable_sequence_cuda(self, dtype=torch.float):
4763  self._test_variable_sequence("cuda", dtype)
4764 
4765  def test_LSTM_cell(self):
4766  # this is just a smoke test; these modules are implemented through
4767  # autograd so no Jacobian test is needed
4768  for bias in (True, False):
4769  input = torch.randn(3, 10)
4770  hx = torch.randn(3, 20)
4771  cx = torch.randn(3, 20)
4772  lstm = nn.LSTMCell(10, 20, bias=bias)
4773  for _ in range(6):
4774  hx, cx = lstm(input, (hx, cx))
4775 
4776  (hx + cx).sum().backward()
4777 
4778  @unittest.skipIf(not (TEST_CUDNN and TEST_MULTIGPU), 'CUDNN or multi-gpu not available')
4779  def test_cudnn_rnn_dropout_states_device(self):
4780  rnn = nn.RNN(10, 20, num_layers=2, dropout=.5)
4781  device = 1
4782  input = torch.randn(5, 4, 10).cuda(device)
4783  rnn.cuda(device)
4784  hx = torch.randn(2, 4, 20).cuda(device)
4785  output = rnn(input, hx)
4786 
4787  @unittest.skipIf(not TEST_CUDNN, 'CUDNN not available')
4788  @skipIfRocm
4789  def test_cudnn_weight_format(self):
4790  rnns = [
4791  nn.LSTM(10, 20, batch_first=True),
4792  nn.GRU(10, 20, batch_first=True),
4793  nn.RNN(10, 20, batch_first=True)
4794  ]
4795  first_warn = True
4796  for rnn in rnns:
4797  rnn.cuda()
4798  input = Variable(torch.randn(5, 4, 10).cuda(), requires_grad=True)
4799  hx = Variable(torch.randn(1, 5, 20).cuda(), requires_grad=True)
4800  all_vars = [input, hx] + list(rnn.parameters())
4801  if isinstance(rnn, nn.LSTM):
4802  cx = Variable(torch.randn(1, 5, 20).cuda(), requires_grad=True)
4803  all_vars[2:2] = [cx]
4804  hx = (hx, cx)
4805 
4806  output = rnn(input, hx)
4807  output[0].sum().backward()
4808  grads = [v.grad.data.clone() for v in all_vars]
4809  for v in all_vars:
4810  v.grad.data.zero_()
4811 
4812  # Weights will no longer view onto the same chunk of memory
4813  weight = all_vars[4]
4814  weight_data = weight.data.clone()
4815  with torch.no_grad():
4816  weight.set_(weight_data)
4817 
4818  for _ in range(2):
4819  with warnings.catch_warnings(record=True) as w:
4820  output_noncontig = rnn(input, hx)
4821  if first_warn:
4822  self.assertEqual(len(w), 1)
4823  self.assertIn('weights are not part of single contiguous chunk of memory', w[0].message.args[0])
4824  first_warn = False
4825  warnings.resetwarnings()
4826  output_noncontig[0].sum().backward()
4827  grads_noncontig = [v.grad.data.clone() for v in all_vars]
4828  for v in all_vars:
4829  v.grad.data.zero_()
4830  self.assertEqual(output, output_noncontig)
4831  self.assertEqual(grads_noncontig, grads)
4832 
4833  # Make sure these still share storage
4834  weight_data[:] = 4
4835  self.assertEqual(weight_data, all_vars[4].data)
4836 
4837  @unittest.skipIf(not TEST_CUDNN, 'CUDNN not available')
4838  def test_cudnn_weight_tying(self):
4839  rnns = [
4840  nn.LSTM(10, 20, batch_first=True, bidirectional=True),
4841  nn.GRU(10, 20, batch_first=True, bidirectional=True),
4842  nn.RNN(10, 20, batch_first=True, bidirectional=True)
4843  ]
4844  for rnn in rnns:
4845  rnn.bias_ih_l0_reverse = rnn.bias_ih_l0
4846  rnn.cuda()
4847  input = Variable(torch.randn(5, 4, 10).cuda(), requires_grad=True)
4848  hx = Variable(torch.randn(2, 5, 20).cuda(), requires_grad=True)
4849  all_vars = [input, hx] + list(rnn.parameters())
4850  opt = torch.optim.SGD(rnn.parameters(), lr=0.1)
4851  opt.zero_grad()
4852  if isinstance(rnn, nn.LSTM):
4853  cx = Variable(torch.randn(2, 5, 20).cuda(), requires_grad=True)
4854  all_vars[2:2] = [cx]
4855  hx = (hx, cx)
4856 
4857  with warnings.catch_warnings(record=True) as w:
4858  output = rnn(input, hx)
4859  output[0].sum().backward()
4860 
4861  opt.step()
4862  with warnings.catch_warnings(record=True) as w:
4863  output_cuda = rnn(input, hx)
4864  rnn.cpu()
4865  hx = (hx[0].cpu(), hx[1].cpu()) if isinstance(rnn, nn.LSTM) else hx.cpu()
4866  output_cpu = rnn(input.cpu(), hx)
4867  self.assertEqual(output_cuda, output_cpu)
4868 
4869  @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
4870  @repeat_test_for_types(NO_HALF_TENSORTYPES)
4871  def test_cuda_rnn_fused(self, dtype=torch.float):
4872 
4873  def copy_rnn(rnn1, rnn2):
4874  for x_layer, y_layer in zip(rnn1.all_weights, rnn2.all_weights):
4875  for x, y in zip(x_layer, y_layer):
4876  x.data.copy_(y.data)
4877 
4878  def check_rnn_grads(rnn1, rnn2):
4879  for x_layer, y_layer in zip(rnn1.all_weights, rnn2.all_weights):
4880  for x, y in zip(x_layer, y_layer):
4881  self.assertEqual(x.grad, y.grad, prec=5e-5)
4882 
4883  input_size = 10
4884  hidden_size = 6
4885  num_layers = 2
4886  seq_length = 7
4887  batch = 6
4888  input_val = torch.randn(seq_length, batch, input_size, dtype=dtype)
4889  grad_output = torch.randn(seq_length, batch, hidden_size, dtype=dtype)
4890  hx_val = torch.randn(num_layers, batch, hidden_size, dtype=dtype)
4891  grad_hy = torch.randn(num_layers, batch, hidden_size, dtype=dtype)
4892  with torch.backends.cudnn.flags(enabled=False):
4893  for module in (nn.GRU, nn.LSTM):
4894  for bias in (True, False):
4895  rnn = module(input_size, hidden_size, num_layers, bias=bias).to(dtype)
4896  rnn_cuda = module(input_size, hidden_size, num_layers, bias=bias).to("cuda", dtype)
4897  copy_rnn(rnn, rnn_cuda)
4898 
4899  is_lstm = isinstance(rnn, nn.LSTM)
4900  if is_lstm:
4901  hx = (Variable(hx_val.clone(), requires_grad=True),
4902  Variable(hx_val.clone().add(1), requires_grad=True))
4903  hx_cuda = (Variable(hx_val.clone().cuda(), requires_grad=True),
4904  Variable(hx_val.clone().cuda().add(1), requires_grad=True))
4905  else:
4906  hx = Variable(hx_val.clone(), requires_grad=True)
4907  hx_cuda = Variable(hx_val.clone().cuda(), requires_grad=True)
4908 
4909  inp = Variable(input_val.clone(), requires_grad=True)
4910  inp_cu = Variable(input_val.clone().cuda(), requires_grad=True)
4911  output1, hy1 = rnn(inp, hx)
4912  output2, hy2 = rnn_cuda(inp_cu, hx_cuda)
4913  if is_lstm:
4915  [output1, hy1[0], hy1[1]], [grad_output, grad_hy, grad_hy + 1]
4916  )
4918  [output2, hy2[0], hy2[1]],
4919  [grad_output.cuda(), grad_hy.cuda(), (grad_hy + 1).cuda()]
4920  )
4921  else:
4922  torch.autograd.backward([output1, hy1], [grad_output, grad_hy])
4923  torch.autograd.backward([output2, hy2], [grad_output.cuda(), grad_hy.cuda()])
4924 
4925  self.assertEqual(output1, output2)
4926  self.assertEqual(hy1, hy2)
4927 
4928  check_rnn_grads(rnn, rnn_cuda)
4929  self.assertEqual(inp.grad.data, inp_cu.grad.data)
4930  if is_lstm:
4931  self.assertEqual(hx[0].grad.data, hx_cuda[0].grad.data)
4932  self.assertEqual(hx[1].grad.data, hx_cuda[1].grad.data)
4933  else:
4934  self.assertEqual(hx.grad.data, hx_cuda.grad.data)
4935 
4936  def test_rnn_args_check(self):
4937  input_size = 3
4938  hidden_size = 5
4939  num_layers = 2
4940  batch_size = 4
4941  seq_len = 6
4942  num_directions = 1
4943  bad_size = 7 # prime number so that no size can divide it.
4944 
4945  def test(input_shape, hidden_shape, mode):
4946  for input, hidden in get_inputs(input_shape, hidden_shape, mode):
4947  model = getattr(nn, mode)(input_size, hidden_size, num_layers)
4948  self.assertRaises(RuntimeError, lambda: model(input, hidden))
4949 
4950  correct_input_shape = (seq_len, batch_size, input_size)
4951  correct_hidden_shape = (num_layers * num_directions, batch_size, hidden_size)
4952 
4953  def update_shape(shape, dim, new_dim_size):
4954  new_shape = list(shape)
4955  new_shape[dim] = new_dim_size
4956  return tuple(new_shape)
4957 
4958  def get_inputs(input_shape, hidden_shape, mode):
4959  '''returns list( tuple(input, hidden) )
4960  where input, hidden are inputs to a model'''
4961  input = torch.randn(input_shape)
4962  hidden = torch.randn(hidden_shape)
4963  if mode != 'LSTM':
4964  return [(input, hidden)]
4965  if hidden_shape == correct_hidden_shape:
4966  return [(input, (hidden, hidden))]
4967  good_hidden = torch.randn(correct_hidden_shape)
4968  return [
4969  (input, (hidden, good_hidden)),
4970  (input, (good_hidden, hidden)),
4971  ]
4972 
4973  rnn_modes = ['RNN', 'GRU', 'LSTM']
4974  for mode in rnn_modes:
4975  # Incorrect input batch size
4976  input_shape = update_shape(correct_input_shape, 1, bad_size)
4977  hidden_shape = correct_hidden_shape
4978  test(input_shape, hidden_shape, mode)
4979 
4980  # Incorrect hidden batch size
4981  input_shape = correct_input_shape
4982  hidden_shape = update_shape(correct_hidden_shape, 1, bad_size)
4983  test(input_shape, hidden_shape, mode)
4984 
4985  # Incorrect input size
4986  input_shape = update_shape(correct_input_shape, 2, bad_size)
4987  hidden_shape = correct_hidden_shape
4988  test(input_shape, hidden_shape, mode)
4989 
4990  # Incorrect hidden size
4991  input_shape = correct_input_shape
4992  hidden_shape = update_shape(correct_hidden_shape, 2, bad_size)
4993  test(input_shape, hidden_shape, mode)
4994 
4995  # Incorrect hidden[0]
4996  input_shape = correct_input_shape
4997  hidden_shape = update_shape(correct_hidden_shape, 0, bad_size)
4998  test(input_shape, hidden_shape, mode)
4999 
5000  @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
5001  def test_rnn_check_device(self):
5002  input_size = 3
5003  hidden_size = 5
5004  num_layers = 2
5005  batch_size = 4
5006  seq_len = 6
5007  num_directions = 1
5008 
5009  correct_input_shape = (seq_len, batch_size, input_size)
5010  correct_hidden_shape = (num_layers * num_directions, batch_size, hidden_size)
5011  rnn_modes = ['RNN', 'GRU', 'LSTM']
5012 
5013  for mode in rnn_modes:
5014  model = getattr(nn, mode)(input_size, hidden_size, num_layers)
5015  input = torch.randn(correct_input_shape)
5016  hidden = torch.randn(correct_hidden_shape)
5017 
5018  # input and weights are not at the same device
5019  with self.assertRaisesRegex(RuntimeError,
5020  "Input and parameter tensors are not at the same device"):
5021  model(input.to('cuda:0'))
5022 
5023  # input and hiddens are not at the same device
5024  with self.assertRaisesRegex(RuntimeError,
5025  r"Input and hidden tensors are not at the same device"):
5026  if mode == 'LSTM':
5027  model(input, (hidden.to('cuda:0'), hidden.to('cuda:0')))
5028  else:
5029  model(input, (hidden.to('cuda:0')))
5030 
5031  # hidden tensors are not at the same CUDA device
5032  if mode == 'LSTM':
5033  with self.assertRaisesRegex(RuntimeError,
5034  "Input and hidden tensors are not at the same device"):
5035  model(input.to('cuda:0'), (hidden.to('cuda:0'), hidden.to('cuda:1')))
5036 
5037  def test_rnn_initial_hidden_state(self):
5038  rnn_modes = ['RNN', 'GRU', 'LSTM']
5039  for mode in rnn_modes:
5040  rnn = getattr(nn, mode)(30, 20, 2)
5041  input = torch.randn(10, 32, 30)
5042  hidden = torch.zeros(2, 32, 20)
5043 
5044  if mode == 'LSTM':
5045  hidden = (hidden, hidden)
5046  output1, hidden1 = rnn(input, hidden)
5047  output2, hidden2 = rnn(input)
5048  self.assertEqual(output1, output2)
5049  self.assertEqual(hidden1, hidden2)
5050 
5051  def _test_rnn_retain_variables(self, device="cpu", dtype=torch.double):
5052  rnns = [nn.LSTM(10, 20, num_layers=2).to(device, dtype),
5053  nn.GRU(10, 20, num_layers=2).to(device, dtype),
5054  nn.RNN(10, 20, num_layers=2).to(device, dtype)]
5055  for rnn in rnns:
5056  input = torch.randn(5, 6, 10, device=device, dtype=dtype, requires_grad=True)
5057  output = rnn(input)
5058  output[0].sum().backward(retain_graph=True)
5059  grads = [input.grad.data.clone()] + [p.grad.data.clone() for p in rnn.parameters()]
5060  for _ in range(4):
5061  rnn.zero_grad()
5062  input.grad.data.zero_()
5063  output[0].sum().backward(retain_graph=True)
5064  grads2 = [input.grad.data] + [p.grad.data for p in rnn.parameters()]
5065  self.assertEqual(grads, grads2)
5066 
5067  def test_rnn_retain_variables(self):
5068  self._test_rnn_retain_variables()
5069 
5070  @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
5071  @repeat_test_for_types(ALL_TENSORTYPES)
5072  def test_rnn_retain_variables_cuda(self, dtype=torch.float):
5073  with torch.backends.cudnn.flags(enabled=False):
5074  self._test_rnn_retain_variables("cuda", dtype)
5075  self._test_rnn_retain_variables("cuda", dtype)
5076 
5077  def _test_RNN_cpu_vs_cudnn(self, dropout):
5078 
5079  def forward_backward(cuda, rnn, input_val, hx_val, grad_output, grad_hy, weights_val):
5080  is_lstm = isinstance(rnn, nn.LSTM)
5081 
5082  for x_layer, y_layer in zip(rnn.all_weights, weights_val):
5083  for x, y in zip(x_layer, y_layer):
5084  x.data.copy_(y.data)
5085 
5086  if isinstance(input_val, rnn_utils.PackedSequence):
5087  input = rnn_utils.PackedSequence(
5088  Variable(input_val.data.data, requires_grad=True), input_val.batch_sizes)
5089  input_var = input.data
5090  else:
5091  input = Variable(input_val.clone(), requires_grad=True)
5092  input_var = input
5093  if is_lstm:
5094  hx = (Variable(hx_val.clone(), requires_grad=True),
5095  Variable(hx_val.add(1), requires_grad=True))
5096  else:
5097  hx = Variable(hx_val.clone(), requires_grad=True)
5098 
5099  if cuda:
5100  rnn.cuda()
5101  input_var.data = input_var.data.cuda()
5102  if is_lstm:
5103  hx[0].data = hx[0].data.cuda()
5104  hx[1].data = hx[1].data.cuda()
5105  else:
5106  hx.data = hx.data.cuda()
5107  grad_hy = grad_hy.cuda()
5108  grad_output = grad_output.cuda()
5109 
5110  output, hy = rnn(input, hx)
5111 
5112  if isinstance(output, rnn_utils.PackedSequence):
5113  output = output.data
5114 
5115  if is_lstm:
5116  torch.autograd.backward([output, hy[0], hy[1]], [grad_output, grad_hy, grad_hy + 1])
5117  else:
5118  torch.autograd.backward([output, hy], [grad_output, grad_hy])
5119 
5120  return {'output': output.data,
5121  'hy': hy[0].data if is_lstm else hy.data,
5122  'weights': rnn.all_weights,
5123  'grad_input': input_var.grad.data,
5124  'grad_hx': hx[0].grad.data if is_lstm else hx.grad.data,
5125  'cy': hy[1].data if is_lstm else None,
5126  'grad_cx': hx[1].grad.data if is_lstm else None}
5127 
5128  input_size = 10
5129  hidden_size = 6
5130  num_layers = 2
5131  seq_length = 7
5132  batch = 6
5133 
5134  def make_noncontig(tensor):
5135  ndim = tensor.dim()
5136  return torch.stack([tensor.clone().zero_(), tensor], ndim).select(ndim, 1)
5137 
5138  def compare_cpu_gpu(outputs_cpu, outputs_gpu):
5139  self.assertEqual(list(outputs_cpu.keys()), list(outputs_gpu.keys()))
5140  for key in outputs_cpu.keys():
5141  if key != 'weights':
5142  self.assertEqual(outputs_cpu[key], outputs_gpu[key], prec=5e-5, message=key)
5143 
5144  # check grad weights separately, as nested dict
5145  for cpu_layer_weight, gpu_layer_weight in zip(outputs_cpu['weights'], outputs_gpu['weights']):
5146  for (cpu_weight, gpu_weight) in zip(cpu_layer_weight, gpu_layer_weight):
5147  self.assertEqual(cpu_weight.grad.data, gpu_weight.grad.data, prec=5e-5)
5148 
5149  for module in (nn.RNN, nn.LSTM, nn.GRU):
5150  for bias, bidirectional, batch_first, contig, variable_len, lens_as_tensor \
5151  in product((True, False), repeat=6):
5152 
5153  num_directions = 2 if bidirectional else 1
5154  if batch_first:
5155  input_val = torch.randn(batch, seq_length, input_size)
5156  grad_output = torch.randn(batch, seq_length, hidden_size * num_directions)
5157  else:
5158  input_val = torch.randn(seq_length, batch, input_size)
5159  grad_output = torch.randn(seq_length, batch, hidden_size * num_directions)
5160 
5161  if not contig:
5162  grad_output = make_noncontig(grad_output)
5163  grad_hy = make_noncontig(grad_hy)
5164  input_var = make_noncontig(input_val)
5165  hx_val = make_noncontig(hx_val)
5166 
5167  hx_val = torch.randn(num_layers * num_directions, batch, hidden_size)
5168  grad_hy = torch.randn(num_layers * num_directions, batch, hidden_size)
5169 
5170  if variable_len:
5171  lengths = [7, 5, 5, 2, 1, 1]
5172  if lens_as_tensor:
5173  lengths = torch.tensor(lengths, dtype=torch.long)
5174  input_val = rnn_utils.pack_padded_sequence(input_val, lengths, batch_first=batch_first)
5175  grad_output = rnn_utils.pack_padded_sequence(grad_output, lengths, batch_first=batch_first).data
5176 
5177  rnn = module(input_size,
5178  hidden_size,
5179  num_layers,
5180  bias=bias,
5181  dropout=dropout,
5182  bidirectional=bidirectional,
5183  batch_first=batch_first)
5184 
5185  outputs_cpu = forward_backward(
5186  False, rnn, input_val, hx_val, grad_output, grad_hy, rnn.all_weights)
5187 
5188  rnn_gpu = module(input_size,
5189  hidden_size,
5190  num_layers,
5191  bias=bias,
5192  dropout=dropout,
5193  bidirectional=bidirectional,
5194  batch_first=batch_first)
5195 
5196  outputs_gpu = forward_backward(
5197  True, rnn_gpu, input_val, hx_val, grad_output, grad_hy, rnn.all_weights)
5198 
5199  compare_cpu_gpu(outputs_cpu, outputs_gpu)
5200 
5201  for nonlinearity in ('tanh', 'relu'):
5202  hx_val = torch.randn(num_layers, batch, hidden_size)
5203  input_val = torch.randn(seq_length, batch, input_size)
5204  grad_output = torch.randn(
5205  seq_length, batch, hidden_size * num_directions)
5206  grad_hy = torch.randn(
5207  num_layers * num_directions, batch, hidden_size)
5208 
5209  rnn = nn.RNN(input_size, hidden_size, num_layers, bias=bias, nonlinearity=nonlinearity)
5210  outputs_cpu = forward_backward(False, rnn, input_val, hx_val, grad_output, grad_hy, rnn.all_weights)
5211 
5212  rnn_gpu = nn.RNN(input_size, hidden_size, num_layers, bias=bias, nonlinearity=nonlinearity)
5213  outputs_gpu = forward_backward(True, rnn_gpu, input_val, hx_val, grad_output, grad_hy, rnn.all_weights)
5214 
5215  compare_cpu_gpu(outputs_cpu, outputs_gpu)
5216 
5217  @unittest.skipIf(not TEST_CUDNN, "needs cudnn")
5218  @default_tensor_type(torch.FloatTensor) # FIXME: just until torch.cuda.DoubleTensor.sum() implemented
5219  def test_RNN_cpu_vs_cudnn_no_dropout(self):
5220  self._test_RNN_cpu_vs_cudnn(0)
5221 
5222  @unittest.skipIf(not TEST_CUDNN, "needs cudnn")
5223  def test_RNN_cudnn_weight_norm(self):
5224  input_size = 10
5225  hidden_size = 6
5226  num_layers = 2
5227  seq_length = 7
5228  batch = 6
5229  m = nn.LSTM(input_size, hidden_size, num_layers).cuda()
5230  input = torch.randn(seq_length, batch, input_size).cuda()
5231  expected_output = m(input)
5232  # add weight normalization
5233  name = 'weight_hh_l0'
5234  m = torch.nn.utils.weight_norm(m, name=name)
5235  # otherwise, subsequent warnings will be hidden, and further tests rely on them
5236  warnings.simplefilter("always")
5237  self.assertEqual(m(input), expected_output)
5238 
5239  # remove weight norm
5240  m = torch.nn.utils.remove_weight_norm(m, name=name)
5241  self.assertEqual(m(input), expected_output)
5242 
5243  @unittest.skipIf(not (TEST_CUDNN and TEST_CUDNN_VERSION >= 5103), "needs cudnn >= 5.1")
5244  @default_tensor_type(torch.FloatTensor) # FIXME: just until torch.cuda.DoubleTensor.sum() implemented
5245  def test_RNN_cpu_vs_cudnn_with_dropout(self):
5246  # Because of dropout randomness, can only compare dropout=0 and dropout=1
5247  self._test_RNN_cpu_vs_cudnn(1)
5248 
5249  @unittest.skipIf(not (TEST_CUDNN and TEST_CUDNN_VERSION >= 5103), "needs cudnn >= 5.1")
5250  def test_RNN_dropout(self):
5251  # checking the assumption that cuDNN sticks dropout in between
5252  # RNN layers
5253  for p in (0, 0.276, 0.731, 1):
5254  for train in (True, False):
5255  for cuda in (True, False):
5256  rnn = nn.RNN(10, 1000, 2, bias=False, dropout=p, nonlinearity='relu')
5257  if cuda:
5258  rnn.cuda()
5259 
5260  if train:
5261  rnn.train()
5262  else:
5263  rnn.eval()
5264  rnn.weight_ih_l0.data.fill_(1)
5265  rnn.weight_hh_l0.data.fill_(1)
5266  rnn.weight_ih_l1.data.fill_(1)
5267  rnn.weight_hh_l1.data.fill_(1)
5268  input = torch.ones(1, 1, 10)
5269  hx = torch.zeros(2, 1, 1000)
5270  if cuda:
5271  input = input.cuda()
5272  hx = hx.cuda()
5273 
5274  output, hy = rnn(input, hx)
5275  self.assertEqual(output.data.min(), output.data.max())
5276  output_val = output.data[0][0][0]
5277  if p == 0 or not train:
5278  self.assertEqual(output_val, 10000)
5279  elif p == 1:
5280  self.assertEqual(output_val, 0)
5281  else:
5282  self.assertGreater(output_val, 8000)
5283  self.assertLess(output_val, 12000)
5284  denorm_mod = (output_val * (1 - p)) % 10
5285  self.assertLess(min(denorm_mod, 10 - denorm_mod), 1e-2)
5286 
5287  self.assertEqual(hy[0].data.min(), hy[0].data.max())
5288  self.assertEqual(hy[1].data.min(), hy[1].data.max())
5289  self.assertEqual(hy.data[0][0][0], 10)
5290  self.assertEqual(hy.data[1][0][0], output_val)
5291 
5292  @unittest.skipIf(not (TEST_CUDNN and TEST_CUDNN_VERSION >= 5103), "needs cudnn >= 5.1")
5293  def test_RNN_dropout_state(self):
5294  import sys
5295  if sys.version_info[0] == 2:
5296  import cPickle as pickle
5297  else:
5298  import pickle
5299  for p in (0, 0.1234):
5300  for train in (True, False):
5301  for cuda in (True, False):
5302  rnn = nn.RNN(100, 100, 2, bias=False, dropout=p, nonlinearity='relu')
5303  if cuda:
5304  rnn.cuda()
5305 
5306  if train:
5307  rnn.train()
5308  else:
5309  rnn.eval()
5310  input = torch.rand(1, 1, 100)
5311  hx = torch.rand(2, 1, 100)
5312  if cuda:
5313  input = input.cuda()
5314  hx = hx.cuda()
5315 
5316  output1, hy1 = rnn(input, hx)
5317  output2, hy2 = rnn(input, hx)
5318 
5319  rnn_pickle = pickle.dumps(rnn)
5320  rnn2 = pickle.loads(rnn_pickle)
5321  rnn2.flatten_parameters()
5322  output3, hy3 = rnn2(input, hx)
5323 
5324  if p == 0 or not train:
5325  self.assertEqual(output1, output2)
5326  self.assertEqual(output1, output3)
5327  self.assertEqual(hy1, hy2)
5328  self.assertEqual(hy1, hy3)
5329  else:
5330  self.assertNotEqual(output1, output2)
5331  self.assertNotEqual(output1, output3)
5332  self.assertNotEqual(hy1, hy2)
5333  self.assertNotEqual(hy1, hy3)
5334 
5335  @unittest.skipIf(not (TEST_CUDNN and TEST_CUDNN_VERSION >= 5103), "needs cudnn >= 5.1")
5336  def test_RNN_change_dropout(self):
5337  for train, cuda in product((True, False), repeat=2):
5338  rnn = nn.RNN(100, 100, 2, dropout=0, nonlinearity='relu')
5339  input = torch.rand(3, 2, 100)
5340  if cuda:
5341  input.data = input.data.cuda()
5342  rnn.cuda()
5343 
5344  if train:
5345  rnn.train()
5346  else:
5347  rnn.eval()
5348 
5349  prev_output = None
5350  for p in (0, 0.5, 0, 0.7, 0.2, 1, 0.2, 0):
5351  rnn.dropout = p
5352  output1, hy1 = rnn(input)
5353  output2, hy2 = rnn(input)
5354 
5355  if p == 0 or p == 1 or not train:
5356  self.assertEqual(output1, output2)
5357  self.assertEqual(hy1, hy2)
5358  else:
5359  self.assertNotEqual(output1, output2)
5360  self.assertNotEqual(hy1, hy2)
5361 
5362  if prev_output is not None:
5363  if not train:
5364  self.assertEqual(output1.data, prev_output)
5365  self.assertEqual(output2.data, prev_output)
5366  else:
5367  self.assertNotEqual(output1.data, prev_output)
5368  self.assertNotEqual(output2.data, prev_output)
5369  prev_output = output1.data
5370 
5371  def _verify_pixel_shuffle(self, input, output, upscale_factor):
5372  for c in range(output.size(1)):
5373  for h in range(output.size(2)):
5374  for w in range(output.size(3)):
5375  height_idx = h // upscale_factor
5376  weight_idx = w // upscale_factor
5377  channel_idx = (upscale_factor * (h % upscale_factor)) + (w % upscale_factor) + \
5378  (c * upscale_factor ** 2)
5379  self.assertEqual(output[:, c, h, w], input[:, channel_idx, height_idx, weight_idx])
5380 
5381  def test_inplace_thnn(self):
5382  modules = [nn.ReLU, nn.ELU, nn.SELU, nn.CELU, nn.RReLU]
5383  for mod in modules:
5384  r = mod(inplace=True)
5385  input = torch.randn(5, 5, requires_grad=True)
5386  output = r(input + 0)
5387  grad_output = torch.randn(5, 5)
5388  grad_output_clone = grad_output.clone()
5389  output.backward(grad_output)
5390  self.assertEqual(grad_output, grad_output_clone)
5391 
5392  @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
5393  @repeat_test_for_types(ALL_TENSORTYPES)
5394  def test_noncontig_conv_grad_cuda(self, dtype=torch.float):
5395  # FIXME: remove after adding non-contiguous grad tests for all modules
5396  module = nn.Conv2d(3, 5, kernel_size=3, padding=1).to("cuda", dtype)
5397  input = torch.randn(2, 3, 10, 10, dtype=dtype, device="cuda", requires_grad=True)
5398  output = module(input)
5399 
5400  grad = torch.randn(2, 2, 5, 10, 10, dtype=dtype, device="cuda")[:, 1]
5401  assert not grad.is_contiguous()
5402  output.backward(grad, retain_graph=True)
5403  self.assertIsNotNone(input.grad)
5404  result = input.grad.data.clone()
5405  input.grad.data.zero_()
5406 
5407  output.backward(grad.contiguous())
5408  self.assertEqual(result, input.grad.data, dtype2prec[dtype])
5409 
5410  def test_pixel_shuffle(self):
5411  batch_size = random.randint(1, 3)
5412  upscale_factor = random.randint(2, 5)
5413  channels = random.randint(1, 4) * upscale_factor ** 2
5414  height = random.randint(5, 10)
5415  width = random.randint(5, 10)
5416 
5417  input = torch.rand(batch_size, channels, height, width, requires_grad=True)
5418  ps = nn.PixelShuffle(upscale_factor)
5419  output = ps(input)
5420  self._verify_pixel_shuffle(input.data, output.data, upscale_factor)
5421  output.backward(output.data)
5422  self.assertEqual(input.data, input.grad.data)
5423 
5424  def test_elu_inplace_view(self):
5425  v = torch.tensor([1.0, -1.0, 1.0, -1.0], requires_grad=True)
5426 
5427  def func(root):
5428  x = root.clone()
5429  view = x.narrow(0, 1, 2)
5430  res = F.elu(view, inplace=True)
5431  self.assertIs(res, view)
5432  return x
5433 
5434  gradcheck(func, [v])
5435  gradgradcheck(func, [v])
5436 
5437  def test_relu_inplace_view(self):
5438  v = torch.tensor([1.0, -1.0, 1.0, -1.0], requires_grad=True)
5439 
5440  def func(root):
5441  x = root.clone()
5442  view = x.narrow(0, 1, 2)
5443  res = F.relu(view, inplace=True)
5444  self.assertIs(res, view)
5445  return x
5446 
5447  gradcheck(func, [v])
5448  gradgradcheck(func, [v])
5449 
5450  @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
5451  def test_PReLU_backward_requires_grad_false(self):
5452  m = nn.PReLU().to('cuda')
5453  x = torch.randn(2, 3, 4, 5, requires_grad=False, device='cuda')
5454  y = m(x)
5455  y.mean().backward()
5456  self.assertEqual(x.grad, None)
5457 
5458  def test_bce_loss_always_nonnegative(self):
5459  target = torch.ones(5)
5460  input = torch.ones(5)
5461  self.assertEqual((nn.BCELoss()(input, target) < 0).sum(), 0)
5462 
5463  target = torch.zeros(5)
5464  input = torch.zeros(5)
5465  self.assertEqual((nn.BCELoss()(input, target) < 0).sum(), 0)
5466 
5467  def test_bce_with_logits_raises_if_target_and_input_are_different_size(self):
5468  target = torch.rand(5)
5469  input = torch.rand(5, 1)
5470  with self.assertRaises(ValueError):
5471  nn.BCEWithLogitsLoss()(input, target)
5472 
5473  target = torch.rand(5, 1)
5474  input = torch.rand(5)
5475  with self.assertRaises(ValueError):
5476  nn.BCEWithLogitsLoss()(input, target)
5477 
5478  def test_bce_with_logits_gives_same_result_as_sigmoid_and_bce_loss(self):
5479  sigmoid = nn.Sigmoid()
5480 
5481  target = torch.rand(64, 4)
5482  output = torch.rand(64, 4) - 0.5
5483 
5484  self.assertEqual(nn.BCEWithLogitsLoss()(output, target), nn.BCELoss()(sigmoid(output), target))
5485 
5486  weight = torch.rand(4)
5487  self.assertEqual(nn.BCEWithLogitsLoss(weight)(output, target), nn.BCELoss(weight)(sigmoid(output), target))
5488 
5489  target = torch.zeros(4, 1, dtype=torch.float)
5490  output = torch.empty(4, 1, dtype=torch.float).fill_(-100)
5491 
5492  self.assertEqual(nn.BCEWithLogitsLoss()(output, target), nn.BCELoss()(sigmoid(output), target))
5493 
5494  self.assertEqual(nn.BCEWithLogitsLoss(reduction='none')(output, target),
5495  nn.BCELoss(reduction='none')(sigmoid(output), target))
5496 
5497  weight = torch.rand(1, dtype=torch.float)
5498  self.assertEqual(nn.BCEWithLogitsLoss(weight)(output, target), nn.BCELoss(weight)(sigmoid(output), target))
5499 
5500  def test_bce_with_logits_has_correct_grad_at_zero(self):
5501  output = torch.zeros(3, 1, requires_grad=True)
5502  target = torch.zeros(3, 1)
5503  nn.BCEWithLogitsLoss(reduction='sum')(output, target).backward()
5504  expected_grad = torch.empty(3, 1).fill_(0.5)
5505  self.assertEqual(output.grad, expected_grad)
5506 
5507  def test_bce_with_logits_broadcasts_weights(self):
5508  target = torch.rand(16, 4)
5509  output = torch.rand(16, 4) - 0.5
5510 
5511  weight = torch.rand(4)
5512  out1 = nn.BCEWithLogitsLoss(weight)(output, target)
5513 
5514  weight = weight.expand(16, 4).contiguous()
5515  out2 = nn.BCEWithLogitsLoss(weight)(output, target)
5516 
5517  self.assertEqual(out1, out2)
5518 
5519  weight = torch.rand(16, 1)
5520  out1 = nn.BCEWithLogitsLoss(weight)(output, target)
5521 
5522  weight = weight.expand(16, 4).contiguous()
5523  out2 = nn.BCEWithLogitsLoss(weight)(output, target)
5524 
5525  self.assertEqual(out1, out2)
5526 
5527  def test_bce_with_logits_ones_in_pos_weights_are_the_same_as_none(self):
5528  target = torch.rand(64, 4)
5529  output = torch.rand(64, 4) - 0.5
5530  pos_weight = torch.ones(64, 4)
5531 
5532  self.assertEqual(nn.BCEWithLogitsLoss()(output, target),
5533  nn.BCEWithLogitsLoss(pos_weight=pos_weight)(output, target))
5534 
5535  def test_bce_with_logits_broadcasts_pos_weights(self):
5536  target = torch.rand(64, 4)
5537  output = torch.rand(64, 4) - 0.5
5538  pos_weight = torch.rand(4)
5539  out1 = nn.BCEWithLogitsLoss(pos_weight=pos_weight)(output, target)
5540 
5541  pos_weight1 = pos_weight.expand(1, 4)
5542  out2 = nn.BCEWithLogitsLoss(pos_weight=pos_weight1)(output, target)
5543 
5544  pos_weight2 = pos_weight.expand(64, 4)
5545  out3 = nn.BCEWithLogitsLoss(pos_weight=pos_weight2)(output, target)
5546 
5547  self.assertEqual(out1, out2)
5548  self.assertEqual(out1, out3)
5549 
5550  def test_bce_with_logits_with_pos_weight_has_correct_grad_at_zero(self):
5551  output = torch.zeros(3, 1, requires_grad=True)
5552  target = torch.zeros(3, 1)
5553  pos_weight = torch.ones(3, 1)
5554  nn.BCEWithLogitsLoss(pos_weight=pos_weight, reduction='sum')(output, target).backward()
5555  expected_grad = torch.empty(3, 1).fill_(0.5)
5556  grad = output.grad
5557  self.assertEqual(grad, expected_grad)
5558 
5559  def test_bce_with_logits_stability(self):
5560  output = torch.tensor([0., -120.])
5561  target = torch.tensor([0., 1.])
5562  pos_weight = torch.tensor([1., 1.])
5563 
5564  out1 = nn.BCEWithLogitsLoss()(output, target)
5565  self.assertTrue(torch.isfinite(out1).all().item())
5566 
5567  out2 = nn.BCEWithLogitsLoss(pos_weight=pos_weight)(output, target)
5568  self.assertTrue(torch.isfinite(out2).all().item())
5569 
5570  def test_bce_loss_broadcasts_weights(self):
5571  sigmoid = nn.Sigmoid()
5572  target = torch.rand(16, 4)
5573  output = torch.rand(16, 4) - 0.5
5574 
5575  weight = torch.rand(4)
5576  out1 = nn.BCELoss(weight)(sigmoid(output), target)
5577 
5578  weight = weight.expand(16, 4).contiguous()
5579  out2 = nn.BCELoss(weight)(sigmoid(output), target)
5580 
5581  self.assertEqual(out1, out2)
5582 
5583  weight = torch.rand(16, 1)
5584  out1 = nn.BCELoss(weight)(sigmoid(output), target)
5585 
5586  weight = weight.expand(16, 4).contiguous()
5587  out2 = nn.BCELoss(weight)(sigmoid(output), target)
5588 
5589  self.assertEqual(out1, out2)
5590 
5591  def test_elu_inplace_gradgrad(self):
5592  v = torch.randn(8, requires_grad=True)
5593 
5594  def func(root):
5595  x = root.clone()
5596  return F.elu(x, inplace=True)
5597 
5598  gradcheck(func, [v])
5599  gradgradcheck(func, [v])
5600 
5601  def test_hardtanh_inplace_gradgrad(self):
5602  v = torch.randn(8, requires_grad=True)
5603 
5604  def func(root):
5605  x = root.clone()
5606  return F.hardtanh(x, inplace=True)
5607 
5608  gradcheck(func, [v])
5609  gradgradcheck(func, [v])
5610 
5611  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
5612  def test_batchnorm_cudnn_half(self):
5613  # THNN
5614  input = torch.randint(1, 10, (2, 3, 2, 2), dtype=torch.half, device="cuda", requires_grad=True)
5615  m = nn.BatchNorm2d(3).half().cuda()
5616  thnn_output = m(input)
5617  thnn_output.sum().backward()
5618  thnn_input_grad = input.grad.data.clone()
5619  self.assertEqual(thnn_output.type(), input.type())
5620  # cuDNN
5621  if TEST_CUDNN:
5622  input.grad = None
5623  m = m.float()
5624  cudnn_output = m(input)
5625  cudnn_output.sum().backward()
5626  cudnn_input_grad = input.grad.data.clone()
5627  self.assertEqual(cudnn_output.type(), input.type())
5628  self.assertEqual(cudnn_output, thnn_output)
5629  self.assertAlmostEqual(cudnn_input_grad, thnn_input_grad, delta=1e-3)
5630 
5631  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
5632  @repeat_test_for_types([torch.float, torch.half])
5633  def test_batchnorm_large_batch(self, dtype=torch.float):
5634  bn = nn.BatchNorm1d(1).to('cuda', dtype)
5635  data = torch.rand(131072, 1, device="cuda", dtype=dtype)
5636  out = bn(data).sum().backward()
5637 
5638  def _test_batchnorm_update_stats(self, device="cpu", dtype=torch.float):
5639  module = nn.BatchNorm1d(3).to(device, dtype)
5640 
5641  data = torch.rand(4, 3, device=device, dtype=dtype)
5642 
5643  # training pass
5644  old_running_mean = module.running_mean.clone()
5645  old_running_var = module.running_var.clone()
5646  old_num_batches_tracked = module.num_batches_tracked.clone()
5647  module(data)
5648  self.assertNotEqual(old_running_mean, module.running_mean)
5649  self.assertNotEqual(old_running_var, module.running_var)
5650  self.assertEqual(old_num_batches_tracked + 1, module.num_batches_tracked)
5651 
5652  # eval pass
5653  module.eval()
5654  old_running_mean = module.running_mean.clone()
5655  old_running_var = module.running_var.clone()
5656  old_num_batches_tracked = module.num_batches_tracked.clone()
5657  module(data)
5658  self.assertEqual(old_running_mean, module.running_mean)
5659  self.assertEqual(old_running_var, module.running_var)
5660  self.assertEqual(old_num_batches_tracked, module.num_batches_tracked)
5661 
5662  def test_batchnorm_update_stats(self):
5663  self._test_batchnorm_update_stats()
5664 
5665  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
5666  def test_batchnorm_update_stats_cuda(self):
5667  self._test_batchnorm_update_stats("cuda", torch.float)
5668  if TEST_CUDNN:
5669  with torch.backends.cudnn.flags(enabled=False):
5670  self._test_batchnorm_update_stats("cuda", torch.float)
5671 
5672  def test_batchnorm_raises_error_if_running_mean_is_not_same_size_as_input(self):
5673  input = torch.rand(2, 10)
5674  running_var = torch.rand(10)
5675  wrong_sizes = [9, 11]
5676  for size in wrong_sizes:
5677  with self.assertRaises(RuntimeError):
5678  F.batch_norm(input, torch.rand(size), running_var)
5679 
5680  def test_batchnorm_raises_error_if_running_var_is_not_same_size_as_input(self):
5681  input = torch.rand(2, 10)
5682  running_mean = torch.rand(10)
5683  wrong_sizes = [9, 11]
5684  for size in wrong_sizes:
5685  with self.assertRaises(RuntimeError):
5686  F.batch_norm(input, running_mean, torch.rand(size))
5687 
5688  def test_batchnorm_raises_error_if_weight_is_not_same_size_as_input(self):
5689  input = torch.rand(2, 10)
5690  running_mean = torch.rand(10)
5691  running_var = torch.rand(10)
5692  wrong_sizes = [9, 11]
5693  for size in wrong_sizes:
5694  with self.assertRaises(RuntimeError):
5695  F.batch_norm(input, running_mean, running_var, weight=Parameter(torch.rand(size)))
5696 
5697  def test_batchnorm_raises_error_if_bias_is_not_same_size_as_input(self):
5698  input = torch.rand(2, 10)
5699  running_mean = torch.rand(10)
5700  running_var = torch.rand(10)
5701  wrong_sizes = [9, 11]
5702  for size in wrong_sizes:
5703  with self.assertRaises(RuntimeError):
5704  F.batch_norm(input, running_mean, running_var, bias=Parameter(torch.rand(size)))
5705 
5706  def _test_batchnorm_grad(self, device="cpu", dtype=torch.double):
5707  bs, n_feat, size_feat = 4, 5, 6
5708  input = torch.arange(bs * n_feat * size_feat, device=device,
5709  requires_grad=True, dtype=dtype).view(bs, n_feat, size_feat)
5710  weight = torch.arange(1, n_feat + 1, device=device, requires_grad=True, dtype=dtype)
5711  bias = torch.arange(n_feat, device=device, requires_grad=True, dtype=dtype)
5712  running_mean = 1 - torch.arange(n_feat, device=device, dtype=dtype)
5713  running_var = 2 * torch.arange(n_feat, device=device, dtype=dtype)
5714  for training in [False, True]:
5715  _assertGradAndGradgradChecks(self, F.batch_norm, (input, running_mean, running_var, weight, bias,
5716  training, 0.1, 0.0001))
5717 
5718  def _test_batchnorm_eval(self, device="cpu", dtype=torch.float):
5719  module = nn.BatchNorm1d(3).to(device, dtype)
5720  module.eval()
5721 
5722  data = torch.rand(4, 3, device=device, dtype=dtype, requires_grad=True)
5723  grad = torch.rand(4, 3, device=device, dtype=dtype)
5724 
5725  # 1st pass
5726  res1 = module(data)
5727  res1.backward(grad)
5728  grad1 = data.grad.clone()
5729 
5730  # 2nd pass
5731  if data.grad is not None:
5732  data.grad.data.zero_()
5733 
5734  res2 = module(data)
5735  res2.backward(grad)
5736  grad2 = data.grad.clone()
5737  self.assertEqual(res1, res2)
5738  self.assertEqual(grad1, grad2)
5739 
5740  # track_running_stats=False
5741  module = nn.BatchNorm1d(3, track_running_stats=False).to(device, dtype)
5742 
5743  data = torch.rand(4, 3, device=device, dtype=dtype, requires_grad=True)
5744  grad = torch.rand(4, 3, device=device, dtype=dtype)
5745 
5746  # 1st pass
5747  res1 = module(data)
5748  res1.backward(grad)
5749  grad1 = data.grad.clone()
5750 
5751  # set eval
5752  module.eval()
5753 
5754  # 2nd pass
5755  if data.grad is not None:
5756  data.grad.data.zero_()
5757 
5758  res2 = module(data)
5759  res2.backward(grad)
5760  grad2 = data.grad.clone()
5761  self.assertEqual(res1, res2)
5762  self.assertEqual(grad1, grad2)
5763 
5764  def _test_batchnorm_simple_average(self, test_type=torch.FloatTensor):
5765  module = nn.BatchNorm1d(3, momentum=None).type(test_type)
5766  zeros = torch.zeros(3).type(test_type)
5767  ones = torch.ones(3).type(test_type)
5768  self.assertEqual(module.running_mean, zeros)
5769  self.assertEqual(module.running_var, ones)
5770 
5771  data1 = torch.rand(4, 3).type(test_type)
5772  data2 = torch.rand(4, 3).type(test_type)
5773 
5774  # 1st pass
5775  res1 = module(data1)
5776  running_mean1 = module.running_mean.clone()
5777  running_var1 = module.running_var.clone()
5778  self.assertNotEqual(running_mean1, zeros)
5779  self.assertNotEqual(running_var1, ones)
5780 
5781  # reset stats
5782  module.reset_running_stats()
5783  self.assertEqual(module.running_mean, zeros)
5784  self.assertEqual(module.running_var, ones)
5785 
5786  # 2nd pass
5787  res2 = module(data2)
5788  running_mean2 = module.running_mean.clone()
5789  running_var2 = module.running_var.clone()
5790  self.assertNotEqual(running_mean2, zeros)
5791  self.assertNotEqual(running_var2, ones)
5792 
5793  # reset stats
5794  module.reset_running_stats()
5795  self.assertEqual(module.running_mean, zeros)
5796  self.assertEqual(module.running_var, ones)
5797 
5798  # 3rd (combined) pass
5799  res3 = module(data1)
5800  res4 = module(data2)
5801  self.assertEqual(res3, res1)
5802  self.assertEqual(res4, res2)
5803  self.assertAlmostEqual(module.running_mean, (running_mean1 + running_mean2) / 2)
5804  self.assertAlmostEqual(module.running_var, (running_var1 + running_var2) / 2)
5805 
5806  def test_pairwise_distance(self):
5807  input1 = torch.randn(4, 4, requires_grad=True)
5808  input2 = torch.randn(4, 4, requires_grad=True)
5809  self.assertTrue(gradcheck(lambda x, y: F.pairwise_distance(x, y), (input1, input2)))
5810 
5811  @skipIfRocm
5812  def test_pdist(self):
5813  for device, trans in itertools.product(device_(), [False, True]):
5814  inp = torch.randn(4, 5, dtype=torch.double, device=device, requires_grad=True)
5815  if trans:
5816  inp = inp.transpose(0, 1)
5817  for p in [0, 1, 2, 0.5, 1.5, 2.5, float('inf')]:
5818  self.assertTrue(gradcheck(lambda x: F.pdist(x, p), (inp,)))
5819 
5820  def test_pdist_zeros(self):
5821  """Test that grad is still valid when dist is 0"""
5822  for device in device_():
5823  inp = torch.randn(1, 3, dtype=torch.double, device=device, requires_grad=True).repeat([2, 1])
5824  for p in [0, 1, 2, 0.5, 1.5, 2.5, float('inf')]:
5825  self.assertTrue(gradcheck(lambda x: F.pdist(x, p), (inp,)))
5826 
5827  def test_pdist_empty_row(self):
5828  for device in device_():
5829  inp = torch.randn(1, 3, dtype=torch.double, device=device, requires_grad=True)
5830  self.assertTrue(gradcheck(F.pdist, (inp,)))
5831 
5832  def test_pdist_empty_col(self):
5833  for device in device_():
5834  inp = torch.randn(4, 0, dtype=torch.double, device=device, requires_grad=True)
5835  self.assertTrue(gradcheck(F.pdist, (inp,)))
5836 
5837  @unittest.expectedFailure
5838  def test_pdist_cpu_gradgrad_unimplemented(self):
5839  inp = torch.randn(4, 5, requires_grad=True)
5840  gradgradcheck(F.pdist, (inp,))
5841 
5842  @skipIfRocm
5843  @unittest.expectedFailure
5844  def test_pdist_cuda_gradgrad_unimplemented(self):
5845  inp = torch.randn(4, 5, device='cuda', requires_grad=True)
5846  gradgradcheck(F.pdist, (inp,))
5847 
5848  def test_cosine_embedding_loss_no_reduce(self):
5849  input1 = torch.randn(15, 10, requires_grad=True)
5850  input2 = torch.randn(15, 10, requires_grad=True)
5851  target = torch.randn(15).sign()
5852  self.assertTrue(gradcheck(lambda x, y, z: F.cosine_embedding_loss(
5853  x, y, z, reduction='none'), (input1, input2, target)))
5854  self.assertEqual(F.cosine_embedding_loss(input1, input2, target, reduction='none'),
5855  loss_reference_fns['CosineEmbeddingLoss'](input1, input2, target, reduction='none'))
5856 
5857  def test_cosine_embedding_loss_margin_no_reduce(self):
5858  input1 = torch.randn(15, 10, requires_grad=True)
5859  input2 = torch.randn(15, 10, requires_grad=True)
5860  target = torch.randn(15).sign()
5861  self.assertTrue(gradcheck(lambda x, y, z: F.cosine_embedding_loss(
5862  x, y, z, margin=0.5, reduction='none'), (input1, input2, target)))
5863  self.assertEqual(F.cosine_embedding_loss(input1, input2, target, margin=0.5, reduction='none'),
5864  loss_reference_fns['CosineEmbeddingLoss'](input1, input2, target,
5865  margin=0.5, reduction='none'))
5866 
5867  def test_margin_ranking_loss_no_reduce(self):
5868  input1 = torch.randn(15).mul_(10).requires_grad_()
5869  input2 = torch.randn(15).mul_(10).requires_grad_()
5870  target = torch.randn(15).sign()
5871  self.assertTrue(gradcheck(lambda x, y, z: F.margin_ranking_loss(
5872  x, y, z, reduction='none'), (input1, input2, target)))
5873  self.assertEqual(F.margin_ranking_loss(input1, input2, target, reduction='none'),
5874  loss_reference_fns['MarginRankingLoss'](input1, input2, target, reduction='none'))
5875 
5876  def test_margin_ranking_loss_margin_no_reduce(self):
5877  input1 = torch.randn(15).mul_(10).requires_grad_()
5878  input2 = torch.randn(15).mul_(10).requires_grad_()
5879  target = torch.randn(15).sign()
5880  self.assertTrue(gradcheck(lambda x, y, z: F.margin_ranking_loss(
5881  x, y, z, margin=0.5, reduction='none'), (input1, input2, target)))
5882  self.assertEqual(F.margin_ranking_loss(input1, input2, target, margin=0.5, reduction='none'),
5883  loss_reference_fns['MarginRankingLoss'](input1, input2, target, margin=0.5, reduction='none'))
5884 
5885  def test_triplet_margin_loss(self):
5886  input1 = torch.randn(5, 10, requires_grad=True)
5887  input2 = torch.randn(5, 10, requires_grad=True)
5888  input3 = torch.randn(5, 10, requires_grad=True)
5889  self.assertTrue(gradcheck(lambda x1, x2, x3: F.triplet_margin_loss(
5890  x1, x2, x3), (input1, input2, input3)))
5891  self.assertEqual(F.triplet_margin_loss(input1, input2, input3),
5892  loss_reference_fns['TripletMarginLoss'](input1, input2, input3))
5893 
5894  def test_triplet_margin_loss_swap(self):
5895  input1 = torch.randn(5, 10, requires_grad=True)
5896  input2 = torch.randn(5, 10, requires_grad=True)
5897  input3 = torch.randn(5, 10, requires_grad=True)
5898  self.assertTrue(gradcheck(lambda x1, x2, x3: F.triplet_margin_loss(
5899  x1, x2, x3, swap=True), (input1, input2, input3)))
5900  self.assertEqual(F.triplet_margin_loss(input1, input2, input3, swap=True),
5901  loss_reference_fns['TripletMarginLoss'](input1, input2, input3, swap=True))
5902 
5903  def test_triplet_margin_loss_no_reduce(self):
5904  input1 = torch.randn(5, 10, requires_grad=True)
5905  input2 = torch.randn(5, 10, requires_grad=True)
5906  input3 = torch.randn(5, 10, requires_grad=True)
5907  self.assertTrue(gradcheck(lambda x1, x2, x3: F.triplet_margin_loss(
5908  x1, x2, x3, reduction='none'), (input1, input2, input3)))
5909  self.assertEqual(F.triplet_margin_loss(input1, input2, input3, reduction='none'),
5910  loss_reference_fns['TripletMarginLoss'](input1, input2, input3, reduction='none'))
5911 
5912  def test_triplet_margin_loss_swap_no_reduce(self):
5913  input1 = torch.randn(5, 10, requires_grad=True)
5914  input2 = torch.randn(5, 10, requires_grad=True)
5915  input3 = torch.randn(5, 10, requires_grad=True)
5916  self.assertTrue(gradcheck(lambda x1, x2, x3: F.triplet_margin_loss(
5917  x1, x2, x3, swap=True, reduction='none'), (input1, input2, input3)))
5918  self.assertEqual(F.triplet_margin_loss(input1, input2, input3, swap=True, reduction='none'),
5919  loss_reference_fns['TripletMarginLoss'](input1, input2, input3, swap=True, reduction='none'))
5920 
5921  def test_pointwise_loss_target_grad_none_reduction(self):
5922  i = torch.randn(5, 10)
5923  t = torch.randn(5, 10, requires_grad=True)
5924  self.assertEqual(F.mse_loss(i, t, reduction='none').size(), t.size())
5925  self.assertEqual(F.l1_loss(i, t, reduction='none').size(), t.size())
5926 
5927  def test_pointwise_loss_broadcast(self):
5928  losses = {
5929  'mse_loss': lambda x, y, r: F.mse_loss(x, y, reduction=r),
5930  'l1_loss': lambda x, y, r: F.l1_loss(x, y, reduction=r),
5931  'smooth_l1_loss': lambda x, y, r: F.smooth_l1_loss(x, y, reduction=r),
5932  }
5933 
5934  input = torch.randn(2, 1, requires_grad=True)
5935  for _name, fn in losses.items():
5936  for requires_grad in [True, False]:
5937  # When target.requires_grad=True, its impl is in Python, while the other is in TH.
5938  target = torch.randn(2, 10, requires_grad=requires_grad)
5939  for reduction in ['none', 'mean', 'sum']:
5940  l = fn(input, target, reduction)
5941  if reduction == 'none':
5942  self.assertEqual(l.size(), target.size())
5943  self.assertTrue(gradcheck(fn, (input, target, reduction)))
5944 
5945  def test_cosine_similarity(self):
5946  input1 = torch.randn(4, 4, requires_grad=True)
5947  input2 = torch.randn(4, 4, requires_grad=True)
5948  self.assertTrue(gradcheck(lambda x, y: F.cosine_similarity(x, y), (input1, input2)))
5949 
5950  input1 = torch.randn(4, 5, 6, requires_grad=True)
5951  input2 = torch.randn(4, 5, 6, requires_grad=True)
5952  self.assertTrue(gradcheck(lambda x, y: F.cosine_similarity(x, y, dim=0), (input1, input2)))
5953  self.assertTrue(gradcheck(lambda x, y: F.cosine_similarity(x, y, dim=-1), (input1, input2)))
5954 
5955  input1 = torch.randn((), requires_grad=True)
5956  input2 = torch.randn((), requires_grad=True)
5957  self.assertTrue(gradcheck(lambda x, y: F.cosine_similarity(x, y, dim=0), (input1, input2)))
5958  self.assertTrue(gradcheck(lambda x, y: F.cosine_similarity(x, y, dim=-1), (input1, input2)))
5959 
5960  # Check cosine_similarity input/output shapes
5961  input_size = (1, 3, 2, 1)
5962  expected_size = (1, 2, 1)
5963  input1 = torch.randn(input_size, requires_grad=True)
5964  input2 = torch.randn(input_size, requires_grad=True)
5965  self.assertEqual(F.cosine_similarity(input1, input2, dim=1).size(), expected_size)
5966 
5967  # Check numerical precision, issue #18057
5968  vv1 = torch.tensor(list([float(i) for i in range(84)])).unsqueeze(0)
5969  vv2 = torch.tensor(list([float(i) for i in range(84)])).unsqueeze(0)
5970  out = F.cosine_similarity(vv1, vv2)
5971  self.assertLessEqual(out, 1.0)
5972 
5973  def test_grid_sample_error_checking(self):
5974  input = torch.empty(1, 1, 2, 2)
5975  grid = torch.empty(1, 1, 1, 2)
5976 
5977  # assert no error
5978  F.grid_sample(input, grid)
5979 
5980  with self.assertRaisesRegex(ValueError, "but got: 'garbage'"):
5981  F.grid_sample(input, grid, mode='garbage')
5982 
5983  with self.assertRaisesRegex(ValueError, "but got: 'garbage'"):
5984  F.grid_sample(input, grid, padding_mode='garbage')
5985 
5986  with self.assertRaisesRegex(RuntimeError, "expected input and grid to have same dtype"):
5987  F.grid_sample(input.float(), grid.double())
5988 
5989  with self.assertRaisesRegex(RuntimeError, "expected 4D or 5D input"):
5990  F.grid_sample(input[0], grid)
5991 
5992  with self.assertRaisesRegex(RuntimeError, "grid with same number of dimensions"):
5993  F.grid_sample(input, torch.empty(1, 1, 1, 1, 3))
5994 
5995  with self.assertRaisesRegex(RuntimeError, "expected grid and input to have same batch size"):
5996  F.grid_sample(input, torch.empty(2, 1, 1, 2))
5997 
5998  with self.assertRaisesRegex(RuntimeError, "expected grid to have size 2 in last dimension"):
5999  F.grid_sample(input, torch.empty(1, 1, 1, 3))
6000 
6001  with self.assertRaisesRegex(RuntimeError, "expected input to have non-empty spatial dimensions"):
6002  F.grid_sample(torch.empty(1, 1, 0, 2), grid)
6003 
6004  if TEST_CUDA:
6005  with self.assertRaisesRegex(RuntimeError, "expected input and grid to be on same device"):
6006  F.grid_sample(input.cuda(), grid)
6007 
6008  def test_grid_sample(self):
6009  def test(N, C, H, W, mode, padding_mode):
6010  def test_shape(N, C, IH, IW, H, W, mode, padding_mode):
6011  for grid_dim_contig_order in [(0, 1, 2, 3), (0, 3, 1, 2), (3, 0, 1, 2), (0, 2, 1, 3)]:
6012  # grid_dim_contig_order specifies the dimension order that can
6013  # make grid to be contiguous.
6014  # i.e., grid.permute(grid_dim_contig_order) is contiguous.
6015  # e.g., with grid_dim_contig_order=[0, 3, 1, 2], grid should be
6016  # initialized with contiguous tensor of shape [N, 2, H, W]
6017  # and permuted to [N, H, W, 2] afterwards.
6018  grid_shape = [N, H, W, 2]
6019  grid_init_shape = [grid_shape[d] for d in grid_dim_contig_order]
6020  grid_fwd_permute = [None, None, None, None]
6021  for i, d in enumerate(grid_dim_contig_order):
6022  grid_fwd_permute[d] = i
6023 
6024  def get_grid(device='cpu', data=None):
6025  if data is not None:
6026  assert list(data.shape) == grid_shape
6027  data = data.permute(grid_dim_contig_order).to(device)
6028  else:
6029  data = torch.randn(grid_init_shape, device=device)
6030  grid = data.permute(grid_fwd_permute)
6031  assert grid.permute(grid_dim_contig_order).is_contiguous()
6032  return grid
6033 
6034  input_cpu = torch.randn(C, N, IH, IW).transpose(0, 1).requires_grad_()
6035  grid_cpu = get_grid().requires_grad_()
6036  out_cpu = F.grid_sample(input_cpu, grid_cpu, mode=mode, padding_mode=padding_mode)
6037  self.assertTrue(out_cpu.size() == torch.Size([N, C, H, W]))
6038 
6039  gradients = torch.randn_like(out_cpu)
6040  out_cpu.backward(gradients)
6041 
6042  if TEST_CUDA:
6043  input_cuda = input_cpu.detach().transpose(0, 1).cuda().transpose(0, 1).requires_grad_()
6044  grid_cuda = get_grid('cuda', grid_cpu.detach()).requires_grad_()
6045  out_cuda = F.grid_sample(input_cuda, grid_cuda, mode=mode, padding_mode=padding_mode)
6046  self.assertEqual(out_cpu, out_cuda)
6047 
6048  out_cuda.backward(gradients.cuda())
6049  self.assertEqual(input_cpu.grad, input_cuda.grad)
6050  self.assertEqual(grid_cpu.grad, grid_cuda.grad, prec=5e-5)
6051 
6052  # check that zero-dimensional input strides don't error out
6053  base_input = torch.randn(N, C, 1, IW)
6054  input_cpu = base_input.expand_as(input_cuda).requires_grad_()
6055  out_cpu = F.grid_sample(input_cpu, grid_cpu, mode=mode, padding_mode=padding_mode)
6056 
6057  input_cuda = base_input.cuda().expand_as(input_cuda).requires_grad_()
6058  out_cuda = F.grid_sample(input_cuda, grid_cuda, mode=mode, padding_mode=padding_mode)
6059  self.assertEqual(out_cpu, out_cuda)
6060 
6061  # test same size output
6062  test_shape(N, C, H, W, H, W, mode, padding_mode)
6063 
6064  # test larger output
6065  N = random.randint(2, 8)
6066  C = random.randint(2, 8)
6067  IH = random.randint(2, 8)
6068  IW = random.randint(2, 8)
6069  H = random.randint(IH + 1, 12)
6070  W = random.randint(IW + 1, 12)
6071  test_shape(N, C, IH, IW, H, W, mode, padding_mode)
6072 
6073  # test smaller output
6074  N = random.randint(2, 8)
6075  C = random.randint(2, 8)
6076  IH = random.randint(2, 8)
6077  IW = random.randint(2, 8)
6078  H = random.randint(2, IH)
6079  W = random.randint(2, IW)
6080  test_shape(N, C, IH, IW, H, W, mode, padding_mode)
6081 
6082  # test 1x1 inpput
6083  N = random.randint(2, 8)
6084  C = random.randint(2, 8)
6085  IH = 1
6086  IW = 1
6087  H = random.randint(2, 5)
6088  W = random.randint(2, 5)
6089  test_shape(N, C, IH, IW, H, W, mode, padding_mode)
6090 
6091  # testing empty grid
6092  N = random.randint(2, 8)
6093  C = random.randint(2, 8)
6094  IH = random.randint(2, 8)
6095  IW = random.randint(2, 8)
6096  W = random.randint(3, IW + 2)
6097  test_shape(N, C, IH, IW, 0, W, mode, padding_mode)
6098 
6099  # testing empty channel
6100  N = random.randint(2, 8)
6101  IH = random.randint(2, 8)
6102  IW = random.randint(2, 8)
6103  H = random.randint(3, IH + 2)
6104  W = random.randint(3, IW + 2)
6105  test_shape(N, 0, IH, IW, H, W, mode, padding_mode)
6106 
6107  # testing empty batch
6108  C = random.randint(2, 8)
6109  IH = random.randint(2, 8)
6110  IW = random.randint(2, 8)
6111  H = random.randint(3, IH + 2)
6112  W = random.randint(3, IW + 2)
6113  test_shape(0, C, IH, IW, H, W, mode, padding_mode)
6114 
6115  for mode in ('bilinear', 'nearest'):
6116  for padding_mode in ('zeros', 'border', 'reflection'):
6117  # test known input on CPU
6118  input = torch.arange(1., 11).view(1, 1, 2, 5)
6119  grid = torch.tensor(
6120  [[[-0.9, -4.1], [0, 0.2000], [1, -1], [-0.333, 1e-10], [0.5, 1.0]],
6121  [[-1.0, -0.5], [0, 0.3333], [1, -1], [-0.200, 1e-10], [1.5, 0.5]]]).view(1, 2, 5, 2)
6122  if mode == 'bilinear':
6123  if padding_mode == 'zeros':
6124  groundtruth = torch.tensor(
6125  [[0.0000, 6.0000000000, 5.0000, 4.8340, 9.0000],
6126  [2.2500, 6.3332500450, 5.0000, 5.1000, 0.0000]]).view(1, 1, 2, 5)
6127  elif padding_mode == 'border':
6128  groundtruth = torch.tensor(
6129  [[1.2000, 6.0000000000, 5.0000, 4.8340, 9.0000],
6130  [2.2500, 6.3332500450, 5.0000, 5.1000, 8.7500]]).view(1, 1, 2, 5)
6131  elif padding_mode == 'reflection':
6132  groundtruth = torch.tensor(
6133  [[3.4500, 6.0000000000, 5.0000, 4.8340, 9.0000],
6134  [2.2500, 6.3332500450, 5.0000, 5.1000, 7.7500]]).view(1, 1, 2, 5)
6135  else:
6136  assert False, "missing groundtruth test for padding mode '{}'".format(padding_mode)
6137  elif mode == 'nearest':
6138  if padding_mode == 'zeros':
6139  groundtruth = torch.tensor(
6140  [[0., 8., 5., 7., 9.],
6141  [1., 8., 5., 8., 0.]]).view(1, 1, 2, 5)
6142  elif padding_mode == 'border':
6143  groundtruth = torch.tensor(
6144  [[1., 8., 5., 7., 9.],
6145  [1., 8., 5., 8., 10.]]).view(1, 1, 2, 5)
6146  elif padding_mode == 'reflection':
6147  groundtruth = torch.tensor(
6148  [[1., 8., 5., 7., 9.],
6149  [1., 8., 5., 8., 9.]]).view(1, 1, 2, 5)
6150  else:
6151  assert False, "missing groundtruth test for padding mode '{}'".format(padding_mode)
6152  else:
6153  assert False, "missing groundtruth test for interpolation mode '{}'".format(mode)
6154  output = F.grid_sample(input, grid, mode=mode, padding_mode=padding_mode)
6155  self.assertEqual(output, groundtruth,
6156  "groundtruth comparison failed for mode={}, "
6157  "padding_mode={}".format(mode, padding_mode))
6158 
6159  # do gradcheck
6160  N = random.randint(2, 8)
6161  C = random.randint(2, 6)
6162  H = random.randint(2, 8)
6163  W = random.randint(2, 8)
6164  input = torch.randn(N, C, H, W, requires_grad=True)
6165  grid = torch.randn(N, H, W, 2, requires_grad=True)
6166  self.assertTrue(gradcheck(
6167  lambda inp, grid: F.grid_sample(inp, grid, mode=mode, padding_mode=padding_mode),
6168  (input, grid)))
6169 
6170  test(N, C, H, W, mode, padding_mode)
6171  if TEST_CUDNN:
6172  with cudnn.flags(enabled=False):
6173  test(N, C, H, W, mode, padding_mode)
6174 
6175  def test_grid_sample_3d(self):
6176  def test(N, C, D, H, W, mode, padding_mode):
6177  def test_shape(N, C, ID, IH, IW, D, H, W, mode, padding_mode):
6178  input_cpu = torch.randn(C, N, ID, IH, IW).transpose(0, 1).requires_grad_()
6179  grid_cpu = torch.randn(D, N, H, W, 3).transpose(0, 1).requires_grad_()
6180  out_cpu = F.grid_sample(input_cpu, grid_cpu, mode=mode, padding_mode=padding_mode)
6181  self.assertTrue(out_cpu.size() == torch.Size([N, C, D, H, W]))
6182 
6183  gradients = torch.randn_like(out_cpu)
6184  out_cpu.backward(gradients)
6185 
6186  if TEST_CUDA:
6187  input_cuda = input_cpu.detach().transpose(0, 1).cuda().transpose(0, 1).requires_grad_()
6188  grid_cuda = grid_cpu.detach().transpose(0, 1).cuda().transpose(0, 1).requires_grad_()
6189  out_cuda = F.grid_sample(input_cuda, grid_cuda, mode=mode, padding_mode=padding_mode)
6190  self.assertEqual(out_cpu, out_cuda)
6191 
6192  out_cuda.backward(gradients.cuda())
6193  self.assertEqual(input_cpu.grad, input_cuda.grad)
6194  self.assertEqual(grid_cpu.grad, grid_cuda.grad, prec=5e-5)
6195 
6196  # check that zero-dimensional input strides don't error out
6197  base_input = torch.randn(N, C, 1, IH, IW)
6198  input_cpu = base_input.expand_as(input_cuda).requires_grad_()
6199  grid_cpu = torch.randn(N, D, H, W, 3, requires_grad=True)
6200  out_cpu = F.grid_sample(input_cpu, grid_cpu, mode=mode, padding_mode=padding_mode)
6201 
6202  input_cuda = base_input.cuda().expand_as(input_cuda).requires_grad_()
6203  grid_cuda = grid_cpu.detach().cuda().requires_grad_()
6204  out_cuda = F.grid_sample(input_cuda, grid_cuda, mode=mode, padding_mode=padding_mode)
6205  self.assertEqual(out_cpu, out_cuda)
6206 
6207  # test same size output
6208  test_shape(N, C, D, H, W, D, H, W, mode, padding_mode)
6209 
6210  # test larger output
6211  N = random.randint(2, 7)
6212  C = random.randint(2, 5)
6213  ID = random.randint(2, 7)
6214  IH = random.randint(2, 7)
6215  IW = random.randint(2, 7)
6216  D = random.randint(ID + 1, 10)
6217  H = random.randint(IH + 1, 10)
6218  W = random.randint(IW + 1, 10)
6219  test_shape(N, C, ID, IH, IW, D, H, W, mode, padding_mode)
6220 
6221  # test smaller output
6222  N = random.randint(2, 7)
6223  C = random.randint(2, 5)
6224  ID = random.randint(2, 7)
6225  IH = random.randint(2, 7)
6226  IW = random.randint(2, 7)
6227  D = random.randint(2, ID)
6228  H = random.randint(2, IH)
6229  W = random.randint(2, IW)
6230  test_shape(N, C, ID, IH, IW, D, H, W, mode, padding_mode)
6231 
6232  # test 1x1 inpput
6233  N = random.randint(2, 7)
6234  C = random.randint(2, 7)
6235  ID = 1
6236  IH = 1
6237  IW = 1
6238  H = random.randint(2, 5)
6239  W = random.randint(2, 5)
6240  test_shape(N, C, ID, IH, IW, D, H, W, mode, padding_mode)
6241 
6242  # testing empty grid
6243  N = random.randint(2, 7)
6244  C = random.randint(2, 5)
6245  ID = random.randint(2, 7)
6246  IH = random.randint(2, 7)
6247  IW = random.randint(2, 7)
6248  D = random.randint(3, ID + 2)
6249  W = random.randint(3, IW + 2)
6250  test_shape(N, C, ID, IH, IW, D, 0, W, mode, padding_mode)
6251 
6252  # testing empty channel
6253  N = random.randint(2, 7)
6254  ID = random.randint(2, 5)
6255  IH = random.randint(2, 7)
6256  IW = random.randint(2, 7)
6257  D = random.randint(3, ID + 2)
6258  H = random.randint(3, IH + 2)
6259  W = random.randint(3, IW + 2)
6260  test_shape(N, 0, ID, IH, IW, D, H, W, mode, padding_mode)
6261 
6262  # testing empty batch
6263  C = random.randint(2, 5)
6264  ID = random.randint(2, 7)
6265  IH = random.randint(2, 7)
6266  IW = random.randint(2, 7)
6267  D = random.randint(3, ID + 2)
6268  H = random.randint(3, IH + 2)
6269  W = random.randint(3, IW + 2)
6270  test_shape(0, C, ID, IH, IW, D, H, W, mode, padding_mode)
6271 
6272  for mode in ('bilinear', 'nearest'):
6273  for padding_mode in ('zeros', 'border', 'reflection'):
6274  # do gradcheck
6275  N = random.randint(2, 5)
6276  C = random.randint(2, 4)
6277  D = random.randint(2, 5)
6278  H = random.randint(2, 5)
6279  W = random.randint(2, 5)
6280  input = torch.randn(N, C, D, H, W, requires_grad=True)
6281  grid = torch.randn(N, D, H, W, 3, requires_grad=True)
6282  self.assertTrue(gradcheck(
6283  lambda inp, grid: F.grid_sample(inp, grid, mode=mode, padding_mode=padding_mode),
6284  (input, grid)))
6285 
6286  test(N, C, D, H, W, mode, padding_mode)
6287 
6288  def test_affine_grid(self):
6289  # test known input on CPU
6290  input = torch.arange(1., 7).view(1, 2, 3)
6291  output = F.affine_grid(input, torch.Size([1, 1, 2, 2]))
6292  groundtruth = torch.Tensor(
6293  [[[0, -3], [2, 5]], [[4, 7], [6, 15]]]).view(1, 2, 2, 2)
6294  self.assertEqual(output, groundtruth)
6295 
6296  # do gradcheck
6297  N = random.randint(1, 8)
6298  C = random.randint(1, 8)
6299  H = random.randint(1, 8)
6300  W = random.randint(1, 8)
6301  sz = torch.Size([N, C, H, W])
6302  inp = torch.randn(N, 2, 3, requires_grad=True)
6303  self.assertTrue(gradcheck(lambda inp: F.affine_grid(inp, sz), (inp,)))
6304 
6305  # test CPU against CUDA
6306  if TEST_CUDNN:
6307  input_cpu = torch.randn(N, 2, 3, requires_grad=True)
6308  out_cpu = F.affine_grid(input_cpu, sz)
6309  gradients = torch.randn(out_cpu.size())
6310  out_cpu.backward(gradients)
6311  input_gpu = input_cpu.detach().cuda().requires_grad_()
6312  out_cuda = F.affine_grid(input_gpu, sz)
6313  out_cuda.backward(gradients.cuda())
6314  self.assertEqual(out_cpu, out_cuda)
6315  self.assertEqual(input_cpu.grad, input_gpu.grad)
6316 
6317  @unittest.skipIf((not TEST_NUMPY) or (not TEST_SCIPY) or (scipy.__version__ < '1.0.0'),
6318  "Scipy v1.0 and/or numpy not found")
6319  @skipIfRocm
6320  def test_affine_2d_rotate0(self):
6321  # scipy before 1.0.0 do not support homogeneous coordinate
6322  # scipy.ndimage.affine_transform, so we need to skip.
6323  for device in device_():
6324  input_size = [1, 1, 3, 3]
6325  input_ary = np.array(np.random.random(input_size), dtype=np.float32)
6326  output_size = [1, 1, 5, 5]
6327  angle_rad = 0.
6328 
6329  transform_tensor, transform_ary, offset = \
6330  _buildEquivalentAffineTransforms2d(device, input_size, output_size, angle_rad)
6331 
6332  scipy_ary = scipy.ndimage.affine_transform(
6333  input_ary[0, 0],
6334  transform_ary,
6335  offset=offset,
6336  output_shape=output_size[2:],
6337  order=1,
6338  mode='nearest',
6339  prefilter=False)
6340 
6341  affine_tensor = torch.nn.functional.affine_grid(
6342  transform_tensor,
6343  torch.Size(output_size)
6344  )
6345 
6346  gridsample_ary = torch.nn.functional.grid_sample(
6347  torch.tensor(input_ary, device=device).to(device),
6348  affine_tensor,
6349  padding_mode='border'
6350  ).to('cpu').numpy()
6351 
6352  assert np.abs(scipy_ary.mean() - gridsample_ary.mean()) < 1e-6
6353  assert np.abs(scipy_ary - gridsample_ary).max() < 1e-6
6354 
6355  @unittest.skipIf((not TEST_NUMPY) or (not TEST_SCIPY) or (scipy.__version__ < '1.0.0'),
6356  "Scipy v1.0 and/or numpy not found")
6357  @skipIfRocm
6358  def test_affine_2d_rotate90(self):
6359  # scipy before 1.0.0 do not support homogeneous coordinate
6360  # scipy.ndimage.affine_transform, so we need to skip.
6361  for device, input_size2dsq, output_size2dsq in \
6362  itertools.product(device_(), input_size2dsq_(), output_size2dsq_()):
6363  input_size = input_size2dsq
6364  input_ary = np.array(np.random.random(input_size), dtype=np.float32)
6365  output_size = output_size2dsq
6366  angle_rad = 0.25 * math.pi * 2
6367 
6368  transform_tensor, transform_ary, offset = \
6369  _buildEquivalentAffineTransforms2d(device, input_size, output_size, angle_rad)
6370 
6371  scipy_ary = scipy.ndimage.affine_transform(
6372  input_ary[0, 0],
6373  transform_ary,
6374  offset=offset,
6375  output_shape=output_size[2:],
6376  order=1,
6377  mode='nearest',
6378  prefilter=True)
6379 
6380  if input_size2dsq == output_size2dsq:
6381  assert np.abs(scipy_ary.mean() - input_ary.mean()) < 1e-6
6382  assert np.abs(scipy_ary[0, 0] - input_ary[0, 0, 0, -1]).max() < 1e-6
6383  assert np.abs(scipy_ary[0, -1] - input_ary[0, 0, -1, -1]).max() < 1e-6
6384  assert np.abs(scipy_ary[-1, -1] - input_ary[0, 0, -1, 0]).max() < 1e-6
6385  assert np.abs(scipy_ary[-1, 0] - input_ary[0, 0, 0, 0]).max() < 1e-6
6386 
6387  affine_tensor = torch.nn.functional.affine_grid(
6388  transform_tensor,
6389  torch.Size(output_size)
6390  )
6391 
6392  gridsample_ary = torch.nn.functional.grid_sample(
6393  torch.tensor(input_ary, device=device).to(device),
6394  affine_tensor,
6395  padding_mode='border'
6396  ).to('cpu').numpy()
6397 
6398  assert np.abs(scipy_ary.mean() - gridsample_ary.mean()) < 1e-6
6399  assert np.abs(scipy_ary - gridsample_ary).max() < 1e-6
6400 
6401  @unittest.skipIf((not TEST_NUMPY) or (not TEST_SCIPY) or (scipy.__version__ < '1.0.0'),
6402  "Scipy v1.0 and/or numpy not found")
6403  @skipIfRocm
6404  def test_affine_2d_rotate45(self):
6405  # scipy before 1.0.0 do not support homogeneous coordinate
6406  # scipy.ndimage.affine_transform, so we need to skip.
6407  for device in device_():
6408  input_size = [1, 1, 3, 3]
6409  input_ary = np.array(np.zeros(input_size), dtype=np.float32)
6410  input_ary[0, 0, 0, :] = 0.5
6411  input_ary[0, 0, 2, 2] = 1.0
6412  output_size = [1, 1, 3, 3]
6413  angle_rad = 0.125 * math.pi * 2
6414 
6415  transform_tensor, transform_ary, offset = \
6416  _buildEquivalentAffineTransforms2d(device, input_size, output_size, angle_rad)
6417 
6418  scipy_ary = scipy.ndimage.affine_transform(
6419  input_ary[0, 0],
6420  transform_ary,
6421  offset=offset,
6422  output_shape=output_size[2:],
6423  order=1,
6424  mode='nearest',
6425  prefilter=False)
6426 
6427  affine_tensor = torch.nn.functional.affine_grid(
6428  transform_tensor,
6429  torch.Size(output_size)
6430  )
6431 
6432  gridsample_ary = torch.nn.functional.grid_sample(
6433  torch.tensor(input_ary, device=device).to(device),
6434  affine_tensor,
6435  padding_mode='border'
6436  ).to('cpu').numpy()
6437 
6438  assert np.abs(scipy_ary - gridsample_ary).max() < 1e-6
6439 
6440  @unittest.skipIf((not TEST_NUMPY) or (not TEST_SCIPY) or (scipy.__version__ < '1.0.0'),
6441  "Scipy v1.0 and/or numpy not found")
6442  @skipIfRocm
6443  def test_affine_2d_rotateRandom(self):
6444  # scipy before 1.0.0 do not support homogeneous coordinate
6445  # scipy.ndimage.affine_transform, so we need to skip.
6446  for device, angle_rad, input_size2d, output_size2d in \
6447  itertools.product(device_(), angle_rad_(), input_size2d_(), output_size2d_()):
6448 
6449  input_size = input_size2d
6450  input_ary = np.array(np.random.random(input_size), dtype=np.float32).round(3)
6451  output_size = output_size2d
6452 
6453  input_ary[0, 0, 0, 0] = 2
6454  input_ary[0, 0, 0, -1] = 4
6455  input_ary[0, 0, -1, 0] = 6
6456  input_ary[0, 0, -1, -1] = 8
6457 
6458  transform_tensor, transform_ary, grid_ary = \
6459  _buildEquivalentAffineTransforms2d(device, input_size, output_size, angle_rad)
6460 
6461  scipy_ary = scipy.ndimage.affine_transform(
6462  input_ary[0, 0],
6463  transform_ary,
6464  output_shape=output_size[2:],
6465  order=1,
6466  mode='nearest',
6467  prefilter=False)
6468 
6469  affine_tensor = torch.nn.functional.affine_grid(
6470  transform_tensor,
6471  torch.Size(output_size)
6472  )
6473 
6474  gridsample_ary = torch.nn.functional.grid_sample(
6475  torch.tensor(input_ary, device=device).to(device),
6476  affine_tensor,
6477  padding_mode='border'
6478  ).to('cpu').numpy()
6479 
6480  affine_tensor = affine_tensor.to('cpu')
6481 
6482  for r in range(affine_tensor.size(1)):
6483  for c in range(affine_tensor.size(2)):
6484  grid_out = np.dot(grid_ary, [r, c, 1])
6485  assert np.allclose(affine_tensor[0, r, c], grid_out[:2], atol=1e-5)
6486 
6487  assert np.abs(scipy_ary - gridsample_ary).max() < 1e-5
6488 
6489  @unittest.skipIf((not TEST_NUMPY) or (not TEST_SCIPY) or (scipy.__version__ < '1.0.0'),
6490  "Scipy v1.0 and/or numpy not found")
6491  @skipIfRocm
6492  def test_affine_3d_rotateRandom(self):
6493  # scipy before 1.0.0 do not support homogeneous coordinate
6494  # scipy.ndimage.affine_transform, so we need to skip.
6495  for device, angle_rad, axis_vector, input_size3d, output_size3d in \
6496  itertools.product(device_(), angle_rad_(), axis_vector_(), input_size3d_(), output_size3d_()):
6497  input_size = input_size3d
6498  input_ary = np.array(np.random.random(input_size), dtype=np.float32)
6499  output_size = output_size3d
6500 
6501  input_ary[0, 0, 0, 0, 0] = 2
6502  input_ary[0, 0, 0, 0, -1] = 3
6503  input_ary[0, 0, 0, -1, 0] = 4
6504  input_ary[0, 0, 0, -1, -1] = 5
6505  input_ary[0, 0, -1, 0, 0] = 6
6506  input_ary[0, 0, -1, 0, -1] = 7
6507  input_ary[0, 0, -1, -1, 0] = 8
6508  input_ary[0, 0, -1, -1, -1] = 9
6509 
6510  transform_tensor, transform_ary, grid_ary = \
6511  _buildEquivalentAffineTransforms3d(device, input_size, output_size, angle_rad, axis_vector)
6512 
6513  scipy_ary = scipy.ndimage.affine_transform(
6514  input_ary[0, 0],
6515  transform_ary,
6516  output_shape=output_size[2:],
6517  order=1,
6518  mode='nearest',
6519  prefilter=False)
6520 
6521  affine_tensor = torch.nn.functional.affine_grid(
6522  transform_tensor,
6523  torch.Size(output_size)
6524  )
6525 
6526  gridsample_ary = torch.nn.functional.grid_sample(
6527  torch.tensor(input_ary, device=device).to(device),
6528  affine_tensor,
6529  padding_mode='border'
6530  ).to('cpu').numpy()
6531 
6532  affine_tensor = affine_tensor.to('cpu')
6533 
6534  for i in range(affine_tensor.size(1)):
6535  for r in range(affine_tensor.size(2)):
6536  for c in range(affine_tensor.size(3)):
6537  grid_out = np.dot(grid_ary, [i, r, c, 1])
6538  assert np.allclose(affine_tensor[0, i, r, c], grid_out[:3], atol=1e-5)
6539 
6540  assert np.abs(scipy_ary - gridsample_ary).max() < 1e-5
6541 
6542  def test_upsamplingNearest1d(self):
6543  m = nn.Upsample(size=4, mode='nearest')
6544  in_t = torch.ones(1, 1, 2)
6545  with warnings.catch_warnings(record=True) as w:
6546  out_t = m(in_t)
6547  self.assertEqual(torch.ones(1, 1, 4), out_t.data)
6548 
6549  input = torch.randn(1, 1, 2, requires_grad=True)
6550  gradcheck(lambda x: F.interpolate(x, 4, mode='nearest'), [input])
6551 
6552  def test_upsamplingLinear1d(self):
6553  for align_corners in [True, False]:
6554  kwargs = dict(mode='linear', align_corners=align_corners)
6555 
6556  # test float scale factor up & downsampling
6557  for scale_factor in [0.5, 1.5, 2]:
6558  m = nn.Upsample(scale_factor=scale_factor, **kwargs)
6559  in_t = torch.ones(1, 1, 2)
6560  out_size = int(math.floor(in_t.shape[-1] * scale_factor))
6561  with warnings.catch_warnings(record=True) as w:
6562  out_t = m(in_t)
6563  self.assertEqual(torch.ones(1, 1, out_size), out_t.data)
6564 
6565  input = torch.randn(1, 1, 2, requires_grad=True)
6566  gradcheck(lambda x: F.interpolate(x, out_size, **kwargs), (input,))
6567 
6568  def test_upsamplingLinear1d_spatial_invariance(self):
6569  m = nn.Upsample(scale_factor=3, mode='linear', align_corners=False)
6570  in_t_9 = torch.zeros(1, 1, 9)
6571  in_t_9[:, :, :4].normal_()
6572  with warnings.catch_warnings(record=True) as w:
6573  out_t_9 = m(in_t_9)
6574  out_t_5 = m(in_t_9[:, :, :5])
6575  self.assertEqual(out_t_9[:, :, :15], out_t_5)
6576 
6577  def test_upsamplingNearest2d(self):
6578  m = nn.Upsample(size=4, mode='nearest')
6579  in_t = torch.ones(1, 1, 2, 2)
6580  with warnings.catch_warnings(record=True) as w:
6581  out_t = m(Variable(in_t))
6582  self.assertEqual(torch.ones(1, 1, 4, 4), out_t.data)
6583 
6584  input = torch.randn(1, 1, 2, 2, requires_grad=True)
6585  self.assertEqual(
6586  F.interpolate(input, 4, mode='nearest'),
6587  F.interpolate(input, scale_factor=2, mode='nearest'))
6588  gradcheck(lambda x: F.interpolate(x, 4, mode='nearest'), [input])
6589  gradgradcheck(lambda x: F.interpolate(x, 4, mode='nearest'), [input])
6590 
6591  def test_upsamplingBilinear2d(self):
6592  for align_corners in [True, False]:
6593  kwargs = dict(mode='bilinear', align_corners=align_corners)
6594 
6595  # test float scale factor up & downsampling
6596  for scale_factor in [0.5, 1.5, 2]:
6597  m = nn.Upsample(scale_factor=scale_factor, **kwargs)
6598  in_t = torch.ones(1, 1, 2, 2)
6599  out_size = int(math.floor(in_t.shape[-1] * scale_factor))
6600  with warnings.catch_warnings(record=True) as w:
6601  out_t = m(in_t)
6602  self.assertEqual(torch.ones(1, 1, out_size, out_size), out_t.data)
6603 
6604  input = torch.randn(1, 1, 2, 2, requires_grad=True)
6605  gradcheck(lambda x: F.interpolate(x, out_size, **kwargs), [input])
6606 
6607  def test_upsamplingBicubic2d(self):
6608  # test output against known input
6609  in_t = torch.arange(4).view(1, 1, 2, 2).type(torch.FloatTensor)
6610  expected_out_t = torch.Tensor(
6611  [[[[0.00000, 0.31481, 0.68519, 1.00000],
6612  [0.62963, 0.94444, 1.31481, 1.62963],
6613  [1.37037, 1.68518, 2.05556, 2.37037],
6614  [2.00000, 2.31481, 2.68519, 3.00000]]]])
6615  out_t = F.interpolate(in_t, scale_factor=2, mode='bicubic', align_corners=True)
6616  torch.set_printoptions(precision=5)
6617  self.assertEqual(out_t, expected_out_t)
6618 
6619  for align_corners in [True, False]:
6620  kwargs = dict(mode='bicubic', align_corners=align_corners)
6621 
6622  # test float scale factor up & downsampling
6623  for scale_factor in [0.5, 1.5, 2]:
6624  in_t = torch.ones(2, 2, 2, 2)
6625  out_t = F.interpolate(in_t, scale_factor=scale_factor, **kwargs)
6626  out_size = int(math.floor(in_t.shape[-1] * scale_factor))
6627  self.assertEqual(torch.ones(2, 2, out_size, out_size), out_t.data)
6628 
6629  input = torch.randn(2, 2, 2, 2, requires_grad=True)
6630  gradcheck(lambda x: F.interpolate(x, out_size, **kwargs), [input])
6631 
6632  def test_upsamplingBilinear2d_spatial_invariance(self):
6633  m = nn.Upsample(scale_factor=3, mode='bilinear', align_corners=False)
6634  in_t_9 = torch.zeros(1, 1, 9, 9)
6635  in_t_9[:, :, :4, :4].normal_()
6636  with warnings.catch_warnings(record=True) as w:
6637  out_t_9 = m(in_t_9)
6638  out_t_5 = m(in_t_9[:, :, :5, :5])
6639  self.assertEqual(out_t_9[:, :, :15, :15], out_t_5)
6640 
6641  def test_upsamplingNearest3d(self):
6642  m = nn.Upsample(size=4, mode='nearest')
6643  in_t = torch.ones(1, 1, 2, 2, 2)
6644  with warnings.catch_warnings(record=True) as w:
6645  out_t = m(Variable(in_t))
6646  self.assertEqual(torch.ones(1, 1, 4, 4, 4), out_t.data)
6647 
6648  input = torch.randn(1, 1, 2, 2, 2, requires_grad=True)
6649  gradcheck(lambda x: F.interpolate(x, 4, mode='nearest'), [input])
6650 
6651  def test_upsamplingTrilinear3d(self):
6652  for align_corners in [True, False]:
6653  kwargs = dict(mode='trilinear', align_corners=align_corners)
6654 
6655  # test float scale factor up & downsampling
6656  for scale_factor in [0.5, 1.5, 2]:
6657  m = nn.Upsample(scale_factor=scale_factor, **kwargs)
6658  in_t = torch.ones(1, 1, 2, 2, 2)
6659  out_size = int(math.floor(in_t.shape[-1] * scale_factor))
6660  with warnings.catch_warnings(record=True) as w:
6661  out_t = m(in_t)
6662  self.assertEqual(torch.ones(1, 1, out_size, out_size, out_size), out_t.data)
6663 
6664  input = torch.randn(1, 1, 2, 2, 2, requires_grad=True)
6665  self.assertEqual(
6666  F.interpolate(input, (out_size, out_size, out_size), **kwargs),
6667  F.interpolate(input, scale_factor=scale_factor, **kwargs))
6668  gradcheck(lambda x: F.interpolate(x, out_size, **kwargs), [input])
6669  gradgradcheck(lambda x: F.interpolate(x, out_size, **kwargs), [input])
6670 
6671  def test_upsamplingTrilinear3d_spatial_invariance(self):
6672  m = nn.Upsample(scale_factor=3, mode='trilinear', align_corners=False)
6673  in_t_9 = torch.zeros(1, 1, 9, 9, 9)
6674  in_t_9[:, :, :4, :4, :4].normal_()
6675  with warnings.catch_warnings(record=True) as w:
6676  out_t_9 = m(in_t_9)
6677  out_t_5 = m(in_t_9[:, :, :5, :5, :5])
6678  self.assertEqual(out_t_9[:, :, :15, :15, :15], out_t_5)
6679 
6680  def test_interpolate(self):
6681  def _test_interpolate_helper(in_t, scale_factor, layer):
6682  out_size = int(math.floor(in_t.shape[-1] * scale_factor))
6683  dim = len(in_t.shape) - 2
6684  out_shape = [1, 1] + [out_size] * dim
6685  with warnings.catch_warnings(record=True) as w:
6686  out_t = m(in_t)
6687  self.assertEqual(torch.ones(out_shape), out_t)
6688 
6689  self.assertEqual(
6690  F.interpolate(in_t, (out_size,) * dim, **kwargs),
6691  F.interpolate(in_t, scale_factor=scale_factor, **kwargs))
6692  gradcheck(lambda x: F.interpolate(x, out_size, **kwargs), [in_t])
6693  gradgradcheck(lambda x: F.interpolate(x, out_size, **kwargs), [in_t])
6694 
6695  def _make_input(dim):
6696  size = [1, 1]
6697  size += [2] * dim
6698  return torch.ones(size, requires_grad=True)
6699 
6700  device_list = ['cpu']
6701  if TEST_CUDA:
6702  device_list.append('cuda')
6703 
6704  for device in device_list:
6705  for scale_factor in [0.5, 1.5, 2]:
6706  for mode in ['nearest', 'area']:
6707  kwargs = dict(mode=mode)
6708  m = nn.Upsample(scale_factor=scale_factor, **kwargs).to(device)
6709  for input in [_make_input(1), _make_input(2), _make_input(3)]:
6710  _test_interpolate_helper(input, scale_factor, m)
6711 
6712  for align_corners in [True, False]:
6713  kwargs = dict(mode='linear', align_corners=align_corners)
6714  m = nn.Upsample(scale_factor=scale_factor, **kwargs).to(device)
6715  _test_interpolate_helper(_make_input(1), scale_factor, m)
6716 
6717  kwargs = dict(mode='bilinear', align_corners=align_corners)
6718  m = nn.Upsample(scale_factor=scale_factor, **kwargs).to(device)
6719  _test_interpolate_helper(_make_input(2), scale_factor, m)
6720 
6721  kwargs = dict(mode='bicubic', align_corners=align_corners)
6722 
6723  def m(t):
6724  return F.interpolate(t, scale_factor=scale_factor, **kwargs).to(device)
6725  _test_interpolate_helper(_make_input(2), scale_factor, m)
6726 
6727  kwargs = dict(mode='trilinear', align_corners=align_corners)
6728  m = nn.Upsample(scale_factor=scale_factor, **kwargs).to(device)
6729  _test_interpolate_helper(_make_input(3), scale_factor, m)
6730 
6731  def test_linear_broadcasting(self):
6732  m = nn.Linear(5, 8)
6733  inp = torch.randn(2, 3, 5)
6734  expected = m(inp.view(6, 5)).view(2, 3, 8)
6735  self.assertEqual(expected, m(inp))
6736 
6737  def test_bilinear(self):
6738  module = nn.Bilinear(10, 10, 8)
6739  input1 = torch.randn(4, 10, requires_grad=True)
6740  input2 = torch.randn(4, 10, requires_grad=True)
6741  grad_output = torch.randn(4, 8)
6742 
6743  res = module(input1, input2)
6744  expected = (torch.einsum("bi,kij,bj->bk", input1, module.weight, input2) +
6745  module.bias)
6746  self.assertEqual(res, expected)
6747  grads = torch.autograd.grad(res, [module.weight, module.bias, input1, input2], grad_output)
6748  grads_expected = torch.autograd.grad(expected, [module.weight, module.bias, input1, input2], grad_output)
6749  for g, ge in zip(grads, grads_expected):
6750  self.assertEqual(g, ge)
6751 
6752  def test_bilinear_no_bias(self):
6753  module = nn.Bilinear(10, 10, 8)
6754  module_no_bias = nn.Bilinear(10, 10, 8, False)
6755 
6756  module.bias.data.zero_()
6757  module.weight.data.copy_(module_no_bias.weight)
6758 
6759  input1 = torch.randn(4, 10, requires_grad=True)
6760  input2 = torch.randn(4, 10, requires_grad=True)
6761  grad_output = torch.randn(4, 8)
6762 
6763  def run(net):
6764  input1.grad = input2.grad = None
6765  output = net(input1, input2)
6766  output.backward(grad_output)
6767 
6768  return output.data, input1.grad.data, input2.grad.data
6769 
6770  out, g1, g2 = run(module)
6771  out_nb, g1_nb, g2_nb = run(module_no_bias)
6772 
6773  self.assertEqual(out, out_nb)
6774  self.assertEqual(g1, g1_nb)
6775  self.assertEqual(g2, g2_nb)
6776 
6777  _assertGradAndGradgradChecks(self,
6778  lambda x1, x2: F.bilinear(x1, x2, module_no_bias.weight, module_no_bias.bias),
6779  (input1, input2))
6780 
6781  def test_bilinear_broadcasting(self):
6782  m = nn.Bilinear(5, 6, 8)
6783  input1 = torch.randn(2, 3, 5)
6784  input2 = torch.randn(2, 3, 6)
6785  expected = m(input1.view(6, 5), input2.view(6, 6)).view(2, 3, 8)
6786  self.assertEqual(expected, m(input1, input2))
6787 
6788  def test_conv_tbc(self):
6789  inp = torch.randn(9, 4, 5, requires_grad=True)
6790  weight = torch.randn(3, 5, 6, requires_grad=True)
6791  bias = torch.randn(6, requires_grad=True)
6792 
6793  gradcheck(lambda i, w, b, pad: F.conv_tbc(i, w, b, pad), (inp, weight, bias, 3))
6794 
6795  @staticmethod
6796  def _test_conv_noncontig_weights(self, device):
6797  for dim in (1, 2, 3):
6798  for grouped in (False, True):
6799  nc = 3
6800  groups = 3 if grouped else 1
6801  w = torch.randn([3] * dim, device=device)
6802  w = w.expand([nc, int(nc / groups)] + list(w.shape))
6803  w = w.detach().requires_grad_()
6804  x = torch.randn([1, nc] + ([5] * dim), device=device, requires_grad=True)
6805  y = getattr(F, 'conv{}d'.format(dim))(x, w, groups=groups)
6806  y.sum().backward()
6807  y = getattr(F, 'conv_transpose{}d'.format(dim))(x, w, groups=groups)
6808  y.sum().backward()
6809 
6810  def test_conv_noncontig_weights(self):
6811  self._test_conv_noncontig_weights(self, torch.device('cpu'))
6812 
6813  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
6814  def test_conv_noncontig_weights_cuda(self):
6815  self._test_conv_noncontig_weights(self, torch.device('cuda'))
6816 
6817  @staticmethod
6818  def _test_conv_noncontig_weights_and_bias(self, device):
6819  # need floats to exercise https://github.com/pytorch/pytorch/issues/16018
6820  for bias in [True, False]:
6821  conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
6822  bias=bias).to(device, torch.float)
6823 
6824  input_nc = torch.randn((1, 3, 224, 224, 2), device=device, dtype=torch.float)[:, :, :, :, 1]
6825  input_c = input_nc.contiguous()
6826 
6827  weight_nc = torch.randn((64, 3, 7, 7, 2), device=device, dtype=torch.float)[:, :, :, :, 1]
6828  conv1.weight = nn.Parameter(weight_nc)
6829  weight_c = conv1.weight.contiguous()
6830 
6831  if bias:
6832  bias_nc = torch.randn((64, 2), device=device, dtype=torch.float)[:, 1]
6833  conv1.bias = nn.Parameter(bias_nc)
6834  bias_c = conv1.bias.contiguous()
6835 
6836  out1 = conv1(input_nc)
6837  conv1.weight = nn.Parameter(weight_c)
6838  if bias:
6839  conv1.bias = nn.Parameter(bias_c)
6840  out2 = conv1(input_c)
6841  self.assertEqual(out1, out2)
6842 
6843  def test_conv_noncontig_weights_and_bias(self):
6844  self._test_conv_noncontig_weights_and_bias(self, torch.device('cpu'))
6845 
6846  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
6847  def test_conv_noncontig_weights_and_bias_cuda(self):
6848  self._test_conv_noncontig_weights_and_bias(self, torch.device('cuda'))
6849 
6850  def run_conv_double_back_test(self, kern, stride, padding, chan_in, chan_out, batch_size,
6851  inp_size, dilation, no_weight, groups=1, use_cuda=False,
6852  use_bias=True, dtype=torch.double):
6853  if use_cuda:
6854  device = torch.device("cuda")
6855  else:
6856  device = torch.device("cpu")
6857 
6858  x = torch.randn(batch_size, chan_in, inp_size, inp_size, device=device,
6859  dtype=dtype, requires_grad=True)
6860  weight = torch.randn(chan_out, chan_in // groups, kern, kern, device=device,
6861  dtype=dtype, requires_grad=not no_weight)
6862  if use_bias:
6863  bias = torch.randn(chan_out, device=device, dtype=dtype, requires_grad=True)
6864  else:
6865  bias = None
6866 
6867  def func(*inputs):
6868  if use_bias:
6869  lx, lweight, lbias = inputs
6870  else:
6871  lx, lweight = inputs
6872  lbias = None
6873  # We disable cudnn during forward to avoid finite difference imprecision issues
6874  with cudnn.flags(enabled=False):
6875  out = F.conv2d(lx, lweight, lbias, stride, padding, dilation, groups)
6876  return out
6877 
6878  if use_bias:
6879  inputs = x, weight, bias
6880  else:
6881  inputs = x, weight
6882 
6883  dummy_out = func(*inputs)
6884  grad_y = torch.randn_like(dummy_out, device=device, dtype=dtype, requires_grad=True)
6885 
6886  # Issue #15353: test mkldnn double backward, don't run gradgradcheck due
6887  # to imprecision issues
6888  if dtype == torch.float:
6889  g, = torch.autograd.grad(dummy_out.sum(), x, create_graph=True)
6890  return g.requires_grad
6891 
6892  return gradgradcheck(func, inputs, (grad_y,))
6893 
6894  def test_conv_double_backward(self):
6895  batch_size = 2
6896  for kern, inp_size, dilations in [(3, 6, [1, 2]), (3, 7, [1]), (4, 9, [1])]:
6897  for stride, padding, chan_in, chan_out, dilation in \
6898  product([1, 2], [0, 1, 2], [2], [3], dilations):
6899  for no_weight in (True, False):
6900  for dtype in (torch.float, torch.double):
6901  result = self.run_conv_double_back_test(kern, stride,
6902  padding, chan_in, chan_out,
6903  batch_size, inp_size, dilation,
6904  no_weight, dtype=dtype)
6905  self.assertTrue(result,
6906  "Conv double backward test failed with parameters:" +
6907  "\nkern: " + str(kern) +
6908  "\nstride: " + str(stride) +
6909  "\npadding: " + str(padding) +
6910  "\nchan_in: " + str(chan_in) +
6911  "\nchan_out: " + str(chan_out) +
6912  "\nbatch_size: " + str(batch_size) +
6913  "\ninp_size: " + str(inp_size) +
6914  "\ndilation: " + str(dilation) +
6915  "\ndtype: " + str(dtype))
6916 
6917  def test_conv_double_backward_no_bias(self):
6918  kern = 3
6919  stride = 2
6920  chan_in, chan_out = 2, 4
6921  batch_size = 2
6922  inp_size = 5
6923  padding = 1
6924  dilation = 1
6925  no_weight = False
6926  use_bias = True
6927  result = self.run_conv_double_back_test(kern, stride,
6928  padding, chan_in, chan_out,
6929  batch_size, inp_size, dilation,
6930  no_weight, use_bias=use_bias)
6931  self.assertTrue(result,
6932  "Conv double backward test failed with parameters:" +
6933  "\nkern: " + str(kern) +
6934  "\nstride: " + str(stride) +
6935  "\npadding: " + str(padding) +
6936  "\nchan_in: " + str(chan_in) +
6937  "\nchan_out: " + str(chan_out) +
6938  "\nbatch_size: " + str(batch_size) +
6939  "\ninp_size: " + str(inp_size) +
6940  "\ndilation: " + str(dilation))
6941 
6942  def test_conv_double_backward_groups(self):
6943  kern = 3
6944  stride = 1
6945  padding = 2
6946  chan_in, chan_out = 2, 4
6947  batch_size = 2
6948  inp_size = 6
6949  dilation = 1
6950  no_weight = False
6951  groups = 2
6952  result = self.run_conv_double_back_test(kern, stride,
6953  padding, chan_in * groups, chan_out * groups,
6954  batch_size, inp_size, dilation,
6955  no_weight, groups=groups)
6956  self.assertTrue(result,
6957  "Conv double backward test failed with parameters:" +
6958  "\nkern: " + str(kern) +
6959  "\nstride: " + str(stride) +
6960  "\npadding: " + str(padding) +
6961  "\nchan_in: " + str(chan_in) +
6962  "\nchan_out: " + str(chan_out) +
6963  "\nbatch_size: " + str(batch_size) +
6964  "\ninp_size: " + str(inp_size) +
6965  "\ndilation: " + str(dilation) +
6966  "\ngroups: " + str(groups))
6967 
6968  def test_conv_double_backward_stride(self):
6969  batch_size = 2
6970 
6971  # Cannot provide ggW when stride is > 1
6972  for kern, inp_size, dilations in [(3, 5, [1, 2]), (3, 7, [1])]:
6973  for stride, padding, chan_in, chan_out, dilation in product([2], [0, 1], [1], [2], dilations):
6974  no_weight = False
6975  self.run_conv_double_back_test(kern, stride,
6976  padding, chan_in, chan_out,
6977  batch_size, inp_size, dilation,
6978  no_weight)
6979 
6980  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
6981  def test_cudnn_noncontiguous_weight(self):
6982  # Noncontiguous weights must be contiguous() before being
6983  # passed to cuDNN
6984  input = Variable(torch.cuda.DoubleTensor([1, 1, 1]).view(1, 1, 3))
6985  weights1 = Variable(torch.cuda.DoubleTensor([1]).expand(1, 1, 2))
6986  weights2 = Variable(torch.cuda.DoubleTensor([1]).expand(1, 1, 2)).contiguous()
6987  self.assertEqual(F.conv1d(input, weights1, bias=None, stride=2, dilation=2),
6988  F.conv1d(input, weights2, bias=None, stride=2, dilation=2))
6989 
6990  @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
6991  @repeat_test_for_types(DOUBLE_TENSORTYPES)
6992  def test_conv_double_backward_cuda(self, dtype=torch.double):
6993  # Double backward only runs with DoubleTensor due to precison reason
6994  batch_size = 1
6995  for kern, inp_size, dilations in [(3, 5, [1, 2]), (4, 9, [1])]:
6996  for stride, padding, chan_in, chan_out, dilation in product([1], [2], [2], [3], dilations):
6997  no_weight = stride == 2
6998  result = self.run_conv_double_back_test(kern, stride,
6999  padding, chan_in, chan_out,
7000  batch_size, inp_size, dilation,
7001  no_weight, use_cuda=True, dtype=dtype)
7002  self.assertTrue(result,
7003  "Conv double backward test failed with parameters:" +
7004  "\nkern: " + str(kern) +
7005  "\nstride: " + str(stride) +
7006  "\npadding: " + str(padding) +
7007  "\nchan_in: " + str(chan_in) +
7008  "\nchan_out: " + str(chan_out) +
7009  "\nbatch_size: " + str(batch_size) +
7010  "\ninp_size: " + str(inp_size) +
7011  "\ndilation: " + str(dilation))
7012 
7013  def run_grad_conv_test(self, func_forward, func_backward, dim=1, gradient='input'):
7014  for kern, inp_size in [(3, 6), (3, 7), (4, 9)]:
7015  for batch, stride, padding, chan_in, chan_out, dilation in \
7016  product([1, 2], [1, 2], [0, 1, 2], [2], [3], [1]):
7017 
7018  for has_bias in [True, False]:
7019  input_shape = [batch, chan_in]
7020  weight_shape = [chan_out, chan_in]
7021  for _ in range(dim):
7022  input_shape.append(inp_size)
7023  weight_shape.append(kern)
7024 
7025  input = torch.randn(input_shape, requires_grad=True)
7026  weight = torch.randn(weight_shape, requires_grad=True)
7027  if has_bias:
7028  bias = torch.randn([chan_out], requires_grad=True)
7029  output = func_forward(input, weight, stride=stride, padding=padding, dilation=dilation, bias=bias)
7030 
7031  gradient_o = torch.randn(output.shape)
7032  gradient_w = torch.autograd.grad(output, input if (gradient == 'input') else weight, gradient_o)
7033 
7034  self.assertAlmostEqual(gradient_w[0],
7035  func_backward(
7036  input_shape if (gradient == 'input') else input,
7037  weight_shape if (gradient == 'weight') else weight,
7038  gradient_o,
7039  stride=stride,
7040  padding=padding,
7041  dilation=dilation))
7042 
7043  def test_grad_conv1d_input(self):
7044  self.run_grad_conv_test(F.conv1d, F.grad.conv1d_input, 1, 'input')
7045 
7046  def test_grad_conv1d_weight(self):
7047  self.run_grad_conv_test(F.conv1d, F.grad.conv1d_weight, 1, 'weight')
7048 
7049  def test_grad_conv2d_input(self):
7050  self.run_grad_conv_test(F.conv2d, F.grad.conv2d_input, 2, 'input')
7051 
7052  def test_grad_conv2d_weight(self):
7053  self.run_grad_conv_test(F.conv2d, F.grad.conv2d_weight, 2, 'weight')
7054 
7055  def test_grad_conv3d_input(self):
7056  self.run_grad_conv_test(F.conv3d, F.grad.conv3d_input, 3, 'input')
7057 
7058  def test_grad_conv3d_weight(self):
7059  self.run_grad_conv_test(F.conv3d, F.grad.conv3d_weight, 3, 'weight')
7060 
7061  @unittest.skipIf(not torch._nnpack_available(), "NNPACK unavailable")
7062  def test_nnpack_conv(self):
7063  for kern, inp_size in [(3, 6), (3, 7), (4, 9)]:
7064  for batch, padding, chan_in, chan_out in \
7065  product([1, 2], [0, 1, 2], [2], [3]):
7066 
7067  for has_bias in [True, False]:
7068  input_shape = [batch, chan_in]
7069  weight_shape = [chan_out, chan_in]
7070  for _ in range(2):
7071  input_shape.append(inp_size)
7072  weight_shape.append(kern)
7073 
7074  input = torch.randn(input_shape, requires_grad=True, dtype=torch.float)
7075  weight = torch.randn(weight_shape, requires_grad=True, dtype=torch.float)
7076  if has_bias:
7077  bias = torch.randn([chan_out], requires_grad=True, dtype=torch.float)
7078  output = torch._nnpack_spatial_convolution(input, weight, padding=padding, bias=bias)
7079  output_expected = torch.nn.functional.conv2d(input, weight, padding=padding, bias=bias)
7080  self.assertAlmostEqual(output, output_expected, delta=3e-4)
7081 
7082  gradient_o = torch.randn(output.shape, dtype=torch.float)
7083 
7084  grads = torch.autograd.grad(output, [input, weight], gradient_o)
7085  grads_expected = torch.autograd.grad(output_expected, [input, weight], gradient_o)
7086  for gr, gr_expected in zip(grads, grads_expected):
7087  self.assertAlmostEqual(gr, gr_expected, delta=3e-4)
7088 
7089  def test_fold_invalid_arg(self):
7090  # input wrong dimension
7091 
7092  fold = nn.Fold(output_size=(4, 5), kernel_size=(2, 3))
7093  with self.assertRaisesRegex(NotImplementedError, r"Only 3D input Tensors are supported"):
7094  fold(torch.randn(1, 5))
7095 
7096  # input.size(1) not divisible by \prod(kernel_size)
7097 
7098  fold = nn.Fold(output_size=(4, 5), kernel_size=(2, 3))
7099  with self.assertRaisesRegex(RuntimeError, r"be divisible by the product of kernel_size"):
7100  fold(torch.randn(1, 5, 9))
7101 
7102  with self.assertRaisesRegex(RuntimeError, r"be divisible by the product of kernel_size"):
7103  fold(torch.randn(1, 19, 9))
7104 
7105  # input.size(2) not matching the total number of sliding blocks
7106 
7107  with self.assertRaisesRegex(RuntimeError, r"match the calculated number of sliding blocks"):
7108  fold = nn.Fold(output_size=(4, 5), kernel_size=(2, 3))
7109  fold(torch.randn(1, 6, 10))
7110 
7111  with self.assertRaisesRegex(RuntimeError, r"match the calculated number of sliding blocks"):
7112  fold = nn.Fold(output_size=(4, 5), kernel_size=(2, 3), stride=(2, 2))
7113  fold(torch.randn(1, 6, 5))
7114 
7115  with self.assertRaisesRegex(RuntimeError, r"match the calculated number of sliding blocks"):
7116  fold = nn.Fold(output_size=(4, 5), kernel_size=(2, 3), stride=(2, 2), dilation=(1, 2), padding=(2, 0))
7117  fold(torch.randn(1, 6, 5)) # should be 4 * 1 = 4 sliding blocks
7118 
7119  def test_unfold_invalid_arg(self):
7120  # input wrong dimension
7121 
7122  unfold = nn.Unfold(kernel_size=(2, 3))
7123  with self.assertRaisesRegex(NotImplementedError, r"Only 4D input Tensors are supported"):
7124  unfold(torch.randn(1, 5, 2))
7125 
7126  # calculated output shape is too small
7127 
7128  with self.assertRaisesRegex(RuntimeError, r"too small \(non-positive\)"):
7129  unfold = nn.Unfold(kernel_size=(2, 3))
7130  unfold(torch.randn(1, 2, 2, 2))
7131 
7132  with self.assertRaisesRegex(RuntimeError, r"too small \(non-positive\)"):
7133  unfold = nn.Unfold(kernel_size=(5, 3), padding=(1, 1))
7134  unfold(torch.randn(1, 2, 2, 3))
7135 
7136  with self.assertRaisesRegex(RuntimeError, r"too small \(non-positive\)"):
7137  unfold = nn.Unfold(kernel_size=(1, 3), padding=(1, 1), dilation=(1, 2))
7138  unfold(torch.randn(1, 2, 2, 2))
7139 
7140  def test_softmin(self):
7141  x = torch.randn(2, 16)
7142  self.assertEqual(F.softmin(x, 1), F.softmax(-x, 1))
7143  self.assertEqual(F.softmin(x, 0), F.softmax(-x, 0))
7144 
7145  def test_adaptive_log_softmax(self):
7146  # args validation
7147  with self.assertRaises(ValueError):
7148  _ = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 15, 15], div_value=2.)
7149 
7150  with self.assertRaises(ValueError):
7151  _ = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 15, 10], div_value=2.)
7152 
7153  with self.assertRaises(ValueError):
7154  _ = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 10, 25], div_value=2.)
7155 
7156  with self.assertRaisesRegex(ValueError, "cutoffs should be a sequence of unique,"):
7157  _ = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 10, 20], div_value=2.)
7158 
7159  # not raise
7160  _ = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 10, 19], div_value=2.)
7161 
7162  # input shapes
7163  with self.assertRaisesRegex(RuntimeError, r"Input and target should have the same size"):
7164  asfm = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 10, 15], div_value=2.)
7165  x = torch.randn(2, 16)
7166  y = torch.tensor([0, 5, 10])
7167  asfm(x, y)
7168 
7169  # out-of-bound targets
7170  with self.assertRaisesRegex(RuntimeError, r"Target values should be in"):
7171  asfm = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 10, 15], div_value=2.)
7172  x = torch.randn(2, 16)
7173  y = torch.tensor([0, 20])
7174  asfm(x, y)
7175 
7176  # cluster sizes
7177  asfm = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 10, 15], div_value=2.)
7178  x = torch.randn(2, 16)
7179  y = torch.tensor([0, 17])
7180 
7181  self.assertEqual(asfm.head.weight.size(), (5 + 3, 16)) # 5 targets in head, 3 clusters, dimensionality 16
7182  self.assertEqual(asfm.tail[0][1].weight.size(), (5, 8)) # 5 targets in this cluster, dimensionality 8
7183  self.assertEqual(asfm.tail[1][1].weight.size(), (5, 4))
7184  self.assertEqual(asfm.tail[2][1].weight.size(), (5, 2))
7185 
7186  self.assertEqual(asfm(x, y).output.size(), (2, ))
7187 
7188  # log_probs actually returns log_proba
7189  asfm = nn.AdaptiveLogSoftmaxWithLoss(8, 4, [2], div_value=2.)
7190  x = torch.randn(4, 8)
7191  logprob_out = asfm.log_prob(x)
7192 
7193  self.assertEqual(torch.exp(logprob_out).data.sum(1), torch.ones(4))
7194 
7195  # forward returns the same thing as log_probs
7196  for v in [0, 1, 2, 3]:
7197  y = torch.full((4,), v, dtype=torch.long)
7198  out, loss = asfm(x, y)
7199 
7200  self.assertEqual(out, logprob_out.gather(1, y.unsqueeze(1)).squeeze())
7201  self.assertEqual(loss, F.nll_loss(logprob_out, y))
7202 
7203  # predict
7204  x = torch.randn(64, 8).abs_()
7205 
7206  # argmax in shortlist
7207  asfm = nn.AdaptiveLogSoftmaxWithLoss(8, 10, [4, 8], div_value=2., head_bias=True)
7208  asfm.head.weight.data.abs_()
7209  asfm.head.bias.data.abs_()
7210  asfm.head.weight.data[asfm.shortlist_size:, :].zero_()
7211 
7212  out = asfm.predict(x)
7213  self.assertEqual(out, asfm.log_prob(x).argmax(dim=1))
7214 
7215  # argmax outside of shortlist
7216  asfm = nn.AdaptiveLogSoftmaxWithLoss(8, 10, [4, 8], div_value=2., head_bias=True)
7217  asfm.head.weight.data.abs_()
7218  asfm.head.bias.data.abs_()
7219  asfm.head.weight.data[:asfm.shortlist_size, :].zero_()
7220 
7221  out = asfm.predict(x)
7222  self.assertEqual(out, asfm.log_prob(x).argmax(dim=1))
7223 
7224  # half of the argmax in shortlist, half in clusters
7225  asfm = nn.AdaptiveLogSoftmaxWithLoss(8, 10, [4, 8], div_value=2., head_bias=True)
7226  asfm.head.weight.data.abs_()
7227  asfm.head.bias.data.abs_()
7228 
7229  x[:32, :asfm.shortlist_size].zero_()
7230  x[32:, asfm.shortlist_size:].zero_()
7231 
7232  asfm.head.weight.data[:asfm.shortlist_size, asfm.shortlist_size:].zero_()
7233  asfm.head.weight.data[asfm.shortlist_size:, :asfm.shortlist_size].zero_()
7234 
7235  out = asfm.predict(x)
7236  self.assertEqual(out, asfm.log_prob(x).argmax(dim=1))
7237 
7238 
7240  def setUp(self):
7241  super(TestNNInit, self).setUp()
7242  random.seed(123)
7243 
7244  def _is_normal(self, tensor, mean, std):
7245  samples = tensor.view(-1).tolist()
7246  p_value = stats.kstest(samples, 'norm', args=(mean, std))[1]
7247  return p_value > 0.0001
7248 
7249  def _is_uniform(self, tensor, a, b):
7250  samples = tensor.view(-1).tolist()
7251  p_value = stats.kstest(samples, 'uniform', args=(a, (b - a)))[1]
7252  return p_value > 0.0001
7253 
7254  def _create_random_nd_tensor(self, dims, size_min, size_max):
7255  size = [random.randint(size_min, size_max) for _ in range(dims)]
7256  tensor = torch.zeros(size)
7257  return tensor
7258 
7259  def _random_float(self, a, b):
7260  return (b - a) * random.random() + a
7261 
7262  def test_calculate_gain_linear(self):
7263  for fn in ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose2d', 'conv_transpose2d', 'conv_transpose3d']:
7264  gain = init.calculate_gain(fn)
7265  self.assertEqual(gain, 1)
7266 
7267  def test_calculate_gain_nonlinear(self):
7268  for fn in ['sigmoid', 'tanh', 'relu', 'leaky_relu']:
7269  gain = init.calculate_gain(fn)
7270  if fn == 'sigmoid':
7271  self.assertEqual(gain, 1)
7272  elif fn == 'tanh': # 5 / 3
7273  self.assertEqual(gain, 1.6666666666666667)
7274  elif fn == 'relu': # sqrt(2)
7275  self.assertEqual(gain, 1.4142135623730951)
7276  elif fn == 'leaky_relu': # sqrt(2 / 1 + slope^2))
7277  self.assertEqual(gain, 1.4141428569978354)
7278 
7279  def test_calculate_gain_leaky_relu(self):
7280  for param in [None, 0, 0.01, 10]:
7281  gain = init.calculate_gain('leaky_relu', param)
7282  if param is None: # Default slope is 0.01
7283  self.assertEqual(gain, 1.4141428569978354)
7284  elif param == 0: # No slope = same gain as normal ReLU
7285  self.assertEqual(gain, 1.4142135623730951)
7286  elif param == 0.01:
7287  self.assertEqual(gain, 1.4141428569978354)
7288  elif param == 10:
7289  self.assertEqual(gain, 0.14071950894605836)
7290 
7291  def test_calculate_gain_leaky_relu_only_accepts_numbers(self):
7292  for param in [True, [1], {'a': 'b'}]:
7293  with self.assertRaises(ValueError):
7294  init.calculate_gain('leaky_relu', param)
7295 
7296  def test_calculate_gain_only_accepts_valid_nonlinearities(self):
7297  for n in [2, 5, 25]:
7298  # Generate random strings of lengths that definitely aren't supported
7299  random_string = ''.join([random.choice(string.ascii_lowercase) for i in range(n)])
7300  with self.assertRaises(ValueError):
7301  init.calculate_gain(random_string)
7302 
7303  @unittest.skipIf(not TEST_SCIPY, "Scipy not found.")
7304  def test_uniform(self):
7305  for dims in [1, 2, 4]:
7306  input_tensor = self._create_random_nd_tensor(dims, size_min=30, size_max=50)
7307  a = self._random_float(-3, 3)
7308  b = a + self._random_float(1, 5)
7309  init.uniform_(input_tensor, a=a, b=b)
7310  assert self._is_uniform(input_tensor, a, b)
7311 
7312  @unittest.skipIf(not TEST_SCIPY, "Scipy not found.")
7313  def test_normal(self):
7314  for dims in [1, 2, 4]:
7315  input_tensor = self._create_random_nd_tensor(dims, size_min=30, size_max=50)
7316  mean = self._random_float(-3, 3)
7317  std = self._random_float(1, 5)
7318  init.normal_(input_tensor, mean=mean, std=std)
7319 
7320  assert self._is_normal(input_tensor, mean, std)
7321 
7322  def test_constant(self):
7323  for dims in [1, 2, 4]:
7324  input_tensor = self._create_random_nd_tensor(dims, size_min=1, size_max=5)
7325  val = self._random_float(1, 10)
7326  init.constant_(input_tensor, val)
7327 
7328  self.assertEqual(input_tensor, input_tensor.clone().fill_(val))
7329 
7330  def test_ones_and_zeros(self):
7331  for init_fn_, val in zip([init.ones_, init.zeros_], [1, 0]):
7332  for dims in [1, 2, 4]:
7333  input_tensor = self._create_random_nd_tensor(dims, size_min=1, size_max=5)
7334  init_fn_(input_tensor)
7335 
7336  self.assertEqual(input_tensor, input_tensor.clone().fill_(val))
7337 
7338  def test_eye(self):
7339  input_tensor = self._create_random_nd_tensor(2, size_min=1, size_max=5)
7340  init.eye_(input_tensor)
7341 
7342  # Check every single element
7343  for i in range(input_tensor.size(0)):
7344  for j in range(input_tensor.size(1)):
7345  if i == j:
7346  assert input_tensor[i][j] == 1
7347  else:
7348  assert input_tensor[i][j] == 0
7349 
7350  def test_eye_only_works_on_2d_inputs(self):
7351  for dims in [1, 3]:
7352  with self.assertRaises(ValueError):
7353  tensor = self._create_random_nd_tensor(dims, size_min=1, size_max=3)
7354  init.eye_(tensor)
7355 
7356  def test_max_unpool(self):
7357  # Test 1D
7358  output, indices = F.max_pool1d(torch.randn([1, 1, 4]), 2, stride=2, return_indices=True)
7359  self.assertEqual(F.max_unpool1d(output, indices, 2), F.max_unpool1d(output, indices, 2, stride=2))
7360 
7361  # Test list / tuple passed as argument to max_unpool1d
7362  input = torch.randn([1, 1, 5])
7363  output, indices = F.max_pool1d(input, 2, stride=2, return_indices=True)
7364  self.assertEqual(F.max_unpool1d(output, indices, 2, stride=2, output_size=input.shape),
7365  F.max_unpool1d(output, indices, 2, stride=2, output_size=input.size()))
7366 
7367  # Test 2D
7368  output, indices = F.max_pool2d(torch.randn([1, 1, 4, 4]), 2, stride=2, return_indices=True)
7369  self.assertEqual(F.max_unpool2d(output, indices, 2), F.max_unpool2d(output, indices, 2, stride=2))
7370 
7371  # Test 3D
7372  output, indices = F.max_pool3d(torch.randn([4, 4, 4, 4, 4]), 2, stride=2, return_indices=True)
7373  self.assertEqual(F.max_unpool3d(output, indices, 2), F.max_unpool3d(output, indices, 2, stride=2))
7374 
7375  def test_dirac_properties(self):
7376  for dims in [3, 4, 5]:
7377  input_tensor = self._create_random_nd_tensor(dims, size_min=1, size_max=5)
7378  init.dirac_(input_tensor)
7379 
7380  c_out, c_in = input_tensor.size(0), input_tensor.size(1)
7381  min_d = min(c_out, c_in)
7382  # Check number of nonzeros is equivalent to smallest dim
7383  assert torch.nonzero(input_tensor).size(0) == min_d
7384  # Check sum of values (can have precision issues, hence assertEqual) is also equivalent
7385  self.assertEqual(input_tensor.sum(), min_d)
7386 
7387  def test_dirac_identity(self):
7388  batch, in_c, out_c, size, kernel_size = 8, 3, 4, 5, 3
7389  # Test 1D
7390  input_var = torch.randn(batch, in_c, size)
7391  filter_var = torch.zeros(out_c, in_c, kernel_size)
7392  init.dirac_(filter_var)
7393  output_var = F.conv1d(input_var, filter_var)
7394  input_tensor, output_tensor = input_var.data, output_var.data # Variables do not support nonzero
7395  self.assertEqual(input_tensor[:, :, 1:-1], output_tensor[:, :in_c, :]) # Assert in_c outputs are preserved
7396  assert torch.nonzero(output_tensor[:, in_c:, :]).numel() == 0 # Assert extra outputs are 0
7397 
7398  # Test 2D
7399  input_var = torch.randn(batch, in_c, size, size)
7400  filter_var = torch.zeros(out_c, in_c, kernel_size, kernel_size)
7401  init.dirac_(filter_var)
7402  output_var = F.conv2d(input_var, filter_var)
7403  input_tensor, output_tensor = input_var.data, output_var.data
7404  self.assertEqual(input_tensor[:, :, 1:-1, 1:-1], output_tensor[:, :in_c, :, :])
7405  assert torch.nonzero(output_tensor[:, in_c:, :, :]).numel() == 0
7406 
7407  # Test 3D
7408  input_var = torch.randn(batch, in_c, size, size, size)
7409  filter_var = torch.zeros(out_c, in_c, kernel_size, kernel_size, kernel_size)
7410  init.dirac_(filter_var)
7411  output_var = F.conv3d(input_var, filter_var)
7412  input_tensor, output_tensor = input_var.data, output_var.data
7413  self.assertEqual(input_tensor[:, :, 1:-1, 1:-1, 1:-1], output_tensor[:, :in_c, :, :])
7414  assert torch.nonzero(output_tensor[:, in_c:, :, :, :]).numel() == 0
7415 
7416  def test_dirac_only_works_on_3_4_5d_inputs(self):
7417  for dims in [1, 2, 6]:
7418  with self.assertRaises(ValueError):
7419  tensor = self._create_random_nd_tensor(dims, size_min=1, size_max=3)
7420  init.dirac_(tensor)
7421 
7422  def test_xavier_uniform_errors_on_inputs_smaller_than_2d(self):
7423  for dims in [0, 1]:
7424  tensor = self._create_random_nd_tensor(dims, size_min=1, size_max=1)
7425  with self.assertRaises(ValueError):
7426  init.xavier_uniform_(tensor)
7427 
7428  def test_xavier_normal_errors_on_inputs_smaller_than_2d(self):
7429  for dims in [0, 1]:
7430  tensor = self._create_random_nd_tensor(dims, size_min=1, size_max=1)
7431  with self.assertRaises(ValueError):
7432  init.xavier_normal_(tensor)
7433 
7434  @unittest.skipIf(not TEST_SCIPY, "Scipy not found.")
7435  def test_xavier_uniform(self):
7436  for use_gain in [True, False]:
7437  for dims in [2, 4]:
7438  input_tensor = self._create_random_nd_tensor(dims, size_min=20, size_max=25)
7439  gain = 1
7440 
7441  if use_gain:
7442  gain = self._random_float(0.1, 2)
7443  init.xavier_uniform_(input_tensor, gain=gain)
7444  else:
7445  init.xavier_uniform_(input_tensor)
7446 
7447  fan_in = input_tensor.size(1)
7448  fan_out = input_tensor.size(0)
7449  if input_tensor.dim() > 2:
7450  fan_in *= input_tensor[0, 0].numel()
7451  fan_out *= input_tensor[0, 0].numel()
7452 
7453  expected_std = gain * math.sqrt(2.0 / (fan_in + fan_out))
7454  bounds = expected_std * math.sqrt(3)
7455  assert self._is_uniform(input_tensor, -bounds, bounds)
7456 
7457  @unittest.skipIf(not TEST_SCIPY, "Scipy not found.")
7458  def test_xavier_normal(self):
7459  for use_gain in [True, False]:
7460  for dims in [2, 4]:
7461  input_tensor = self._create_random_nd_tensor(dims, size_min=20, size_max=25)
7462  gain = 1
7463 
7464  if use_gain:
7465  gain = self._random_float(0.1, 2)
7466  init.xavier_normal_(input_tensor, gain=gain)
7467  else:
7468  init.xavier_normal_(input_tensor)
7469 
7470  fan_in = input_tensor.size(1)
7471  fan_out = input_tensor.size(0)
7472  if input_tensor.dim() > 2:
7473  fan_in *= input_tensor[0, 0].numel()
7474  fan_out *= input_tensor[0, 0].numel()
7475 
7476  expected_std = gain * math.sqrt(2.0 / (fan_in + fan_out))
7477  assert self._is_normal(input_tensor, 0, expected_std)
7478 
7479  def test_kaiming_uniform_errors_on_inputs_smaller_than_2d(self):
7480  for dims in [0, 1]:
7481  with self.assertRaises(ValueError):
7482  tensor = self._create_random_nd_tensor(dims, size_min=1, size_max=1)
7483  init.kaiming_uniform_(tensor)
7484 
7485  def test_kaiming_normal_errors_on_inputs_smaller_than_2d(self):
7486  for dims in [0, 1]:
7487  with self.assertRaises(ValueError):
7488  tensor = self._create_random_nd_tensor(dims, size_min=1, size_max=1)
7489  init.kaiming_normal_(tensor)
7490 
7491  @unittest.skipIf(not TEST_SCIPY, "Scipy not found.")
7492  def test_kaiming_uniform(self):
7493  for use_a in [True, False]:
7494  for dims in [2, 4]:
7495  for mode in ['fan_in', 'fan_out']:
7496  input_tensor = self._create_random_nd_tensor(dims, size_min=20, size_max=25)
7497  if use_a:
7498  a = self._random_float(0.1, 2)
7499  init.kaiming_uniform_(input_tensor, a=a, mode=mode)
7500  else:
7501  a = 0
7502  init.kaiming_uniform_(input_tensor, mode=mode)
7503 
7504  fan_in = input_tensor.size(1)
7505  fan_out = input_tensor.size(0)
7506  if input_tensor.dim() > 2:
7507  fan_in *= input_tensor[0, 0].numel()
7508  fan_out *= input_tensor[0, 0].numel()
7509 
7510  if mode == 'fan_in':
7511  n = fan_in
7512  else:
7513  n = fan_out
7514 
7515  expected_std = math.sqrt(2.0 / ((1 + a**2) * n))
7516  bounds = expected_std * math.sqrt(3.0)
7517  assert self._is_uniform(input_tensor, -bounds, bounds)
7518 
7519  @unittest.skipIf(not TEST_SCIPY, "Scipy not found.")
7520  def test_kaiming_normal(self):
7521  for use_a in [True, False]:
7522  for dims in [2, 4]:
7523  for mode in ['fan_in', 'fan_out']:
7524  input_tensor = self._create_random_nd_tensor(dims, size_min=20, size_max=25)
7525  if use_a:
7526  a = self._random_float(0.1, 2)
7527  init.kaiming_normal_(input_tensor, a=a, mode=mode)
7528  else:
7529  a = 0
7530  init.kaiming_normal_(input_tensor, mode=mode)
7531 
7532  fan_in = input_tensor.size(1)
7533  fan_out = input_tensor.size(0)
7534  if input_tensor.dim() > 2:
7535  fan_in *= input_tensor[0, 0].numel()
7536  fan_out *= input_tensor[0, 0].numel()
7537 
7538  if mode == 'fan_in':
7539  n = fan_in
7540  else:
7541  n = fan_out
7542 
7543  expected_std = math.sqrt(2.0 / ((1 + a**2) * n))
7544  assert self._is_normal(input_tensor, 0, expected_std)
7545 
7546  def test_sparse_only_works_on_2d_inputs(self):
7547  for dims in [1, 3]:
7548  with self.assertRaises(ValueError):
7549  sparsity = self._random_float(0.1, 0.9)
7550  tensor = self._create_random_nd_tensor(dims, size_min=1, size_max=3)
7551  init.sparse_(tensor, sparsity)
7552 
7553  @unittest.skipIf(not TEST_SCIPY, "Scipy not found.")
7554  def test_sparse_default_std(self):
7555  for use_random_std in [True, False]:
7556  input_tensor = self._create_random_nd_tensor(2, size_min=30, size_max=35)
7557  rows, cols = input_tensor.size(0), input_tensor.size(1)
7558  sparsity = self._random_float(0.1, 0.2)
7559 
7560  std = 0.01 # default std
7561  if use_random_std:
7562  std = self._random_float(0.01, 0.2)
7563  init.sparse_(input_tensor, sparsity=sparsity, std=std)
7564  else:
7565  init.sparse_(input_tensor, sparsity=sparsity)
7566 
7567  for col_idx in range(input_tensor.size(1)):
7568  column = input_tensor[:, col_idx]
7569  assert column[column == 0].nelement() >= math.ceil(sparsity * rows)
7570 
7571  assert self._is_normal(input_tensor[input_tensor != 0], 0, std)
7572 
7573  @skipIfNoLapack
7574  def test_orthogonal(self):
7575  for use_gain in [True, False]:
7576  for tensor_size in [[3, 4], [4, 3], [20, 2, 3, 4], [2, 3, 4, 5]]:
7577  input_tensor = torch.zeros(tensor_size)
7578  gain = 1.0
7579 
7580  if use_gain:
7581  gain = self._random_float(0.1, 2)
7582  init.orthogonal_(input_tensor, gain=gain)
7583  else:
7584  init.orthogonal_(input_tensor)
7585 
7586  rows, cols = tensor_size[0], reduce(mul, tensor_size[1:])
7587  flattened_tensor = input_tensor.view(rows, cols)
7588  if rows > cols:
7589  self.assertEqual(torch.mm(flattened_tensor.t(), flattened_tensor),
7590  torch.eye(cols) * gain ** 2, prec=1e-6)
7591  else:
7592  self.assertEqual(torch.mm(flattened_tensor, flattened_tensor.t()),
7593  torch.eye(rows) * gain ** 2, prec=1e-6)
7594 
7595  def test_deprecation(self):
7596  x = torch.randn(3, 3)
7597 
7598  def fn():
7599  init.normal(x)
7600  self.assertWarnsRegex(fn, 'deprecated', 'methods not suffixed with underscore should be deprecated')
7601 
7602 
7603 def add_test(test, decorator=None):
7604  def add(test_name, fn):
7605  if hasattr(TestNN, test_name):
7606  raise RuntimeError('Found two tests with the same name: ' + test_name)
7607  if decorator is not None:
7608  fn = decorator(fn)
7609  setattr(TestNN, test_name, fn)
7610 
7611  test_name = test.get_name()
7612  add(test_name, lambda self, test=test: test(self))
7613  cuda_test_name = test_name + '_cuda'
7614  # With dtype enable, it's good enough to test against three floating types
7615  kwargs = {}
7616  if 'extra_args' in get_function_arglist(test.test_cuda):
7617  kwargs['extra_args'] = test.extra_args
7618 
7619  if 'dtype' in get_function_arglist(test.test_cuda):
7620  add(cuda_test_name + '_float', lambda self,
7621  test=test, kwargs=kwargs: test.test_cuda(self, dtype=torch.float, **kwargs))
7622  add(cuda_test_name + '_double', lambda self,
7623  test=test, kwargs=kwargs: test.test_cuda(self, dtype=torch.double, **kwargs))
7624 
7625  def test_half(self, test=test, kwargs=kwargs):
7626  test.test_cuda(self, dtype=torch.half, **kwargs)
7627  if getattr(test, 'check_half', True):
7628  add(cuda_test_name + '_half', test_half)
7629  else:
7630  add(cuda_test_name, lambda self, test=test, kwargs=kwargs: test.test_cuda(self, **kwargs))
7631 
7632 
7633 new_criterion_tests = [
7634  dict(
7635  module_name='BCEWithLogitsLoss',
7636  input_fn=lambda: torch.rand(15, 10).clamp_(1e-2, 1 - 1e-2),
7637  target_fn=lambda: torch.randn(15, 10).gt(0).double(),
7638  ),
7639  dict(
7640  module_name='BCEWithLogitsLoss',
7641  constructor_args=(torch.rand(10),),
7642  input_fn=lambda: torch.rand(15, 10).clamp_(1e-2, 1 - 1e-2),
7643  target_fn=lambda: torch.randn(15, 10).gt(0).double(),
7644  desc='weights',
7645  ),
7646  dict(
7647  module_name='BCEWithLogitsLoss',
7648  constructor_args=(torch.rand(()),),
7649  input_fn=lambda: torch.rand(()).clamp_(1e-2, 1 - 1e-2),
7650  target_fn=lambda: torch.randn(()).gt(0).double(),
7651  desc='scalar_weights'
7652  ),
7653  dict(
7654  module_name='NLLLoss',
7655  input_size=(2, 3, 5, 5),
7656  target_fn=lambda: torch.rand(2, 5, 5).mul(3).floor().long(),
7657  reference_fn=lambda i, t, m:
7658  loss_reference_fns['NLLLossNd'](i, t, reduction=get_reduction(m)),
7659  check_sum_reduction=True,
7660  desc='2d',
7661  ),
7662  dict(
7663  module_name='NLLLoss',
7664  constructor_args_fn=lambda: (torch.rand(3),),
7665  input_size=(2, 3, 5, 5),
7666  target=torch.rand(2, 5, 5).mul(3).floor().long(),
7667  reference_fn=lambda i, t, m:
7668  loss_reference_fns['NLLLossNd'](i, t, weight=get_weight(m)),
7669  desc='2d_weights',
7670  ),
7671  dict(
7672  module_name='NLLLoss',
7673  constructor_args=(None, None, 1),
7674  input_size=(2, 3, 5, 5),
7675  target_fn=lambda: torch.rand(2, 5, 5).mul(3).floor().long(),
7676  reference_fn=lambda i, t, m:
7677  loss_reference_fns['NLLLossNd'](i, t, ignore_index=1),
7678  desc='2d_ignore_index',
7679  ),
7680  dict(
7681  module_name='NLLLoss',
7682  input_size=(2, 3, 5, 5, 2, 2),
7683  target_fn=lambda: torch.rand(2, 5, 5, 2, 2).mul(3).floor().long(),
7684  reference_fn=lambda i, t, m:
7685  loss_reference_fns['NLLLossNd'](i, t, reduction=get_reduction(m)),
7686  check_sum_reduction=True,
7687  desc='higher_dim',
7688  ),
7689  dict(
7690  module_name='NLLLoss',
7691  input_size=(2, 3, 5),
7692  target_fn=lambda: torch.rand(2, 5).mul(3).floor().long(),
7693  reference_fn=lambda i, t, m:
7694  loss_reference_fns['NLLLossNd'](i, t, reduction=get_reduction(m)),
7695  check_sum_reduction=True,
7696  desc='dim_is_3',
7697  ),
7698  dict(
7699  module_name='PoissonNLLLoss',
7700  input_size=(2, 3, 4, 5),
7701  target_fn=lambda: torch.randn(2, 3, 4, 5).floor_().abs_(),
7702  desc='no_full_loss', # without sterling approx
7703  ),
7704  dict(
7705  module_name='PoissonNLLLoss',
7706  constructor_args=(False,),
7707  input_fn=lambda: torch.randn(2, 3, 4, 5).abs_().add_(0.001),
7708  target_fn=lambda: torch.randn(2, 3, 4, 5).floor_().abs_(),
7709  desc='full_loss', # with sterling approx
7710  ),
7711  dict(
7712  module_name='L1Loss',
7713  input_size=(),
7714  target_size=(),
7715  reference_fn=lambda i, t, _: 1. / i.numel() * (i - t).abs().sum(),
7716  desc='scalar',
7717  ),
7718  dict(
7719  module_name='KLDivLoss',
7720  input_fn=lambda: torch.rand(()).log(),
7721  target_fn=lambda: torch.rand(()),
7722  reference_fn=lambda i, t, m:
7723  kldivloss_reference(i, t, get_reduction(m)),
7724  check_sum_reduction=True,
7725  desc='scalar',
7726  ),
7727  dict(
7728  module_name='MSELoss',
7729  input_size=(),
7730  target_size=(),
7731  reference_fn=lambda i, t, m: ((i - t).abs().pow(2).sum() /
7732  (i.numel() if get_reduction(m) == 'mean' else 1)),
7733  check_sum_reduction=True,
7734  desc='scalar'
7735  ),
7736  dict(
7737  module_name='MSELoss',
7738  input_fn=lambda: torch.ones(5, 68, 64, 64, dtype=torch.float) / 10,
7739  target_fn=lambda: torch.zeros(5, 68, 64, 64, dtype=torch.float),
7740  reference_fn=lambda i, t, m: ((i - t).abs().pow(2).sum() /
7741  (i.numel() if get_reduction(m) == 'mean' else 1)),
7742  check_forward_only=True,
7743  desc='prec',
7744  ),
7745  dict(
7746  module_name='BCELoss',
7747  constructor_args_fn=lambda: (torch.rand(()),),
7748  input_fn=lambda: torch.rand(()).clamp_(1e-2, 1 - 1e-2),
7749  target_fn=lambda: torch.rand(()).gt(0).double(),
7750  reference_fn=lambda i, t, m: -((t * i.log() + (1 - t) * (1 - i).log()) * get_weight(m)).sum() /
7751  (i.numel() if get_reduction(m) == 'mean' else 1),
7752  desc='scalar_weights',
7753  check_gradgrad=False,
7754  ),
7755  dict(
7756  module_name='HingeEmbeddingLoss',
7757  constructor_args=(0.5,),
7758  input_size=(),
7759  target_fn=lambda: torch.randn(()).gt(0).double().mul_(2).sub(1),
7760  desc='scalar_margin',
7761  check_sum_reduction=True,
7762  ),
7763  dict(
7764  module_name='SmoothL1Loss',
7765  input_size=(),
7766  target_size=(),
7767  check_sum_reduction=True,
7768  reference_fn=lambda i, t, m:
7769  smoothl1loss_reference(i, t, reduction=get_reduction(m)),
7770  desc='scalar',
7771  ),
7772  dict(
7773  module_name='MultiLabelSoftMarginLoss',
7774  constructor_args=(torch.rand(10),),
7775  input_fn=lambda: torch.randn(5, 10),
7776  target_fn=lambda: torch.rand(5, 10).mul(2).floor(),
7777  reference_fn=lambda i, t, m: -((t * i.sigmoid().log() + (1 - t) * (-i).sigmoid().log()) * get_weight(m)).sum() /
7778  (i.numel() if get_reduction(m) == 'mean' else i.size(1) if get_reduction(m) == 'sum' else 1),
7779  desc='weights',
7780  check_sum_reduction=True,
7781  check_gradgrad=False,
7782  ),
7783  dict(
7784  module_name='CTCLoss',
7785  constructor_args=(14,), # blank=14
7786  extra_args=([50, 50, 50], [30, 25, 20]), # input_lengths, target_lengths
7787  input_fn=lambda: torch.randn(50, 3, 15).log_softmax(2),
7788  target_fn=lambda: torch.randint(0, 14, (3, 30), dtype=torch.long),
7789  reference_fn=lambda i, t, il, tl, m:
7790  ctcloss_reference(i, t, il, tl, blank=14, reduction=get_reduction(m)),
7791  check_sum_reduction=True,
7792  check_gradgrad=False,
7793  check_half=False,
7794  ),
7795  dict(
7796  module_name='CTCLoss',
7797  desc='1d_target',
7798  constructor_args=(14,), # blank=14
7799  extra_args=([50, 50, 50], [30, 25, 20]), # input_lengths, target_lengths
7800  input_fn=lambda: torch.randn(50, 3, 15).log_softmax(2),
7801  target_fn=lambda: torch.randint(0, 14, (3, 30), dtype=torch.long),
7802  reference_fn=lambda i, t, il, tl, m:
7803  ctcloss_reference(i, t, il, tl, blank=14, reduction=get_reduction(m)),
7804  check_sum_reduction=True,
7805  check_gradgrad=False,
7806  check_half=False,
7807  ),
7808  dict(
7809  module_name='CTCLoss',
7810  desc='2d_int_target',
7811  constructor_args=(0,), # blank=0
7812  extra_args=([50, 50, 50], [30, 25, 20]), # input_lengths, target_lengths
7813  input_fn=lambda: torch.randn(50, 3, 15).log_softmax(2),
7814  target_fn=lambda: torch.randint(1, 15, (3, 30), dtype=torch.int),
7815  reference_fn=lambda i, t, il, tl, m:
7816  ctcloss_reference(i, t, il, tl, blank=0, reduction=get_reduction(m)),
7817  check_sum_reduction=True,
7818  check_gradgrad=False,
7819  check_half=False,
7820  convert_target=False,
7821  ),
7822  dict(
7823  module_name='CTCLoss',
7824  desc='2d_lengths_tensors',
7825  constructor_args=(0,), # blank=0
7826  extra_args=(torch.tensor([50, 50, 50]), torch.tensor([30, 25, 20])), # input_lengths, target_lengths
7827  input_fn=lambda: torch.randn(50, 3, 15).log_softmax(2),
7828  target_fn=lambda: torch.randint(1, 15, (3, 30), dtype=torch.int),
7829  reference_fn=lambda i, t, il, tl, m:
7830  ctcloss_reference(i, t, il, tl, blank=0, reduction=get_reduction(m)),
7831  check_sum_reduction=True,
7832  check_gradgrad=False,
7833  check_half=False,
7834  convert_target=False,
7835  ),
7836 ]
7837 
7838 
7839 for test_params in module_tests + new_module_tests:
7840  # TODO: CUDA is not implemented yet
7841  if 'constructor' not in test_params:
7842  name = test_params.pop('module_name')
7843  test_params['constructor'] = getattr(nn, name)
7844  decorator = test_params.pop('decorator', None)
7845  test = NewModuleTest(**test_params)
7846  add_test(test, decorator)
7847  if 'check_eval' in test_params:
7848  # create a new test that is identical but that sets module.training to False
7849  desc = test_params.get('desc', None)
7850  test_params['desc'] = 'eval' if desc is None else desc + '_eval'
7851 
7852  def gen_eval_constructor(constructor):
7853  def eval_constructor(*args, **kwargs):
7854  cons = constructor(*args, **kwargs)
7855  cons.training = False
7856  return cons
7857  eval_constructor.__name__ = constructor.__name__
7858  return eval_constructor
7859 
7860  test_params['constructor'] = gen_eval_constructor(test_params['constructor'])
7861  test = NewModuleTest(**test_params)
7862  add_test(test, decorator)
7863 
7864 for test_params in criterion_tests + new_criterion_tests:
7865  name = test_params.pop('module_name')
7866  test_params['constructor'] = getattr(nn, name)
7867  test = NewCriterionTest(**test_params)
7868  decorator = test_params.pop('decorator', None)
7869  add_test(test, decorator)
7870  if 'check_sum_reduction' in test_params:
7871  desc = test_params.get('desc', None)
7872  test_params['desc'] = 'sum_reduction' if desc is None else desc + '_sum_reduction'
7873 
7874  def gen_sum_reduction_constructor(constructor):
7875  def sum_reduction_constructor(*args, **kwargs):
7876  cons = constructor(*args, reduction='sum', **kwargs)
7877  return cons
7878  sum_reduction_constructor.__name__ = constructor.__name__
7879  return sum_reduction_constructor
7880 
7881  test_params['constructor'] = gen_sum_reduction_constructor(test_params['constructor'])
7882  test = NewCriterionTest(**test_params)
7883  add_test(test, decorator)
7884 
7885 
7886 class UnpoolingNet(nn.Module):
7887  def __init__(self, pool, unpool):
7888  super(UnpoolingNet, self).__init__()
7889  self.pool = pool
7890  self.unpool = unpool
7891 
7892  def forward(self, input):
7893  return self.unpool(*self.pool(input))
7894 
7895 
7896 add_test(NewModuleTest(
7897  constructor=lambda: UnpoolingNet(
7898  nn.MaxPool1d(2, return_indices=True),
7899  nn.MaxUnpool1d(2)),
7900  input_size=(1, 1, 4),
7901  fullname='MaxUnpool1d_net',))
7902 add_test(NewModuleTest(
7903  constructor=lambda: UnpoolingNet(
7904  nn.MaxPool2d(2, return_indices=True),
7905  nn.MaxUnpool2d(2)),
7906  input_size=(1, 1, 2, 4),
7907  fullname='MaxUnpool2d_net',))
7908 add_test(NewModuleTest(
7909  constructor=lambda: UnpoolingNet(
7910  nn.MaxPool3d(2, return_indices=True),
7911  nn.MaxUnpool3d(2)),
7912  input_size=(1, 1, 2, 4, 6),
7913  fullname='MaxUnpool3d_net',
7914  check_gradgrad=False,))
7915 
7916 
7917 class _AdaptiveLogSoftmaxWithLoss(nn.AdaptiveLogSoftmaxWithLoss):
7918  def __call__(self, input):
7919  t = torch.tensor([0, 1, 4, 8]).to(input.device)
7920  return nn.AdaptiveLogSoftmaxWithLoss.__call__(self, input, t).output
7921 
7922 add_test(NewModuleTest(
7923  constructor=lambda: _AdaptiveLogSoftmaxWithLoss(16, 10, [2, 6]),
7924  input_size=(4, 16),
7925  fullname='AdaptiveLogSoftmax'))
7926 
7927 
7928 # The following are helpers for TestNN.test_affine_*
7930  def device_():
7931  return ['cpu', 'cuda']
7932 else:
7933  def device_():
7934  return ['cpu']
7935 
7936 
7937 def angle_rad_():
7938  return [r * math.pi * 2 for r in [0.0, 0.5, 0.25, 0.125, random.random()]]
7939 
7940 
7941 def axis_vector_():
7942  t = (random.random(), random.random(), random.random())
7943  l = sum(x ** 2 for x in t) ** 0.5
7944 
7945  return [(1.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 1.0), tuple(x / l for x in t)]
7946 
7947 
7948 def input_size2d_():
7949  return [[1, 1, 3, 5], [1, 1, 3, 3], [1, 1, 4, 4], [1, 1, 3, 4]]
7950 
7951 
7952 def output_size2d_():
7953  return [[1, 1, 5, 3], [1, 1, 3, 5], [1, 1, 4, 3], [1, 1, 5, 5], [1, 1, 6, 6]]
7954 
7955 
7956 def input_size2dsq_():
7957  return [[1, 1, 2, 2], [1, 1, 3, 3], [1, 1, 4, 4], [1, 1, 6, 6]]
7958 
7959 
7960 def output_size2dsq_():
7961  return [[1, 1, 2, 2], [1, 1, 3, 3], [1, 1, 4, 4], [1, 1, 5, 5], [1, 1, 6, 6]]
7962 
7963 
7964 def input_size3d_():
7965  return [[1, 1, 2, 2, 2], [1, 1, 2, 3, 4], [1, 1, 3, 3, 3], [1, 1, 4, 4, 4], [1, 1, 3, 4, 5]]
7966 
7967 
7968 def input_size3dsq_():
7969  return [[1, 1, 2, 2, 2], [1, 1, 3, 3, 3], [1, 1, 4, 4, 4], [1, 1, 6, 6, 6]]
7970 
7971 
7972 def output_size3dsq_():
7973  return [[1, 1, 2, 2, 2], [1, 1, 3, 3, 3], [1, 1, 4, 4, 4], [1, 1, 5, 5, 5], [1, 1, 6, 6, 6]]
7974 
7975 
7976 def output_size3d_():
7977  return [[1, 1, 2, 2, 2], [1, 1, 3, 3, 3], [1, 1, 3, 4, 5], [1, 1, 4, 3, 2], [1, 1, 5, 5, 5], [1, 1, 6, 6, 6]]
7978 
7979 
7980 def _buildEquivalentAffineTransforms2d(device, input_size, output_size, angle_rad):
7981  input_center = [(x - 1) / 2.0 for x in input_size]
7982  output_center = [(x - 1) / 2.0 for x in output_size]
7983 
7984  s = math.sin(angle_rad)
7985  c = math.cos(angle_rad)
7986 
7987  intrans_ary = np.array([
7988  [1, 0, input_center[2]],
7989  [0, 1, input_center[3]],
7990  [0, 0, 1],
7991  ], dtype=np.float64)
7992 
7993  inscale_ary = np.array([
7994  [input_center[2], 0, 0],
7995  [0, input_center[3], 0],
7996  [0, 0, 1],
7997  ], dtype=np.float64)
7998 
7999  rotation_ary = np.array([
8000  [c, -s, 0],
8001  [s, c, 0],
8002  [0, 0, 1],
8003  ], dtype=np.float64)
8004 
8005  outscale_ary = np.array([
8006  [1.0 / output_center[2], 0, 0],
8007  [0, 1.0 / output_center[3], 0],
8008  [0, 0, 1],
8009  ], dtype=np.float64)
8010 
8011  outtrans_ary = np.array([
8012  [1, 0, -output_center[2]],
8013  [0, 1, -output_center[3]],
8014  [0, 0, 1],
8015  ], dtype=np.float64)
8016 
8017  reorder_ary = np.array([
8018  [0, 1, 0],
8019  [1, 0, 0],
8020  [0, 0, 1],
8021  ], dtype=np.float64)
8022 
8023  transform_ary = np.dot(np.dot(np.dot(np.dot(
8024  intrans_ary,
8025  inscale_ary),
8026  rotation_ary.T),
8027  outscale_ary),
8028  outtrans_ary)
8029  grid_ary = np.dot(np.dot(np.dot(reorder_ary, rotation_ary.T), outscale_ary), outtrans_ary)
8030 
8031  transform_tensor = torch.from_numpy((rotation_ary)).to(device, torch.float32)
8032  transform_tensor = transform_tensor[:2].unsqueeze(0)
8033 
8034  return transform_tensor, transform_ary, grid_ary
8035 
8036 
8037 def _buildEquivalentAffineTransforms3d(device, input_size, output_size, angle_rad, axis_vector):
8038  input_center = [(x - 1) / 2.0 for x in input_size]
8039  output_center = [(x - 1) / 2.0 for x in output_size]
8040 
8041  s = math.sin(angle_rad)
8042  c = math.cos(angle_rad)
8043  c1 = 1 - c
8044 
8045  intrans_ary = np.array([
8046  [1, 0, 0, input_center[2]],
8047  [0, 1, 0, input_center[3]],
8048  [0, 0, 1, input_center[4]],
8049  [0, 0, 0, 1],
8050  ], dtype=np.float64)
8051 
8052  inscale_ary = np.array([
8053  [input_center[2], 0, 0, 0],
8054  [0, input_center[3], 0, 0],
8055  [0, 0, input_center[4], 0],
8056  [0, 0, 0, 1],
8057  ], dtype=np.float64)
8058 
8059  l, m, n = axis_vector
8060  scipyRotation_ary = np.array([
8061  [l * l * c1 + c, m * l * c1 - n * s, n * l * c1 + m * s, 0],
8062  [l * m * c1 + n * s, m * m * c1 + c, n * m * c1 - l * s, 0],
8063  [l * n * c1 - m * s, m * n * c1 + l * s, n * n * c1 + c, 0],
8064  [0, 0, 0, 1],
8065  ], dtype=np.float64)
8066 
8067  z, y, x = axis_vector
8068  torchRotation_ary = np.array([
8069  [x * x * c1 + c, y * x * c1 - z * s, z * x * c1 + y * s, 0],
8070  [x * y * c1 + z * s, y * y * c1 + c, z * y * c1 - x * s, 0],
8071  [x * z * c1 - y * s, y * z * c1 + x * s, z * z * c1 + c, 0],
8072  [0, 0, 0, 1],
8073  ], dtype=np.float64)
8074 
8075  outscale_ary = np.array([
8076  [1.0 / output_center[2], 0, 0, 0],
8077  [0, 1.0 / output_center[3], 0, 0],
8078  [0, 0, 1.0 / output_center[4], 0],
8079  [0, 0, 0, 1],
8080  ], dtype=np.float64)
8081 
8082  outtrans_ary = np.array([
8083  [1, 0, 0, -output_center[2]],
8084  [0, 1, 0, -output_center[3]],
8085  [0, 0, 1, -output_center[4]],
8086  [0, 0, 0, 1],
8087  ], dtype=np.float64)
8088 
8089  reorder_ary = np.array([
8090  [0, 0, 1, 0],
8091  [0, 1, 0, 0],
8092  [1, 0, 0, 0],
8093  [0, 0, 0, 1],
8094  ], dtype=np.float64)
8095 
8096  transform_ary = np.dot(np.dot(np.dot(np.dot(
8097  intrans_ary,
8098  inscale_ary),
8099  np.linalg.inv(scipyRotation_ary)),
8100  outscale_ary),
8101  outtrans_ary)
8102  grid_ary = np.dot(np.dot(np.dot(reorder_ary, np.linalg.inv(scipyRotation_ary)), outscale_ary), outtrans_ary)
8103 
8104  transform_tensor = torch.from_numpy((torchRotation_ary)).to(device, torch.float32)
8105  transform_tensor = transform_tensor[:3].unsqueeze(0)
8106 
8107  return transform_tensor, transform_ary, grid_ary
8108 # end TestNN.test_affine_* helpers
8109 
8110 
8111 if __name__ == '__main__':
8112  run_tests()
def _test_InstanceNorm_cuda_half(self, cls, input)
Definition: test_nn.py:2633
def assertEqual(self, x, y, prec=None, message='', allow_inf=False)
def _test_Conv2d_naive_groups(self, device="cpu", dtype=torch.float)
Definition: test_nn.py:4164
def backward(tensors, grad_tensors=None, retain_graph=None, create_graph=False, grad_variables=None)
Definition: __init__.py:38
def _random_float(self, a, b)
Definition: test_nn.py:7259
def test_gumbel_softmax(self, dtype=torch.float)
Definition: test_nn.py:2254
def _test_batchnorm_eval(self, device="cpu", dtype=torch.float)
Definition: test_nn.py:5718
def _test_conv_noncontig_weights_and_bias(self, device)
Definition: test_nn.py:6818
def affine_grid(theta, size)
Definition: functional.py:2673
Definition: test.py:1
def _padded_sequence(self, tensor_type)
Definition: test_nn.py:116
def _test_conv_noncontig_weights(self, device)
Definition: test_nn.py:6796
def _test_scatter(self, tensor)
Definition: test_nn.py:3171
def is_available()
Definition: __init__.py:45
def _test_gather(self, output_device)
Definition: test_nn.py:3194
Definition: model.py:1
def _test_LayerNorm_general(self, device="cpu", dtype=torch.float)
Definition: test_nn.py:2713
def _test_LayerNorm_cuda_half(self)
Definition: test_nn.py:2755
def run_grad_conv_test(self, func_forward, func_backward, dim=1, gradient='input')
Definition: test_nn.py:7013
def get_rng_state()
Definition: random.py:17
def _test_gumbel_softmax_st_shapes(self, cuda, dtype, shape, dim, count_expected)
Definition: test_nn.py:2192
def device_count()
Definition: __init__.py:341
def grid_sample(input, grid, mode='bilinear', padding_mode='zeros')
Definition: functional.py:2593
check_backward_hook_flag
Definition: test_nn.py:951
def _test_gumbel_softmax_grad(self, cuda, dtype)
Definition: test_nn.py:2235
def run_conv_double_back_test(self, kern, stride, padding, chan_in, chan_out, batch_size, inp_size, dilation, no_weight, groups=1, use_cuda=False, use_bias=True, dtype=torch.double)
Definition: test_nn.py:6852
def _test_InstanceNorm_general(self, cls, input, device="cpu", dtype=torch.float)
Definition: test_nn.py:2579
def _test_GroupNorm_cuda_half(self)
Definition: test_nn.py:2818
def assertNotEqual(self, x, y, prec=None, message='')
def _is_uniform(self, tensor, a, b)
Definition: test_nn.py:7249
def set_rng_state(new_state)
Definition: random.py:8
def _test_EmbeddingBag(self, cuda, mode, sparse, dtype=torch.double)
Definition: test_nn.py:2277
def _get_input(self, unpack=True)
Definition: common_nn.py:3139
def _create_basic_net(self)
Definition: test_nn.py:553
def _test_gumbel_softmax_straight_through(self, cuda, dtype)
Definition: test_nn.py:2207
def _test_dropout(self, cls, cuda, input)
Definition: test_nn.py:851
def test_pdist_zeros(self)
Definition: test_nn.py:5820
def _ordered_sequence(self, tensor_type)
Definition: test_nn.py:108
Module caffe2.python.helpers.conv.
def _get_arg(self, name, unpack)
Definition: common_nn.py:3113
def _test_one_hot(self, use_cuda=False)
Definition: test_nn.py:2853
def _test_batchnorm_grad(self, device="cpu", dtype=torch.double)
Definition: test_nn.py:5706
def stream(stream)
Definition: __init__.py:307
def _test_maxpool_indices(self, num_dim, adaptive=False, device="cpu", dtype=torch.float)
Definition: test_nn.py:2969
def flags(enabled=False, benchmark=False, deterministic=False, verbose=False)
Definition: __init__.py:176
def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reduction='mean', zero_infinity=False)
Definition: functional.py:1744
def assertAlmostEqual(self, x, y, places=None, msg=None, delta=None, allow_inf=None)
def _test_pool_large_size(self, device, dtype=torch.float)
Definition: test_nn.py:3151
def _test_nonlinearity_propagate_nan(self, device)
Definition: test_nn.py:1720
def typename(o)
Define basic utilities.
Definition: __init__.py:94
def constructor_args(self)
Definition: common_nn.py:3106
def _test_batchnorm_simple_average(self, test_type=torch.FloatTensor)
Definition: test_nn.py:5764
def set_default_tensor_type(t)
Definition: __init__.py:132
def _create_random_nd_tensor(self, dims, size_min, size_max)
Definition: test_nn.py:7254
def grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=False, only_inputs=True, allow_unused=False)
Definition: __init__.py:97
def linear(input, weight, bias=None)
Definition: functional.py:1387
def _test_softmax_backward(self, device)
Definition: test_nn.py:2169
def _test_alpha_dropout(self, cls, input)
Definition: test_nn.py:879
def _is_normal(self, tensor, mean, std)
Definition: test_nn.py:7244
def _test_GroupNorm_general(self, device="cpu", dtype=torch.float)
Definition: test_nn.py:2770
def _test_max_pool_nan(self, device, dtype=torch.float)
Definition: test_nn.py:3133
def current_stream(device=None)
Definition: __init__.py:361
def assertWarnsRegex(self, callable, regex, msg='')