Caffe2 - Python API
A deep learning, cross platform ML framework
test_utils.py
1 from __future__ import print_function
2 import sys
3 import os
4 import re
5 import math
6 import shutil
7 import random
8 import tempfile
9 import unittest
10 import traceback
11 import torch
12 import torch.nn as nn
13 import torch.utils.data
14 import torch.cuda
15 import warnings
16 from torch.utils.checkpoint import checkpoint, checkpoint_sequential
17 import torch.hub as hub
18 from torch.autograd._functions.utils import prepare_onnx_paddings
19 from torch.autograd._functions.utils import check_onnx_broadcast
20 from common_utils import IS_WINDOWS, IS_PPC, skipIfRocm, load_tests
21 
22 # load_tests from common_utils is used to automatically filter tests for
23 # sharding on sandcastle. This line silences flake warnings
24 load_tests = load_tests
25 
26 try:
27  import torchvision.models as models
28  HAS_TORCHVISION = True
29 except ImportError:
30  HAS_TORCHVISION = False
31 
32 
33 skipIfNoTorchVision = unittest.skipIf(not HAS_TORCHVISION, "no torchvision")
34 
35 HAS_CUDA = torch.cuda.is_available()
36 
37 from common_utils import TestCase, run_tests, download_file
38 
39 
40 class RandomDatasetMock(object):
41 
42  def __getitem__(self, index):
43  return torch.tensor([torch.rand(1).item(), random.uniform(0, 1)])
44 
45  def __len__(self):
46  return 1000
47 
48 
50 
51  # This runs checkpoint_sequential on each of the nets in
52  # module_lists_to_compare, and compares them against the uncheckpointed model.
53  # To compare, it checks outputs as well as input gradients and parameter gradients
54  def _check_checkpoint_sequential(
55  self,
56  model,
57  module_lists_to_compare,
58  num_chunks,
59  *inputs
60  ):
61 
62  # not checkpointed
63  if not isinstance(inputs, tuple):
64  inputs = (inputs,)
65  out = model(*inputs)
66  out_not_checkpointed = out.data.clone()
67  model.zero_grad()
68  out.sum().backward()
69  grad_not_checkpointed = {
70  name: param.grad.data.clone()
71  for name, param in model.named_parameters()
72  }
73  input_grad_not_checkpointed = [i.grad.data.clone() for i in inputs]
74  for model_to_compare in module_lists_to_compare:
75  # checkpointed model by passing list of modules
76  detached_inputs = [i.detach() for i in inputs]
77  for detached in detached_inputs:
78  detached.requires_grad = True
79 
80  # pass list of modules to checkpoint
81  out = checkpoint_sequential(model_to_compare, num_chunks, *detached_inputs)
82  out_checkpointed = out.data.clone()
83  model.zero_grad()
84  out.sum().backward()
85  grad_checkpointed = {
86  name: param.grad.data.clone()
87  for name, param in model.named_parameters()
88  }
89  input_grad_checkpointed = [d.grad.data.clone() for d in detached_inputs]
90  # compare outputs as well as the gradients of input and parameters
91  self.assertEqual(out_checkpointed, out_not_checkpointed)
92  for i, j in zip(input_grad_not_checkpointed, input_grad_checkpointed):
93  self.assertEqual(i, j)
94  for name in grad_checkpointed:
95  self.assertEqual(grad_checkpointed[name], grad_not_checkpointed[name])
96 
97  # Test whether checkpoint is being triggered or not. For this, we check
98  # the number of times forward pass happens
99  def test_checkpoint_trigger(self):
100 
101  class Net(nn.Module):
102 
103  def __init__(self):
104  super(Net, self).__init__()
105  self.counter = 0
106 
107  def forward(self, input_var):
108  self.counter += 1
109  return input_var
110 
111  # checkpointed
112  modules = [Net() for _ in range(10)]
113  for m in modules:
114  self.assertEqual(m.counter, 0)
115  input_var = torch.randn(3, 4, requires_grad=True)
116  out = checkpoint_sequential(modules, 2, input_var)
117  for m in modules:
118  self.assertEqual(m.counter, 1)
119  out.sum().backward()
120  for m in modules[:(len(modules) // 2)]:
121  self.assertEqual(m.counter, 2)
122  for m in modules[(len(modules) // 2):]:
123  self.assertEqual(m.counter, 1)
124 
125  def test_checkpoint_valid(self):
126  model = nn.Sequential(
127  nn.Linear(100, 50),
128  nn.ReLU(),
129  nn.Linear(50, 20),
130  nn.ReLU(),
131  nn.Linear(20, 5),
132  nn.ReLU()
133  )
134 
135  input_var = torch.randn(1, 100, requires_grad=True)
136 
137  # checkpointed
138  chunks = 2
139  modules = list(model.children())
140  out = checkpoint_sequential(modules, chunks, input_var)
141  with self.assertRaisesRegex(RuntimeError, "Checkpointing is not compatible"):
143  outputs=[out], grad_outputs=[torch.ones(1, 5)], inputs=[input_var], create_graph=True
144  )
145 
146  def test_checkpoint(self):
147  model = nn.Sequential(
148  nn.Linear(100, 50),
149  nn.ReLU(),
150  nn.Linear(50, 20),
151  nn.ReLU(),
152  nn.Linear(20, 5),
153  nn.ReLU()
154  )
155 
156  # Compare uncheckpointed model with its checkpointed counterparts
157  # In addition to running checkpoint_sequential on the nn.Sequential
158  # instance, we also run the function on the list of functions within
159  # the module.
161  model,
162  [list(model.children()), model],
163  2,
164  torch.randn(1, 100, requires_grad=True)
165  )
166 
167  def test_checkpoint_module_list_multiple_args(self):
168  class ModuleListNet(nn.Module):
169  def __init__(self):
170  super(ModuleListNet, self).__init__()
171  module_list = [
172  nn.Bilinear(100, 60, 50),
173  nn.ReLU(),
174  nn.Linear(50, 20),
175  nn.ReLU(),
176  nn.Linear(20, 5),
177  nn.ReLU(),
178  ]
179  self.module_list = nn.ModuleList(module_list)
180 
181  def forward(self, *inputs):
182  for layer in self.module_list:
183  if isinstance(inputs, tuple):
184  inputs = layer(*inputs)
185  else:
186  inputs = layer(inputs)
187  return inputs
188 
189  model = ModuleListNet()
190 
191  # Compare uncheckpointed model with its checkpointed counterparts
192  # In addition to running checkpoint_sequential on the nn.ModuleList
193  # instance, we also run the function on the list of functions within
194  # the ModuleList.
196  model,
197  [list(model.module_list.children()), model.module_list],
198  2,
199  torch.randn(1, 100, requires_grad=True),
200  torch.randn(1, 60, requires_grad=True)
201  )
202 
203  def test_checkpoint_rng_cpu(self):
204  for _ in range(5):
205  inp = torch.randn(20000, device='cpu').requires_grad_()
206  phase1 = torch.nn.Dropout()
207  phase2 = torch.nn.Dropout()
208 
209  def run_fn(input):
210  return phase2(input)
211 
212  state = torch.get_rng_state()
213 
214  out = phase1(inp)
215  out = checkpoint(run_fn, out)
216  out.sum().backward()
217  grad_with_checkpointing = inp.grad
218 
219  torch.set_rng_state(state)
220 
221  inp.grad = None
222 
223  out = phase1(inp)
224  out = run_fn(out)
225  out.sum().backward()
226  grad_no_checkpointing = inp.grad
227 
228  self.assertEqual(grad_with_checkpointing, grad_no_checkpointing)
229 
230  @unittest.skipIf(not HAS_CUDA, 'No CUDA')
231  def test_checkpoint_rng_cuda(self):
232  for _ in range(5):
233  inp = torch.randn(20000, device='cuda').requires_grad_()
234  phase1 = torch.nn.Dropout()
235  phase2 = torch.nn.Dropout()
236 
237  def run_fn(input):
238  return phase2(input)
239 
240  state = torch.cuda.get_rng_state()
241 
242  out = phase1(inp)
243  out = checkpoint(run_fn, out)
244  out.sum().backward()
245  grad_with_checkpointing = inp.grad
246 
247  torch.cuda.set_rng_state(state)
248 
249  inp.grad = None
250 
251  out = phase1(inp)
252  out = run_fn(out)
253  out.sum().backward()
254  grad_no_checkpointing = inp.grad
255 
256  self.assertEqual(grad_with_checkpointing, grad_no_checkpointing)
257 
258  def test_checkpoint_non_tensor(self):
259 
260  def run_fn(tensor1, tensor2):
261  if tensor2 is None:
262  return tensor1
263  return tensor1 + tensor2
264 
265  input_var = torch.randn(1, 100, requires_grad=True)
266  out = checkpoint(run_fn, input_var, None)
267  out.sum().backward()
268 
269 
271  def setUp(self):
272  self.dataset = torch.randn(5, 3, 3, 2)
273  self.batch_size = 3
274 
275  def test_random_seed(self):
276  def run():
277  dataloader = torch.utils.data.DataLoader(RandomDatasetMock(),
278  batch_size=2,
279  num_workers=4,
280  shuffle=True)
281  return next(iter(dataloader))
282 
283  torch.manual_seed(2018)
284  x1 = run()
285  torch.manual_seed(2018)
286  x2 = run()
287  self.assertEqual(x1, x2)
288 
289  def test_single_keep(self):
290  dataloader = torch.utils.data.DataLoader(self.dataset,
291  batch_size=self.batch_size,
292  num_workers=0,
293  drop_last=False)
294  dataiter = iter(dataloader)
295  self.assertEqual(len(list(dataiter)), 2)
296 
297  def test_single_drop(self):
298  dataloader = torch.utils.data.DataLoader(self.dataset,
299  batch_size=self.batch_size,
300  num_workers=0,
301  drop_last=True)
302  dataiter = iter(dataloader)
303  self.assertEqual(len(list(dataiter)), 1)
304 
305  @unittest.skip("FIXME: Intermittent CUDA out-of-memory error on Windows and time-out under ASAN")
306  def test_multi_keep(self):
307  dataloader = torch.utils.data.DataLoader(self.dataset,
308  batch_size=self.batch_size,
309  num_workers=2,
310  drop_last=False)
311  dataiter = iter(dataloader)
312  self.assertEqual(len(list(dataiter)), 2)
313 
314  def test_multi_drop(self):
315  dataloader = torch.utils.data.DataLoader(self.dataset,
316  batch_size=self.batch_size,
317  num_workers=2,
318  drop_last=True)
319  dataiter = iter(dataloader)
320  self.assertEqual(len(list(dataiter)), 1)
321 
322 
323 test_dir = os.path.abspath(os.path.dirname(str(__file__)))
324 
325 
327  def test_deprecated(self):
328  with self.assertRaisesRegex(ImportError, "torch.utils.ffi is deprecated. Please use cpp extensions instead."):
329  from torch.utils.ffi import create_extension
330 
331 
332 @unittest.skipIf('SKIP_TEST_BOTTLENECK' in os.environ.keys(), 'SKIP_TEST_BOTTLENECK is set')
334  def _run(self, command):
335  """Returns (return-code, stdout, stderr)"""
336  import subprocess
337  from common_utils import PY3
338 
339  p = subprocess.Popen(command, stdout=subprocess.PIPE, # noqa
340  stderr=subprocess.PIPE, shell=True)
341  output, err = p.communicate()
342  rc = p.returncode
343  if PY3:
344  output = output.decode("ascii")
345  err = err.decode("ascii")
346  return (rc, output, err)
347 
348  def _run_bottleneck(self, test_file, scriptargs=''):
349  curdir = os.path.dirname(os.path.abspath(__file__))
350  filepath = '{}/{}'.format(curdir, test_file)
351  if scriptargs != '':
352  scriptargs = ' {}'.format(scriptargs)
353  rc, out, err = self._run(
354  '{} -m torch.utils.bottleneck {}{}'.format(sys.executable, filepath, scriptargs))
355  return rc, out, err
356 
357  def _check_run_args(self):
358  # Check that this fails due to missing args
359  rc, out, err = self._run_bottleneck('bottleneck/test_args.py')
360  self.assertEqual(rc, 2, None, self._fail_msg('Missing args should error', out + err))
361 
362  # This should succeed
363  rc, out, err = self._run_bottleneck('bottleneck/test_args.py', '--foo foo --bar bar')
364  self.assertEqual(rc, 0, None, self._fail_msg('Should pass args to script', out + err))
365 
366  def _fail_msg(self, msg, output):
367  return '{}, output was:\n{}'.format(msg, output)
368 
369  def _check_environment_summary(self, output):
370  results = re.search('Environment Summary', output)
371  self.assertIsNotNone(results, self._fail_msg('Should have Enviroment Summary', output))
372 
373  # Up to five lines away from the heading, there should be the version number
374  results = re.search(r'Environment Summary.*(\n.*){,5}\nPyTorch \d+\.\d+', output)
375  self.assertIsNotNone(results, self._fail_msg('Should have PyTorch version', output))
376 
377  def _check_cprof_summary(self, output):
378  results = re.search('cProfile output', output)
379  self.assertIsNotNone(results, self._fail_msg('Should have cProfile output', output))
380 
381  # This assumes that after the cProfile output section we have
382  # the autograd profiler output
383  results = re.search(r'cProfile output.*(\n.*){6,50}\n.*autograd profiler output', output)
384  self.assertIsNotNone(results, self._fail_msg(
385  'Distance between cProfile and autograd prof out not in [6, 50] lines', output))
386 
387  def _check_autograd_summary(self, output):
388  results = re.search('autograd profiler output', output)
389  self.assertIsNotNone(results, self._fail_msg('Should have autograd profiler output', output))
390 
391  # This assumes that after the autograd profiler output is the end of the
392  # output.
393  results = re.search(r'autograd profiler output.*(\n.*){6,100}', output)
394  self.assertIsNotNone(results, self._fail_msg(
395  'Distance between autograd prof output and end of output not in [6, 100] lines', output))
396 
397  def _check_cuda(self, output):
398  if HAS_CUDA:
399  results = re.search('CUDA mode', output)
400  self.assertIsNotNone(results, self._fail_msg('Should tell users CUDA', output))
401  else:
402  results = re.search('CUDA mode', output)
403  self.assertIsNone(results, self._fail_msg('Should not tell users about CUDA', output))
404 
405  @unittest.skipIf(HAS_CUDA, 'CPU-only test')
406  def test_bottleneck_cpu_only(self):
407  rc, out, err = self._run_bottleneck('bottleneck/test.py')
408  self.assertEqual(rc, 0, 'Run failed with\n{}'.format(err))
409 
410  self._check_run_args()
412  self._check_autograd_summary(out)
413  self._check_cprof_summary(out)
414  self._check_cuda(out)
415 
416  @unittest.skipIf(not HAS_CUDA, 'No CUDA')
417  @skipIfRocm
418  def test_bottleneck_cuda(self):
419  rc, out, err = self._run_bottleneck('bottleneck/test_cuda.py')
420  self.assertEqual(rc, 0, 'Run failed with\n{}'.format(err))
421 
422  self._check_run_args()
424  self._check_autograd_summary(out)
425  self._check_cprof_summary(out)
426  self._check_cuda(out)
427 
428 
429 from torch.utils.collect_env import get_pretty_env_info
430 
431 
433  def test_smoke(self):
434  info_output = get_pretty_env_info()
435  self.assertTrue(info_output.count('\n') >= 17)
436 
437 
439  def test_prepare_onnx_paddings(self):
440  sizes = [2, 3, 4]
441  pad = [1, 2, 3, 4]
442  paddings = prepare_onnx_paddings(len(sizes), pad)
443  self.assertEqual(paddings, [0, 3, 1, 0, 4, 2])
444 
445  def test_check_onnx_broadcast(self):
446 
447  def try_check_onnx_broadcast(dims1, dims2, expect_broadcast, expect_fail):
448  broadcast = True
449  fail = False
450  try:
451  broadcast = check_onnx_broadcast(dims1, dims2)
452  except ValueError:
453  fail = True
454  self.assertEqual(broadcast, expect_broadcast)
455  self.assertEqual(fail, expect_fail)
456 
457  # Case 1, check the case when len(dims1) < len(dims2) and numel(dims2) > 1
458  dims1 = [3, 4]
459  dims2 = [2, 3, 4]
460  try_check_onnx_broadcast(dims1, dims2, True, True)
461 
462  # Case 2, check the case when len(dims1) < len(dims2) and numel(dims2) == 1
463  dims1 = [3, 4]
464  dims2 = [1, 1, 1]
465  try_check_onnx_broadcast(dims1, dims2, True, False)
466 
467  # Case 3, check the case when len(dims1) > len(dims2) and numel(dims2) == 1
468  dims1 = [1, 1]
469  dims2 = [1]
470  try_check_onnx_broadcast(dims1, dims2, True, False)
471 
472  # Case 4, check the case when len(dims1) > len(dims2) and dims1[x:] == dims2
473  dims1 = [2, 3, 4]
474  dims2 = [3, 4]
475  try_check_onnx_broadcast(dims1, dims2, True, False)
476 
477  # Case 5, check the case when len(dims1) > len(dims2), but dims1[x:] != dims2
478  dims1 = [2, 3, 4]
479  dims2 = [1, 4]
480  try_check_onnx_broadcast(dims1, dims2, True, True)
481 
482  # Case 6, check the equal case, no broadcast
483  dims1 = [3, 4]
484  dims2 = [3, 4]
485  try_check_onnx_broadcast(dims1, dims2, False, False)
486 
487  # Case 7, check the case when len(dims1) == len(dims2), but dims1 != dims2
488  dims1 = [3, 4]
489  dims2 = [1, 4]
490  try_check_onnx_broadcast(dims1, dims2, True, True)
491 
492  # Case 8, check the case when len(dims1) == len(dims2) and numel(s2) == 1
493  dims1 = [3, 4]
494  dims2 = [1, 1]
495  try_check_onnx_broadcast(dims1, dims2, True, False)
496 
497 
499  @classmethod
500  @skipIfNoTorchVision
501  def setUpClass(cls):
502  cls.resnet18_pretrained = models.__dict__['resnet18'](pretrained=True).state_dict()
503 
504  @skipIfNoTorchVision
505  def test_load_from_github(self):
506  hub_model = hub.load(
507  'pytorch/vision',
508  'resnet18',
509  pretrained=True)
510  self.assertEqual(self.resnet18_pretrained, hub_model.state_dict())
511 
512  @skipIfNoTorchVision
513  def test_set_dir(self):
514  temp_dir = tempfile.gettempdir()
515  hub.set_dir(temp_dir)
516  hub_model = hub.load(
517  'pytorch/vision',
518  'resnet18',
519  pretrained=True)
520  self.assertEqual(self.resnet18_pretrained, hub_model.state_dict())
521  assert os.path.exists(temp_dir + '/vision_master')
522  shutil.rmtree(temp_dir + '/vision_master')
523 
524 
525 if __name__ == '__main__':
526  run_tests()
def assertEqual(self, x, y, prec=None, message='', allow_inf=False)
def _check_environment_summary(self, output)
Definition: test_utils.py:369
Module caffe2.python.checkpoint.
def _check_autograd_summary(self, output)
Definition: test_utils.py:387
def _check_checkpoint_sequential(self, model, module_lists_to_compare, num_chunks, inputs)
Definition: test_utils.py:60
def is_available()
Definition: __init__.py:45
Definition: model.py:1
def _fail_msg(self, msg, output)
Definition: test_utils.py:366
Definition: hub.py:1
def _run(self, command)
Definition: test_utils.py:334
def _run_bottleneck(self, test_file, scriptargs='')
Definition: test_utils.py:348
def _check_cuda(self, output)
Definition: test_utils.py:397
def _check_cprof_summary(self, output)
Definition: test_utils.py:377
def grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=False, only_inputs=True, allow_unused=False)
Definition: __init__.py:97