Caffe2 - Python API
A deep learning, cross platform ML framework
test_optim.py
1 import math
2 import unittest
3 import functools
4 from copy import deepcopy
5 from bisect import bisect_right
6 import torch
7 from torch._six import inf
8 import torch.optim as optim
9 import torch.nn.functional as F
10 from torch.optim import SGD
11 from torch.autograd import Variable
12 from torch import sparse
13 from torch.optim.lr_scheduler import LambdaLR, StepLR, MultiStepLR, \
14  ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau, _LRScheduler
15 from common_utils import TestCase, run_tests, TEST_WITH_UBSAN, load_tests
16 
17 # load_tests from common_utils is used to automatically filter tests for
18 # sharding on sandcastle. This line silences flake warnings
19 load_tests = load_tests
20 
21 
22 def rosenbrock(tensor):
23  x, y = tensor
24  return (1 - x) ** 2 + 100 * (y - x ** 2) ** 2
25 
26 
27 def drosenbrock(tensor):
28  x, y = tensor
29  return torch.DoubleTensor((-400 * x * (y - x ** 2) - 2 * (1 - x), 200 * (y - x ** 2)))
30 
31 
33  def _test_rosenbrock_sparse(self, constructor, scheduler_constructors=None,
34  sparse_only=False):
35  if scheduler_constructors is None:
36  scheduler_constructors = []
37  params_t = torch.Tensor([1.5, 1.5])
38 
39  params = Variable(params_t, requires_grad=True)
40  optimizer = constructor([params])
41  schedulers = []
42  for scheduler_constructor in scheduler_constructors:
43  schedulers.append(scheduler_constructor(optimizer))
44 
45  if not sparse_only:
46  params_c = Variable(params_t.clone(), requires_grad=True)
47  optimizer_c = constructor([params_c])
48 
49  solution = torch.Tensor([1, 1])
50  initial_dist = params.data.dist(solution)
51 
52  def eval(params, sparse_grad, w):
53  # Depending on w, provide only the x or y gradient
54  optimizer.zero_grad()
55  loss = rosenbrock(params)
56  loss.backward()
57  grad = drosenbrock(params.data)
58  # NB: We torture test the optimizer by returning an
59  # uncoalesced sparse tensor
60  if w:
61  i = torch.LongTensor([[0, 0]])
62  x = grad[0]
63  v = torch.DoubleTensor([x / 4., x - x / 4.])
64  else:
65  i = torch.LongTensor([[1, 1]])
66  y = grad[1]
67  v = torch.DoubleTensor([y - y / 4., y / 4.])
68  x = sparse.DoubleTensor(i, v, torch.Size([2]))
69  if sparse_grad:
70  params.grad.data = x
71  else:
72  params.grad.data = x.to_dense()
73  return loss
74 
75  for i in range(2000):
76  # Do cyclic coordinate descent
77  w = i % 2
78  optimizer.step(functools.partial(eval, params, True, w))
79  for scheduler in schedulers:
80  if isinstance(scheduler, ReduceLROnPlateau):
81  scheduler.step(rosenbrock(params))
82  else:
83  scheduler.step()
84  if not sparse_only:
85  optimizer_c.step(functools.partial(eval, params_c, False, w))
86  self.assertEqual(params.data, params_c.data)
87 
88  self.assertLessEqual(params.data.dist(solution), initial_dist)
89 
90  def _test_basic_cases_template(self, weight, bias, input, constructor, scheduler_constructors):
91  weight = Variable(weight, requires_grad=True)
92  bias = Variable(bias, requires_grad=True)
93  input = Variable(input)
94  optimizer = constructor(weight, bias)
95  schedulers = []
96  for scheduler_constructor in scheduler_constructors:
97  schedulers.append(scheduler_constructor(optimizer))
98 
99  # to check if the optimizer can be printed as a string
100  optimizer.__repr__()
101 
102  def fn():
103  optimizer.zero_grad()
104  y = weight.mv(input)
105  if y.is_cuda and bias.is_cuda and y.get_device() != bias.get_device():
106  y = y.cuda(bias.get_device())
107  loss = (y + bias).pow(2).sum()
108  loss.backward()
109  return loss
110 
111  initial_value = fn().item()
112  for _i in range(200):
113  for scheduler in schedulers:
114  if isinstance(scheduler, ReduceLROnPlateau):
115  val_loss = fn()
116  scheduler.step(val_loss)
117  else:
118  scheduler.step()
119  optimizer.step(fn)
120  self.assertLess(fn().item(), initial_value)
121 
122  def _test_state_dict(self, weight, bias, input, constructor):
123  weight = Variable(weight, requires_grad=True)
124  bias = Variable(bias, requires_grad=True)
125  input = Variable(input)
126 
127  def fn_base(optimizer, weight, bias):
128  optimizer.zero_grad()
129  i = input_cuda if weight.is_cuda else input
130  loss = (weight.mv(i) + bias).pow(2).sum()
131  loss.backward()
132  return loss
133 
134  optimizer = constructor(weight, bias)
135  fn = functools.partial(fn_base, optimizer, weight, bias)
136 
137  # Prime the optimizer
138  for _i in range(20):
139  optimizer.step(fn)
140  # Clone the weights and construct new optimizer for them
141  weight_c = Variable(weight.data.clone(), requires_grad=True)
142  bias_c = Variable(bias.data.clone(), requires_grad=True)
143  optimizer_c = constructor(weight_c, bias_c)
144  fn_c = functools.partial(fn_base, optimizer_c, weight_c, bias_c)
145  # Load state dict
146  state_dict = deepcopy(optimizer.state_dict())
147  state_dict_c = deepcopy(optimizer.state_dict())
148  optimizer_c.load_state_dict(state_dict_c)
149  # Run both optimizations in parallel
150  for _i in range(20):
151  optimizer.step(fn)
152  optimizer_c.step(fn_c)
153  self.assertEqual(weight, weight_c)
154  self.assertEqual(bias, bias_c)
155  # Make sure state dict wasn't modified
156  self.assertEqual(state_dict, state_dict_c)
157 
158  # Check that state dict can be loaded even when we cast parameters
159  # to a different type and move to a different device.
160  if not torch.cuda.is_available():
161  return
162 
163  input_cuda = Variable(input.data.float().cuda())
164  weight_cuda = Variable(weight.data.float().cuda(), requires_grad=True)
165  bias_cuda = Variable(bias.data.float().cuda(), requires_grad=True)
166  optimizer_cuda = constructor(weight_cuda, bias_cuda)
167  fn_cuda = functools.partial(fn_base, optimizer_cuda, weight_cuda, bias_cuda)
168 
169  state_dict = deepcopy(optimizer.state_dict())
170  state_dict_c = deepcopy(optimizer.state_dict())
171  optimizer_cuda.load_state_dict(state_dict_c)
172 
173  # Make sure state dict wasn't modified
174  self.assertEqual(state_dict, state_dict_c)
175 
176  for _i in range(20):
177  optimizer.step(fn)
178  optimizer_cuda.step(fn_cuda)
179  self.assertEqual(weight, weight_cuda)
180  self.assertEqual(bias, bias_cuda)
181 
182  def _test_basic_cases(self, constructor, scheduler_constructors=None,
183  ignore_multidevice=False):
184  if scheduler_constructors is None:
185  scheduler_constructors = []
186  self._test_state_dict(
187  torch.randn(10, 5),
188  torch.randn(10),
189  torch.randn(5),
190  constructor
191  )
193  torch.randn(10, 5),
194  torch.randn(10),
195  torch.randn(5),
196  constructor,
197  scheduler_constructors
198  )
199  # non-contiguous parameters
201  torch.randn(10, 5, 2)[..., 0],
202  torch.randn(10, 2)[..., 0],
203  torch.randn(5),
204  constructor,
205  scheduler_constructors
206  )
207  # CUDA
208  if not torch.cuda.is_available():
209  return
211  torch.randn(10, 5).cuda(),
212  torch.randn(10).cuda(),
213  torch.randn(5).cuda(),
214  constructor,
215  scheduler_constructors
216  )
217  # Multi-GPU
218  if not torch.cuda.device_count() > 1 or ignore_multidevice:
219  return
221  torch.randn(10, 5).cuda(0),
222  torch.randn(10).cuda(1),
223  torch.randn(5).cuda(0),
224  constructor,
225  scheduler_constructors
226  )
227 
228  def _build_params_dict(self, weight, bias, **kwargs):
229  return [{'params': [weight]}, dict(params=[bias], **kwargs)]
230 
231  def _build_params_dict_single(self, weight, bias, **kwargs):
232  return [dict(params=bias, **kwargs)]
233 
234  def test_sgd(self):
235  self._test_basic_cases(
236  lambda weight, bias: optim.SGD([weight, bias], lr=1e-3)
237  )
238  self._test_basic_cases(
239  lambda weight, bias: optim.SGD(
240  self._build_params_dict(weight, bias, lr=1e-2),
241  lr=1e-3)
242  )
243  self._test_basic_cases(
244  lambda weight, bias: optim.SGD(
245  self._build_params_dict_single(weight, bias, lr=1e-2),
246  lr=1e-3)
247  )
248  self._test_basic_cases(
249  lambda weight, bias: optim.SGD(
250  self._build_params_dict_single(weight, bias, lr=1e-2))
251  )
252  self._test_basic_cases(
253  lambda weight, bias: optim.SGD([weight, bias], lr=1e-3),
254  [lambda opt: StepLR(opt, gamma=0.9, step_size=10)]
255  )
256  self._test_basic_cases(
257  lambda weight, bias: optim.SGD([weight, bias], lr=1e-3),
258  [lambda opt: StepLR(opt, gamma=0.9, step_size=10),
259  lambda opt: ReduceLROnPlateau(opt)]
260  )
261  self._test_basic_cases(
262  lambda weight, bias: optim.SGD([weight, bias], lr=1e-3),
263  [lambda opt: StepLR(opt, gamma=0.99, step_size=10),
264  lambda opt: ExponentialLR(opt, gamma=0.99),
265  lambda opt: ReduceLROnPlateau(opt)]
266  )
267  with self.assertRaisesRegex(ValueError, "Invalid momentum value: -0.5"):
268  optim.SGD(None, lr=1e-2, momentum=-0.5)
269 
270  def test_sgd_sparse(self):
272  lambda params: optim.SGD(params, lr=5e-3)
273  )
275  lambda params: optim.SGD(params, lr=0.005),
276  [lambda opt: StepLR(opt, gamma=0.99999, step_size=300)]
277  )
278 
279  def test_adam(self):
280  self._test_basic_cases(
281  lambda weight, bias: optim.Adam([weight, bias], lr=1e-3)
282  )
283  self._test_basic_cases(
284  lambda weight, bias: optim.Adam(
285  self._build_params_dict(weight, bias, lr=1e-2),
286  lr=1e-3)
287  )
288  self._test_basic_cases(
289  lambda weight, bias: optim.Adam([weight, bias], lr=1e-3,
290  amsgrad=True)
291  )
292  self._test_basic_cases(
293  lambda weight, bias: optim.Adam(
294  self._build_params_dict(weight, bias, lr=1e-2),
295  lr=1e-3, amsgrad=True)
296  )
297  self._test_basic_cases(
298  lambda weight, bias: optim.Adam(
299  self._build_params_dict(weight, bias, lr=1e-2),
300  lr=1e-3),
301  [lambda opt: ExponentialLR(opt, gamma=0.9)]
302  )
303  self._test_basic_cases(
304  lambda weight, bias: optim.Adam([weight, bias], lr=1e-3,
305  amsgrad=True),
306  [lambda opt: ExponentialLR(opt, gamma=0.9),
307  lambda opt: ReduceLROnPlateau(opt)]
308  )
309  self._test_basic_cases(
310  lambda weight, bias: optim.Adam(
311  self._build_params_dict(weight, bias, lr=1e-2),
312  lr=1e-3, amsgrad=True),
313  [lambda opt: StepLR(opt, gamma=0.9, step_size=10),
314  lambda opt: ReduceLROnPlateau(opt)]
315  )
316  with self.assertRaisesRegex(ValueError, "Invalid beta parameter at index 0: 1.0"):
317  optim.Adam(None, lr=1e-2, betas=(1.0, 0.0))
318 
319  def test_sparse_adam(self):
321  lambda params: optim.SparseAdam(params, lr=4e-2),
322  [],
323  True
324  )
325  with self.assertRaisesRegex(ValueError, "Invalid beta parameter at index 0: 1.0"):
326  optim.SparseAdam(None, lr=1e-2, betas=(1.0, 0.0))
327 
328  def test_adadelta(self):
329  self._test_basic_cases(
330  lambda weight, bias: optim.Adadelta([weight, bias])
331  )
332  self._test_basic_cases(
333  lambda weight, bias: optim.Adadelta(
334  self._build_params_dict(weight, bias, rho=0.95))
335  )
336  self._test_basic_cases(
337  lambda weight, bias: optim.Adadelta(
338  self._build_params_dict(weight, bias, rho=0.95)),
339  [lambda opt: StepLR(opt, gamma=0.9, step_size=10),
340  lambda opt: ReduceLROnPlateau(opt)]
341  )
342  with self.assertRaisesRegex(ValueError, "Invalid rho value: 1.1"):
343  optim.Adadelta(None, lr=1e-2, rho=1.1)
344 
345  def test_adagrad(self):
346  self._test_basic_cases(
347  lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-1)
348  )
349  self._test_basic_cases(
350  lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-1,
351  initial_accumulator_value=0.1)
352  )
353  self._test_basic_cases(
354  lambda weight, bias: optim.Adagrad(
355  self._build_params_dict(weight, bias, lr=1e-2),
356  lr=1e-1)
357  )
358  self._test_basic_cases(
359  lambda weight, bias: optim.Adagrad(
360  self._build_params_dict(weight, bias, lr=1e-2),
361  lr=1e-1),
362  [lambda opt: ReduceLROnPlateau(opt)]
363  )
364  self._test_basic_cases(
365  lambda weight, bias: optim.Adagrad(
366  self._build_params_dict(weight, bias, lr=1e-2),
367  lr=1e-1),
368  [lambda opt: ReduceLROnPlateau(opt),
369  lambda opt: ExponentialLR(opt, gamma=0.99)]
370  )
371  with self.assertRaisesRegex(ValueError, "Invalid lr_decay value: -0.5"):
372  optim.Adagrad(None, lr=1e-2, lr_decay=-0.5)
373 
374  def test_adagrad_sparse(self):
376  lambda params: optim.Adagrad(params, lr=1e-1)
377  )
379  lambda params: optim.Adagrad(params, lr=0.1),
380  [lambda opt: StepLR(opt, gamma=1 - 1e-5, step_size=500),
381  lambda opt: ReduceLROnPlateau(opt, threshold=1e-4)]
382  )
383 
384  def test_adamax(self):
385  self._test_basic_cases(
386  lambda weight, bias: optim.Adamax([weight, bias], lr=1e-1)
387  )
388  self._test_basic_cases(
389  lambda weight, bias: optim.Adamax(
390  self._build_params_dict(weight, bias, lr=1e-2),
391  lr=1e-1)
392  )
393  with self.assertRaisesRegex(ValueError, "Invalid beta parameter at index 1: 1.0"):
394  optim.Adamax(None, lr=1e-2, betas=(0.0, 1.0))
395 
396  def test_rmsprop(self):
397  self._test_basic_cases(
398  lambda weight, bias: optim.RMSprop([weight, bias], lr=1e-2)
399  )
400  self._test_basic_cases(
401  lambda weight, bias: optim.RMSprop(
402  self._build_params_dict(weight, bias, lr=1e-3),
403  lr=1e-2)
404  )
405  with self.assertRaisesRegex(ValueError, "Invalid momentum value: -1.0"):
406  optim.RMSprop(None, lr=1e-2, momentum=-1.0)
407 
408  def test_asgd(self):
409  self._test_basic_cases(
410  lambda weight, bias: optim.ASGD([weight, bias], lr=1e-3, t0=100)
411  )
412  self._test_basic_cases(
413  lambda weight, bias: optim.ASGD(
414  self._build_params_dict(weight, bias, lr=1e-2),
415  lr=1e-3, t0=100)
416  )
417  with self.assertRaisesRegex(ValueError, "Invalid weight_decay value: -0.5"):
418  optim.ASGD(None, lr=1e-2, weight_decay=-0.5)
419 
420  def test_rprop(self):
421  self._test_basic_cases(
422  lambda weight, bias: optim.Rprop([weight, bias], lr=1e-3)
423  )
424  self._test_basic_cases(
425  lambda weight, bias: optim.Rprop(
426  self._build_params_dict(weight, bias, lr=1e-2),
427  lr=1e-3)
428  )
429  with self.assertRaisesRegex(ValueError, "Invalid eta values: 1.0, 0.5"):
430  optim.Rprop(None, lr=1e-2, etas=(1.0, 0.5))
431 
432  def test_lbfgs(self):
433  self._test_basic_cases(
434  lambda weight, bias: optim.LBFGS([weight, bias]),
435  ignore_multidevice=True
436  )
437 
438  @unittest.skipIf(TEST_WITH_UBSAN, "division-by-zero error with UBSAN")
439  def test_lbfgs_return_type(self):
440  params = [torch.randn(10, 5), torch.randn(10)]
441  opt1 = optim.LBFGS(params, 0.01, tolerance_grad=inf)
442  opt2 = optim.LBFGS(params, 0.01, tolerance_grad=-inf)
443 
444  def closure():
445  return torch.Tensor([10])
446 
447  res1 = opt1.step(closure)
448  res2 = opt2.step(closure)
449  self.assertEqual(type(res1), type(res2))
450 
451  def test_invalid_param_type(self):
452  with self.assertRaises(TypeError):
453  optim.SGD(Variable(torch.randn(5, 5)), lr=3)
454 
455 
456 class SchedulerTestNet(torch.nn.Module):
457  def __init__(self):
458  super(SchedulerTestNet, self).__init__()
459  self.conv1 = torch.nn.Conv2d(1, 1, 1)
460  self.conv2 = torch.nn.Conv2d(1, 1, 1)
461 
462  def forward(self, x):
463  return self.conv2(F.relu(self.conv1(x)))
464 
465 
467  def __init__(self, value):
468  self.value = value
469 
470  def __call__(self, epoch):
471  return self.value * epoch
472 
473  def __eq__(self, other):
474  if isinstance(other, self.__class__):
475  return self.__dict__ == other.__dict__
476  else:
477  return False
478 
479 
481  def get_lr(self):
482  return [base_lr * self.gamma ** (self.last_epoch // self.step_size)
483  for base_lr in self.base_lrs]
484 
485 
487  def __init__(self, optimizer, milestones, gamma=0.1, last_epoch=-1):
488  self.milestones = sorted(milestones)
489  self.gamma = gamma
490  super(MultiStepLR, self).__init__(optimizer, last_epoch)
491 
492  def get_lr(self):
493  return [base_lr * self.gamma ** bisect_right(self.milestones, self.last_epoch)
494  for base_lr in self.base_lrs]
495 
496 
497 class LegacyExponentialLR(ExponentialLR):
498  def get_lr(self):
499  return [base_lr * self.gamma ** self.last_epoch
500  for base_lr in self.base_lrs]
501 
502 
503 class LegacyCosineAnnealingLR(CosineAnnealingLR):
504  def get_lr(self):
505  return [self.eta_min + (base_lr - self.eta_min) *
506  (1 + math.cos(math.pi * self.last_epoch / self.T_max)) / 2
507  for base_lr in self.base_lrs]
508 
509 
511  def setUp(self):
512  self.net = SchedulerTestNet()
513  self.opt = SGD(
514  [{'params': self.net.conv1.parameters()}, {'params': self.net.conv2.parameters(), 'lr': 0.5}],
515  lr=0.05)
516 
517  def test_step_lr(self):
518  # lr = 0.05 if epoch < 3
519  # lr = 0.005 if 30 <= epoch < 6
520  # lr = 0.0005 if epoch >= 9
521  epochs = 10
522  single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005] * 3
523  targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
524  scheduler = StepLR(self.opt, gamma=0.1, step_size=3)
525  self._test(scheduler, targets, epochs)
526 
527  def test_multi_step_lr(self):
528  # lr = 0.05 if epoch < 2
529  # lr = 0.005 if 2 <= epoch < 5
530  # lr = 0.0005 if epoch < 9
531  # lr = 0.00005 if epoch >= 9
532  epochs = 10
533  single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005] * 3
534  targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
535  scheduler = MultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9])
536  self._test(scheduler, targets, epochs)
537 
538  def test_exp_lr(self):
539  epochs = 10
540  single_targets = [0.05 * (0.9 ** x) for x in range(epochs)]
541  targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
542  scheduler = ExponentialLR(self.opt, gamma=0.9)
543  self._test(scheduler, targets, epochs)
544 
545  def test_cos_anneal_lr(self):
546  epochs = 10
547  eta_min = 1e-10
548  single_targets = [eta_min + (0.05 - eta_min) *
549  (1 + math.cos(math.pi * x / epochs)) / 2
550  for x in range(epochs)]
551  targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
552  scheduler = CosineAnnealingLR(self.opt, T_max=epochs, eta_min=eta_min)
553  self._test(scheduler, targets, epochs)
554 
555  def test_legacy_step_lr(self):
556  scheduler = StepLR(self.opt, gamma=0.1, step_size=3)
557  legacy_scheduler = LegacyStepLR(self.opt, gamma=0.1, step_size=3)
558  self._test_against_legacy(scheduler, legacy_scheduler, 20)
559 
560  def test_legacy_multi_step_lr(self):
561  scheduler = MultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9])
562  legacy_scheduler = LegacyMultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9])
563  self._test_against_legacy(scheduler, legacy_scheduler, 20)
564 
565  def test_legacy_exp_lr(self):
566  scheduler = ExponentialLR(self.opt, gamma=0.9)
567  legacy_scheduler = LegacyExponentialLR(self.opt, gamma=0.9)
568  self._test_against_legacy(scheduler, legacy_scheduler, 20)
569 
570  def test_legacy_cos_anneal_lr(self):
571  eta_min = 1e-10
572  epochs = 20
573  scheduler = CosineAnnealingLR(self.opt, T_max=epochs, eta_min=eta_min)
574  legacy_scheduler = LegacyCosineAnnealingLR(self.opt, T_max=epochs, eta_min=eta_min)
575  self._test_against_legacy(scheduler, legacy_scheduler, epochs)
576 
577  def test_reduce_lr_on_plateau1(self):
578  epochs = 10
579  for param_group in self.opt.param_groups:
580  param_group['lr'] = 0.5
581  targets = [[0.5] * 20]
582  metrics = [10 - i * 0.0167 for i in range(20)]
583  scheduler = ReduceLROnPlateau(self.opt, threshold_mode='abs', mode='min',
584  threshold=0.01, patience=5, cooldown=5)
585  self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
586 
587  def test_reduce_lr_on_plateau2(self):
588  epochs = 22
589  for param_group in self.opt.param_groups:
590  param_group['lr'] = 0.5
591  targets = [[0.5] * 6 + [0.05] * 7 + [0.005] * 7 + [0.0005] * 2]
592  metrics = [10 - i * 0.0165 for i in range(22)]
593  scheduler = ReduceLROnPlateau(self.opt, patience=5, cooldown=0, threshold_mode='abs',
594  mode='min', threshold=0.1)
595  self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
596 
597  def test_reduce_lr_on_plateau3(self):
598  epochs = 22
599  for param_group in self.opt.param_groups:
600  param_group['lr'] = 0.5
601  targets = [[0.5] * (2 + 6) + [0.05] * (5 + 6) + [0.005] * 4]
602  metrics = [-0.8] * 2 + [-0.234] * 20
603  scheduler = ReduceLROnPlateau(self.opt, mode='max', patience=5, cooldown=5,
604  threshold_mode='abs')
605  self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
606 
607  def test_reduce_lr_on_plateau4(self):
608  epochs = 20
609  for param_group in self.opt.param_groups:
610  param_group['lr'] = 0.5
611  targets = [[0.5] * 20]
612  metrics = [1.5 * (1.025 ** i) for i in range(20)] # 1.025 > 1.1**0.25
613  scheduler = ReduceLROnPlateau(self.opt, mode='max', patience=3,
614  threshold_mode='rel', threshold=0.1)
615  self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
616 
617  def test_reduce_lr_on_plateau5(self):
618  epochs = 20
619  for param_group in self.opt.param_groups:
620  param_group['lr'] = 0.5
621  targets = [[0.5] * 6 + [0.05] * (5 + 6) + [0.005] * 4]
622  metrics = [1.5 * (1.005 ** i) for i in range(20)]
623  scheduler = ReduceLROnPlateau(self.opt, mode='max', threshold_mode='rel',
624  threshold=0.1, patience=5, cooldown=5)
625  self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
626 
627  def test_reduce_lr_on_plateau6(self):
628  epochs = 20
629  for param_group in self.opt.param_groups:
630  param_group['lr'] = 0.5
631  targets = [[0.5] * 20]
632  metrics = [1.5 * (0.85 ** i) for i in range(20)]
633  scheduler = ReduceLROnPlateau(self.opt, mode='min', threshold_mode='rel',
634  threshold=0.1)
635  self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
636 
637  def test_reduce_lr_on_plateau7(self):
638  epochs = 20
639  for param_group in self.opt.param_groups:
640  param_group['lr'] = 0.5
641  targets = [[0.5] * 6 + [0.05] * (5 + 6) + [0.005] * 4]
642  metrics = [1] * 7 + [0.6] + [0.5] * 12
643  scheduler = ReduceLROnPlateau(self.opt, mode='min', threshold_mode='rel',
644  threshold=0.1, patience=5, cooldown=5)
645  self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
646 
647  def test_reduce_lr_on_plateau8(self):
648  epochs = 20
649  for param_group in self.opt.param_groups:
650  param_group['lr'] = 0.5
651  targets = [[0.5] * 6 + [0.4] * 14, [0.5] * 6 + [0.3] * 14]
652  metrics = [1.5 * (1.005 ** i) for i in range(20)]
653  scheduler = ReduceLROnPlateau(self.opt, mode='max', threshold_mode='rel', min_lr=[0.4, 0.3],
654  threshold=0.1, patience=5, cooldown=5)
655  self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)
656 
657  def test_compound_step_and_multistep_lr(self):
658  epochs = 10
659  schedulers = [None] * 2
660  schedulers[0] = StepLR(self.opt, gamma=0.1, step_size=3)
661  schedulers[1] = MultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9])
662  targets = [[0.05] * 2 + [0.005] * 1 + [5e-4] * 2 + [5e-5] + [5e-6] * 3 + [5e-8]]
663  self._test(schedulers, targets, epochs)
664 
665  def test_compound_step_and_exp_lr(self):
666  epochs = 10
667  schedulers = [None] * 2
668  single_targets = [0.05 * (0.9 ** x) for x in range(3)]
669  single_targets += [0.005 * (0.9 ** x) for x in range(3, 6)]
670  single_targets += [0.0005 * (0.9 ** x) for x in range(6, 9)]
671  single_targets += [0.00005 * (0.9 ** x) for x in range(9, 12)]
672  targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
673  schedulers[0] = StepLR(self.opt, gamma=0.1, step_size=3)
674  schedulers[1] = ExponentialLR(self.opt, gamma=0.9)
675  self._test(schedulers, targets, epochs)
676 
677  def test_compound_exp_and_multistep_lr(self):
678  epochs = 10
679  schedulers = [None] * 2
680  single_targets = [0.05 * (0.9 ** x) for x in range(2)]
681  single_targets += [0.005 * (0.9 ** x) for x in range(2, 5)]
682  single_targets += [0.0005 * (0.9 ** x) for x in range(5, 9)]
683  single_targets += [0.00005 * (0.9 ** x) for x in range(9, 11)]
684  targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
685  schedulers[0] = MultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9])
686  schedulers[1] = ExponentialLR(self.opt, gamma=0.9)
687  self._test(schedulers, targets, epochs)
688 
689  def test_compound_cosanneal_and_step_lr(self):
690  epochs = 10
691  eta_min = 1e-10
692  single_targets = [eta_min + (0.05 - eta_min) *
693  (1 + math.cos(math.pi * x / epochs)) / 2
694  for x in range(epochs)]
695  single_targets = [x * 0.1 ** (i // 3) for i, x in enumerate(single_targets)]
696  targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
697  schedulers = [None] * 2
698  schedulers[0] = CosineAnnealingLR(self.opt, T_max=epochs, eta_min=eta_min)
699  schedulers[1] = StepLR(self.opt, gamma=0.1, step_size=3)
700  self._test(schedulers, targets, epochs)
701 
702  def test_compound_cosanneal_and_multistep_lr(self):
703  epochs = 10
704  eta_min = 1e-10
705  single_targets = [eta_min + (0.05 - eta_min) *
706  (1 + math.cos(math.pi * x / epochs)) / 2
707  for x in range(epochs)]
708  multipliers = [1] * 2 + [0.1] * 3 + [0.01] * 4 + [0.001]
709  single_targets = [x * y for x, y in zip(single_targets, multipliers)]
710  targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
711  schedulers = [None] * 2
712  schedulers[0] = CosineAnnealingLR(self.opt, T_max=epochs, eta_min=eta_min)
713  schedulers[1] = MultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9])
714  self._test(schedulers, targets, epochs)
715 
716  def test_compound_cosanneal_and_exp_lr(self):
717  epochs = 10
718  eta_min = 1e-10
719  single_targets = [eta_min + (0.05 - eta_min) *
720  (1 + math.cos(math.pi * x / epochs)) / 2
721  for x in range(epochs)]
722  multipliers = [0.1 ** i for i in range(epochs)]
723  single_targets = [x * y for x, y in zip(single_targets, multipliers)]
724  targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
725  schedulers = [None] * 2
726  schedulers[0] = CosineAnnealingLR(self.opt, T_max=epochs, eta_min=eta_min)
727  schedulers[1] = ExponentialLR(self.opt, gamma=0.1)
728  self._test(schedulers, targets, epochs)
729 
730  def test_compound_reduce_lr_on_plateau1(self):
731  epochs = 10
732  for param_group in self.opt.param_groups:
733  param_group['lr'] = 0.5
734  single_targets = [0.5] * 20
735  multipliers = [0.1 ** (i // 3) for i in range(20)]
736  single_targets = [x * y for x, y in zip(multipliers, single_targets)]
737  targets = [single_targets]
738  metrics = [10 - i * 0.0167 for i in range(20)]
739  schedulers = [None, None]
740  schedulers[0] = ReduceLROnPlateau(self.opt, threshold_mode='abs', mode='min',
741  threshold=0.01, patience=5, cooldown=5)
742  schedulers[1] = StepLR(self.opt, gamma=0.1, step_size=3)
743  self._test_reduce_lr_on_plateau(schedulers, targets, metrics, epochs)
744 
745  def test_compound_reduce_lr_on_plateau2(self):
746  epochs = 22
747  for param_group in self.opt.param_groups:
748  param_group['lr'] = 0.5
749  single_targets = [0.5] * 6 + [0.05] * 7 + [0.005] * 7 + [0.0005] * 2
750  multipliers = [1] * 3 + [0.1] * 5 + [0.01] * 4 + [0.001] * 10
751  single_targets = [x * y for x, y in zip(single_targets, multipliers)]
752  targets = [single_targets]
753  metrics = [10 - i * 0.0165 for i in range(22)]
754  schedulers = [None] * 2
755  schedulers[0] = ReduceLROnPlateau(self.opt, patience=5, cooldown=0, threshold_mode='abs',
756  mode='min', threshold=0.1)
757  schedulers[1] = MultiStepLR(self.opt, gamma=0.1, milestones=[3, 8, 12])
758  self._test_reduce_lr_on_plateau(schedulers, targets, metrics, epochs)
759 
760  def test_compound_reduce_lr_on_plateau3(self):
761  epochs = 22
762  for param_group in self.opt.param_groups:
763  param_group['lr'] = 0.5
764  single_targets = [0.5] * (2 + 6) + [0.05] * (5 + 6) + [0.005] * 4
765  multipliers = [0.1 ** i for i in range(epochs)]
766  single_targets = [x * y for x, y in zip(multipliers, single_targets)]
767  targets = [single_targets]
768  metrics = [-0.8] * 2 + [-0.234] * 20
769  schedulers = [None, None]
770  schedulers[0] = ReduceLROnPlateau(self.opt, mode='max', patience=5, cooldown=5,
771  threshold_mode='abs')
772  schedulers[1] = ExponentialLR(self.opt, gamma=0.1)
773  self._test_reduce_lr_on_plateau(schedulers, targets, metrics, epochs)
774 
775  def test_compound_reduce_lr_on_plateau4(self):
776  epochs = 20
777  for param_group in self.opt.param_groups:
778  param_group['lr'] = 0.05
779  epochs = 10
780  eta_min = 1e-10
781  single_targets = [eta_min + (0.05 - eta_min) *
782  (1 + math.cos(math.pi * x / epochs)) / 2
783  for x in range(epochs)]
784  targets = [single_targets]
785  metrics = [1.5 * (1.025 ** i) for i in range(20)] # 1.025 > 1.1**0.25
786  schedulers = [None, None]
787  schedulers[0] = ReduceLROnPlateau(self.opt, mode='max', patience=3,
788  threshold_mode='rel', threshold=0.1)
789  schedulers[1] = CosineAnnealingLR(self.opt, epochs, eta_min)
790  self._test_reduce_lr_on_plateau(schedulers, targets, metrics, epochs)
791 
792  def test_lambda_lr(self):
793  epochs = 10
794  self.opt.param_groups[0]['lr'] = 0.05
795  self.opt.param_groups[1]['lr'] = 0.4
796  targets = [[0.05 * (0.9 ** x) for x in range(epochs)], [0.4 * (0.8 ** x) for x in range(epochs)]]
797  scheduler = LambdaLR(self.opt,
798  lr_lambda=[lambda x1: 0.9 ** x1, lambda x2: 0.8 ** x2])
799  self._test(scheduler, targets, epochs)
800 
801  def test_step_lr_state_dict(self):
803  lambda: StepLR(self.opt, gamma=0.1, step_size=3),
804  lambda: StepLR(self.opt, gamma=0.01 / 2, step_size=1))
805 
806  def test_multi_step_lr_state_dict(self):
808  lambda: MultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9]),
809  lambda: MultiStepLR(self.opt, gamma=0.01, milestones=[1, 4, 6]))
810 
811  def test_exp_step_lr_state_dict(self):
813  lambda: ExponentialLR(self.opt, gamma=0.1),
814  lambda: ExponentialLR(self.opt, gamma=0.01))
815 
816  def test_cosine_lr_state_dict(self):
817  epochs = 10
818  eta_min = 1e-10
820  lambda: CosineAnnealingLR(self.opt, T_max=epochs, eta_min=eta_min),
821  lambda: CosineAnnealingLR(self.opt, T_max=epochs // 2, eta_min=eta_min / 2),
822  epochs=epochs)
823 
824  def test_reduce_lr_on_plateau_state_dict(self):
825  scheduler = ReduceLROnPlateau(self.opt, mode='min', factor=0.1, patience=2)
826  for score in [1.0, 2.0, 3.0, 4.0, 3.0, 4.0, 5.0, 3.0, 2.0, 1.0]:
827  scheduler.step(score)
828  scheduler_copy = ReduceLROnPlateau(self.opt, mode='max', factor=0.5, patience=10)
829  scheduler_copy.load_state_dict(scheduler.state_dict())
830  for key in scheduler.__dict__.keys():
831  if key not in {'optimizer', 'is_better'}:
832  self.assertEqual(scheduler.__dict__[key], scheduler_copy.__dict__[key], allow_inf=True)
833 
834  def test_lambda_lr_state_dict_fn(self):
835  scheduler = LambdaLR(self.opt, lr_lambda=lambda x: x)
836  state = scheduler.state_dict()
837  self.assertIsNone(state['lr_lambdas'][0])
838 
839  scheduler_copy = LambdaLR(self.opt, lr_lambda=lambda x: x)
840  scheduler_copy.load_state_dict(state)
841  for key in scheduler.__dict__.keys():
842  if key not in {'optimizer', 'lr_lambdas'}:
843  self.assertEqual(scheduler.__dict__[key], scheduler_copy.__dict__[key], allow_inf=True)
844 
845  def test_lambda_lr_state_dict_obj(self):
846  scheduler = LambdaLR(self.opt, lr_lambda=LambdaLRTestObject(10))
847  state = scheduler.state_dict()
848  self.assertIsNotNone(state['lr_lambdas'][0])
849 
850  scheduler_copy = LambdaLR(self.opt, lr_lambda=LambdaLRTestObject(-1))
851  scheduler_copy.load_state_dict(state)
852  for key in scheduler.__dict__.keys():
853  if key not in {'optimizer'}:
854  self.assertEqual(scheduler.__dict__[key], scheduler_copy.__dict__[key], allow_inf=True)
855 
856  def _check_scheduler_state_dict(self, constr, constr2, epochs=10):
857  scheduler = constr()
858  for _ in range(epochs):
859  scheduler.step()
860  scheduler_copy = constr2()
861  scheduler_copy.load_state_dict(scheduler.state_dict())
862  for key in scheduler.__dict__.keys():
863  if key != 'optimizer':
864  self.assertAlmostEqual(scheduler.__dict__[key], scheduler_copy.__dict__[key])
865  self.assertAlmostEqual(scheduler.get_lr(), scheduler_copy.get_lr())
866 
867  def _test(self, schedulers, targets, epochs=10):
868  if isinstance(schedulers, _LRScheduler):
869  schedulers = [schedulers]
870  for epoch in range(epochs):
871  [scheduler.step(epoch) for scheduler in schedulers]
872  for param_group, target in zip(self.opt.param_groups, targets):
873  self.assertAlmostEqual(target[epoch], param_group['lr'],
874  msg='LR is wrong in epoch {}: expected {}, got {}'.format(
875  epoch, target[epoch], param_group['lr']), delta=1e-5)
876 
877  def _test_against_legacy(self, scheduler, legacy_scheduler, epochs=10):
878  self.setUp()
879  targets = []
880  for epoch in range(epochs):
881  legacy_scheduler.step(epoch)
882  targets.append([group['lr'] for group in self.opt.param_groups])
883  self.setUp()
884  for epoch in range(epochs):
885  scheduler.step(epoch)
886  for i, param_group in enumerate(self.opt.param_groups):
887  self.assertAlmostEqual(targets[epoch][i], param_group['lr'],
888  msg='LR is wrong in epoch {}: expected {}, got {}'.format(
889  epoch, targets[epoch][i], param_group['lr']), delta=1e-5)
890 
891  def _test_reduce_lr_on_plateau(self, schedulers, targets, metrics, epochs=10, verbose=False):
892  if isinstance(schedulers, _LRScheduler) or isinstance(schedulers, ReduceLROnPlateau):
893  schedulers = [schedulers]
894  for epoch in range(epochs):
895  for scheduler in schedulers:
896  if isinstance(scheduler, ReduceLROnPlateau):
897  scheduler.step(metrics[epoch])
898  else:
899  scheduler.step(epoch)
900  if verbose:
901  print('epoch{}:\tlr={}'.format(epoch, self.opt.param_groups[0]['lr']))
902  for param_group, target in zip(self.opt.param_groups, targets):
903  self.assertAlmostEqual(target[epoch], param_group['lr'],
904  msg='LR is wrong in epoch {}: expected {}, got {}'.format(
905  epoch, target[epoch], param_group['lr']), delta=1e-5)
906 
907 if __name__ == '__main__':
908  run_tests()
def assertEqual(self, x, y, prec=None, message='', allow_inf=False)
def _test(self, schedulers, targets, epochs=10)
Definition: test_optim.py:867
def _test_state_dict(self, weight, bias, input, constructor)
Definition: test_optim.py:122
def _test_basic_cases_template(self, weight, bias, input, constructor, scheduler_constructors)
Definition: test_optim.py:90
def is_available()
Definition: __init__.py:45
def device_count()
Definition: __init__.py:341
def _test_rosenbrock_sparse(self, constructor, scheduler_constructors=None, sparse_only=False)
Definition: test_optim.py:34
def _check_scheduler_state_dict(self, constr, constr2, epochs=10)
Definition: test_optim.py:856
def _test_basic_cases(self, constructor, scheduler_constructors=None, ignore_multidevice=False)
Definition: test_optim.py:183
def _test_against_legacy(self, scheduler, legacy_scheduler, epochs=10)
Definition: test_optim.py:877
def _build_params_dict_single(self, weight, bias, kwargs)
Definition: test_optim.py:231
def assertAlmostEqual(self, x, y, places=None, msg=None, delta=None, allow_inf=None)
def _test_reduce_lr_on_plateau(self, schedulers, targets, metrics, epochs=10, verbose=False)
Definition: test_optim.py:891
def _build_params_dict(self, weight, bias, kwargs)
Definition: test_optim.py:228