4 from copy
import deepcopy
5 from bisect
import bisect_right
12 from torch
import sparse
14 ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau, _LRScheduler
15 from common_utils
import TestCase, run_tests, TEST_WITH_UBSAN, load_tests
19 load_tests = load_tests
22 def rosenbrock(tensor):
24 return (1 - x) ** 2 + 100 * (y - x ** 2) ** 2
27 def drosenbrock(tensor):
29 return torch.DoubleTensor((-400 * x * (y - x ** 2) - 2 * (1 - x), 200 * (y - x ** 2)))
33 def _test_rosenbrock_sparse(self, constructor, scheduler_constructors=None,
35 if scheduler_constructors
is None:
36 scheduler_constructors = []
37 params_t = torch.Tensor([1.5, 1.5])
39 params = Variable(params_t, requires_grad=
True)
40 optimizer = constructor([params])
42 for scheduler_constructor
in scheduler_constructors:
43 schedulers.append(scheduler_constructor(optimizer))
46 params_c = Variable(params_t.clone(), requires_grad=
True)
47 optimizer_c = constructor([params_c])
49 solution = torch.Tensor([1, 1])
50 initial_dist = params.data.dist(solution)
52 def eval(params, sparse_grad, w):
55 loss = rosenbrock(params)
57 grad = drosenbrock(params.data)
61 i = torch.LongTensor([[0, 0]])
63 v = torch.DoubleTensor([x / 4., x - x / 4.])
65 i = torch.LongTensor([[1, 1]])
67 v = torch.DoubleTensor([y - y / 4., y / 4.])
68 x = sparse.DoubleTensor(i, v, torch.Size([2]))
72 params.grad.data = x.to_dense()
78 optimizer.step(functools.partial(eval, params,
True, w))
79 for scheduler
in schedulers:
80 if isinstance(scheduler, ReduceLROnPlateau):
81 scheduler.step(rosenbrock(params))
85 optimizer_c.step(functools.partial(eval, params_c,
False, w))
88 self.assertLessEqual(params.data.dist(solution), initial_dist)
90 def _test_basic_cases_template(self, weight, bias, input, constructor, scheduler_constructors):
91 weight = Variable(weight, requires_grad=
True)
92 bias = Variable(bias, requires_grad=
True)
93 input = Variable(input)
94 optimizer = constructor(weight, bias)
96 for scheduler_constructor
in scheduler_constructors:
97 schedulers.append(scheduler_constructor(optimizer))
103 optimizer.zero_grad()
105 if y.is_cuda
and bias.is_cuda
and y.get_device() != bias.get_device():
106 y = y.cuda(bias.get_device())
107 loss = (y + bias).pow(2).sum()
111 initial_value = fn().item()
112 for _i
in range(200):
113 for scheduler
in schedulers:
114 if isinstance(scheduler, ReduceLROnPlateau):
116 scheduler.step(val_loss)
120 self.assertLess(fn().item(), initial_value)
122 def _test_state_dict(self, weight, bias, input, constructor):
123 weight = Variable(weight, requires_grad=
True)
124 bias = Variable(bias, requires_grad=
True)
125 input = Variable(input)
127 def fn_base(optimizer, weight, bias):
128 optimizer.zero_grad()
129 i = input_cuda
if weight.is_cuda
else input
130 loss = (weight.mv(i) + bias).pow(2).sum()
134 optimizer = constructor(weight, bias)
135 fn = functools.partial(fn_base, optimizer, weight, bias)
141 weight_c = Variable(weight.data.clone(), requires_grad=
True)
142 bias_c = Variable(bias.data.clone(), requires_grad=
True)
143 optimizer_c = constructor(weight_c, bias_c)
144 fn_c = functools.partial(fn_base, optimizer_c, weight_c, bias_c)
146 state_dict = deepcopy(optimizer.state_dict())
147 state_dict_c = deepcopy(optimizer.state_dict())
148 optimizer_c.load_state_dict(state_dict_c)
152 optimizer_c.step(fn_c)
163 input_cuda = Variable(input.data.float().cuda())
164 weight_cuda = Variable(weight.data.float().cuda(), requires_grad=
True)
165 bias_cuda = Variable(bias.data.float().cuda(), requires_grad=
True)
166 optimizer_cuda = constructor(weight_cuda, bias_cuda)
167 fn_cuda = functools.partial(fn_base, optimizer_cuda, weight_cuda, bias_cuda)
169 state_dict = deepcopy(optimizer.state_dict())
170 state_dict_c = deepcopy(optimizer.state_dict())
171 optimizer_cuda.load_state_dict(state_dict_c)
178 optimizer_cuda.step(fn_cuda)
182 def _test_basic_cases(self, constructor, scheduler_constructors=None,
183 ignore_multidevice=
False):
184 if scheduler_constructors
is None:
185 scheduler_constructors = []
197 scheduler_constructors
201 torch.randn(10, 5, 2)[..., 0],
202 torch.randn(10, 2)[..., 0],
205 scheduler_constructors
211 torch.randn(10, 5).cuda(),
212 torch.randn(10).cuda(),
213 torch.randn(5).cuda(),
215 scheduler_constructors
221 torch.randn(10, 5).cuda(0),
222 torch.randn(10).cuda(1),
223 torch.randn(5).cuda(0),
225 scheduler_constructors
228 def _build_params_dict(self, weight, bias, **kwargs):
229 return [{
'params': [weight]}, dict(params=[bias], **kwargs)]
231 def _build_params_dict_single(self, weight, bias, **kwargs):
232 return [dict(params=bias, **kwargs)]
236 lambda weight, bias: optim.SGD([weight, bias], lr=1e-3)
239 lambda weight, bias: optim.SGD(
244 lambda weight, bias: optim.SGD(
249 lambda weight, bias: optim.SGD(
253 lambda weight, bias: optim.SGD([weight, bias], lr=1e-3),
254 [
lambda opt:
StepLR(opt, gamma=0.9, step_size=10)]
257 lambda weight, bias: optim.SGD([weight, bias], lr=1e-3),
258 [
lambda opt:
StepLR(opt, gamma=0.9, step_size=10),
259 lambda opt: ReduceLROnPlateau(opt)]
262 lambda weight, bias: optim.SGD([weight, bias], lr=1e-3),
263 [
lambda opt:
StepLR(opt, gamma=0.99, step_size=10),
264 lambda opt: ExponentialLR(opt, gamma=0.99),
265 lambda opt: ReduceLROnPlateau(opt)]
268 optim.SGD(
None, lr=1e-2, momentum=-0.5)
270 def test_sgd_sparse(self):
272 lambda params: optim.SGD(params, lr=5e-3)
275 lambda params: optim.SGD(params, lr=0.005),
276 [
lambda opt:
StepLR(opt, gamma=0.99999, step_size=300)]
281 lambda weight, bias: optim.Adam([weight, bias], lr=1e-3)
284 lambda weight, bias: optim.Adam(
289 lambda weight, bias: optim.Adam([weight, bias], lr=1e-3,
293 lambda weight, bias: optim.Adam(
295 lr=1e-3, amsgrad=
True)
298 lambda weight, bias: optim.Adam(
301 [
lambda opt: ExponentialLR(opt, gamma=0.9)]
304 lambda weight, bias: optim.Adam([weight, bias], lr=1e-3,
306 [
lambda opt: ExponentialLR(opt, gamma=0.9),
307 lambda opt: ReduceLROnPlateau(opt)]
310 lambda weight, bias: optim.Adam(
312 lr=1e-3, amsgrad=
True),
313 [
lambda opt:
StepLR(opt, gamma=0.9, step_size=10),
314 lambda opt: ReduceLROnPlateau(opt)]
316 with self.
assertRaisesRegex(ValueError,
"Invalid beta parameter at index 0: 1.0"):
317 optim.Adam(
None, lr=1e-2, betas=(1.0, 0.0))
319 def test_sparse_adam(self):
321 lambda params: optim.SparseAdam(params, lr=4e-2),
325 with self.
assertRaisesRegex(ValueError,
"Invalid beta parameter at index 0: 1.0"):
326 optim.SparseAdam(
None, lr=1e-2, betas=(1.0, 0.0))
328 def test_adadelta(self):
330 lambda weight, bias: optim.Adadelta([weight, bias])
333 lambda weight, bias: optim.Adadelta(
337 lambda weight, bias: optim.Adadelta(
339 [
lambda opt:
StepLR(opt, gamma=0.9, step_size=10),
340 lambda opt: ReduceLROnPlateau(opt)]
343 optim.Adadelta(
None, lr=1e-2, rho=1.1)
345 def test_adagrad(self):
347 lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-1)
350 lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-1,
351 initial_accumulator_value=0.1)
354 lambda weight, bias: optim.Adagrad(
359 lambda weight, bias: optim.Adagrad(
362 [
lambda opt: ReduceLROnPlateau(opt)]
365 lambda weight, bias: optim.Adagrad(
368 [
lambda opt: ReduceLROnPlateau(opt),
369 lambda opt: ExponentialLR(opt, gamma=0.99)]
372 optim.Adagrad(
None, lr=1e-2, lr_decay=-0.5)
374 def test_adagrad_sparse(self):
376 lambda params: optim.Adagrad(params, lr=1e-1)
379 lambda params: optim.Adagrad(params, lr=0.1),
380 [
lambda opt:
StepLR(opt, gamma=1 - 1e-5, step_size=500),
381 lambda opt: ReduceLROnPlateau(opt, threshold=1e-4)]
384 def test_adamax(self):
386 lambda weight, bias: optim.Adamax([weight, bias], lr=1e-1)
389 lambda weight, bias: optim.Adamax(
393 with self.
assertRaisesRegex(ValueError,
"Invalid beta parameter at index 1: 1.0"):
394 optim.Adamax(
None, lr=1e-2, betas=(0.0, 1.0))
396 def test_rmsprop(self):
398 lambda weight, bias: optim.RMSprop([weight, bias], lr=1e-2)
401 lambda weight, bias: optim.RMSprop(
406 optim.RMSprop(
None, lr=1e-2, momentum=-1.0)
410 lambda weight, bias: optim.ASGD([weight, bias], lr=1e-3, t0=100)
413 lambda weight, bias: optim.ASGD(
418 optim.ASGD(
None, lr=1e-2, weight_decay=-0.5)
420 def test_rprop(self):
422 lambda weight, bias: optim.Rprop([weight, bias], lr=1e-3)
425 lambda weight, bias: optim.Rprop(
430 optim.Rprop(
None, lr=1e-2, etas=(1.0, 0.5))
432 def test_lbfgs(self):
434 lambda weight, bias: optim.LBFGS([weight, bias]),
435 ignore_multidevice=
True 438 @unittest.skipIf(TEST_WITH_UBSAN,
"division-by-zero error with UBSAN")
439 def test_lbfgs_return_type(self):
440 params = [torch.randn(10, 5), torch.randn(10)]
441 opt1 = optim.LBFGS(params, 0.01, tolerance_grad=inf)
442 opt2 = optim.LBFGS(params, 0.01, tolerance_grad=-inf)
445 return torch.Tensor([10])
447 res1 = opt1.step(closure)
448 res2 = opt2.step(closure)
451 def test_invalid_param_type(self):
452 with self.assertRaises(TypeError):
453 optim.SGD(Variable(torch.randn(5, 5)), lr=3)
458 super(SchedulerTestNet, self).__init__()
459 self.
conv1 = torch.nn.Conv2d(1, 1, 1)
460 self.
conv2 = torch.nn.Conv2d(1, 1, 1)
462 def forward(self, x):
467 def __init__(self, value):
470 def __call__(self, epoch):
471 return self.
value * epoch
473 def __eq__(self, other):
474 if isinstance(other, self.__class__):
475 return self.
__dict__ == other.__dict__
487 def __init__(self, optimizer, milestones, gamma=0.1, last_epoch=-1):
490 super(MultiStepLR, self).__init__(optimizer, last_epoch)
499 return [base_lr * self.gamma ** self.last_epoch
500 for base_lr
in self.base_lrs]
505 return [self.eta_min + (base_lr - self.eta_min) *
506 (1 + math.cos(math.pi * self.last_epoch / self.T_max)) / 2
507 for base_lr
in self.base_lrs]
514 [{
'params': self.net.conv1.parameters()}, {
'params': self.net.conv2.parameters(),
'lr': 0.5}],
517 def test_step_lr(self):
522 single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005] * 3
523 targets = [single_targets, list(map(
lambda x: x * epochs, single_targets))]
524 scheduler =
StepLR(self.
opt, gamma=0.1, step_size=3)
525 self.
_test(scheduler, targets, epochs)
527 def test_multi_step_lr(self):
533 single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005] * 3
534 targets = [single_targets, list(map(
lambda x: x * epochs, single_targets))]
535 scheduler =
MultiStepLR(self.
opt, gamma=0.1, milestones=[2, 5, 9])
536 self.
_test(scheduler, targets, epochs)
538 def test_exp_lr(self):
540 single_targets = [0.05 * (0.9 ** x)
for x
in range(epochs)]
541 targets = [single_targets, list(map(
lambda x: x * epochs, single_targets))]
542 scheduler = ExponentialLR(self.
opt, gamma=0.9)
543 self.
_test(scheduler, targets, epochs)
545 def test_cos_anneal_lr(self):
548 single_targets = [eta_min + (0.05 - eta_min) *
549 (1 + math.cos(math.pi * x / epochs)) / 2
550 for x
in range(epochs)]
551 targets = [single_targets, list(map(
lambda x: x * epochs, single_targets))]
552 scheduler = CosineAnnealingLR(self.
opt, T_max=epochs, eta_min=eta_min)
553 self.
_test(scheduler, targets, epochs)
555 def test_legacy_step_lr(self):
556 scheduler =
StepLR(self.
opt, gamma=0.1, step_size=3)
560 def test_legacy_multi_step_lr(self):
561 scheduler =
MultiStepLR(self.
opt, gamma=0.1, milestones=[2, 5, 9])
565 def test_legacy_exp_lr(self):
566 scheduler = ExponentialLR(self.
opt, gamma=0.9)
570 def test_legacy_cos_anneal_lr(self):
573 scheduler = CosineAnnealingLR(self.
opt, T_max=epochs, eta_min=eta_min)
577 def test_reduce_lr_on_plateau1(self):
579 for param_group
in self.opt.param_groups:
580 param_group[
'lr'] = 0.5
581 targets = [[0.5] * 20]
582 metrics = [10 - i * 0.0167
for i
in range(20)]
583 scheduler = ReduceLROnPlateau(self.
opt, threshold_mode=
'abs', mode=
'min',
584 threshold=0.01, patience=5, cooldown=5)
587 def test_reduce_lr_on_plateau2(self):
589 for param_group
in self.opt.param_groups:
590 param_group[
'lr'] = 0.5
591 targets = [[0.5] * 6 + [0.05] * 7 + [0.005] * 7 + [0.0005] * 2]
592 metrics = [10 - i * 0.0165
for i
in range(22)]
593 scheduler = ReduceLROnPlateau(self.
opt, patience=5, cooldown=0, threshold_mode=
'abs',
594 mode=
'min', threshold=0.1)
597 def test_reduce_lr_on_plateau3(self):
599 for param_group
in self.opt.param_groups:
600 param_group[
'lr'] = 0.5
601 targets = [[0.5] * (2 + 6) + [0.05] * (5 + 6) + [0.005] * 4]
602 metrics = [-0.8] * 2 + [-0.234] * 20
603 scheduler = ReduceLROnPlateau(self.
opt, mode=
'max', patience=5, cooldown=5,
604 threshold_mode=
'abs')
607 def test_reduce_lr_on_plateau4(self):
609 for param_group
in self.opt.param_groups:
610 param_group[
'lr'] = 0.5
611 targets = [[0.5] * 20]
612 metrics = [1.5 * (1.025 ** i)
for i
in range(20)]
613 scheduler = ReduceLROnPlateau(self.
opt, mode=
'max', patience=3,
614 threshold_mode=
'rel', threshold=0.1)
617 def test_reduce_lr_on_plateau5(self):
619 for param_group
in self.opt.param_groups:
620 param_group[
'lr'] = 0.5
621 targets = [[0.5] * 6 + [0.05] * (5 + 6) + [0.005] * 4]
622 metrics = [1.5 * (1.005 ** i)
for i
in range(20)]
623 scheduler = ReduceLROnPlateau(self.
opt, mode=
'max', threshold_mode=
'rel',
624 threshold=0.1, patience=5, cooldown=5)
627 def test_reduce_lr_on_plateau6(self):
629 for param_group
in self.opt.param_groups:
630 param_group[
'lr'] = 0.5
631 targets = [[0.5] * 20]
632 metrics = [1.5 * (0.85 ** i)
for i
in range(20)]
633 scheduler = ReduceLROnPlateau(self.
opt, mode=
'min', threshold_mode=
'rel',
637 def test_reduce_lr_on_plateau7(self):
639 for param_group
in self.opt.param_groups:
640 param_group[
'lr'] = 0.5
641 targets = [[0.5] * 6 + [0.05] * (5 + 6) + [0.005] * 4]
642 metrics = [1] * 7 + [0.6] + [0.5] * 12
643 scheduler = ReduceLROnPlateau(self.
opt, mode=
'min', threshold_mode=
'rel',
644 threshold=0.1, patience=5, cooldown=5)
647 def test_reduce_lr_on_plateau8(self):
649 for param_group
in self.opt.param_groups:
650 param_group[
'lr'] = 0.5
651 targets = [[0.5] * 6 + [0.4] * 14, [0.5] * 6 + [0.3] * 14]
652 metrics = [1.5 * (1.005 ** i)
for i
in range(20)]
653 scheduler = ReduceLROnPlateau(self.
opt, mode=
'max', threshold_mode=
'rel', min_lr=[0.4, 0.3],
654 threshold=0.1, patience=5, cooldown=5)
657 def test_compound_step_and_multistep_lr(self):
659 schedulers = [
None] * 2
660 schedulers[0] =
StepLR(self.
opt, gamma=0.1, step_size=3)
661 schedulers[1] =
MultiStepLR(self.
opt, gamma=0.1, milestones=[2, 5, 9])
662 targets = [[0.05] * 2 + [0.005] * 1 + [5e-4] * 2 + [5e-5] + [5e-6] * 3 + [5e-8]]
663 self.
_test(schedulers, targets, epochs)
665 def test_compound_step_and_exp_lr(self):
667 schedulers = [
None] * 2
668 single_targets = [0.05 * (0.9 ** x)
for x
in range(3)]
669 single_targets += [0.005 * (0.9 ** x)
for x
in range(3, 6)]
670 single_targets += [0.0005 * (0.9 ** x)
for x
in range(6, 9)]
671 single_targets += [0.00005 * (0.9 ** x)
for x
in range(9, 12)]
672 targets = [single_targets, list(map(
lambda x: x * epochs, single_targets))]
673 schedulers[0] =
StepLR(self.
opt, gamma=0.1, step_size=3)
674 schedulers[1] = ExponentialLR(self.
opt, gamma=0.9)
675 self.
_test(schedulers, targets, epochs)
677 def test_compound_exp_and_multistep_lr(self):
679 schedulers = [
None] * 2
680 single_targets = [0.05 * (0.9 ** x)
for x
in range(2)]
681 single_targets += [0.005 * (0.9 ** x)
for x
in range(2, 5)]
682 single_targets += [0.0005 * (0.9 ** x)
for x
in range(5, 9)]
683 single_targets += [0.00005 * (0.9 ** x)
for x
in range(9, 11)]
684 targets = [single_targets, list(map(
lambda x: x * epochs, single_targets))]
685 schedulers[0] =
MultiStepLR(self.
opt, gamma=0.1, milestones=[2, 5, 9])
686 schedulers[1] = ExponentialLR(self.
opt, gamma=0.9)
687 self.
_test(schedulers, targets, epochs)
689 def test_compound_cosanneal_and_step_lr(self):
692 single_targets = [eta_min + (0.05 - eta_min) *
693 (1 + math.cos(math.pi * x / epochs)) / 2
694 for x
in range(epochs)]
695 single_targets = [x * 0.1 ** (i // 3)
for i, x
in enumerate(single_targets)]
696 targets = [single_targets, list(map(
lambda x: x * epochs, single_targets))]
697 schedulers = [
None] * 2
698 schedulers[0] = CosineAnnealingLR(self.
opt, T_max=epochs, eta_min=eta_min)
699 schedulers[1] =
StepLR(self.
opt, gamma=0.1, step_size=3)
700 self.
_test(schedulers, targets, epochs)
702 def test_compound_cosanneal_and_multistep_lr(self):
705 single_targets = [eta_min + (0.05 - eta_min) *
706 (1 + math.cos(math.pi * x / epochs)) / 2
707 for x
in range(epochs)]
708 multipliers = [1] * 2 + [0.1] * 3 + [0.01] * 4 + [0.001]
709 single_targets = [x * y
for x, y
in zip(single_targets, multipliers)]
710 targets = [single_targets, list(map(
lambda x: x * epochs, single_targets))]
711 schedulers = [
None] * 2
712 schedulers[0] = CosineAnnealingLR(self.
opt, T_max=epochs, eta_min=eta_min)
713 schedulers[1] =
MultiStepLR(self.
opt, gamma=0.1, milestones=[2, 5, 9])
714 self.
_test(schedulers, targets, epochs)
716 def test_compound_cosanneal_and_exp_lr(self):
719 single_targets = [eta_min + (0.05 - eta_min) *
720 (1 + math.cos(math.pi * x / epochs)) / 2
721 for x
in range(epochs)]
722 multipliers = [0.1 ** i
for i
in range(epochs)]
723 single_targets = [x * y
for x, y
in zip(single_targets, multipliers)]
724 targets = [single_targets, list(map(
lambda x: x * epochs, single_targets))]
725 schedulers = [
None] * 2
726 schedulers[0] = CosineAnnealingLR(self.
opt, T_max=epochs, eta_min=eta_min)
727 schedulers[1] = ExponentialLR(self.
opt, gamma=0.1)
728 self.
_test(schedulers, targets, epochs)
730 def test_compound_reduce_lr_on_plateau1(self):
732 for param_group
in self.opt.param_groups:
733 param_group[
'lr'] = 0.5
734 single_targets = [0.5] * 20
735 multipliers = [0.1 ** (i // 3)
for i
in range(20)]
736 single_targets = [x * y
for x, y
in zip(multipliers, single_targets)]
737 targets = [single_targets]
738 metrics = [10 - i * 0.0167
for i
in range(20)]
739 schedulers = [
None,
None]
740 schedulers[0] = ReduceLROnPlateau(self.
opt, threshold_mode=
'abs', mode=
'min',
741 threshold=0.01, patience=5, cooldown=5)
742 schedulers[1] =
StepLR(self.
opt, gamma=0.1, step_size=3)
745 def test_compound_reduce_lr_on_plateau2(self):
747 for param_group
in self.opt.param_groups:
748 param_group[
'lr'] = 0.5
749 single_targets = [0.5] * 6 + [0.05] * 7 + [0.005] * 7 + [0.0005] * 2
750 multipliers = [1] * 3 + [0.1] * 5 + [0.01] * 4 + [0.001] * 10
751 single_targets = [x * y
for x, y
in zip(single_targets, multipliers)]
752 targets = [single_targets]
753 metrics = [10 - i * 0.0165
for i
in range(22)]
754 schedulers = [
None] * 2
755 schedulers[0] = ReduceLROnPlateau(self.
opt, patience=5, cooldown=0, threshold_mode=
'abs',
756 mode=
'min', threshold=0.1)
757 schedulers[1] =
MultiStepLR(self.
opt, gamma=0.1, milestones=[3, 8, 12])
760 def test_compound_reduce_lr_on_plateau3(self):
762 for param_group
in self.opt.param_groups:
763 param_group[
'lr'] = 0.5
764 single_targets = [0.5] * (2 + 6) + [0.05] * (5 + 6) + [0.005] * 4
765 multipliers = [0.1 ** i
for i
in range(epochs)]
766 single_targets = [x * y
for x, y
in zip(multipliers, single_targets)]
767 targets = [single_targets]
768 metrics = [-0.8] * 2 + [-0.234] * 20
769 schedulers = [
None,
None]
770 schedulers[0] = ReduceLROnPlateau(self.
opt, mode=
'max', patience=5, cooldown=5,
771 threshold_mode=
'abs')
772 schedulers[1] = ExponentialLR(self.
opt, gamma=0.1)
775 def test_compound_reduce_lr_on_plateau4(self):
777 for param_group
in self.opt.param_groups:
778 param_group[
'lr'] = 0.05
781 single_targets = [eta_min + (0.05 - eta_min) *
782 (1 + math.cos(math.pi * x / epochs)) / 2
783 for x
in range(epochs)]
784 targets = [single_targets]
785 metrics = [1.5 * (1.025 ** i)
for i
in range(20)]
786 schedulers = [
None,
None]
787 schedulers[0] = ReduceLROnPlateau(self.
opt, mode=
'max', patience=3,
788 threshold_mode=
'rel', threshold=0.1)
789 schedulers[1] = CosineAnnealingLR(self.
opt, epochs, eta_min)
792 def test_lambda_lr(self):
794 self.opt.param_groups[0][
'lr'] = 0.05
795 self.opt.param_groups[1][
'lr'] = 0.4
796 targets = [[0.05 * (0.9 ** x)
for x
in range(epochs)], [0.4 * (0.8 ** x)
for x
in range(epochs)]]
798 lr_lambda=[
lambda x1: 0.9 ** x1,
lambda x2: 0.8 ** x2])
799 self.
_test(scheduler, targets, epochs)
801 def test_step_lr_state_dict(self):
803 lambda:
StepLR(self.
opt, gamma=0.1, step_size=3),
804 lambda:
StepLR(self.
opt, gamma=0.01 / 2, step_size=1))
806 def test_multi_step_lr_state_dict(self):
811 def test_exp_step_lr_state_dict(self):
813 lambda: ExponentialLR(self.
opt, gamma=0.1),
814 lambda: ExponentialLR(self.
opt, gamma=0.01))
816 def test_cosine_lr_state_dict(self):
820 lambda: CosineAnnealingLR(self.
opt, T_max=epochs, eta_min=eta_min),
821 lambda: CosineAnnealingLR(self.
opt, T_max=epochs // 2, eta_min=eta_min / 2),
824 def test_reduce_lr_on_plateau_state_dict(self):
825 scheduler = ReduceLROnPlateau(self.
opt, mode=
'min', factor=0.1, patience=2)
826 for score
in [1.0, 2.0, 3.0, 4.0, 3.0, 4.0, 5.0, 3.0, 2.0, 1.0]:
827 scheduler.step(score)
828 scheduler_copy = ReduceLROnPlateau(self.
opt, mode=
'max', factor=0.5, patience=10)
829 scheduler_copy.load_state_dict(scheduler.state_dict())
830 for key
in scheduler.__dict__.keys():
831 if key
not in {
'optimizer',
'is_better'}:
832 self.
assertEqual(scheduler.__dict__[key], scheduler_copy.__dict__[key], allow_inf=
True)
834 def test_lambda_lr_state_dict_fn(self):
835 scheduler =
LambdaLR(self.
opt, lr_lambda=
lambda x: x)
836 state = scheduler.state_dict()
837 self.assertIsNone(state[
'lr_lambdas'][0])
839 scheduler_copy =
LambdaLR(self.
opt, lr_lambda=
lambda x: x)
840 scheduler_copy.load_state_dict(state)
841 for key
in scheduler.__dict__.keys():
842 if key
not in {
'optimizer',
'lr_lambdas'}:
843 self.
assertEqual(scheduler.__dict__[key], scheduler_copy.__dict__[key], allow_inf=
True)
845 def test_lambda_lr_state_dict_obj(self):
847 state = scheduler.state_dict()
848 self.assertIsNotNone(state[
'lr_lambdas'][0])
851 scheduler_copy.load_state_dict(state)
852 for key
in scheduler.__dict__.keys():
853 if key
not in {
'optimizer'}:
854 self.
assertEqual(scheduler.__dict__[key], scheduler_copy.__dict__[key], allow_inf=
True)
856 def _check_scheduler_state_dict(self, constr, constr2, epochs=10):
858 for _
in range(epochs):
860 scheduler_copy = constr2()
861 scheduler_copy.load_state_dict(scheduler.state_dict())
862 for key
in scheduler.__dict__.keys():
863 if key !=
'optimizer':
867 def _test(self, schedulers, targets, epochs=10):
868 if isinstance(schedulers, _LRScheduler):
869 schedulers = [schedulers]
870 for epoch
in range(epochs):
871 [scheduler.step(epoch)
for scheduler
in schedulers]
872 for param_group, target
in zip(self.opt.param_groups, targets):
874 msg=
'LR is wrong in epoch {}: expected {}, got {}'.format(
875 epoch, target[epoch], param_group[
'lr']), delta=1e-5)
877 def _test_against_legacy(self, scheduler, legacy_scheduler, epochs=10):
880 for epoch
in range(epochs):
881 legacy_scheduler.step(epoch)
882 targets.append([group[
'lr']
for group
in self.opt.param_groups])
884 for epoch
in range(epochs):
885 scheduler.step(epoch)
886 for i, param_group
in enumerate(self.opt.param_groups):
888 msg=
'LR is wrong in epoch {}: expected {}, got {}'.format(
889 epoch, targets[epoch][i], param_group[
'lr']), delta=1e-5)
891 def _test_reduce_lr_on_plateau(self, schedulers, targets, metrics, epochs=10, verbose=False):
892 if isinstance(schedulers, _LRScheduler)
or isinstance(schedulers, ReduceLROnPlateau):
893 schedulers = [schedulers]
894 for epoch
in range(epochs):
895 for scheduler
in schedulers:
896 if isinstance(scheduler, ReduceLROnPlateau):
897 scheduler.step(metrics[epoch])
899 scheduler.step(epoch)
901 print(
'epoch{}:\tlr={}'.format(epoch, self.opt.param_groups[0][
'lr']))
902 for param_group, target
in zip(self.opt.param_groups, targets):
904 msg=
'LR is wrong in epoch {}: expected {}, got {}'.format(
905 epoch, target[epoch], param_group[
'lr']), delta=1e-5)
907 if __name__ ==
'__main__':
def assertEqual(self, x, y, prec=None, message='', allow_inf=False)
def _test(self, schedulers, targets, epochs=10)
def _test_state_dict(self, weight, bias, input, constructor)
def _test_basic_cases_template(self, weight, bias, input, constructor, scheduler_constructors)
def _test_rosenbrock_sparse(self, constructor, scheduler_constructors=None, sparse_only=False)
def _check_scheduler_state_dict(self, constr, constr2, epochs=10)
def _test_basic_cases(self, constructor, scheduler_constructors=None, ignore_multidevice=False)
def _test_against_legacy(self, scheduler, legacy_scheduler, epochs=10)
def _build_params_dict_single(self, weight, bias, kwargs)
def assertAlmostEqual(self, x, y, places=None, msg=None, delta=None, allow_inf=None)
def _test_reduce_lr_on_plateau(self, schedulers, targets, metrics, epochs=10, verbose=False)
def _build_params_dict(self, weight, bias, kwargs)