1 """Script to generate baseline values from PyTorch optimization algorithms""" 12 #include <torch/types.h> 16 namespace expected_parameters { 19 FOOTER =
"} // namespace expected_parameters" 21 PARAMETERS =
"inline std::vector<std::vector<torch::Tensor>> {}() {{" 24 "Adam":
lambda p: torch.optim.Adam(p, 1.0),
25 "Adam_with_weight_decay":
lambda p: torch.optim.Adam(p, 1.0, weight_decay=1e-2),
26 "Adam_with_weight_decay_and_amsgrad":
lambda p: torch.optim.Adam(p, 1.0, weight_decay=1e-6, amsgrad=
True),
27 "Adagrad":
lambda p: torch.optim.Adagrad(p, 1.0),
28 "Adagrad_with_weight_decay":
lambda p: torch.optim.Adagrad(p, 1.0, weight_decay=1e-2),
29 "Adagrad_with_weight_decay_and_lr_decay":
lambda p: torch.optim.Adagrad(p, 1.0, weight_decay=1e-6, lr_decay=1e-3),
30 "RMSprop":
lambda p: torch.optim.RMSprop(p, 0.1),
31 "RMSprop_with_weight_decay":
lambda p: torch.optim.RMSprop(p, 0.1, weight_decay=1e-2),
32 "RMSprop_with_weight_decay_and_centered":
lambda p: torch.optim.RMSprop(p, 0.1, weight_decay=1e-6, centered=
True),
33 "RMSprop_with_weight_decay_and_centered_and_momentum":
34 lambda p: torch.optim.RMSprop(p, 0.1, weight_decay=1e-6, centered=
True, momentum=0.9),
35 "SGD":
lambda p: torch.optim.SGD(p, 0.1),
36 "SGD_with_weight_decay":
lambda p: torch.optim.SGD(p, 0.1, weight_decay=1e-2),
37 "SGD_with_weight_decay_and_momentum":
lambda p: torch.optim.SGD(p, 0.1, momentum=0.9, weight_decay=1e-2),
38 "SGD_with_weight_decay_and_nesterov_momentum":
39 lambda p: torch.optim.SGD(p, 0.1, momentum=0.9, weight_decay=1e-6, nesterov=
True),
43 def weight_init(module):
44 if isinstance(module, torch.nn.Linear):
45 stdev = 1.0 / math.sqrt(module.weight.size(1))
46 for p
in module.parameters():
47 p.data.uniform_(-stdev, stdev)
50 def run(optimizer_name, iterations, sample_every):
52 model = torch.nn.Sequential(
53 torch.nn.Linear(2, 3),
55 torch.nn.Linear(3, 1),
58 model = model.to(torch.float64).apply(weight_init)
60 optimizer = OPTIMIZERS[optimizer_name](model.parameters())
62 input =
torch.tensor([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]], dtype=torch.float64)
65 for i
in range(iterations):
68 output = model.forward(input)
74 if i % sample_every == 0:
77 [p.clone().flatten().data.numpy()
for p
in model.parameters()]
83 def emit(optimizer_parameter_map):
85 print(
"// @{} from {}".format(
'generated', __file__))
87 for optimizer_name, parameters
in optimizer_parameter_map.items():
88 print(PARAMETERS.format(optimizer_name))
90 for sample
in parameters:
92 for parameter
in sample:
93 parameter_values =
"{{{}}}".format(
", ".join(map(str, parameter)))
94 print(
" torch::tensor({}),".format(parameter_values))
102 parser = argparse.ArgumentParser(
103 "Produce optimization output baseline from PyTorch" 105 parser.add_argument(
"-i",
"--iterations", default=1001, type=int)
106 parser.add_argument(
"-s",
"--sample-every", default=100, type=int)
107 options = parser.parse_args()
109 optimizer_parameter_map = {}
110 for optimizer
in OPTIMIZERS.keys():
111 sys.stderr.write(
'Evaluating {} ...\n'.format(optimizer))
112 optimizer_parameter_map[optimizer] = run(
113 optimizer, options.iterations, options.sample_every
116 emit(optimizer_parameter_map)
119 if __name__ ==
"__main__":