Caffe2 - Python API
A deep learning, cross platform ML framework
optim_baseline.py
1 """Script to generate baseline values from PyTorch optimization algorithms"""
2 
3 import argparse
4 import math
5 import sys
6 
7 import torch
8 import torch.optim
9 
10 
11 HEADER = """
12 #include <torch/types.h>
13 
14 #include <vector>
15 
16 namespace expected_parameters {
17 """
18 
19 FOOTER = "} // namespace expected_parameters"
20 
21 PARAMETERS = "inline std::vector<std::vector<torch::Tensor>> {}() {{"
22 
23 OPTIMIZERS = {
24  "Adam": lambda p: torch.optim.Adam(p, 1.0),
25  "Adam_with_weight_decay": lambda p: torch.optim.Adam(p, 1.0, weight_decay=1e-2),
26  "Adam_with_weight_decay_and_amsgrad": lambda p: torch.optim.Adam(p, 1.0, weight_decay=1e-6, amsgrad=True),
27  "Adagrad": lambda p: torch.optim.Adagrad(p, 1.0),
28  "Adagrad_with_weight_decay": lambda p: torch.optim.Adagrad(p, 1.0, weight_decay=1e-2),
29  "Adagrad_with_weight_decay_and_lr_decay": lambda p: torch.optim.Adagrad(p, 1.0, weight_decay=1e-6, lr_decay=1e-3),
30  "RMSprop": lambda p: torch.optim.RMSprop(p, 0.1),
31  "RMSprop_with_weight_decay": lambda p: torch.optim.RMSprop(p, 0.1, weight_decay=1e-2),
32  "RMSprop_with_weight_decay_and_centered": lambda p: torch.optim.RMSprop(p, 0.1, weight_decay=1e-6, centered=True),
33  "RMSprop_with_weight_decay_and_centered_and_momentum":
34  lambda p: torch.optim.RMSprop(p, 0.1, weight_decay=1e-6, centered=True, momentum=0.9),
35  "SGD": lambda p: torch.optim.SGD(p, 0.1),
36  "SGD_with_weight_decay": lambda p: torch.optim.SGD(p, 0.1, weight_decay=1e-2),
37  "SGD_with_weight_decay_and_momentum": lambda p: torch.optim.SGD(p, 0.1, momentum=0.9, weight_decay=1e-2),
38  "SGD_with_weight_decay_and_nesterov_momentum":
39  lambda p: torch.optim.SGD(p, 0.1, momentum=0.9, weight_decay=1e-6, nesterov=True),
40 }
41 
42 
43 def weight_init(module):
44  if isinstance(module, torch.nn.Linear):
45  stdev = 1.0 / math.sqrt(module.weight.size(1))
46  for p in module.parameters():
47  p.data.uniform_(-stdev, stdev)
48 
49 
50 def run(optimizer_name, iterations, sample_every):
51  torch.manual_seed(0)
52  model = torch.nn.Sequential(
53  torch.nn.Linear(2, 3),
54  torch.nn.Sigmoid(),
55  torch.nn.Linear(3, 1),
56  torch.nn.Sigmoid(),
57  )
58  model = model.to(torch.float64).apply(weight_init)
59 
60  optimizer = OPTIMIZERS[optimizer_name](model.parameters())
61 
62  input = torch.tensor([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]], dtype=torch.float64)
63 
64  values = []
65  for i in range(iterations):
66  optimizer.zero_grad()
67 
68  output = model.forward(input)
69  loss = output.sum()
70  loss.backward()
71 
72  optimizer.step()
73 
74  if i % sample_every == 0:
75 
76  values.append(
77  [p.clone().flatten().data.numpy() for p in model.parameters()]
78  )
79 
80  return values
81 
82 
83 def emit(optimizer_parameter_map):
84  # Don't write generated with an @ in front, else this file is recognized as generated.
85  print("// @{} from {}".format('generated', __file__))
86  print(HEADER)
87  for optimizer_name, parameters in optimizer_parameter_map.items():
88  print(PARAMETERS.format(optimizer_name))
89  print(" return {")
90  for sample in parameters:
91  print(" {")
92  for parameter in sample:
93  parameter_values = "{{{}}}".format(", ".join(map(str, parameter)))
94  print(" torch::tensor({}),".format(parameter_values))
95  print(" },")
96  print(" };")
97  print("}\n")
98  print(FOOTER)
99 
100 
101 def main():
102  parser = argparse.ArgumentParser(
103  "Produce optimization output baseline from PyTorch"
104  )
105  parser.add_argument("-i", "--iterations", default=1001, type=int)
106  parser.add_argument("-s", "--sample-every", default=100, type=int)
107  options = parser.parse_args()
108 
109  optimizer_parameter_map = {}
110  for optimizer in OPTIMIZERS.keys():
111  sys.stderr.write('Evaluating {} ...\n'.format(optimizer))
112  optimizer_parameter_map[optimizer] = run(
113  optimizer, options.iterations, options.sample_every
114  )
115 
116  emit(optimizer_parameter_map)
117 
118 
119 if __name__ == "__main__":
120  main()