Caffe2 - Python API
A deep learning, cross platform ML framework
optimizer_test_util.py
1 ## @package optimizer_test_util
2 # Module caffe2.python.optimizer_test_util
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 from __future__ import unicode_literals
7 
8 import unittest
9 import numpy as np
10 from caffe2.python import brew, core, workspace, cnn, optimizer
11 from caffe2.proto import caffe2_pb2
13  Initializer, PseudoFP16Initializer)
14 
15 from caffe2.python.model_helper import ModelHelper
16 
17 
18 class OptimizerTestBase(object):
19  """
20  This is an abstract base class.
21  Don't inherit from unittest.TestCase, and don't name it 'Test*'.
22  Do, however, do these things in classes which inherit from this.
23  """
24 
25  def _createDense(self, dtype=core.DataType.FLOAT):
26  perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
27  np.random.seed(123) # make test deterministic
28  numpy_dtype = np.float32 if dtype == core.DataType.FLOAT else np.float16
29  initializer = Initializer if dtype == core.DataType.FLOAT else \
30  PseudoFP16Initializer
31  data = np.random.randint(
32  2,
33  size=(20, perfect_model.size)).astype(numpy_dtype)
34  label = np.dot(data, perfect_model)[:, np.newaxis]
35 
36  model = ModelHelper(name="test", arg_scope={'order': 'NCHW'})
37  out = brew.fc(
38  model,
39  'data', 'fc', perfect_model.size, 1, ('ConstantFill', {}),
40  ('ConstantFill', {}), axis=0,
41  WeightInitializer=initializer, BiasInitializer=initializer
42  )
43  if dtype == core.DataType.FLOAT16:
44  out = model.HalfToFloat(out, out + "_fp32")
45  sq = model.SquaredL2Distance([out, 'label'])
46  loss = model.AveragedLoss(sq, "avg_loss")
47  grad_map = model.AddGradientOperators([loss])
48  self.assertIsInstance(grad_map['fc_w'], core.BlobReference)
49  return (model, perfect_model, data, label)
50 
51  def testDense(self):
52  model, perfect_model, data, label = self._createDense()
53  optimizer = self.build_optimizer(model)
54  workspace.FeedBlob('data', data[0])
55  workspace.FeedBlob('label', label[0])
56  workspace.RunNetOnce(model.param_init_net)
57  workspace.CreateNet(model.net, True)
58  for _ in range(2000):
59  idx = np.random.randint(data.shape[0])
60  workspace.FeedBlob('data', data[idx])
61  workspace.FeedBlob('label', label[idx])
62  workspace.RunNet(model.net.Proto().name)
63 
64  np.testing.assert_allclose(
65  perfect_model[np.newaxis, :],
66  workspace.FetchBlob('fc_w'),
67  atol=1e-2
68  )
69  self.check_optimizer(optimizer)
70 
71  @unittest.skipIf(not workspace.has_gpu_support, "No gpu support")
72  def testGPUDense(self, dtype=core.DataType.FLOAT):
73  device_opt = core.DeviceOption(workspace.GpuDeviceType, 0)
74  with core.DeviceScope(device_opt):
75  model, _perfect_model, data, label = self._createDense(dtype)
76  if dtype == core.DataType.FLOAT16:
77  fc_fp32_for_host = model.HalfToFloat('fc', 'fc_fp32_for_host')
78  model.CopyGPUToCPU(fc_fp32_for_host, 'fc_cpu')
79  else:
80  model.CopyGPUToCPU('fc', 'fc_cpu')
81  workspace.FeedBlob('data', data[0])
82  workspace.FeedBlob('label', label[0])
83 
84  # Add some CPU ops
85  brew.fc(model, 'fc_cpu', 'fc2', dim_in=1, dim_out=10, axis=0)
86 
87  # Create optimizer in default device scope
88  self.build_optimizer(model)
89 
90  if self._skip_gpu:
91  return
92 
93  # Run net to see it does not crash
94  workspace.RunNetOnce(model.param_init_net)
95  workspace.CreateNet(model.net, True)
96  workspace.RunNet(model.net.Proto().name)
97 
98  def testSparse(self):
99  # to test duplicated indices we assign two indices to each weight and
100  # thus each weight might count once or twice
101  DUPLICATION = 2
102  perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
103  np.random.seed(123) # make test deterministic
104  data = np.random.randint(
105  2,
106  size=(20, perfect_model.size * DUPLICATION)).astype(np.float32)
107  label = np.dot(data, np.repeat(perfect_model, DUPLICATION))
108 
109  model = cnn.CNNModelHelper("NCHW", name="test")
110  # imitate what model wrapper does
111  w = model.param_init_net.ConstantFill(
112  [], 'w', shape=[perfect_model.size], value=0.0)
113  model.params.append(w)
114  picked = model.net.Gather([w, 'indices'], 'gather')
115  out = model.ReduceFrontSum(picked, 'sum')
116 
117  sq = model.SquaredL2Distance([out, 'label'])
118  loss = model.AveragedLoss(sq, "avg_loss")
119  grad_map = model.AddGradientOperators([loss])
120  self.assertIsInstance(grad_map['w'], core.GradientSlice)
121  optimizer = self.build_optimizer(model)
122 
123  workspace.CreateBlob('indices')
124  workspace.CreateBlob('label')
125 
126  for indices_type in [np.int32, np.int64]:
127  workspace.RunNetOnce(model.param_init_net)
128  workspace.CreateNet(model.net, True)
129  for _ in range(2000):
130  idx = np.random.randint(data.shape[0])
131  # transform into indices of binary features
132  indices = np.repeat(np.arange(perfect_model.size),
133  DUPLICATION)[data[idx] == 1]
134  if indices.size == 0:
135  continue
136  workspace.FeedBlob(
137  'indices',
138  indices.reshape((indices.size,)).astype(indices_type)
139  )
140  workspace.FeedBlob('label',
141  np.array(label[idx]).astype(np.float32))
142  workspace.RunNet(model.net.Proto().name)
143 
144  np.testing.assert_allclose(
145  perfect_model,
146  workspace.FetchBlob('w'),
147  atol=1e-2
148  )
149  self.check_optimizer(optimizer)
150 
151 
153  """
154  This is an abstract base class.
155  Don't inherit from unittest.TestCase, and don't name it 'Test*'.
156  Do, however, do these things in classes which inherit from this.
157  """
158 
159  def _gradient_ratio_reference(self, model, params, max_gradient_norm):
160  from caffe2.python import core
161  sum_squared_norms = 0.0
162  for param in params:
163  grad = (
164  model.param_to_grad[param]
165  if not isinstance(
166  model.param_to_grad[param],
167  core.GradientSlice,
168  ) else model.param_to_grad[param].values
169  )
170  val = workspace.FetchBlob(grad)
171  sum_squared_norms += np.power(np.linalg.norm(val), 2.0)
172  global_norm = np.sqrt(sum_squared_norms)
173  clip_norm = max_gradient_norm
174  norm_ratio = clip_norm / np.maximum(clip_norm, global_norm)
175  return norm_ratio
176 
177  def test_global_norm_based_gradient_clipping(self):
178  max_gradient_norm = 1.0
179  model, perfect_model, data, label = self._createDense()
180  opt = self.build_optimizer(model, max_gradient_norm=max_gradient_norm)
181 
182  params = []
183  for param in model.GetParams(top_scope=True):
184  if param in model.param_to_grad:
185  if not isinstance(
186  model.param_to_grad[param],
187  core.GradientSlice,
188  ):
189  params.append(param)
190 
191  workspace.FeedBlob('data', data[0])
192  workspace.FeedBlob('label', label[0])
193  workspace.RunNetOnce(model.param_init_net)
194  workspace.CreateNet(model.net, True)
195  self.assertIsNotNone(opt._lr_multiplier)
196 
197  # Run net once
198  idx = np.random.randint(data.shape[0])
199  workspace.FeedBlob('data', data[idx])
200  workspace.FeedBlob('label', label[idx])
201  workspace.RunNet(model.net.Proto().name)
202 
203  reference = self._gradient_ratio_reference(
204  model,
205  params,
206  max_gradient_norm,
207  )
208  norm_ratio = workspace.FetchBlob(
209  'norm_clipped_grad_update/norm_ratio')
210  np.testing.assert_almost_equal(norm_ratio, reference)
211  self.assertTrue(
212  reference < 1.0, "Bad test, gradient not being scaled."
213  )
214 
215  def test_lr_injection(self):
216  model, perfect_model, data, label = self._createDense()
217  opt = self.build_optimizer(
218  model, max_gradient_norm=1, allow_lr_injection=True
219  )
220 
221  workspace.FeedBlob('data', data[0])
222  workspace.FeedBlob('label', label[0])
223  workspace.RunNetOnce(model.param_init_net)
224  workspace.CreateNet(model.net, True)
225 
226  # Test LR injection initialized properly
227  self.assertIsNotNone(opt._lr_multiplier)
228  self.assertEqual(optimizer.get_lr_injection(), 1)
229 
230  # Test that we're able to modify the value of the lr_injection
231  optimizer.set_lr_injection(0)
232  self.assertEqual(optimizer.get_lr_injection(), 0)
233 
234  # Test that setting the lr_injector properly propogates to the
235  # lr_multiplier. Here, we have both lr_injector and norm_ratio that
236  # affect the lr_multiplier
237  workspace.RunNet(model.net.Proto().name)
238  self.assertEqual(workspace.FetchBlob('lr_multiplier'), 0)
def _createDense(self, dtype=core.DataType.FLOAT)
def _gradient_ratio_reference(self, model, params, max_gradient_norm)