Caffe2 - Python API
A deep learning, cross platform ML framework
optimizer_test_util.py
1 # Copyright (c) 2016-present, Facebook, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 ##############################################################################
15 
16 ## @package optimizer_test_util
17 # Module caffe2.python.optimizer_test_util
18 from __future__ import absolute_import
19 from __future__ import division
20 from __future__ import print_function
21 from __future__ import unicode_literals
22 
23 import unittest
24 import numpy as np
25 from caffe2.python import brew, core, workspace, cnn, optimizer
26 from caffe2.proto import caffe2_pb2
28  Initializer, pFP16Initializer)
29 
30 from caffe2.python.model_helper import ModelHelper
31 
32 
33 class OptimizerTestBase(object):
34  """
35  This is an abstract base class.
36  Don't inherit from unittest.TestCase, and don't name it 'Test*'.
37  Do, however, do these things in classes which inherit from this.
38  """
39 
40  def _createDense(self, dtype=core.DataType.FLOAT):
41  perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
42  np.random.seed(123) # make test deterministic
43  numpy_dtype = np.float32 if dtype == core.DataType.FLOAT else np.float16
44  initializer = Initializer if dtype == core.DataType.FLOAT else pFP16Initializer
45  data = np.random.randint(
46  2,
47  size=(20, perfect_model.size)).astype(numpy_dtype)
48  label = np.dot(data, perfect_model)[:, np.newaxis]
49 
50  model = ModelHelper(name="test", arg_scope={'order':'NCHW'})
51  out = brew.fc(
52  model,
53  'data', 'fc', perfect_model.size, 1, ('ConstantFill', {}),
54  ('ConstantFill', {}), axis=0,
55  WeightInitializer=initializer, BiasInitializer=initializer
56  )
57  if dtype == core.DataType.FLOAT16:
58  out = model.HalfToFloat(out, out + "_fp32")
59  sq = model.SquaredL2Distance([out, 'label'])
60  loss = model.AveragedLoss(sq, "avg_loss")
61  grad_map = model.AddGradientOperators([loss])
62  self.assertIsInstance(grad_map['fc_w'], core.BlobReference)
63  return (model, perfect_model, data, label)
64 
65  def testDense(self):
66  model, perfect_model, data, label = self._createDense()
67  optimizer = self.build_optimizer(model)
68 
69  workspace.FeedBlob('data', data[0])
70  workspace.FeedBlob('label', label[0])
71  workspace.RunNetOnce(model.param_init_net)
72  workspace.CreateNet(model.net, True)
73  for _ in range(2000):
74  idx = np.random.randint(data.shape[0])
75  workspace.FeedBlob('data', data[idx])
76  workspace.FeedBlob('label', label[idx])
77  workspace.RunNet(model.net.Proto().name)
78 
79  np.testing.assert_allclose(
80  perfect_model[np.newaxis, :],
81  workspace.FetchBlob('fc_w'),
82  atol=1e-2
83  )
84  self.check_optimizer(optimizer)
85 
86  @unittest.skipIf(not workspace.has_gpu_support, "No gpu support")
87  def testGPUDense(self, dtype=core.DataType.FLOAT):
88  device_opt = core.DeviceOption(caffe2_pb2.CUDA, 0)
89  with core.DeviceScope(device_opt):
90  model, _perfect_model, data, label = self._createDense(dtype)
91  if dtype == core.DataType.FLOAT16:
92  fc_fp32_for_host = model.HalfToFloat('fc', 'fc_fp32_for_host')
93  model.CopyGPUToCPU(fc_fp32_for_host, 'fc_cpu')
94  else:
95  model.CopyGPUToCPU('fc', 'fc_cpu')
96  workspace.FeedBlob('data', data[0])
97  workspace.FeedBlob('label', label[0])
98 
99  # Add some CPU ops
100  brew.fc(model, 'fc_cpu', 'fc2', dim_in=1, dim_out=10, axis=0)
101 
102  # Create optimizer in default device scope
103  self.build_optimizer(model)
104 
105  if self._skip_gpu:
106  return
107 
108  # Run net to see it does not crash
109  workspace.RunNetOnce(model.param_init_net)
110  workspace.CreateNet(model.net, True)
111  workspace.RunNet(model.net.Proto().name)
112 
113 
114  def testSparse(self):
115  # to test duplicated indices we assign two indices to each weight and
116  # thus each weight might count once or twice
117  DUPLICATION = 2
118  perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
119  np.random.seed(123) # make test deterministic
120  data = np.random.randint(
121  2,
122  size=(20, perfect_model.size * DUPLICATION)).astype(np.float32)
123  label = np.dot(data, np.repeat(perfect_model, DUPLICATION))
124 
125  model = cnn.CNNModelHelper("NCHW", name="test")
126  # imitate what model wrapper does
127  w = model.param_init_net.ConstantFill(
128  [], 'w', shape=[perfect_model.size], value=0.0)
129  model.params.append(w)
130  picked = model.net.Gather([w, 'indices'], 'gather')
131  out = model.ReduceFrontSum(picked, 'sum')
132 
133  sq = model.SquaredL2Distance([out, 'label'])
134  loss = model.AveragedLoss(sq, "avg_loss")
135  grad_map = model.AddGradientOperators([loss])
136  self.assertIsInstance(grad_map['w'], core.GradientSlice)
137  optimizer = self.build_optimizer(model)
138 
139  workspace.CreateBlob('indices')
140  workspace.CreateBlob('label')
141 
142  for indices_type in [np.int32, np.int64]:
143  workspace.RunNetOnce(model.param_init_net)
144  workspace.CreateNet(model.net, True)
145  for _ in range(2000):
146  idx = np.random.randint(data.shape[0])
147  # transform into indices of binary features
148  indices = np.repeat(np.arange(perfect_model.size),
149  DUPLICATION)[data[idx] == 1]
150  if indices.size == 0:
151  continue
152  workspace.FeedBlob(
153  'indices',
154  indices.reshape((indices.size,)).astype(indices_type)
155  )
156  workspace.FeedBlob('label',
157  np.array(label[idx]).astype(np.float32))
158  workspace.RunNet(model.net.Proto().name)
159 
160  np.testing.assert_allclose(
161  perfect_model,
162  workspace.FetchBlob('w'),
163  atol=1e-2
164  )
165  self.check_optimizer(optimizer)
166 
167 
169  """
170  This is an abstract base class.
171  Don't inherit from unittest.TestCase, and don't name it 'Test*'.
172  Do, however, do these things in classes which inherit from this.
173  """
174 
175  def _gradient_ratio_reference(self, model, params, max_gradient_norm):
176  from caffe2.python import core
177  sum_squared_norms = 0.0
178  for param in params:
179  grad = (
180  model.param_to_grad[param]
181  if not isinstance(
182  model.param_to_grad[param],
183  core.GradientSlice,
184  ) else model.param_to_grad[param].values
185  )
186  val = workspace.FetchBlob(grad)
187  sum_squared_norms += np.power(np.linalg.norm(val), 2.0)
188  global_norm = np.sqrt(sum_squared_norms)
189  clip_norm = max_gradient_norm
190  norm_ratio = clip_norm / np.maximum(clip_norm, global_norm)
191  return norm_ratio
192 
193  def test_global_norm_based_gradient_clipping(self):
194  max_gradient_norm = 1.0
195  model, perfect_model, data, label = self._createDense()
196  opt = self.build_optimizer(model, max_gradient_norm=max_gradient_norm)
197 
198  params = []
199  for param in model.GetParams(top_scope=True):
200  if param in model.param_to_grad:
201  if not isinstance(
202  model.param_to_grad[param],
203  core.GradientSlice,
204  ):
205  params.append(param)
206 
207  workspace.FeedBlob('data', data[0])
208  workspace.FeedBlob('label', label[0])
209  workspace.RunNetOnce(model.param_init_net)
210  workspace.CreateNet(model.net, True)
211  self.assertIsNotNone(opt._lr_multiplier)
212 
213  # Run net once
214  idx = np.random.randint(data.shape[0])
215  workspace.FeedBlob('data', data[idx])
216  workspace.FeedBlob('label', label[idx])
217  workspace.RunNet(model.net.Proto().name)
218 
219  reference = self._gradient_ratio_reference(
220  model,
221  params,
222  max_gradient_norm,
223  )
224  norm_ratio = workspace.FetchBlob(
225  'norm_clipped_grad_update/norm_ratio')
226  np.testing.assert_almost_equal(norm_ratio, reference)
227  self.assertTrue(
228  reference < 1.0, "Bad test, gradient not being scaled."
229  )
230 
231  def test_lr_injection(self):
232  model, perfect_model, data, label = self._createDense()
233  opt = self.build_optimizer(
234  model, max_gradient_norm=1, allow_lr_injection=True
235  )
236 
237  workspace.FeedBlob('data', data[0])
238  workspace.FeedBlob('label', label[0])
239  workspace.RunNetOnce(model.param_init_net)
240  workspace.CreateNet(model.net, True)
241 
242  # Test LR injection initialized properly
243  self.assertIsNotNone(opt._lr_multiplier)
244  self.assertEqual(optimizer.get_lr_injection(), 1)
245 
246  # Test that we're able to modify the value of the lr_injection
247  optimizer.set_lr_injection(0)
248  self.assertEqual(optimizer.get_lr_injection(), 0)
249 
250  # Test that setting the lr_injector properly propogates to the
251  # lr_multiplier. Here, we have both lr_injector and norm_ratio that
252  # affect the lr_multiplier
253  workspace.RunNet(model.net.Proto().name)
254  self.assertEqual(workspace.FetchBlob('lr_multiplier'), 0)
def _createDense(self, dtype=core.DataType.FLOAT)
def _gradient_ratio_reference(self, model, params, max_gradient_norm)