3 from __future__
import absolute_import
4 from __future__
import division
5 from __future__
import print_function
6 from __future__
import unicode_literals
10 from caffe2.python import brew, core, workspace, cnn, optimizer
11 from caffe2.proto
import caffe2_pb2
13 Initializer, PseudoFP16Initializer)
20 This is an abstract base class. 21 Don't inherit from unittest.TestCase, and don't name it 'Test*'. 22 Do, however, do these things in classes which inherit from this. 25 def _createDense(self, dtype=core.DataType.FLOAT):
26 perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
28 numpy_dtype = np.float32
if dtype == core.DataType.FLOAT
else np.float16
29 initializer = Initializer
if dtype == core.DataType.FLOAT
else \
31 data = np.random.randint(
33 size=(20, perfect_model.size)).astype(numpy_dtype)
34 label = np.dot(data, perfect_model)[:, np.newaxis]
36 model =
ModelHelper(name=
"test", arg_scope={
'order':
'NCHW'})
39 'data',
'fc', perfect_model.size, 1, (
'ConstantFill', {}),
40 (
'ConstantFill', {}), axis=0,
41 WeightInitializer=initializer, BiasInitializer=initializer
43 if dtype == core.DataType.FLOAT16:
44 out = model.HalfToFloat(out, out +
"_fp32")
45 sq = model.SquaredL2Distance([out,
'label'])
46 loss = model.AveragedLoss(sq,
"avg_loss")
47 grad_map = model.AddGradientOperators([loss])
49 return (model, perfect_model, data, label)
53 optimizer = self.build_optimizer(model)
54 workspace.FeedBlob(
'data', data[0])
55 workspace.FeedBlob(
'label', label[0])
56 workspace.RunNetOnce(model.param_init_net)
57 workspace.CreateNet(model.net,
True)
59 idx = np.random.randint(data.shape[0])
60 workspace.FeedBlob(
'data', data[idx])
61 workspace.FeedBlob(
'label', label[idx])
62 workspace.RunNet(model.net.Proto().name)
64 np.testing.assert_allclose(
65 perfect_model[np.newaxis, :],
66 workspace.FetchBlob(
'fc_w'),
69 self.check_optimizer(optimizer)
71 @unittest.skipIf(
not workspace.has_gpu_support,
"No gpu support")
72 def testGPUDense(self, dtype=core.DataType.FLOAT):
73 device_opt = core.DeviceOption(workspace.GpuDeviceType, 0)
74 with core.DeviceScope(device_opt):
75 model, _perfect_model, data, label = self.
_createDense(dtype)
76 if dtype == core.DataType.FLOAT16:
77 fc_fp32_for_host = model.HalfToFloat(
'fc',
'fc_fp32_for_host')
78 model.CopyGPUToCPU(fc_fp32_for_host,
'fc_cpu')
80 model.CopyGPUToCPU(
'fc',
'fc_cpu')
81 workspace.FeedBlob(
'data', data[0])
82 workspace.FeedBlob(
'label', label[0])
85 brew.fc(model,
'fc_cpu',
'fc2', dim_in=1, dim_out=10, axis=0)
88 self.build_optimizer(model)
94 workspace.RunNetOnce(model.param_init_net)
95 workspace.CreateNet(model.net,
True)
96 workspace.RunNet(model.net.Proto().name)
102 perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
104 data = np.random.randint(
106 size=(20, perfect_model.size * DUPLICATION)).astype(np.float32)
107 label = np.dot(data, np.repeat(perfect_model, DUPLICATION))
111 w = model.param_init_net.ConstantFill(
112 [],
'w', shape=[perfect_model.size], value=0.0)
113 model.params.append(w)
114 picked = model.net.Gather([w,
'indices'],
'gather')
115 out = model.ReduceFrontSum(picked,
'sum')
117 sq = model.SquaredL2Distance([out,
'label'])
118 loss = model.AveragedLoss(sq,
"avg_loss")
119 grad_map = model.AddGradientOperators([loss])
120 self.assertIsInstance(grad_map[
'w'], core.GradientSlice)
121 optimizer = self.build_optimizer(model)
123 workspace.CreateBlob(
'indices')
124 workspace.CreateBlob(
'label')
126 for indices_type
in [np.int32, np.int64]:
127 workspace.RunNetOnce(model.param_init_net)
128 workspace.CreateNet(model.net,
True)
129 for _
in range(2000):
130 idx = np.random.randint(data.shape[0])
132 indices = np.repeat(np.arange(perfect_model.size),
133 DUPLICATION)[data[idx] == 1]
134 if indices.size == 0:
138 indices.reshape((indices.size,)).astype(indices_type)
140 workspace.FeedBlob(
'label',
141 np.array(label[idx]).astype(np.float32))
142 workspace.RunNet(model.net.Proto().name)
144 np.testing.assert_allclose(
146 workspace.FetchBlob(
'w'),
149 self.check_optimizer(optimizer)
154 This is an abstract base class. 155 Don't inherit from unittest.TestCase, and don't name it 'Test*'. 156 Do, however, do these things in classes which inherit from this. 159 def _gradient_ratio_reference(self, model, params, max_gradient_norm):
161 sum_squared_norms = 0.0
164 model.param_to_grad[param]
166 model.param_to_grad[param],
168 )
else model.param_to_grad[param].values
170 val = workspace.FetchBlob(grad)
171 sum_squared_norms += np.power(np.linalg.norm(val), 2.0)
172 global_norm = np.sqrt(sum_squared_norms)
173 clip_norm = max_gradient_norm
174 norm_ratio = clip_norm / np.maximum(clip_norm, global_norm)
177 def test_global_norm_based_gradient_clipping(self):
178 max_gradient_norm = 1.0
179 model, perfect_model, data, label = self._createDense()
180 opt = self.build_optimizer(model, max_gradient_norm=max_gradient_norm)
183 for param
in model.GetParams(top_scope=
True):
184 if param
in model.param_to_grad:
186 model.param_to_grad[param],
191 workspace.FeedBlob(
'data', data[0])
192 workspace.FeedBlob(
'label', label[0])
193 workspace.RunNetOnce(model.param_init_net)
194 workspace.CreateNet(model.net,
True)
195 self.assertIsNotNone(opt._lr_multiplier)
198 idx = np.random.randint(data.shape[0])
199 workspace.FeedBlob(
'data', data[idx])
200 workspace.FeedBlob(
'label', label[idx])
201 workspace.RunNet(model.net.Proto().name)
208 norm_ratio = workspace.FetchBlob(
209 'norm_clipped_grad_update/norm_ratio')
210 np.testing.assert_almost_equal(norm_ratio, reference)
212 reference < 1.0,
"Bad test, gradient not being scaled." 215 def test_lr_injection(self):
216 model, perfect_model, data, label = self._createDense()
217 opt = self.build_optimizer(
218 model, max_gradient_norm=1, allow_lr_injection=
True 221 workspace.FeedBlob(
'data', data[0])
222 workspace.FeedBlob(
'label', label[0])
223 workspace.RunNetOnce(model.param_init_net)
224 workspace.CreateNet(model.net,
True)
227 self.assertIsNotNone(opt._lr_multiplier)
228 self.assertEqual(optimizer.get_lr_injection(), 1)
231 optimizer.set_lr_injection(0)
232 self.assertEqual(optimizer.get_lr_injection(), 0)
237 workspace.RunNet(model.net.Proto().name)
238 self.assertEqual(workspace.FetchBlob(
'lr_multiplier'), 0)
def _createDense(self, dtype=core.DataType.FLOAT)
def _gradient_ratio_reference(self, model, params, max_gradient_norm)