4 The Hypothesis library uses *property-based testing* to check 5 invariants about the code under test under a variety of random inputs. 7 The key idea here is to express properties of the code under test 8 (e.g. that it passes a gradient check, that it implements a reference 9 function, etc), and then generate random instances and verify they 10 satisfy these properties. 12 The main functions of interest are exposed on `HypothesisTestCase`. 13 You can usually just add a short function in this to generate an 14 arbitrary number of test cases for your operator. 16 The key functions are: 18 - `assertDeviceChecks(devices, op, inputs, outputs)`. This asserts that the 19 operator computes the same outputs, regardless of which device it is executed 21 - `assertGradientChecks(device, op, inputs, output_, 22 outputs_with_grads)`. This implements a standard numerical gradient checker 23 for the operator in question. 24 - `assertReferenceChecks(device, op, inputs, reference)`. This runs the 25 reference function (effectively calling `reference(*inputs)`, and comparing 26 that to the output of output. 28 `hypothesis_test_util.py` exposes some useful pre-built samplers. 30 - `hu.gcs` - a gradient checker device (`gc`) and device checker devices (`dc`) 32 - `hu.gcs_cpu_only` - a CPU-only gradient checker device (`gc`) and 33 device checker devices (`dc`). Used for when your operator is only 34 implemented on the CPU. 37 from __future__
import absolute_import
38 from __future__
import division
39 from __future__
import print_function
40 from __future__
import unicode_literals
41 from caffe2.proto
import caffe2_pb2
43 workspace, device_checker, gradient_checker, test_util, core)
48 import hypothesis.extra.numpy
49 import hypothesis.strategies
as st
57 if os.getenv(
'SANDCASTLE') ==
'1':
59 elif os.getenv(
'TW_JOB_USER') ==
'sandcastle':
65 return 'TRAVIS' in os.environ
70 if hypothesis.version.__version_info__ >= (3, 56, 0):
71 hypothesis.settings.register_profile(
75 suppress_health_check=[hypothesis.HealthCheck.too_slow],
78 verbosity=hypothesis.Verbosity.verbose))
79 hypothesis.settings.register_profile(
82 suppress_health_check=[hypothesis.HealthCheck.too_slow],
85 verbosity=hypothesis.Verbosity.verbose))
86 hypothesis.settings.register_profile(
89 suppress_health_check=[hypothesis.HealthCheck.too_slow],
92 verbosity=hypothesis.Verbosity.verbose))
94 hypothesis.settings.register_profile(
98 suppress_health_check=[hypothesis.HealthCheck.too_slow],
101 min_satisfying_examples=1,
102 verbosity=hypothesis.Verbosity.verbose))
103 hypothesis.settings.register_profile(
106 suppress_health_check=[hypothesis.HealthCheck.too_slow],
109 min_satisfying_examples=1,
110 verbosity=hypothesis.Verbosity.verbose))
111 hypothesis.settings.register_profile(
114 suppress_health_check=[hypothesis.HealthCheck.too_slow],
117 min_satisfying_examples=1,
118 verbosity=hypothesis.Verbosity.verbose))
120 hypothesis.settings.load_profile(
121 'sandcastle' if is_sandcastle()
else os.getenv(
'CAFFE2_HYPOTHESIS_PROFILE',
126 def dims(min_value=1, max_value=5):
127 return st.integers(min_value=min_value, max_value=max_value)
130 def elements_of_type(dtype=np.float32, filter_=None):
132 if dtype
in (np.float16, np.float32, np.float64):
133 elems = st.floats(min_value=-1.0, max_value=1.0)
134 elif dtype
is np.int32:
135 elems = st.integers(min_value=0, max_value=2 ** 31 - 1)
136 elif dtype
is np.int64:
137 elems = st.integers(min_value=0, max_value=2 ** 63 - 1)
138 elif dtype
is np.bool:
139 elems = st.booleans()
141 raise ValueError(
"Unexpected dtype without elements provided")
142 return elems
if filter_
is None else elems.filter(filter_)
145 def arrays(dims, dtype=np.float32, elements=None):
147 elements = elements_of_type(dtype)
148 return hypothesis.extra.numpy.arrays(
155 def tensor(min_dim=1,
160 dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
161 return dims_.flatmap(
162 lambda dims: arrays(dims, dtype, elements))
165 def tensor1d(min_len=1, max_len=64, dtype=np.float32, elements=None):
166 return tensor(1, 1, dtype, elements, min_value=min_len, max_value=max_len)
169 def segment_ids(size, is_sorted):
171 return st.just(np.empty(shape=[0], dtype=np.int32))
176 elements=st.booleans()).map(
177 lambda x: np.cumsum(x, dtype=np.int32) - x[0])
182 elements=st.integers(min_value=0, max_value=2 * size))
185 def lengths(size, min_segments=None, max_segments=None, **kwargs):
190 if min_segments
is None:
192 if max_segments
is None:
194 assert min_segments >= 0
195 assert min_segments <= max_segments
196 if size == 0
and max_segments == 0:
197 return st.just(np.empty(shape=[0], dtype=np.int32))
198 assert max_segments > 0,
"size is not 0, need at least one segment" 200 min_value=max(min_segments - 1, 0), max_value=max_segments - 1
203 hypothesis.extra.numpy.arrays(
204 np.int32, num_borders, elements=st.integers(
205 min_value=0, max_value=size
209 lambda x: np.append(x, np.array([0, size], dtype=np.int32))
210 ).map(sorted).map(np.diff)
213 def segmented_tensor(
219 segment_generator=segment_ids,
223 gen_empty = st.booleans()
if allow_empty
else st.just(
False)
224 data_dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
225 data_dims_ = st.tuples(
226 gen_empty, data_dims_
227 ).map(
lambda pair: ([0]
if pair[0]
else []) + pair[1])
228 return data_dims_.flatmap(
lambda data_dims: st.tuples(
229 arrays(data_dims, dtype, elements),
230 segment_generator(data_dims[0], is_sorted=is_sorted),
234 def lengths_tensor(min_segments=None, max_segments=None, *args, **kwargs):
235 gen = functools.partial(
236 lengths, min_segments=min_segments, max_segments=max_segments)
237 return segmented_tensor(*args, segment_generator=gen, **kwargs)
240 def sparse_segmented_tensor(min_dim=1, max_dim=4, dtype=np.float32,
241 is_sorted=
True, elements=
None, allow_empty=
False,
242 segment_generator=segment_ids, itype=np.int64,
244 gen_empty = st.booleans()
if allow_empty
else st.just(
False)
245 data_dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
246 all_dims_ = st.tuples(gen_empty, data_dims_).flatmap(
247 lambda pair: st.tuples(
249 (st.integers(min_value=1, max_value=pair[1][0])
if not pair[0]
252 return all_dims_.flatmap(
lambda dims: st.tuples(
253 arrays(dims[0], dtype, elements),
254 arrays(dims[1], dtype=itype, elements=st.integers(
255 min_value=0, max_value=dims[0][0] - 1)),
256 segment_generator(dims[1], is_sorted=is_sorted),
260 def sparse_lengths_tensor(**kwargs):
261 return sparse_segmented_tensor(segment_generator=lengths, **kwargs)
264 def tensors(n, min_dim=1, max_dim=4, dtype=np.float32, elements=None, **kwargs):
265 dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
266 return dims_.flatmap(
267 lambda dims: st.lists(
268 arrays(dims, dtype, elements),
273 def tensors1d(n, min_len=1, max_len=64, dtype=np.float32, elements=None):
275 n, 1, 1, dtype, elements, min_value=min_len, max_value=max_len
279 cpu_do = caffe2_pb2.DeviceOption()
280 cuda_do = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA)
281 hip_do = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.HIP)
282 gpu_do = caffe2_pb2.DeviceOption(device_type=workspace.GpuDeviceType)
285 _device_options_no_hip = [cpu_do] + ([cuda_do]
if workspace.has_cuda_support
else [])
286 device_options = _device_options_no_hip + ([hip_do]
if workspace.has_hip_support
else [])
289 expanded_device_options = [cpu_do] + [
290 caffe2_pb2.DeviceOption(device_type=workspace.GpuDeviceType, device_id=i)
291 for i
in range(workspace.NumGpuDevices())]
294 def device_checker_device_options():
295 return st.just(device_options)
298 def gradient_checker_device_option():
299 return st.sampled_from(device_options)
303 gc=gradient_checker_device_option(),
304 dc=device_checker_device_options()
307 gcs_cpu_only = dict(gc=st.sampled_from([cpu_do]), dc=st.just([cpu_do]))
308 gcs_cuda_only = dict(gc=st.sampled_from([cuda_do]), dc=st.just([cuda_do]))
309 gcs_gpu_only = dict(gc=st.sampled_from([gpu_do]), dc=st.just([gpu_do]))
310 gcs_no_hip = dict(gc=st.sampled_from(_device_options_no_hip), dc=st.just(_device_options_no_hip))
313 @contextlib.contextmanager
314 def temp_workspace(name=b"temp_ws"):
315 old_ws_name = workspace.CurrentWorkspace()
316 workspace.SwitchWorkspace(name,
True)
318 workspace.ResetWorkspace()
319 workspace.SwitchWorkspace(old_ws_name)
326 input_device_options=
None,
329 op = copy.deepcopy(op)
330 op.device_option.CopyFrom(device_option)
331 net = caffe2_pb2.NetDef()
333 net.name = op.name
if op.name
else "test" 335 with temp_workspace():
336 _input_device_options = input_device_options
or \
337 core.InferOpBlobDevicesAsDict(op)[0]
338 for (n, b)
in zip(op.input, inputs):
342 device_option=_input_device_options.get(n, device_option)
344 workspace.CreateNet(net)
345 ret = workspace.BenchmarkNet(net.name, 1, iterations,
True)
353 input_device_options=
None,
355 op = copy.deepcopy(op)
356 op.device_option.CopyFrom(device_option)
358 with temp_workspace():
359 if (len(op.input) > len(inputs)):
361 'must supply an input for each input on the op: %s vs %s' %
363 _input_device_options = input_device_options
or \
364 core.InferOpBlobDevicesAsDict(op)[0]
365 for (n, b)
in zip(op.input, inputs):
369 device_option=_input_device_options.get(n, device_option)
371 workspace.RunOperatorOnce(op)
372 outputs_to_check = list(range(len(op.output)))
374 for output_index
in outputs_to_check:
375 output_blob_name = op.output[output_index]
376 output = workspace.FetchBlob(output_blob_name)
383 A unittest.TestCase subclass with some helper functions for 384 utilizing the `hypothesis` (hypothesis.readthedocs.io) library. 393 input_device_options=
None,
397 Asserts that the operator computes the same outputs, regardless of 398 which device it is executed on. 400 Useful for checking the consistency of GPU and CPU 401 implementations of operators. 405 @given(inputs=hu.tensors(n=2), in_place=st.booleans(), **hu.gcs) 406 def test_sum(self, inputs, in_place, gc, dc): 407 op = core.CreateOperator("Sum", ["X1", "X2"], 408 ["Y" if not in_place else "X1"]) 410 self.assertDeviceChecks(dc, op, [X1, X2], [0]) 414 device_options=device_options
417 dc.CheckSimple(op, inputs, outputs_to_check, input_device_options)
430 input_device_options=
None,
433 Implements a standard numerical gradient checker for the operator 436 Useful for checking the consistency of the forward and 437 backward implementations of operators. 441 @given(inputs=hu.tensors(n=2), in_place=st.booleans(), **hu.gcs) 442 def test_sum(self, inputs, in_place, gc, dc): 443 op = core.CreateOperator("Sum", ["X1", "X2"], 444 ["Y" if not in_place else "X1"]) 446 self.assertGradientChecks(gc, op, [X1, X2], 0, [0]) 451 device_option=device_option,
452 workspace_name=str(device_option),
453 input_device_options=input_device_options,
455 res, grad, grad_estimated = gc.CheckSimple(
456 op, inputs, outputs_to_check, outputs_with_grads,
458 input_device_options=input_device_options
460 self.assertEqual(grad.shape, grad_estimated.shape)
463 "Gradient check failed for input " + str(op.input[outputs_to_check])
466 def _assertGradReferenceChecks(
475 grad_blob_name = output_to_grad +
'_grad' 476 grad_ops, grad_map = core.GradientRegistry.GetBackwardPass(
477 [op], {output_to_grad: grad_blob_name})
478 output_grad = workspace.FetchBlob(output_to_grad)
479 grad_ref_outputs = grad_reference(output_grad, ref_outputs, inputs)
480 workspace.FeedBlob(grad_blob_name, workspace.FetchBlob(output_to_grad))
481 workspace.RunOperatorsOnce(grad_ops)
483 self.assertEqual(len(grad_ref_outputs), len(inputs))
484 for (n, ref)
in zip(op.input, grad_ref_outputs):
485 grad_names = grad_map.get(n)
488 self.assertIsNone(ref)
494 val_name = grad_names
497 ref_vals, ref_indices = ref
498 val_name = grad_names.values
499 vals = workspace.FetchBlob(str(val_name))
500 np.testing.assert_allclose(
505 err_msg=
'Gradient {0} (x) is not matching the reference (y)' 508 if ref_indices
is not None:
509 indices = workspace.FetchBlob(str(grad_names.indices))
510 np.testing.assert_allclose(indices, ref_indices,
511 atol=1e-4, rtol=1e-4)
513 def _assertInferTensorChecks(self, name, shapes, types, output):
514 if name
not in shapes:
517 output = workspace.FetchBlob(name)
518 if type(output)
is np.ndarray:
519 if output.dtype == np.dtype(
'float64'):
520 correct_type = caffe2_pb2.TensorProto.DOUBLE
521 elif output.dtype == np.dtype(
'float32'):
522 correct_type = caffe2_pb2.TensorProto.FLOAT
523 elif output.dtype == np.dtype(
'int32'):
524 correct_type = caffe2_pb2.TensorProto.INT32
525 elif output.dtype == np.dtype(
'int64'):
526 correct_type = caffe2_pb2.TensorProto.INT64
528 correct_type =
"unknown {}".format(np.dtype)
530 correct_type = str(type(output))
532 np.testing.assert_array_equal(
533 np.array(shapes[name]).astype(np.int32),
534 np.array(output.shape).astype(np.int32),
535 err_msg=
'Shape {} mismatch: {} vs. {}'.format(
540 if correct_type != caffe2_pb2.TensorProto.INT32:
542 np.testing.assert_equal(
545 err_msg=
'Type {} mismatch: {} vs. {}'.format(
546 name, types[name], correct_type,
549 except AssertionError
as e:
552 logging.warning(str(e))
553 if os.getenv(
'CAFFE2_ASSERT_SHAPEINFERENCE') ==
'1':
562 input_device_options=
None,
567 outputs_to_check=
None,
570 This runs the reference Python function implementation 571 (effectively calling `reference(*inputs)`, and compares that 572 to the output of output, with an absolute/relative tolerance 573 given by the `threshold` parameter. 575 Useful for checking the implementation matches the Python 576 (typically NumPy) implementation of the same functionality. 580 @given(X=hu.tensor(), inplace=st.booleans(), **hu.gcs) 581 def test_softsign(self, X, inplace, gc, dc): 582 op = core.CreateOperator( 583 "Softsign", ["X"], ["X" if inplace else "Y"]) 586 return (X / (1 + np.abs(X)),) 588 self.assertReferenceChecks(gc, op, [X], softsign) 590 op = copy.deepcopy(op)
591 op.device_option.CopyFrom(device_option)
593 with temp_workspace():
594 if (len(op.input) > len(inputs)):
596 'must supply an input for each input on the op: %s vs %s' %
598 _input_device_options = input_device_options
or \
599 core.InferOpBlobDevicesAsDict(op)[0]
600 for (n, b)
in zip(op.input, inputs):
604 device_option=_input_device_options.get(n, device_option)
607 net.Proto().op.extend([op])
608 test_shape_inference =
False 610 (shapes, types) = workspace.InferShapesAndTypes([net])
611 test_shape_inference =
True 612 except RuntimeError
as e:
615 logging.warning(str(e))
616 if os.getenv(
'CAFFE2_ASSERT_SHAPEINFERENCE') ==
'1':
618 workspace.RunNetOnce(net)
619 reference_outputs = reference(*inputs)
620 if not (isinstance(reference_outputs, tuple)
or 621 isinstance(reference_outputs, list)):
623 "You are providing a wrong reference implementation. A " 624 "proper one should return a tuple/list of numpy arrays.")
625 if not outputs_to_check:
626 self.assertEqual(len(reference_outputs), len(op.output))
627 outputs_to_check = list(range(len(op.output)))
629 for (output_index, ref)
in zip(outputs_to_check, reference_outputs):
630 output_blob_name = op.output[output_index]
631 output = workspace.FetchBlob(output_blob_name)
632 if output.dtype.kind
in (
'S',
'O'):
633 np.testing.assert_array_equal(output, ref)
637 np.testing.assert_allclose(
638 output, ref, atol=atol, rtol=threshold,
640 'Output {0} is not matching the reference'.format(
644 if test_shape_inference:
646 output_blob_name, shapes, types, output)
648 if grad_reference
is not None:
649 assert output_to_grad
is not None, \
650 "If grad_reference is set," \
651 "output_to_grad has to be set as well" 653 with core.DeviceScope(device_option):
655 op, inputs, reference_outputs,
656 output_to_grad, grad_reference,
661 def assertValidationChecks(
667 input_device_options=
None,
672 assert len(set(list(op.input) + list(op.output))) == \
673 len(op.input) + len(op.output), \
674 "in-place ops are not supported in as_kwargs mode" 675 op = copy.deepcopy(op)
676 op.device_option.CopyFrom(device_option)
678 with temp_workspace():
679 _input_device_options = input_device_options
or \
680 core.InferOpBlobDevicesAsDict(op)[0]
681 for (n, b)
in zip(op.input, inputs):
685 device_option=_input_device_options.get(n, device_option)
688 workspace.RunNetOnce(init_net)
689 workspace.RunOperatorOnce(op)
690 outputs = [workspace.FetchBlob(n)
for n
in op.output]
692 validator(**dict(zip(
693 list(op.input) + list(op.output), inputs + outputs)))
695 validator(inputs=inputs, outputs=outputs)
697 def assertRunOpRaises(
702 input_device_options=
None,
703 exception=(Exception,),
706 op = copy.deepcopy(op)
707 op.device_option.CopyFrom(device_option)
709 with temp_workspace():
710 _input_device_options = input_device_options
or \
711 core.InferOpBlobDevicesAsDict(op)[0]
712 for (n, b)
in zip(op.input, inputs):
716 device_option=_input_device_options.get(n, device_option)
719 self.assertRaises(exception, workspace.RunOperatorOnce, op)
721 six.assertRaisesRegex(
722 self, exception, regexp, workspace.RunOperatorOnce, op)
def _assertInferTensorChecks(self, name, shapes, types, output)
def _assertGradReferenceChecks(self, op, inputs, ref_outputs, output_to_grad, grad_reference, threshold=1e-4)
def assertReferenceChecks(self, device_option, op, inputs, reference, input_device_options=None, threshold=1e-4, output_to_grad=None, grad_reference=None, atol=None, outputs_to_check=None)
def assertDeviceChecks(self, device_options, op, inputs, outputs_to_check, input_device_options=None, threshold=0.01)
def assertGradientChecks(self, device_option, op, inputs, outputs_to_check, outputs_with_grads, grad_ops=None, threshold=0.005, stepsize=0.05, input_device_options=None)