Caffe2 - Python API
A deep learning, cross platform ML framework
hypothesis_test_util.py
1 # Copyright (c) 2016-present, Facebook, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 ##############################################################################
15 
16 ## @package hypothesis_test_util
17 # Module caffe2.python.hypothesis_test_util
18 """
19 The Hypothesis library uses *property-based testing* to check
20 invariants about the code under test under a variety of random inputs.
21 
22  The key idea here is to express properties of the code under test
23 (e.g. that it passes a gradient check, that it implements a reference
24 function, etc), and then generate random instances and verify they
25 satisfy these properties.
26 
27 The main functions of interest are exposed on `HypothesisTestCase`.
28 You can usually just add a short function in this to generate an
29 arbitrary number of test cases for your operator.
30 
31 The key functions are:
32 
33 - `assertDeviceChecks(devices, op, inputs, outputs)`. This asserts that the
34  operator computes the same outputs, regardless of which device it is executed
35  on.
36 - `assertGradientChecks(device, op, inputs, output_,
37  outputs_with_grads)`. This implements a standard numerical gradient checker
38  for the operator in question.
39 - `assertReferenceChecks(device, op, inputs, reference)`. This runs the
40  reference function (effectively calling `reference(*inputs)`, and comparing
41  that to the output of output.
42 
43 `hypothesis_test_util.py` exposes some useful pre-built samplers.
44 
45 - `hu.gcs` - a gradient checker device (`gc`) and device checker devices (`dc`)
46 
47 - `hu.gcs_cpu_only` - a CPU-only gradient checker device (`gc`) and
48  device checker devices (`dc`). Used for when your operator is only
49  implemented on the CPU.
50 """
51 
52 from __future__ import absolute_import
53 from __future__ import division
54 from __future__ import print_function
55 from __future__ import unicode_literals
56 from caffe2.proto import caffe2_pb2
57 from caffe2.python import (
58  workspace, device_checker, gradient_checker, test_util, core)
59 import contextlib
60 import copy
61 import functools
62 import hypothesis
63 import hypothesis.extra.numpy
64 import hypothesis.strategies as st
65 import logging
66 import numpy as np
67 import os
68 
69 
70 def is_sandcastle():
71  if os.getenv('SANDCASTLE') == '1':
72  return True
73  elif os.getenv('TW_JOB_USER') == 'sandcastle':
74  return True
75  return False
76 
77 
78 def is_travis():
79  return 'TRAVIS' in os.environ
80 
81 
82 hypothesis.settings.register_profile(
83  "sandcastle",
84  hypothesis.settings(
85  derandomize=True,
86  suppress_health_check=[hypothesis.HealthCheck.too_slow],
87  database=None,
88  min_satisfying_examples=1,
89  max_examples=100,
90  verbosity=hypothesis.Verbosity.verbose))
91 
92 hypothesis.settings.register_profile(
93  "dev",
94  hypothesis.settings(
95  suppress_health_check=[hypothesis.HealthCheck.too_slow],
96  database=None,
97  max_examples=10,
98  min_satisfying_examples=1,
99  verbosity=hypothesis.Verbosity.verbose))
100 hypothesis.settings.register_profile(
101  "debug",
102  hypothesis.settings(
103  suppress_health_check=[hypothesis.HealthCheck.too_slow],
104  database=None,
105  max_examples=1000,
106  min_satisfying_examples=1,
107  verbosity=hypothesis.Verbosity.verbose))
108 hypothesis.settings.load_profile(
109  'sandcastle' if is_sandcastle() else os.getenv('CAFFE2_HYPOTHESIS_PROFILE',
110  'dev')
111 )
112 
113 
114 def dims(min_value=1, max_value=5):
115  return st.integers(min_value=min_value, max_value=max_value)
116 
117 
118 def elements_of_type(dtype=np.float32, filter_=None):
119  elems = None
120  if dtype in (np.float16, np.float32, np.float64):
121  elems = st.floats(min_value=-1.0, max_value=1.0)
122  elif dtype is np.int32:
123  elems = st.integers(min_value=0, max_value=2 ** 31 - 1)
124  elif dtype is np.int64:
125  elems = st.integers(min_value=0, max_value=2 ** 63 - 1)
126  elif dtype is np.bool:
127  elems = st.booleans()
128  else:
129  raise ValueError("Unexpected dtype without elements provided")
130  return elems if filter_ is None else elems.filter(filter_)
131 
132 
133 def arrays(dims, dtype=np.float32, elements=None):
134  if elements is None:
135  elements = elements_of_type(dtype)
136  return hypothesis.extra.numpy.arrays(
137  dtype,
138  dims,
139  elements=elements,
140  )
141 
142 
143 def tensor(min_dim=1,
144  max_dim=4,
145  dtype=np.float32,
146  elements=None,
147  **kwargs):
148  dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
149  return dims_.flatmap(
150  lambda dims: arrays(dims, dtype, elements))
151 
152 
153 def tensor1d(min_len=1, max_len=64, dtype=np.float32, elements=None):
154  return tensor(1, 1, dtype, elements, min_value=min_len, max_value=max_len)
155 
156 
157 def segment_ids(size, is_sorted):
158  if size == 0:
159  return st.just(np.empty(shape=[0], dtype=np.int32))
160  if is_sorted:
161  return arrays(
162  [size],
163  dtype=np.int32,
164  elements=st.booleans()).map(
165  lambda x: np.cumsum(x, dtype=np.int32) - x[0])
166  else:
167  return arrays(
168  [size],
169  dtype=np.int32,
170  elements=st.integers(min_value=0, max_value=2 * size))
171 
172 
173 def lengths(size, min_segments=None, max_segments=None, **kwargs):
174  # First generate number of boarders between segments
175  # Then create boarder values and add 0 and size
176  # By sorting and computing diff we convert them to lengths of
177  # possible 0 value
178  if min_segments is None:
179  min_segments = 0
180  if max_segments is None:
181  max_segments = size
182  assert min_segments >= 0
183  assert min_segments <= max_segments
184  if size == 0 and max_segments == 0:
185  return st.just(np.empty(shape=[0], dtype=np.int32))
186  assert max_segments > 0, "size is not 0, need at least one segment"
187  return st.integers(
188  min_value=max(min_segments - 1, 0), max_value=max_segments - 1
189  ).flatmap(
190  lambda num_borders:
191  hypothesis.extra.numpy.arrays(
192  np.int32, num_borders, elements=st.integers(
193  min_value=0, max_value=size
194  )
195  )
196  ).map(
197  lambda x: np.append(x, np.array([0, size], dtype=np.int32))
198  ).map(sorted).map(np.diff)
199 
200 
201 def segmented_tensor(
202  min_dim=1,
203  max_dim=4,
204  dtype=np.float32,
205  is_sorted=True,
206  elements=None,
207  segment_generator=segment_ids,
208  allow_empty=False,
209  **kwargs
210 ):
211  gen_empty = st.booleans() if allow_empty else st.just(False)
212  data_dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
213  data_dims_ = st.tuples(
214  gen_empty, data_dims_
215  ).map(lambda pair: ([0] if pair[0] else []) + pair[1])
216  return data_dims_.flatmap(lambda data_dims: st.tuples(
217  arrays(data_dims, dtype, elements),
218  segment_generator(data_dims[0], is_sorted=is_sorted),
219  ))
220 
221 
222 def lengths_tensor(min_segments=None, max_segments=None, *args, **kwargs):
223  gen = functools.partial(
224  lengths, min_segments=min_segments, max_segments=max_segments)
225  return segmented_tensor(*args, segment_generator=gen, **kwargs)
226 
227 
228 def sparse_segmented_tensor(min_dim=1, max_dim=4, dtype=np.float32,
229  is_sorted=True, elements=None, allow_empty=False,
230  segment_generator=segment_ids, itype=np.int64,
231  **kwargs):
232  gen_empty = st.booleans() if allow_empty else st.just(False)
233  data_dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
234  all_dims_ = st.tuples(gen_empty, data_dims_).flatmap(
235  lambda pair: st.tuples(
236  st.just(pair[1]),
237  (st.integers(min_value=1, max_value=pair[1][0]) if not pair[0]
238  else st.just(0)),
239  ))
240  return all_dims_.flatmap(lambda dims: st.tuples(
241  arrays(dims[0], dtype, elements),
242  arrays(dims[1], dtype=itype, elements=st.integers(
243  min_value=0, max_value=dims[0][0] - 1)),
244  segment_generator(dims[1], is_sorted=is_sorted),
245  ))
246 
247 
248 def sparse_lengths_tensor(**kwargs):
249  return sparse_segmented_tensor(segment_generator=lengths, **kwargs)
250 
251 
252 def tensors(n, min_dim=1, max_dim=4, dtype=np.float32, elements=None, **kwargs):
253  dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
254  return dims_.flatmap(
255  lambda dims: st.lists(
256  arrays(dims, dtype, elements),
257  min_size=n,
258  max_size=n))
259 
260 
261 def tensors1d(n, min_len=1, max_len=64, dtype=np.float32, elements=None):
262  return tensors(
263  n, 1, 1, dtype, elements, min_value=min_len, max_value=max_len
264  )
265 
266 
267 cpu_do = caffe2_pb2.DeviceOption()
268 gpu_do = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA)
269 device_options = [cpu_do] + ([gpu_do] if workspace.has_gpu_support else [])
270 # Include device option for each GPU
271 expanded_device_options = [cpu_do] + (
272  [caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA, cuda_gpu_id=i)
273  for i in range(workspace.NumCudaDevices())]
274  if workspace.has_gpu_support else [])
275 
276 
277 def device_checker_device_options():
278  return st.just(device_options)
279 
280 
281 def gradient_checker_device_option():
282  return st.sampled_from(device_options)
283 
284 
285 gcs = dict(
286  gc=gradient_checker_device_option(),
287  dc=device_checker_device_options()
288 )
289 
290 gcs_cpu_only = dict(gc=st.sampled_from([cpu_do]), dc=st.just([cpu_do]))
291 gcs_gpu_only = dict(gc=st.sampled_from([gpu_do]), dc=st.just([gpu_do]))
292 
293 
294 @contextlib.contextmanager
295 def temp_workspace(name=b"temp_ws"):
296  old_ws_name = workspace.CurrentWorkspace()
297  workspace.SwitchWorkspace(name, True)
298  yield
299  workspace.ResetWorkspace()
300  workspace.SwitchWorkspace(old_ws_name)
301 
302 
303 def runOpBenchmark(
304  device_option,
305  op,
306  inputs,
307  input_device_options=None,
308  iterations=10,
309 ):
310  if input_device_options is None:
311  input_device_options = {}
312  op = copy.deepcopy(op)
313  op.device_option.CopyFrom(device_option)
314  net = caffe2_pb2.NetDef()
315  net.op.extend([op])
316  net.name = op.name if op.name else "test"
317 
318  with temp_workspace():
319  for (n, b) in zip(op.input, inputs):
320  workspace.FeedBlob(
321  n,
322  b,
323  device_option=input_device_options.get(n, device_option)
324  )
325  workspace.CreateNet(net)
326  ret = workspace.BenchmarkNet(net.name, 1, iterations, True)
327  return ret
328 
329 
331  """
332  A unittest.TestCase subclass with some helper functions for
333  utilizing the `hypothesis` (hypothesis.readthedocs.io) library.
334  """
335  def assertDeviceChecks(
336  self,
337  device_options,
338  op,
339  inputs,
340  outputs_to_check,
341  input_device_options=None,
342  threshold=0.01
343  ):
344  """
345  Asserts that the operator computes the same outputs, regardless of
346  which device it is executed on.
347 
348  Useful for checking the consistency of GPU and CPU
349  implementations of operators.
350 
351  Usage example:
352 
353  @given(inputs=hu.tensors(n=2), in_place=st.booleans(), **hu.gcs)
354  def test_sum(self, inputs, in_place, gc, dc):
355  op = core.CreateOperator("Sum", ["X1", "X2"],
356  ["Y" if not in_place else "X1"])
357  X1, X2 = inputs
358  self.assertDeviceChecks(dc, op, [X1, X2], [0])
359  """
361  threshold,
362  device_options=device_options
363  )
364  self.assertTrue(
365  dc.CheckSimple(op, inputs, outputs_to_check, input_device_options)
366  )
367 
369  self,
370  device_option,
371  op,
372  inputs,
373  outputs_to_check,
374  outputs_with_grads,
375  grad_ops=None,
376  threshold=0.005,
377  stepsize=0.05,
378  input_device_options=None,
379  ):
380  """
381  Implements a standard numerical gradient checker for the operator
382  in question.
383 
384  Useful for checking the consistency of the forward and
385  backward implementations of operators.
386 
387  Usage example:
388 
389  @given(inputs=hu.tensors(n=2), in_place=st.booleans(), **hu.gcs)
390  def test_sum(self, inputs, in_place, gc, dc):
391  op = core.CreateOperator("Sum", ["X1", "X2"],
392  ["Y" if not in_place else "X1"])
393  X1, X2 = inputs
394  self.assertGradientChecks(gc, op, [X1, X2], 0, [0])
395  """
397  stepsize=stepsize,
398  threshold=threshold,
399  device_option=device_option,
400  workspace_name=str(device_option),
401  )
402  res, grad, grad_estimated = gc.CheckSimple(
403  op, inputs, outputs_to_check, outputs_with_grads,
404  grad_ops=grad_ops,
405  input_device_options=input_device_options
406  )
407  self.assertEqual(grad.shape, grad_estimated.shape)
408  self.assertTrue(
409  res,
410  "Gradient check failed for input " + str(op.input[outputs_to_check])
411  )
412 
413  def _assertGradReferenceChecks(
414  self,
415  op,
416  inputs,
417  ref_outputs,
418  output_to_grad,
419  grad_reference,
420  threshold=1e-4,
421  ):
422  grad_blob_name = output_to_grad + '_grad'
423  grad_ops, grad_map = core.GradientRegistry.GetBackwardPass(
424  [op], {output_to_grad: grad_blob_name})
425  output_grad = workspace.FetchBlob(output_to_grad)
426  grad_ref_outputs = grad_reference(output_grad, ref_outputs, inputs)
427  workspace.FeedBlob(grad_blob_name, workspace.FetchBlob(output_to_grad))
428  workspace.RunOperatorsOnce(grad_ops)
429 
430  self.assertEqual(len(grad_ref_outputs), len(inputs))
431  for (n, ref) in zip(op.input, grad_ref_outputs):
432  grad_names = grad_map.get(n)
433  if not grad_names:
434  # no grad for this input
435  self.assertIsNone(ref)
436  else:
437  if isinstance(grad_names, core.BlobReference):
438  # dense gradient
439  ref_vals = ref
440  ref_indices = None
441  val_name = grad_names
442  else:
443  # sparse gradient
444  ref_vals, ref_indices = ref
445  val_name = grad_names.values
446  vals = workspace.FetchBlob(str(val_name))
447  np.testing.assert_allclose(
448  vals,
449  ref_vals,
450  atol=threshold,
451  rtol=threshold,
452  err_msg='Gradient {0} (x) is not matching the reference (y)'
453  .format(val_name),
454  )
455  if ref_indices is not None:
456  indices = workspace.FetchBlob(str(grad_names.indices))
457  np.testing.assert_allclose(indices, ref_indices,
458  atol=1e-4, rtol=1e-4)
459 
460  def _assertInferTensorChecks(self, name, shapes, types, output):
461  if name not in shapes:
462  # No inferred shape or type available
463  return
464  output = workspace.FetchBlob(name)
465  if type(output) is np.ndarray:
466  if output.dtype == np.dtype('float64'):
467  correct_type = caffe2_pb2.TensorProto.DOUBLE
468  elif output.dtype == np.dtype('float32'):
469  correct_type = caffe2_pb2.TensorProto.FLOAT
470  elif output.dtype == np.dtype('int32'):
471  correct_type = caffe2_pb2.TensorProto.INT32
472  elif output.dtype == np.dtype('int64'):
473  correct_type = caffe2_pb2.TensorProto.INT64
474  else:
475  correct_type = "unknown {}".format(np.dtype)
476  else:
477  correct_type = str(type(output))
478  try:
479  np.testing.assert_array_equal(
480  np.array(shapes[name]).astype(np.int32),
481  np.array(output.shape).astype(np.int32),
482  err_msg='Shape {} mismatch: {} vs. {}'.format(
483  name,
484  shapes[name],
485  output.shape))
486  # BUG: Workspace blob type not being set correctly T16121392
487  if correct_type != caffe2_pb2.TensorProto.INT32:
488  return
489  np.testing.assert_equal(
490  types[name],
491  correct_type,
492  err_msg='Type {} mismatch: {} vs. {}'.format(
493  name, types[name], correct_type,
494  )
495  )
496  except AssertionError as e:
497  # Temporarily catch these assertion errors when validating
498  # inferred shape and type info
499  logging.warning(str(e))
500  if os.getenv('CAFFE2_ASSERT_SHAPEINFERENCE') == '1':
501  raise e
502 
504  self,
505  device_option,
506  op,
507  inputs,
508  reference,
509  input_device_options=None,
510  threshold=1e-4,
511  output_to_grad=None,
512  grad_reference=None,
513  atol=None,
514  outputs_to_check=None,
515  ):
516  """
517  This runs the reference Python function implementation
518  (effectively calling `reference(*inputs)`, and compares that
519  to the output of output, with an absolute/relative tolerance
520  given by the `threshold` parameter.
521 
522  Useful for checking the implementation matches the Python
523  (typically NumPy) implementation of the same functionality.
524 
525  Usage example:
526 
527  @given(X=hu.tensor(), inplace=st.booleans(), **hu.gcs)
528  def test_softsign(self, X, inplace, gc, dc):
529  op = core.CreateOperator(
530  "Softsign", ["X"], ["X" if inplace else "Y"])
531 
532  def softsign(X):
533  return (X / (1 + np.abs(X)),)
534 
535  self.assertReferenceChecks(gc, op, [X], softsign)
536  """
537  if input_device_options is None:
538  input_device_options = {}
539 
540  op = copy.deepcopy(op)
541  op.device_option.CopyFrom(device_option)
542 
543  with temp_workspace():
544  if (len(op.input) > len(inputs)):
545  raise ValueError(
546  'must supply an input for each input on the op: %s vs %s' %
547  (op.input, inputs))
548  for (n, b) in zip(op.input, inputs):
549  workspace.FeedBlob(
550  n,
551  b,
552  device_option=input_device_options.get(n, device_option)
553  )
554  net = core.Net("opnet")
555  net.Proto().op.extend([op])
556  test_shape_inference = False
557  try:
558  (shapes, types) = workspace.InferShapesAndTypes([net])
559  test_shape_inference = True
560  except RuntimeError as e:
561  # Temporarily catch runtime errors when inferring shape
562  # and type info
563  logging.warning(str(e))
564  if os.getenv('CAFFE2_ASSERT_SHAPEINFERENCE') == '1':
565  raise e
566  workspace.RunNetOnce(net)
567  reference_outputs = reference(*inputs)
568  if not (isinstance(reference_outputs, tuple) or
569  isinstance(reference_outputs, list)):
570  raise RuntimeError(
571  "You are providing a wrong reference implementation. A "
572  "proper one should return a tuple/list of numpy arrays.")
573  if not outputs_to_check:
574  self.assertEqual(len(reference_outputs), len(op.output))
575  outputs_to_check = list(range(len(op.output)))
576  outs = []
577  for (output_index, ref) in zip(outputs_to_check, reference_outputs):
578  output_blob_name = op.output[output_index]
579  output = workspace.FetchBlob(output_blob_name)
580  if output.dtype.kind in ('S', 'O'):
581  np.testing.assert_array_equal(output, ref)
582  else:
583  if atol is None:
584  atol = threshold
585  np.testing.assert_allclose(
586  output, ref, atol=atol, rtol=threshold,
587  err_msg=(
588  'Output {0} is not matching the reference'.format(
589  output_blob_name,
590  )),
591  )
592  if test_shape_inference:
594  output_blob_name, shapes, types, output)
595  outs.append(output)
596  if grad_reference is not None:
597  assert output_to_grad is not None, \
598  "If grad_reference is set," \
599  "output_to_grad has to be set as well"
600 
601  with core.DeviceScope(device_option):
603  op, inputs, reference_outputs,
604  output_to_grad, grad_reference,
605  threshold=threshold)
606  return outs
607 
608  def assertValidationChecks(
609  self,
610  device_option,
611  op,
612  inputs,
613  validator,
614  input_device_options=None,
615  as_kwargs=True,
616  init_net=None,
617  ):
618  if input_device_options is None:
619  input_device_options = {}
620  if as_kwargs:
621  assert len(set(list(op.input) + list(op.output))) == \
622  len(op.input) + len(op.output), \
623  "in-place ops are not supported in as_kwargs mode"
624  op = copy.deepcopy(op)
625  op.device_option.CopyFrom(device_option)
626 
627  with temp_workspace():
628  for (n, b) in zip(op.input, inputs):
629  workspace.FeedBlob(
630  n,
631  b,
632  device_option=input_device_options.get(n, device_option)
633  )
634  if init_net:
635  workspace.RunNetOnce(init_net)
636  workspace.RunOperatorOnce(op)
637  outputs = [workspace.FetchBlob(n) for n in op.output]
638  if as_kwargs:
639  validator(**dict(zip(
640  list(op.input) + list(op.output), inputs + outputs)))
641  else:
642  validator(inputs=inputs, outputs=outputs)
643 
644  def assertRunOpRaises(
645  self,
646  device_option,
647  op,
648  inputs,
649  input_device_options=None,
650  exception=(Exception,),
651  regexp=None,
652  ):
653  if input_device_options is None:
654  input_device_options = {}
655 
656  op = copy.deepcopy(op)
657  op.device_option.CopyFrom(device_option)
658 
659  with temp_workspace():
660  for (n, b) in zip(op.input, inputs):
661  workspace.FeedBlob(
662  n,
663  b,
664  device_option=input_device_options.get(n, device_option)
665  )
666  if regexp is None:
667  self.assertRaises(exception, workspace.RunOperatorOnce, op)
668  else:
669  self.assertRaisesRegexp(
670  exception, regexp, workspace.RunOperatorOnce, op)
def _assertInferTensorChecks(self, name, shapes, types, output)
def _assertGradReferenceChecks(self, op, inputs, ref_outputs, output_to_grad, grad_reference, threshold=1e-4)
def assertReferenceChecks(self, device_option, op, inputs, reference, input_device_options=None, threshold=1e-4, output_to_grad=None, grad_reference=None, atol=None, outputs_to_check=None)
def assertDeviceChecks(self, device_options, op, inputs, outputs_to_check, input_device_options=None, threshold=0.01)
def assertGradientChecks(self, device_option, op, inputs, outputs_to_check, outputs_with_grads, grad_ops=None, threshold=0.005, stepsize=0.05, input_device_options=None)