Caffe2 - Python API A deep learning, cross platform ML framework
dnnlowp_test_utils.py
1 from __future__ import absolute_import, division, print_function, unicode_literals
2
3 import numpy as np
4 from caffe2.python import utils
5 from hypothesis import assume
6
7
8 # This function asserts quantized results (output[1:]) are close enough to
9 # floating point results (output[0]).
10 # The error bound is derived based on assumption that there's no input
11 # quantization error.
12 def check_quantized_results_close(outputs, ref=None, symmetric=False, atol_scale=0.53):
13  if ref is None:
14  ref = outputs[0][0]
15  ref_min = min(np.min(ref), 0)
16  ref_max = max(np.max(ref), 0)
17  if symmetric:
18  ref_scale = 2 * max(abs(ref_max), abs(ref_min)) / 255
19  else:
20  ref_scale = (ref_max - ref_min) / 255
21  # should be divided by 2 in an exact math, but divide by 1.9 here
22  # considering finite precision in floating-point numbers
23  atol = ref_scale * atol_scale
24  for o in outputs[1:]:
25  np.testing.assert_allclose(o[0], outputs[0][0], atol=atol, rtol=0)
26
27
28 def pairwise(iterable):
29  "s -> (s0,s1), (s1,s2), (s2, s3), ..."
30  from itertools import tee
31
32  a, b = tee(iterable)
33  next(b, None)
34  return zip(a, b)
35
36
37 # Make sure we won't have overflows from vpmaddubsw instruction used in fbgemm)
39  batch_size, input_channels, output_channels, X, X_min, X_max, W, W_min, W_max
40 ):
41  for i, j in np.ndindex((batch_size, output_channels)):
42  for k in range(0, input_channels // 2 * 2, 2):
43  x0 = X[i, k] - X_min
44  x1 = X[i, k + 1] - X_min
45  w0 = W[j, k] - 128 - W_min
46  w1 = W[j, k + 1] - 128 - W_min
47  if x0 * w0 + x1 * w1 < -(1 << 15):
48  w1_adjusted = (-(1 << 15) - float(x0) * w0) / x1
49  W[j, k + 1] = int(w1_adjusted) + 128 + W_min
50  elif x0 * w0 + x1 * w1 > (1 << 15) - 1:
51  w1_adjusted = ((1 << 15) - 1 - float(x0) * w0) / x1
52  W[j, k + 1] = int(w1_adjusted) + 128 + W_min
53
54  # Go through the same loop again to double check we don't have any overflow
55  for i, j in np.ndindex((batch_size, output_channels)):
56  for k in range(0, input_channels // 2 * 2, 2):
57  x0 = X[i, k] - X_min
58  x1 = X[i, k + 1] - X_min
59  w0 = W[j, k] - 128 - W_min
60  w1 = W[j, k + 1] - 128 - W_min
61  assert -(1 << 15) <= x0 * w0 + x1 * w1 < (1 << 15)
62
63
64 # Make sure we won't have overflows from vpmaddubsw instruction used in
65 # fbgemm (FIXME: this assumes fbgemm is used only for NHWC and im2col
66 # is done in a way that input_channels is the fastest moving
67 # dimension).
68 #
69 # strides, pads, kernels, dilations, and sizes should be tuples with the same dimension
70 # (2 for 2D conv, 3 for 3D conv, and so on)
72  strides,
74  kernels,
75  dilations,
76  sizes,
77  input_channels,
78  output_channels,
79  batch_size,
80  X,
81  X_min,
82  X_max,
83  W,
84  W_min,
85  W_max,
86 ):
87  ndim = len(sizes)
88  dkernels = tuple((dilations[i] * (kernels[i] - 1) + 1) for i in range(ndim))
89  size_cols = tuple(
90  (sizes[i] + 2 * pads[i] - dkernels[i]) // strides[i] + 1 for i in range(ndim)
91  )
92  for out_idx in np.ndindex((batch_size,) + size_cols + (output_channels,)):
93  b = out_idx[0]
94  oc = out_idx[-1]
95  o_spatial = out_idx[1:-1]
96  for filter_idx1, filter_idx2 in pairwise(
97  np.ndindex(kernels + (input_channels,))
98  ):
99  f0 = filter_idx1[:-1]
100  ic0 = filter_idx1[-1]
101
102  f1 = filter_idx2[:-1]
103  ic1 = filter_idx2[-1]
104
105  i0s = tuple(
106  strides[i] * o_spatial[i] - pads[i] + dilations[i] * f0[i]
107  for i in range(ndim)
108  )
109  i1s = tuple(
110  strides[i] * o_spatial[i] - pads[i] + dilations[i] * f1[i]
111  for i in range(ndim)
112  )
113
114  w0 = W[(oc,) + f0 + (ic0,)] - 128 - W_min
115  w1 = W[(oc,) + f1 + (ic1,)] - 128 - W_min
116
117  if all(0 <= i0s[i] < sizes[i] for i in range(ndim)):
118  x0 = X[(b,) + i0s + (ic0,)] - X_min
119  else:
121  x0 = -X_min
122
123  if all(0 <= i1s[i] < sizes[i] for i in range(ndim)):
124  x1 = X[(b,) + i1s + (ic1,)] - X_min
125  else:
127  x1 = -X_min
128
129  if x0 * w0 + x1 * w1 < -(1 << 15):
130  w1_adjusted = (-(1 << 15) - float(x0) * w0) / x1
131  W[(oc,) + f1 + (ic1,)] = int(w1_adjusted) + 128 + W_min
132  elif x0 * w0 + x1 * w1 >= (1 << 15):
133  w1_adjusted = ((1 << 15) - 1 - float(x0) * w0) / x1
134  W[(oc,) + f1 + (ic1,)] = int(w1_adjusted) + 128 + W_min
135
136  # Go through the same loop again to double check we don't have any overflow
137  for out_idx in np.ndindex((batch_size,) + size_cols + (output_channels,)):
138  b = out_idx[0]
139  oc = out_idx[-1]
140  o_spatial = out_idx[1:-1]
141  for filter_idx1, filter_idx2 in pairwise(
142  np.ndindex(kernels + (input_channels,))
143  ):
144  f0 = filter_idx1[:-1]
145  ic0 = filter_idx1[-1]
146
147  f1 = filter_idx2[:-1]
148  ic1 = filter_idx2[-1]
149
150  i0s = tuple(
151  strides[i] * o_spatial[i] - pads[i] + dilations[i] * f0[i]
152  for i in range(ndim)
153  )
154  i1s = tuple(
155  strides[i] * o_spatial[i] - pads[i] + dilations[i] * f1[i]
156  for i in range(ndim)
157  )
158
159  w0 = W[(oc,) + f0 + (ic0,)] - 128 - W_min
160  w1 = W[(oc,) + f1 + (ic1,)] - 128 - W_min
161
162  if all(0 <= i0s[i] < sizes[i] for i in range(ndim)):
163  x0 = X[(b,) + i0s + (ic0,)] - X_min
164  else:
166  x0 = -X_min
167
168  if all(0 <= i1s[i] < sizes[i] for i in range(ndim)):
169  x1 = X[(b,) + i1s + (ic1,)] - X_min
170  else:
172  x1 = -X_min
173
174  assert -(1 << 15) <= x0 * w0 + x1 * w1 < (1 << 15)
175
176
177 # strides, pads, kernels, dilations, and sizes should be tuples with the same dimension
178 # (2 for 2D conv, 3 for 3D conv, and so on)
179 def generate_convnd_inputs(
180  strides,
182  kernels,
183  dilations,
184  sizes,
185  group,
186  input_channels_per_group,
187  output_channels_per_group,
188  batch_size,
189  order,
190  groupwise_quantization=False,
191  preserve_activation_sparsity=False,
192  preserve_weight_sparsity=False,
193 ):
194  dim = len(sizes)
195  assume(all(len(a) == dim for a in [strides, pads, kernels, dilations]))
196  assume(all(sizes[d] >= dilations[d] * (kernels[d] - 1) + 1 for d in range(dim)))
197  input_channels = input_channels_per_group * group
198  output_channels = output_channels_per_group * group
199  depthwise_convolution = (
200  input_channels_per_group == 1 and output_channels_per_group == 1
201  )
202
203  assert input_channels > 1
204  assert output_channels > 1
205
206  # X and W have scale 1, so exactly represented after quantization
207  X_min = 0 if preserve_activation_sparsity else -77
208  X_max = X_min + 255
209  X_range = X_max - X_min
210  if depthwise_convolution and groupwise_quantization:
211  # For depthwise convolution, it's not enough to set input channel 0
212  # to all X_min to avoid overflow from vpmaddubsw
213  X_range /= 2
214  X = np.round(
215  np.random.rand(*((batch_size,) + tuple(sizes) + (input_channels,))) * X_range
216  + X_min
217  )
218  X = X.astype(np.float32)
219  if (
220  depthwise_convolution
221  and groupwise_quantization
222  and not preserve_activation_sparsity
223  ):
224  # Put X_max in a position not to be paired with any padded value.
225  # Put X_min to all positions that can be paired with the X_max value.
226  #
227  # This is an example of a pattern for 3x3x3
228  # . . . . .
229  # . . . . .
230  # . . . . .
231  # . . . . .
232  # . . . . min
233  #
234  # . . . . .
235  # . . . . min
236  # . min max min .
237  # min . . . .
238  # . . . . .
239  #
240  # min . . . .
241  # . . . . .
242  # . . . . .
243  # . . . . .
244  # . . . . .
245
246  # Make sure we have enough dimension
247  assert X.shape[1] >= 3
248  assert all(X.shape[d + 1] >= kernels[d] + 2 for d in range(1, dim))
249
250  # Take subtensor we want to manipulate
251  X_sub = X[(0,) * (X.ndim - dim - 1) + (slice(None),) * dim + (0,)]
252
253  # Put X_max in the middle of the subtensor
254  X_sub[(1,) + tuple(kernels[d] // 2 + 1 for d in range(1, dim))] = X_max
255
256  # Put X_min to the positions that can be paired with X_max across
257  # the slowest moving dimension
258  X_sub[[[0, 2]] + [[kernels[d] + 1, 0] for d in range(1, dim)]] = X_min
259
260  # Put X_min to other positions that can be paired with X_max
261  for d1 in range(1, dim):
262  X_sub[
263  [[1]]
264  + [[kernels[d2] // 2 + 1] for d2 in range(1, d1)]
265  + [[kernels[d1] // 2, kernels[d1] // 2 + 2]]
266  + [[kernels[d2] + 1, 0] for d2 in range(d1 + 1, dim)]
267  ] = X_min
268  else:
269  # input channel 0 is all X_min to avoid overflow from vpmaddubsw when
270  # multiplied with W_min and W_max
271  X[..., 0] = X_min
272  X[(0,) * (X.ndim - 1) + (1,)] = X_max
273
274  if preserve_weight_sparsity:
275  W_min = -128
276  W_max = 100
277  else:
278  W_min = -100
279  W_max = W_min + 255
280  W = np.round(
281  np.random.rand(
282  *((output_channels,) + tuple(kernels) + (input_channels_per_group,))
283  )
284  * (W_max - W_min)
285  + W_min
286  )
287  W = W.astype(np.float32)
288  if groupwise_quantization:
289  for g in range(group):
290  W[(g * output_channels_per_group,) + (0,) * (W.ndim - 1)] = W_min
291  if depthwise_convolution:
292  W[(g * output_channels_per_group, 1) + (0,) * (W.ndim - 2)] = W_max
293  else:
294  assert output_channels_per_group > 1
295  W[(g * output_channels_per_group + 1,) + (0,) * (W.ndim - 1)] = W_max
296
297  # Make sure each group has different ranges to really see the effect
298  # of group-wise quantization.
299  if not preserve_weight_sparsity:
300  W[
301  g * output_channels_per_group : (g + 1) * output_channels_per_group,
302  ] += g
303  else:
304  W[(0,) + (0,) * (W.ndim - 1)] = W_min
305  W[(1,) + (0,) * (W.ndim - 1)] = W_max
306
307  different_range_per_group = groupwise_quantization and not preserve_weight_sparsity
308  for g in range(group):
310  strides,
312  kernels,
313  dilations,
314  sizes,
315  input_channels_per_group,
316  output_channels_per_group,
317  batch_size,
318  X[..., g * input_channels_per_group : (g + 1) * input_channels_per_group],
319  X_min,
320  X_max,
321  W[g * output_channels_per_group : (g + 1) * output_channels_per_group,],
322  W_min + (g if different_range_per_group else 0),
323  W_max + (g if different_range_per_group else 0),
324  )
325
326  if order == "NCHW":
327  X = utils.NHWC2NCHW(X)
328  W = utils.NHWC2NCHW(W)
329
330  b = np.random.randn(output_channels).astype(np.float32)
331
332  return X, W, b
333
334
335 def generate_conv_inputs(
336  stride,
338  kernel,
339  dilation,
340  size,
341  group,
342  input_channels_per_group,
343  output_channels_per_group,
344  batch_size,
345  order,
346  groupwise_quantization=False,
347  preserve_activation_sparsity=False,
348  preserve_weight_sparsity=False,
349 ):
350  return generate_convnd_inputs(
351  (stride,) * 2,