1 from __future__
import absolute_import, division, print_function, unicode_literals
5 from hypothesis
import assume
12 def check_quantized_results_close(outputs, ref=None, symmetric=False, atol_scale=0.53):
15 ref_min = min(np.min(ref), 0)
16 ref_max = max(np.max(ref), 0)
18 ref_scale = 2 * max(abs(ref_max), abs(ref_min)) / 255
20 ref_scale = (ref_max - ref_min) / 255
23 atol = ref_scale * atol_scale
25 np.testing.assert_allclose(o[0], outputs[0][0], atol=atol, rtol=0)
28 def pairwise(iterable):
29 "s -> (s0,s1), (s1,s2), (s2, s3), ..." 30 from itertools
import tee
38 def avoid_vpmaddubsw_overflow_fc(
39 batch_size, input_channels, output_channels, X, X_min, X_max, W, W_min, W_max
41 for i, j
in np.ndindex((batch_size, output_channels)):
42 for k
in range(0, input_channels // 2 * 2, 2):
44 x1 = X[i, k + 1] - X_min
45 w0 = W[j, k] - 128 - W_min
46 w1 = W[j, k + 1] - 128 - W_min
47 if x0 * w0 + x1 * w1 < -(1 << 15):
48 w1_adjusted = (-(1 << 15) - float(x0) * w0) / x1
49 W[j, k + 1] = int(w1_adjusted) + 128 + W_min
50 elif x0 * w0 + x1 * w1 > (1 << 15) - 1:
51 w1_adjusted = ((1 << 15) - 1 - float(x0) * w0) / x1
52 W[j, k + 1] = int(w1_adjusted) + 128 + W_min
55 for i, j
in np.ndindex((batch_size, output_channels)):
56 for k
in range(0, input_channels // 2 * 2, 2):
58 x1 = X[i, k + 1] - X_min
59 w0 = W[j, k] - 128 - W_min
60 w1 = W[j, k + 1] - 128 - W_min
61 assert -(1 << 15) <= x0 * w0 + x1 * w1 < (1 << 15)
71 def avoid_vpmaddubsw_overflow(
88 dkernels = tuple((dilations[i] * (kernels[i] - 1) + 1)
for i
in range(ndim))
90 (sizes[i] + 2 * pads[i] - dkernels[i]) // strides[i] + 1
for i
in range(ndim)
92 for out_idx
in np.ndindex((batch_size,) + size_cols + (output_channels,)):
95 o_spatial = out_idx[1:-1]
96 for filter_idx1, filter_idx2
in pairwise(
97 np.ndindex(kernels + (input_channels,))
100 ic0 = filter_idx1[-1]
102 f1 = filter_idx2[:-1]
103 ic1 = filter_idx2[-1]
106 strides[i] * o_spatial[i] - pads[i] + dilations[i] * f0[i]
110 strides[i] * o_spatial[i] - pads[i] + dilations[i] * f1[i]
114 w0 = W[(oc,) + f0 + (ic0,)] - 128 - W_min
115 w1 = W[(oc,) + f1 + (ic1,)] - 128 - W_min
117 if all(0 <= i0s[i] < sizes[i]
for i
in range(ndim)):
118 x0 = X[(b,) + i0s + (ic0,)] - X_min
123 if all(0 <= i1s[i] < sizes[i]
for i
in range(ndim)):
124 x1 = X[(b,) + i1s + (ic1,)] - X_min
129 if x0 * w0 + x1 * w1 < -(1 << 15):
130 w1_adjusted = (-(1 << 15) - float(x0) * w0) / x1
131 W[(oc,) + f1 + (ic1,)] = int(w1_adjusted) + 128 + W_min
132 elif x0 * w0 + x1 * w1 >= (1 << 15):
133 w1_adjusted = ((1 << 15) - 1 - float(x0) * w0) / x1
134 W[(oc,) + f1 + (ic1,)] = int(w1_adjusted) + 128 + W_min
137 for out_idx
in np.ndindex((batch_size,) + size_cols + (output_channels,)):
140 o_spatial = out_idx[1:-1]
141 for filter_idx1, filter_idx2
in pairwise(
142 np.ndindex(kernels + (input_channels,))
144 f0 = filter_idx1[:-1]
145 ic0 = filter_idx1[-1]
147 f1 = filter_idx2[:-1]
148 ic1 = filter_idx2[-1]
151 strides[i] * o_spatial[i] - pads[i] + dilations[i] * f0[i]
155 strides[i] * o_spatial[i] - pads[i] + dilations[i] * f1[i]
159 w0 = W[(oc,) + f0 + (ic0,)] - 128 - W_min
160 w1 = W[(oc,) + f1 + (ic1,)] - 128 - W_min
162 if all(0 <= i0s[i] < sizes[i]
for i
in range(ndim)):
163 x0 = X[(b,) + i0s + (ic0,)] - X_min
168 if all(0 <= i1s[i] < sizes[i]
for i
in range(ndim)):
169 x1 = X[(b,) + i1s + (ic1,)] - X_min
174 assert -(1 << 15) <= x0 * w0 + x1 * w1 < (1 << 15)
179 def generate_convnd_inputs(
186 input_channels_per_group,
187 output_channels_per_group,
190 groupwise_quantization=
False,
191 preserve_activation_sparsity=
False,
192 preserve_weight_sparsity=
False,
195 assume(all(len(a) == dim
for a
in [strides, pads, kernels, dilations]))
196 assume(all(sizes[d] >= dilations[d] * (kernels[d] - 1) + 1
for d
in range(dim)))
197 input_channels = input_channels_per_group * group
198 output_channels = output_channels_per_group * group
199 depthwise_convolution = (
200 input_channels_per_group == 1
and output_channels_per_group == 1
203 assert input_channels > 1
204 assert output_channels > 1
207 X_min = 0
if preserve_activation_sparsity
else -77
209 X_range = X_max - X_min
210 if depthwise_convolution
and groupwise_quantization:
215 np.random.rand(*((batch_size,) + tuple(sizes) + (input_channels,))) * X_range
218 X = X.astype(np.float32)
220 depthwise_convolution
221 and groupwise_quantization
222 and not preserve_activation_sparsity
247 assert X.shape[1] >= 3
248 assert all(X.shape[d + 1] >= kernels[d] + 2
for d
in range(1, dim))
251 X_sub = X[(0,) * (X.ndim - dim - 1) + (slice(
None),) * dim + (0,)]
254 X_sub[(1,) + tuple(kernels[d] // 2 + 1
for d
in range(1, dim))] = X_max
258 X_sub[[[0, 2]] + [[kernels[d] + 1, 0]
for d
in range(1, dim)]] = X_min
261 for d1
in range(1, dim):
264 + [[kernels[d2] // 2 + 1]
for d2
in range(1, d1)]
265 + [[kernels[d1] // 2, kernels[d1] // 2 + 2]]
266 + [[kernels[d2] + 1, 0]
for d2
in range(d1 + 1, dim)]
272 X[(0,) * (X.ndim - 1) + (1,)] = X_max
274 if preserve_weight_sparsity:
282 *((output_channels,) + tuple(kernels) + (input_channels_per_group,))
287 W = W.astype(np.float32)
288 if groupwise_quantization:
289 for g
in range(group):
290 W[(g * output_channels_per_group,) + (0,) * (W.ndim - 1)] = W_min
291 if depthwise_convolution:
292 W[(g * output_channels_per_group, 1) + (0,) * (W.ndim - 2)] = W_max
294 assert output_channels_per_group > 1
295 W[(g * output_channels_per_group + 1,) + (0,) * (W.ndim - 1)] = W_max
299 if not preserve_weight_sparsity:
301 g * output_channels_per_group : (g + 1) * output_channels_per_group,
304 W[(0,) + (0,) * (W.ndim - 1)] = W_min
305 W[(1,) + (0,) * (W.ndim - 1)] = W_max
307 different_range_per_group = groupwise_quantization
and not preserve_weight_sparsity
308 for g
in range(group):
309 avoid_vpmaddubsw_overflow(
315 input_channels_per_group,
316 output_channels_per_group,
318 X[..., g * input_channels_per_group : (g + 1) * input_channels_per_group],
321 W[g * output_channels_per_group : (g + 1) * output_channels_per_group,],
322 W_min + (g
if different_range_per_group
else 0),
323 W_max + (g
if different_range_per_group
else 0),
327 X = utils.NHWC2NCHW(X)
328 W = utils.NHWC2NCHW(W)
330 b = np.random.randn(output_channels).astype(np.float32)
335 def generate_conv_inputs(
342 input_channels_per_group,
343 output_channels_per_group,
346 groupwise_quantization=
False,
347 preserve_activation_sparsity=
False,
348 preserve_weight_sparsity=
False,
350 return generate_convnd_inputs(
357 input_channels_per_group,
358 output_channels_per_group,
361 groupwise_quantization,
362 preserve_activation_sparsity,
363 preserve_weight_sparsity,