Caffe2 - Python API
A deep learning, cross platform ML framework
utils.py
1 from __future__ import absolute_import, division, print_function, unicode_literals
2 
3 import copy
4 from collections import defaultdict
5 
6 import numpy as np
7 from caffe2.python import core, utils
8 from caffe2.python.fb import hardcode_scale_zp
9 
10 
11 def pairwise(iterable):
12  "s -> (s0,s1), (s1,s2), (s2, s3), ..."
13  from itertools import tee
14 
15  a, b = tee(iterable)
16  next(b, None)
17  return zip(a, b)
18 
19 
20 def blob_uses(net, blob):
21  u = []
22  for i, op in enumerate(net.op):
23  if blob in op.input or blob in op.control_input:
24  u.append(i)
25  return u
26 
27 
28 def fuse_first_bn(net, params, removed_tensors):
29  net = copy.deepcopy(net)
30  params = copy.deepcopy(params)
31 
32  for ((i, current), (j, next_)) in pairwise(enumerate(net.op)):
33  if next_.input[0] != current.output[0]:
34  continue
35 
36  if current.type not in ("Conv", "ConvTranspose") or next_.type != "SpatialBN":
37  continue
38  if (
39  len(blob_uses(net, current.output[0])) != 1
40  and current.output[0] != next_.output[0]
41  ):
42  # Can't fuse if more than one user unless SpatialBN is inplace
43  continue
44 
45  # else, can fuse
46  conv = current
47  bn = next_
48  fused_conv = copy.deepcopy(conv)
49  fused_conv.output[0] = bn.output[0]
50  conv_weight = params[conv.input[1]]
51  if len(conv.input) > 2:
52  conv_bias = params[conv.input[2]]
53  else:
54  conv_bias = np.zeros(len(params[bn.input[2]])).astype(np.float32)
55 
56  bn_scale = params[bn.input[1]]
57  bn_bias = params[bn.input[2]]
58  bn_running_mean = params[bn.input[3]]
59  bn_running_var = params[bn.input[4]]
60 
61  # First, BN computation can be phrased as follows:
62  # (X - running_mean) * (1.0 / sqrt(running_var + eps)) *
63  # bn_scale + bias
64  # Thus, we can rewrite bn_scale as:
65  # X * bn_scale * 1.0 / (sqrt(running_var + eps)) + (bias -
66  # running_mean * (1.0 / sqrt(running_var + eps)) * bn_scale)
67  # Thus, can just have the affine transform
68  # X * A + B
69  # where
70  # A = bn_scale * 1.0 / (sqrt(running_var + eps))
71  # B = (bias - running_mean * (1.0 / sqrt(running_var + eps))
72  # * bn_scale)
73  eps = 1.0e-5
74  for arg in bn.arg:
75  if arg.name == "epsilon":
76  eps = arg.f
77  A = bn_scale * 1.0 / (np.sqrt(bn_running_var + eps))
78  B = bn_bias - bn_running_mean * A
79 
80  # This identity should hold if we have correctly fused
81  # np.testing.assert_array_equal(
82  # params[conv.output[0]] * A + B,
83  # params[bn.output[0]])
84 
85  # Now, we have that the computation made is the following:
86  # ((X `conv` W) + b) * A + B
87  # Then, we can simply fuse this as follows:
88  # (X `conv` (W * A)) + b * A + B
89  # which is simply
90  # (X `conv` Q) + C
91  # where
92 
93  # Q = W * A
94  # C = b * A + B
95 
96  # For ConvTranspose, from the view of convolutions as a
97  # Toepeliz multiplication, we have W_ = W^T, so the weights
98  # are laid out as (R, S, K, K) (vs (S, R, K, K) for a Conv),
99  # so the weights broadcast slightly differently. Remember, our
100  # BN scale 'B' is of size (S,)
101 
102  A_ = (
103  A.reshape((-1,) + tuple([1] * (conv_weight.ndim - 1)))
104  if conv.type == "Conv"
105  else A.reshape((1, -1) + tuple([1] * (conv_weight.ndim - 2)))
106  )
107 
108  C = conv_bias * A + B
109  Q = conv_weight * A_
110 
111  assert params[conv.input[1]].shape == Q.shape
112  if len(conv.input) > 2:
113  assert params[conv.input[2]].shape == C.shape
114  else:
115  assert bn_bias.shape == C.shape
116 
117  params[conv.input[1]] = Q
118  if len(conv.input) > 2:
119  params[conv.input[2]] = C
120  else:
121  params[bn.input[2]] = C
122  fused_conv.input.append(bn.input[2])
123 
124  new_ops = net.op[:i] + [fused_conv] + net.op[j + 1 :]
125  del net.op[:]
126  removed_tensors.append(bn.input[1])
127  if len(conv.input) > 2:
128  removed_tensors.append(bn.input[2])
129  removed_tensors.append(bn.input[3])
130  removed_tensors.append(bn.input[4])
131  del params[bn.input[1]]
132  if len(conv.input) > 2:
133  del params[bn.input[2]]
134  del params[bn.input[3]]
135  del params[bn.input[4]]
136  net.op.extend(new_ops)
137  break
138  return net, params, removed_tensors
139 
140 
141 def fuse_bn(net, params, ignore_failure):
142  # Run until we hit a fixed point
143  removed_tensors = []
144  while True:
145  (next_net, next_params, removed_tensors) = fuse_first_bn(
146  net, params, removed_tensors
147  )
148  if len(next_net.op) == len(net.op):
149  if any(op.type == "SpatialBN" for op in next_net.op) and not ignore_failure:
150  raise Exception(
151  "Model contains SpatialBN op after fusion: %s", next_net
152  )
153  return (next_net, next_params, removed_tensors)
154  net, params, removed_tensors = (next_net, next_params, removed_tensors)
155 
156 
157 def fuse_first_scale(net, params, removed_tensors):
158  net = copy.deepcopy(net)
159  params = copy.deepcopy(params)
160 
161  for ((i, current), (j, next_)) in pairwise(enumerate(net.op)):
162  if next_.input[0] != current.output[0]:
163  continue
164 
165  if (
166  current.type != "SpatialBN"
167  or next_.type != "Mul"
168  or len(net.op) <= j + 1
169  or net.op[j + 1].type != "Add"
170  ):
171  continue
172 
173  # else, can fuse
174  bn = current
175  mul = next_
176  add = net.op[j + 1]
177 
178  fused_bn = copy.deepcopy(bn)
179  fused_bn.output[0] = add.output[0]
180  bn_scale = params[bn.input[1]]
181  mul_scale = params[mul.input[1]]
182  bn_bias = params[bn.input[2]]
183  add_bias = params[add.input[1]]
184 
185  params[bn.input[1]] = bn_scale * mul_scale
186  params[bn.input[2]] = mul_scale * bn_bias + add_bias
187 
188  new_ops = net.op[:i] + [fused_bn] + net.op[j + 2 :]
189  del net.op[:]
190  removed_tensors.append(mul.input[1])
191  removed_tensors.append(add.input[1])
192  del params[mul.input[1]]
193  del params[add.input[1]]
194  net.op.extend(new_ops)
195  break
196  return net, params, removed_tensors
197 
198 
199 def fuse_scale(net, params, ignore_failure):
200  # Run until we hit a fixed point
201  removed_tensors = []
202  while True:
203  (next_net, next_params, removed_tensors) = fuse_first_scale(
204  net, params, removed_tensors
205  )
206  if len(next_net.op) == len(net.op):
207  return (next_net, next_params, removed_tensors)
208  net, params, removed_tensors = (next_net, next_params, removed_tensors)
209 
210 
211 def fuse_first_relu(net, ignore_op_with_output=None):
212  net = copy.deepcopy(net)
213 
214  for ((i, current), (j, next_)) in pairwise(enumerate(net.op)):
215  if next_.input[0] != current.output[0]:
216  continue
217 
218  if current.type not in ("Conv", "Sum") or next_.type != "Relu":
219  continue
220 
221  if ignore_op_with_output and current.output[0] in ignore_op_with_output:
222  continue
223 
224  # else, can fuse
225  conv = current
226  relu = next_
227  fused_conv = copy.deepcopy(conv)
228  fused_conv.type = "ConvRelu" if current.type == "Conv" else "SumRelu"
229  fused_conv.output[0] = relu.output[0]
230 
231  new_ops = net.op[:i] + [fused_conv] + net.op[j + 1 :]
232  del net.op[:]
233  net.op.extend(new_ops)
234  break
235  return net
236 
237 
238 def fuse_relu(net, ignore_failure, ignore_op_with_output=None):
239  # Run until we hit a fixed point
240  while True:
241  next_net = fuse_first_relu(net, ignore_op_with_output)
242  if len(next_net.op) == len(net.op):
243  if any(op.type == "Relu" for op in next_net.op) and not ignore_failure:
244  raise Exception("Model contains Relu op after fusion: %s", next_net)
245  return next_net
246  net = next_net
247 
248 
249 def last_producer(ops, blob):
250  for (i, op) in reversed(list(enumerate(ops))):
251  if op.output[0] == blob:
252  return i
253  raise ValueError("Failed to find last producer of blob, %s", blob)
254 
255 
256 def swap_first_concat_relu(net, ignore_op_with_output=None):
257  net = copy.deepcopy(net)
258 
259  for ((i, current), (j, next_)) in pairwise(enumerate(net.op)):
260  if next_.input[0] != current.output[0]:
261  continue
262 
263  if current.type != "Concat" or next_.type != "Relu":
264  continue
265 
266  if ignore_op_with_output and current.output[0] in ignore_op_with_output:
267  continue
268 
269  # else, can swap
270  concat = copy.deepcopy(current)
271  relu = copy.deepcopy(next_)
272  pre_ops = copy.deepcopy(net.op[:i])
273  post_ops = copy.deepcopy(net.op[j + 1 :])
274 
275  # Delete the Relu after Concat
276  concat.output[0] = relu.output[0]
277 
278  # Insert Relu after each op that produces inputs to Concat
279  for blob in concat.input:
280  k = last_producer(pre_ops, blob)
281  producer = pre_ops[k]
282  assert producer.output[0] == blob
283  producer.output[0] = blob + "_pre_relu"
284 
285  new_relu = copy.deepcopy(relu)
286  new_relu.input[0] = producer.output[0]
287  new_relu.output[0] = blob
288 
289  pre_ops = pre_ops[: k + 1] + [new_relu] + pre_ops[k + 1 :]
290 
291  new_ops = pre_ops + [concat] + post_ops
292  del net.op[:]
293  net.op.extend(new_ops)
294  break
295  return net
296 
297 
298 def swap_concat_relu(net, ignore_op_with_output=None):
299  # Run until we hit a fixed point
300  while True:
301  next_net = swap_first_concat_relu(net, ignore_op_with_output)
302  if len(next_net.op) == len(net.op):
303  return next_net
304  net = next_net
305 
306 
307 def add_version_to_conv_bias(net, init_net):
308  """
309  In architectures such as FPN (https://arxiv.org/abs/1612.03144), few Conv
310  ops share the same weight and bias and are run at different scales of
311  the input. Since 'bias_scale = input_scale * weight_scale', sharing the
312  same bias blob among multiple Conv ops means that we need different bias
313  scale for each of the ops. To achieve this, we just duplicate those bias
314  blobs that are used by multiple Conv ops before performing int8 rewrite.
315  """
316  bias_count = defaultdict(int)
317  for op in net._net.op:
318  if "Conv" in op.type and len(op.input) >= 3:
319  bias_count[op.input[2]] += 1
320 
321  bias_fill_op = {}
322  for op in init_net._net.op:
323  if bias_count[op.output[0]] > 1:
324  bias_fill_op[op.output[0]] = op
325 
326  bias_version = defaultdict(int)
327  for op in net._net.op:
328  if "Conv" in op.type and len(op.input) >= 3:
329  bias = op.input[2]
330  if bias_count[bias] <= 1:
331  continue
332 
333  version = bias_version[bias]
334  bias_version[bias] += 1
335  if version == 0:
336  continue
337 
338  new_bias = bias + "_v" + str(version)
339  fill_op = copy.deepcopy(bias_fill_op[bias])
340  fill_op.output[0] = new_bias
341  init_net._net.op.extend([fill_op])
342  op.input[2] = new_bias
343  net._net.external_input.append(new_bias)
344 
345 
346 def add_quantization_param_args_(op, q_param):
347  op.arg.extend(
348  [
349  utils.MakeArgument("Y_scale", q_param.scale),
350  utils.MakeArgument("Y_zero_point", q_param.zero_point),
351  ]
352  )
353 
354 
355 def choose_quantization_params(tensor_min, tensor_max, preserve_sparsity=False):
356  if tensor_min < 0 and tensor_max > 0 and preserve_sparsity:
357  symmetric_qmin = -(255 // 2 + 1)
358  symmetric_qmax = 255 // 2
359  max_scale = max(
360  abs(tensor_min / symmetric_qmin), abs(tensor_max / symmetric_qmax)
361  )
362  tensor_min = max_scale * symmetric_qmin
363  tensor_max = max_scale * symmetric_qmax
364 
365  q_param = hardcode_scale_zp.choose_quantization_params(tensor_min, tensor_max)
366 
367  if tensor_min < 0 and tensor_max > 0 and preserve_sparsity:
368  q_param = hardcode_scale_zp.QuantizationParam(q_param.scale, 128)
369 
370  return q_param
371 
372 
373 def add_quantization_param_args(op, tensor, preserve_sparsity=False):
374  tensor_min = 0 if tensor.size == 0 else tensor.min()
375  tensor_max = 0 if tensor.size == 0 else tensor.max()
376 
377  q_param = choose_quantization_params(tensor_min, tensor_max, preserve_sparsity)
378 
379  add_quantization_param_args_(op, q_param)
380  return q_param
381 
382 
383 def create_int8_given_tensor_fill(tensor, out_blob_name, preserve_sparsity=False):
384  """
385  Create Int8GivenTensorFill op that quantizes the given tensor and outputs
386  an Int8Tensor with out_blob_name.
387  """
388  op = core.CreateOperator("Int8GivenTensorFill", [], out_blob_name)
389  q_param = add_quantization_param_args(op, tensor, preserve_sparsity)
390  quantized_tensor = (
391  np.around(tensor / q_param.scale).astype(np.int32) + q_param.zero_point
392  )
393  quantized_tensor = np.maximum(0, np.minimum(quantized_tensor, 255))
394  op.arg.extend(
395  [
396  utils.MakeArgument("values", quantized_tensor.astype(np.uint8).tobytes()),
397  utils.MakeArgument("shape", quantized_tensor.shape),
398  ]
399  )
400  return op, q_param
401 
402 
403 def create_int8_bias_tensor_fill(tensor, out_blob_name, x_q_param, w_q_param):
404  """
405  Similar to create_int8_given_tensor_fill, but for bias blobs to be stored
406  as int32.
407  """
408  scale = x_q_param.scale * w_q_param.scale
409  quantized_tensor = np.around(tensor / scale).astype(np.int32)
410  quantized_tensor.reshape(-1)
411  op = core.CreateOperator("Int8GivenIntTensorFill", [], out_blob_name)
412  op.arg.extend(
413  [
414  utils.MakeArgument("values", quantized_tensor),
415  utils.MakeArgument("shape", quantized_tensor.shape),
416  ]
417  )
418  q_param = hardcode_scale_zp.QuantizationParam(scale, 0)
419  add_quantization_param_args_(op, q_param)
420  return op