1 from __future__
import absolute_import, division, print_function, unicode_literals
4 from collections
import defaultdict
8 from caffe2.python.fb
import hardcode_scale_zp
11 def pairwise(iterable):
12 "s -> (s0,s1), (s1,s2), (s2, s3), ..." 13 from itertools
import tee
20 def blob_uses(net, blob):
22 for i, op
in enumerate(net.op):
23 if blob
in op.input
or blob
in op.control_input:
28 def fuse_first_bn(net, params, removed_tensors):
29 net = copy.deepcopy(net)
30 params = copy.deepcopy(params)
32 for ((i, current), (j, next_))
in pairwise(enumerate(net.op)):
33 if next_.input[0] != current.output[0]:
36 if current.type
not in (
"Conv",
"ConvTranspose")
or next_.type !=
"SpatialBN":
39 len(blob_uses(net, current.output[0])) != 1
40 and current.output[0] != next_.output[0]
48 fused_conv = copy.deepcopy(conv)
49 fused_conv.output[0] = bn.output[0]
50 conv_weight = params[conv.input[1]]
51 if len(conv.input) > 2:
52 conv_bias = params[conv.input[2]]
54 conv_bias = np.zeros(len(params[bn.input[2]])).astype(np.float32)
56 bn_scale = params[bn.input[1]]
57 bn_bias = params[bn.input[2]]
58 bn_running_mean = params[bn.input[3]]
59 bn_running_var = params[bn.input[4]]
75 if arg.name ==
"epsilon":
77 A = bn_scale * 1.0 / (np.sqrt(bn_running_var + eps))
78 B = bn_bias - bn_running_mean * A
103 A.reshape((-1,) + tuple([1] * (conv_weight.ndim - 1)))
104 if conv.type ==
"Conv" 105 else A.reshape((1, -1) + tuple([1] * (conv_weight.ndim - 2)))
108 C = conv_bias * A + B
111 assert params[conv.input[1]].shape == Q.shape
112 if len(conv.input) > 2:
113 assert params[conv.input[2]].shape == C.shape
115 assert bn_bias.shape == C.shape
117 params[conv.input[1]] = Q
118 if len(conv.input) > 2:
119 params[conv.input[2]] = C
121 params[bn.input[2]] = C
122 fused_conv.input.append(bn.input[2])
124 new_ops = net.op[:i] + [fused_conv] + net.op[j + 1 :]
126 removed_tensors.append(bn.input[1])
127 if len(conv.input) > 2:
128 removed_tensors.append(bn.input[2])
129 removed_tensors.append(bn.input[3])
130 removed_tensors.append(bn.input[4])
131 del params[bn.input[1]]
132 if len(conv.input) > 2:
133 del params[bn.input[2]]
134 del params[bn.input[3]]
135 del params[bn.input[4]]
136 net.op.extend(new_ops)
138 return net, params, removed_tensors
141 def fuse_bn(net, params, ignore_failure):
145 (next_net, next_params, removed_tensors) = fuse_first_bn(
146 net, params, removed_tensors
148 if len(next_net.op) == len(net.op):
149 if any(op.type ==
"SpatialBN" for op
in next_net.op)
and not ignore_failure:
151 "Model contains SpatialBN op after fusion: %s", next_net
153 return (next_net, next_params, removed_tensors)
154 net, params, removed_tensors = (next_net, next_params, removed_tensors)
157 def fuse_first_scale(net, params, removed_tensors):
158 net = copy.deepcopy(net)
159 params = copy.deepcopy(params)
161 for ((i, current), (j, next_))
in pairwise(enumerate(net.op)):
162 if next_.input[0] != current.output[0]:
166 current.type !=
"SpatialBN" 167 or next_.type !=
"Mul" 168 or len(net.op) <= j + 1
169 or net.op[j + 1].type !=
"Add" 178 fused_bn = copy.deepcopy(bn)
179 fused_bn.output[0] = add.output[0]
180 bn_scale = params[bn.input[1]]
181 mul_scale = params[mul.input[1]]
182 bn_bias = params[bn.input[2]]
183 add_bias = params[add.input[1]]
185 params[bn.input[1]] = bn_scale * mul_scale
186 params[bn.input[2]] = mul_scale * bn_bias + add_bias
188 new_ops = net.op[:i] + [fused_bn] + net.op[j + 2 :]
190 removed_tensors.append(mul.input[1])
191 removed_tensors.append(add.input[1])
192 del params[mul.input[1]]
193 del params[add.input[1]]
194 net.op.extend(new_ops)
196 return net, params, removed_tensors
199 def fuse_scale(net, params, ignore_failure):
203 (next_net, next_params, removed_tensors) = fuse_first_scale(
204 net, params, removed_tensors
206 if len(next_net.op) == len(net.op):
207 return (next_net, next_params, removed_tensors)
208 net, params, removed_tensors = (next_net, next_params, removed_tensors)
211 def fuse_first_relu(net, ignore_op_with_output=None):
212 net = copy.deepcopy(net)
214 for ((i, current), (j, next_))
in pairwise(enumerate(net.op)):
215 if next_.input[0] != current.output[0]:
218 if current.type
not in (
"Conv",
"Sum")
or next_.type !=
"Relu":
221 if ignore_op_with_output
and current.output[0]
in ignore_op_with_output:
227 fused_conv = copy.deepcopy(conv)
228 fused_conv.type =
"ConvRelu" if current.type ==
"Conv" else "SumRelu" 229 fused_conv.output[0] = relu.output[0]
231 new_ops = net.op[:i] + [fused_conv] + net.op[j + 1 :]
233 net.op.extend(new_ops)
238 def fuse_relu(net, ignore_failure, ignore_op_with_output=None):
241 next_net = fuse_first_relu(net, ignore_op_with_output)
242 if len(next_net.op) == len(net.op):
243 if any(op.type ==
"Relu" for op
in next_net.op)
and not ignore_failure:
244 raise Exception(
"Model contains Relu op after fusion: %s", next_net)
249 def last_producer(ops, blob):
250 for (i, op)
in reversed(list(enumerate(ops))):
251 if op.output[0] == blob:
253 raise ValueError(
"Failed to find last producer of blob, %s", blob)
256 def swap_first_concat_relu(net, ignore_op_with_output=None):
257 net = copy.deepcopy(net)
259 for ((i, current), (j, next_))
in pairwise(enumerate(net.op)):
260 if next_.input[0] != current.output[0]:
263 if current.type !=
"Concat" or next_.type !=
"Relu":
266 if ignore_op_with_output
and current.output[0]
in ignore_op_with_output:
270 concat = copy.deepcopy(current)
271 relu = copy.deepcopy(next_)
272 pre_ops = copy.deepcopy(net.op[:i])
273 post_ops = copy.deepcopy(net.op[j + 1 :])
276 concat.output[0] = relu.output[0]
279 for blob
in concat.input:
280 k = last_producer(pre_ops, blob)
281 producer = pre_ops[k]
282 assert producer.output[0] == blob
283 producer.output[0] = blob +
"_pre_relu" 285 new_relu = copy.deepcopy(relu)
286 new_relu.input[0] = producer.output[0]
287 new_relu.output[0] = blob
289 pre_ops = pre_ops[: k + 1] + [new_relu] + pre_ops[k + 1 :]
291 new_ops = pre_ops + [concat] + post_ops
293 net.op.extend(new_ops)
298 def swap_concat_relu(net, ignore_op_with_output=None):
301 next_net = swap_first_concat_relu(net, ignore_op_with_output)
302 if len(next_net.op) == len(net.op):
307 def add_version_to_conv_bias(net, init_net):
309 In architectures such as FPN (https://arxiv.org/abs/1612.03144), few Conv 310 ops share the same weight and bias and are run at different scales of 311 the input. Since 'bias_scale = input_scale * weight_scale', sharing the 312 same bias blob among multiple Conv ops means that we need different bias 313 scale for each of the ops. To achieve this, we just duplicate those bias 314 blobs that are used by multiple Conv ops before performing int8 rewrite. 316 bias_count = defaultdict(int)
317 for op
in net._net.op:
318 if "Conv" in op.type
and len(op.input) >= 3:
319 bias_count[op.input[2]] += 1
322 for op
in init_net._net.op:
323 if bias_count[op.output[0]] > 1:
324 bias_fill_op[op.output[0]] = op
326 bias_version = defaultdict(int)
327 for op
in net._net.op:
328 if "Conv" in op.type
and len(op.input) >= 3:
330 if bias_count[bias] <= 1:
333 version = bias_version[bias]
334 bias_version[bias] += 1
338 new_bias = bias +
"_v" + str(version)
339 fill_op = copy.deepcopy(bias_fill_op[bias])
340 fill_op.output[0] = new_bias
341 init_net._net.op.extend([fill_op])
342 op.input[2] = new_bias
343 net._net.external_input.append(new_bias)
346 def add_quantization_param_args_(op, q_param):
349 utils.MakeArgument(
"Y_scale", q_param.scale),
350 utils.MakeArgument(
"Y_zero_point", q_param.zero_point),
355 def choose_quantization_params(tensor_min, tensor_max, preserve_sparsity=False):
356 if tensor_min < 0
and tensor_max > 0
and preserve_sparsity:
357 symmetric_qmin = -(255 // 2 + 1)
358 symmetric_qmax = 255 // 2
360 abs(tensor_min / symmetric_qmin), abs(tensor_max / symmetric_qmax)
362 tensor_min = max_scale * symmetric_qmin
363 tensor_max = max_scale * symmetric_qmax
365 q_param = hardcode_scale_zp.choose_quantization_params(tensor_min, tensor_max)
367 if tensor_min < 0
and tensor_max > 0
and preserve_sparsity:
368 q_param = hardcode_scale_zp.QuantizationParam(q_param.scale, 128)
373 def add_quantization_param_args(op, tensor, preserve_sparsity=False):
374 tensor_min = 0
if tensor.size == 0
else tensor.min()
375 tensor_max = 0
if tensor.size == 0
else tensor.max()
377 q_param = choose_quantization_params(tensor_min, tensor_max, preserve_sparsity)
379 add_quantization_param_args_(op, q_param)
383 def create_int8_given_tensor_fill(tensor, out_blob_name, preserve_sparsity=False):
385 Create Int8GivenTensorFill op that quantizes the given tensor and outputs 386 an Int8Tensor with out_blob_name. 388 op = core.CreateOperator(
"Int8GivenTensorFill", [], out_blob_name)
389 q_param = add_quantization_param_args(op, tensor, preserve_sparsity)
391 np.around(tensor / q_param.scale).astype(np.int32) + q_param.zero_point
393 quantized_tensor = np.maximum(0, np.minimum(quantized_tensor, 255))
396 utils.MakeArgument(
"values", quantized_tensor.astype(np.uint8).tobytes()),
397 utils.MakeArgument(
"shape", quantized_tensor.shape),
403 def create_int8_bias_tensor_fill(tensor, out_blob_name, x_q_param, w_q_param):
405 Similar to create_int8_given_tensor_fill, but for bias blobs to be stored 408 scale = x_q_param.scale * w_q_param.scale
409 quantized_tensor = np.around(tensor / scale).astype(np.int32)
410 quantized_tensor.reshape(-1)
411 op = core.CreateOperator(
"Int8GivenIntTensorFill", [], out_blob_name)
414 utils.MakeArgument(
"values", quantized_tensor),
415 utils.MakeArgument(
"shape", quantized_tensor.shape),
418 q_param = hardcode_scale_zp.QuantizationParam(scale, 0)
419 add_quantization_param_args_(op, q_param)