Caffe2 - Python API
A deep learning, cross platform ML framework
detectron_keypoints.py
1 from __future__ import division
2 from __future__ import absolute_import
3 from __future__ import print_function
4 from __future__ import unicode_literals
5 
6 try:
7  import cv2
8 except ImportError:
9  pass # skip if opencv is not available
10 import numpy as np
11 
12 
13 # === copied from utils/keypoints.py as reference ===
14 _NUM_KEYPOINTS = -1 # cfg.KRCNN.NUM_KEYPOINTS
15 _INFERENCE_MIN_SIZE = 0 # cfg.KRCNN.INFERENCE_MIN_SIZE
16 
17 
18 def heatmaps_to_keypoints(maps, rois):
19  """Extracts predicted keypoint locations from heatmaps. Output has shape
20  (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
21  for each keypoint.
22  """
23  # This function converts a discrete image coordinate in a HEATMAP_SIZE x
24  # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain
25  # consistency with keypoints_to_heatmap_labels by using the conversion from
26  # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a
27  # continuous coordinate.
28  offset_x = rois[:, 0]
29  offset_y = rois[:, 1]
30 
31  widths = rois[:, 2] - rois[:, 0]
32  heights = rois[:, 3] - rois[:, 1]
33  widths = np.maximum(widths, 1)
34  heights = np.maximum(heights, 1)
35  widths_ceil = np.ceil(widths).astype(np.int)
36  heights_ceil = np.ceil(heights).astype(np.int)
37 
38  num_keypoints = np.maximum(maps.shape[1], _NUM_KEYPOINTS)
39 
40  # NCHW to NHWC for use with OpenCV
41  maps = np.transpose(maps, [0, 2, 3, 1])
42  min_size = _INFERENCE_MIN_SIZE
43 
44  xy_preds = np.zeros(
45  (len(rois), 4, num_keypoints), dtype=np.float32)
46  for i in range(len(rois)):
47  if min_size > 0:
48  roi_map_width = int(np.maximum(widths_ceil[i], min_size))
49  roi_map_height = int(np.maximum(heights_ceil[i], min_size))
50  else:
51  roi_map_width = widths_ceil[i]
52  roi_map_height = heights_ceil[i]
53  width_correction = widths[i] / roi_map_width
54  height_correction = heights[i] / roi_map_height
55  roi_map = cv2.resize(
56  maps[i], (roi_map_width, roi_map_height),
57  interpolation=cv2.INTER_CUBIC)
58 
59  # Bring back to CHW
60  roi_map = np.transpose(roi_map, [2, 0, 1])
61  roi_map_probs = scores_to_probs(roi_map.copy())
62  w = roi_map.shape[2]
63  for k in range(num_keypoints):
64  pos = roi_map[k, :, :].argmax()
65  x_int = pos % w
66  y_int = (pos - x_int) // w
67  assert (roi_map_probs[k, y_int, x_int] ==
68  roi_map_probs[k, :, :].max())
69  x = (x_int + 0.5) * width_correction
70  y = (y_int + 0.5) * height_correction
71  xy_preds[i, 0, k] = x + offset_x[i]
72  xy_preds[i, 1, k] = y + offset_y[i]
73  xy_preds[i, 2, k] = roi_map[k, y_int, x_int]
74  xy_preds[i, 3, k] = roi_map_probs[k, y_int, x_int]
75 
76  return xy_preds
77 
78 
79 def scores_to_probs(scores):
80  """Transforms CxHxW of scores to probabilities spatially."""
81  channels = scores.shape[0]
82  for c in range(channels):
83  temp = scores[c, :, :]
84  max_score = temp.max()
85  temp = np.exp(temp - max_score) / np.sum(np.exp(temp - max_score))
86  scores[c, :, :] = temp
87  return scores
88 
89 
90 def approx_heatmap_keypoint(heatmaps_in, bboxes_in):
91  '''
92 Mask R-CNN uses bicubic upscaling before taking the maximum of the heat map
93 for keypoints. We are using bilinear upscaling, which means we can approximate
94 the maximum coordinate with the low dimension maximum coordinates. We would like
95 to avoid bicubic upscaling, because it is computationally expensive. Brown and
96 Lowe (Invariant Features from Interest Point Groups, 2002) uses a method for
97 fitting a 3D quadratic function to the local sample points to determine the
98 interpolated location of the maximum of scale space, and his experiments showed
99 that this provides a substantial improvement to matching and stability for
100 keypoint extraction. This approach uses the Taylor expansion (up to the
101 quadratic terms) of the scale-space function. It is equivalent with the Newton
102 method. This efficient method were used in many keypoint estimation algorithms
103 like SIFT, SURF etc...
104 
105 The implementation of Newton methods with numerical analysis is straight forward
106 and super simple, though we need a linear solver.
107 
108  '''
109  assert len(bboxes_in.shape) == 2
110  N = bboxes_in.shape[0]
111  assert bboxes_in.shape[1] == 4
112  assert len(heatmaps_in.shape) == 4
113  assert heatmaps_in.shape[0] == N
114  keypoint_count = heatmaps_in.shape[1]
115  heatmap_size = heatmaps_in.shape[2]
116  assert heatmap_size >= 2
117  assert heatmaps_in.shape[3] == heatmap_size
118 
119  keypoints_out = np.zeros((N, keypoint_count, 4))
120 
121  for k in range(N):
122  x0, y0, x1, y1 = bboxes_in[k, :]
123  xLen = np.maximum(x1 - x0, 1)
124  yLen = np.maximum(y1 - y0, 1)
125  softmax_map = scores_to_probs(heatmaps_in[k, :, :, :].copy())
126  f = heatmaps_in[k]
127  for j in range(keypoint_count):
128  f = heatmaps_in[k][j]
129  maxX = -1
130  maxY = -1
131  maxScore = -100.0
132  maxProb = -100.0
133  for y in range(heatmap_size):
134  for x in range(heatmap_size):
135  score = f[y, x]
136  prob = softmax_map[j, y, x]
137  if maxX < 0 or maxScore < score:
138  maxScore = score
139  maxProb = prob
140  maxX = x
141  maxY = y
142 
143  # print(maxScore, maxX, maxY)
144  # initialize fmax values of 3x3 grid
145  # when 3x3 grid going out-of-bound, mirrowing around center
146  fmax = [[0] * 3 for r in range(3)]
147  for x in range(3):
148  for y in range(3):
149  hm_x = x + maxX - 1
150  hm_y = y + maxY - 1
151  hm_x = hm_x - 2 * (hm_x >= heatmap_size) + 2 * (hm_x < 0)
152  hm_y = hm_y - 2 * (hm_y >= heatmap_size) + 2 * (hm_y < 0)
153  assert((hm_x < heatmap_size) and (hm_x >= 0))
154  assert((hm_y < heatmap_size) and (hm_y >= 0))
155  fmax[y][x] = f[hm_y][hm_x]
156 
157  # print("python fmax ", fmax)
158  # b = -f'(0), A = f''(0) Hessian matrix
159  b = [-(fmax[1][2] - fmax[1][0]) / 2, -
160  (fmax[2][1] - fmax[0][1]) / 2]
161  A = [[fmax[1][0] - 2 * fmax[1][1] + fmax[1][2],
162  (fmax[2][2] - fmax[2][0] - fmax[0][2] + fmax[0][0]) / 4],
163  [(fmax[2][2] - fmax[2][0] - fmax[0][2] + fmax[0][0]) / 4,
164  fmax[0][1] - 2 * fmax[1][1] + fmax[2][1]]]
165  # print("python A")
166  # print(A)
167  # solve Ax=b
168  div = A[1][1] * A[0][0] - A[0][1] * A[1][0]
169  if abs(div) < 0.0001:
170  deltaX = 0
171  deltaY = 0
172  deltaScore = maxScore
173  else:
174  deltaY = (b[1] * A[0][0] - b[0] * A[1][0]) / div
175  deltaX = (b[0] * A[1][1] - b[1] * A[0][1]) / div
176  # clip delta if going out-of-range of 3x3 grid
177  if abs(deltaX) > 1.5 or abs(deltaY) > 1.5:
178  scale = 1.5 / max(abs(deltaX), abs(deltaY))
179  deltaX *= scale
180  deltaY *= scale
181  # score = f(0) + f'(0)*x + 1/2 * f''(0) * x^2
182  # = f(0) - b*x + 1/2*x*A*x
183  deltaScore = (
184  fmax[1][1] - (b[0] * deltaX + b[1] * deltaY) +
185  0.5 * (deltaX * deltaX * A[0][0] +
186  deltaX * deltaY * A[1][0] +
187  deltaY * deltaX * A[0][1] +
188  deltaY * deltaY * A[1][1]))
189 
190  assert abs(deltaX) <= 1.5
191  assert abs(deltaY) <= 1.5
192 
193  # final coordinates
194  keypoints_out[k, j, :] = (
195  x0 + (maxX + deltaX + .5) * xLen / heatmap_size,
196  y0 + (maxY + deltaY + .5) * yLen / heatmap_size,
197  deltaScore,
198  maxProb,
199  )
200 
201  keypoints_out = np.transpose(keypoints_out, [0, 2, 1])
202 
203  return keypoints_out