Caffe2 - Python API
A deep learning, cross platform ML framework
grad.py
1 """Gradient interface"""
2 
3 import torch
4 from .modules.utils import _single, _pair, _triple
5 
6 
7 def _grad_input_padding(grad_output, input_size, stride, padding, kernel_size):
8  input_size = list(input_size)
9  k = grad_output.dim() - 2
10 
11  if len(input_size) == k + 2:
12  input_size = input_size[-k:]
13  if len(input_size) != k:
14  raise ValueError("input_size must have {} elements (got {})"
15  .format(k + 2, len(input_size)))
16 
17  def dim_size(d):
18  return ((grad_output.size(d + 2) - 1) * stride[d] - 2 * padding[d] +
19  kernel_size[d])
20 
21  min_sizes = [dim_size(d) for d in range(k)]
22  max_sizes = [min_sizes[d] + stride[d] - 1 for d in range(k)]
23  for size, min_size, max_size in zip(input_size, min_sizes, max_sizes):
24  if size < min_size or size > max_size:
25  raise ValueError(
26  ("requested an input grad size of {}, but valid sizes range "
27  "from {} to {} (for a grad_output of {})").format(
28  input_size, min_sizes, max_sizes,
29  grad_output.size()[2:]))
30 
31  return tuple(input_size[d] - min_sizes[d] for d in range(k))
32 
33 
34 def conv1d_input(input_size, weight, grad_output, stride=1, padding=0, dilation=1, groups=1):
35  r"""
36  Computes the gradient of conv1d with respect to the input of the convolution.
37  This is same as the 1D transposed convolution operator under the hood but requires
38  the shape of the gradient w.r.t. input to be specified explicitly.
39 
40  Args:
41  input_size : Shape of the input gradient tensor
42  weight: weight tensor (out_channels x in_channels/groups x kW)
43  grad_output : output gradient tensor (minibatch x out_channels x oW)
44  stride (int or tuple, optional): Stride of the convolution. Default: 1
45  padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
46  dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
47  groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
48 
49  Examples::
50 
51  >>> input = torch.randn(1,1,3, requires_grad=True)
52  >>> weight = torch.randn(1,1,1, requires_grad=True)
53  >>> output = F.conv1d(input, weight)
54  >>> grad_output = torch.randn(output.shape)
55  >>> grad_input = torch.autograd.grad(output, input, grad_output)
56  >>> F.grad.conv1d_input(input.shape, weight, grad_output)
57 
58  """
59  stride = _single(stride)
60  padding = _single(padding)
61  dilation = _single(dilation)
62  kernel_size = [weight.shape[2]]
63 
64  if input_size is None:
65  raise ValueError("grad.conv1d_input requires specifying an input_size")
66 
67  grad_input_padding = _grad_input_padding(grad_output, input_size, stride,
68  padding, kernel_size)
69 
70  return torch.conv_transpose1d(
71  grad_output, weight, None, stride, padding, grad_input_padding, groups,
72  dilation)
73 
74 
75 def conv1d_weight(input, weight_size, grad_output, stride=1, padding=0, dilation=1, groups=1):
76  r"""
77  Computes the gradient of conv1d with respect to the weight of the convolution.
78 
79  Args:
80  input: input tensor of shape (minibatch x in_channels x iW)
81  weight_size : Shape of the weight gradient tensor
82  grad_output : output gradient tensor (minibatch x out_channels x oW)
83  stride (int or tuple, optional): Stride of the convolution. Default: 1
84  padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
85  dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
86  groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
87 
88  Examples::
89 
90  >>> input = torch.randn(1,1,3, requires_grad=True)
91  >>> weight = torch.randn(1,1,1, requires_grad=True)
92  >>> output = F.conv1d(input, weight)
93  >>> grad_output = torch.randn(output.shape)
94  >>> grad_weight = torch.autograd.grad(output, filter, grad_output)
95  >>> F.grad.conv1d_weight(input, weight.shape, grad_output)
96 
97  """
98  stride = _single(stride)
99  padding = _single(padding)
100  dilation = _single(dilation)
101  in_channels = input.shape[1]
102  out_channels = grad_output.shape[1]
103  min_batch = input.shape[0]
104 
105  grad_output = grad_output.contiguous().repeat(1, in_channels // groups, 1)
106  grad_output = grad_output.contiguous().view(
107  grad_output.shape[0] * grad_output.shape[1], 1, grad_output.shape[2])
108 
109  input = input.contiguous().view(1, input.shape[0] * input.shape[1],
110  input.shape[2])
111 
112  grad_weight = torch.conv1d(input, grad_output, None, dilation, padding,
113  stride, in_channels * min_batch)
114 
115  grad_weight = grad_weight.contiguous().view(
116  min_batch, grad_weight.shape[1] // min_batch, grad_weight.shape[2])
117 
118  return grad_weight.sum(dim=0).view(
119  in_channels // groups, out_channels, grad_weight.shape[2]).transpose(
120  0, 1).narrow(2, 0, weight_size[2])
121 
122 
123 def conv2d_input(input_size, weight, grad_output, stride=1, padding=0, dilation=1, groups=1):
124  r"""
125  Computes the gradient of conv2d with respect to the input of the convolution.
126  This is same as the 2D transposed convolution operator under the hood but requires
127  the shape of the gradient w.r.t. input to be specified explicitly.
128 
129  Args:
130  input_size : Shape of the input gradient tensor
131  weight: weight tensor (out_channels x in_channels/groups x kH x kW)
132  grad_output : output gradient tensor (minibatch x out_channels x oH x oW)
133  stride (int or tuple, optional): Stride of the convolution. Default: 1
134  padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
135  dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
136  groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
137 
138  Examples::
139 
140  >>> input = torch.randn(1,1,3,3, requires_grad=True)
141  >>> weight = torch.randn(1,1,1,2, requires_grad=True)
142  >>> output = F.conv2d(input, weight)
143  >>> grad_output = torch.randn(output.shape)
144  >>> grad_input = torch.autograd.grad(output, input, grad_output)
145  >>> F.grad.conv2d_input(input.shape, weight, grad_output)
146 
147  """
148  stride = _pair(stride)
149  padding = _pair(padding)
150  dilation = _pair(dilation)
151  kernel_size = (weight.shape[2], weight.shape[3])
152 
153  if input_size is None:
154  raise ValueError("grad.conv2d_input requires specifying an input_size")
155 
156  grad_input_padding = _grad_input_padding(grad_output, input_size, stride,
157  padding, kernel_size)
158 
159  return torch.conv_transpose2d(
160  grad_output, weight, None, stride, padding, grad_input_padding, groups,
161  dilation)
162 
163 
164 def conv2d_weight(input, weight_size, grad_output, stride=1, padding=0, dilation=1, groups=1):
165  r"""
166  Computes the gradient of conv2d with respect to the weight of the convolution.
167 
168  Args:
169  input: input tensor of shape (minibatch x in_channels x iH x iW)
170  weight_size : Shape of the weight gradient tensor
171  grad_output : output gradient tensor (minibatch x out_channels x oH x oW)
172  stride (int or tuple, optional): Stride of the convolution. Default: 1
173  padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
174  dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
175  groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
176 
177  Examples::
178 
179  >>> input = torch.randn(1,1,3,3, requires_grad=True)
180  >>> weight = torch.randn(1,1,1,2, requires_grad=True)
181  >>> output = F.conv2d(input, weight)
182  >>> grad_output = torch.randn(output.shape)
183  >>> grad_weight = torch.autograd.grad(output, filter, grad_output)
184  >>> F.grad.conv2d_weight(input, weight.shape, grad_output)
185 
186  """
187  stride = _pair(stride)
188  padding = _pair(padding)
189  dilation = _pair(dilation)
190  in_channels = input.shape[1]
191  out_channels = grad_output.shape[1]
192  min_batch = input.shape[0]
193 
194  grad_output = grad_output.contiguous().repeat(1, in_channels // groups, 1,
195  1)
196  grad_output = grad_output.contiguous().view(
197  grad_output.shape[0] * grad_output.shape[1], 1, grad_output.shape[2],
198  grad_output.shape[3])
199 
200  input = input.contiguous().view(1, input.shape[0] * input.shape[1],
201  input.shape[2], input.shape[3])
202 
203  grad_weight = torch.conv2d(input, grad_output, None, dilation, padding,
204  stride, in_channels * min_batch)
205 
206  grad_weight = grad_weight.contiguous().view(
207  min_batch, grad_weight.shape[1] // min_batch, grad_weight.shape[2],
208  grad_weight.shape[3])
209 
210  return grad_weight.sum(dim=0).view(
211  in_channels // groups, out_channels,
212  grad_weight.shape[2], grad_weight.shape[3]).transpose(0, 1).narrow(
213  2, 0, weight_size[2]).narrow(3, 0, weight_size[3])
214 
215 
216 def conv3d_input(input_size, weight, grad_output, stride=1, padding=0, dilation=1, groups=1):
217  r"""
218  Computes the gradient of conv3d with respect to the input of the convolution.
219  This is same as the 3D transposed convolution operator under the hood but requires
220  the shape of the gradient w.r.t. input to be specified explicitly.
221 
222  Args:
223  input_size : Shape of the input gradient tensor
224  weight: weights tensor (out_channels x in_channels/groups x kT x kH x kW)
225  grad_output : output gradient tensor (minibatch x out_channels x oT x oH x oW)
226  stride (int or tuple, optional): Stride of the convolution. Default: 1
227  padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
228  dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
229  groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
230 
231  Examples::
232 
233  >>> input = torch.randn(2, 8, 10, 10, 20, requires_grad=True)
234  >>> weight = torch.randn(4, 8, 2, 3, 3, requires_grad=True)
235  >>> output = F.conv3d(input, weight)
236  >>> grad_output = torch.randn(output.shape)
237  >>> grad_input = torch.autograd.grad(output, input, grad_output)
238  >>> F.grad.conv3d_input(input.shape, weight, grad_output)
239 
240  """
241  stride = _triple(stride)
242  padding = _triple(padding)
243  dilation = _triple(dilation)
244  kernel_size = (weight.shape[2], weight.shape[3], weight.shape[4])
245 
246  if input_size is None:
247  raise ValueError("grad.conv3d_input requires specifying an input_size")
248 
249  grad_input_padding = _grad_input_padding(grad_output, input_size, stride,
250  padding, kernel_size)
251 
252  return torch.conv_transpose3d(
253  grad_output, weight, None, stride, padding, grad_input_padding, groups,
254  dilation)
255 
256 
257 def conv3d_weight(input, weight_size, grad_output, stride=1, padding=0, dilation=1, groups=1):
258  r"""
259  Computes the gradient of conv3d with respect to the weight of the convolution.
260 
261  Args:
262  input: input tensor of shape (minibatch x in_channels x iT x iH x iW)
263  weight_size : Shape of the weight gradient tensor
264  grad_output : output gradient tensor (minibatch x out_channels x oT x oH x oW)
265  stride (int or tuple, optional): Stride of the convolution. Default: 1
266  padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
267  dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
268  groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
269 
270  Examples::
271 
272  >>> input = torch.randn(2, 8, 10, 10, 20, requires_grad=True)
273  >>> weight = torch.randn(4, 8, 2, 3, 3, requires_grad=True)
274  >>> output = F.conv3d(input, weight)
275  >>> grad_output = torch.randn(output.shape)
276  >>> grad_weight = torch.autograd.grad(output, weight, grad_output)
277  >>> F.grad.conv3d_weight(input, weight.shape, grad_output)
278 
279  """
280  stride = _triple(stride)
281  padding = _triple(padding)
282  dilation = _triple(dilation)
283  in_channels = input.shape[1]
284  out_channels = grad_output.shape[1]
285  min_batch = input.shape[0]
286 
287  grad_output = grad_output.repeat(1, in_channels // groups, 1, 1, 1)
288  grad_output = grad_output.contiguous().view(
289  grad_output.shape[0] * grad_output.shape[1], 1, grad_output.shape[2],
290  grad_output.shape[3], grad_output.shape[4])
291 
292  input = input.contiguous().view(1, input.shape[0] * input.shape[1],
293  input.shape[2], input.shape[3],
294  input.shape[4])
295 
296  grad_weight = torch.conv3d(input, grad_output, None, dilation, padding,
297  stride, in_channels * min_batch)
298 
299  grad_weight = grad_weight.contiguous().view(
300  min_batch, grad_weight.shape[1] // min_batch, grad_weight.shape[2],
301  grad_weight.shape[3], grad_weight.shape[4])
302 
303  return grad_weight.sum(dim=0).view(
304  in_channels // groups, out_channels, grad_weight.shape[2],
305  grad_weight.shape[3], grad_weight.shape[4]).transpose(0, 1).narrow(
306  2, 0, weight_size[2]).narrow(3, 0, weight_size[3]).narrow(
307  4, 0, weight_size[4])