1 """Gradient interface""" 4 from .modules.utils
import _single, _pair, _triple
7 def _grad_input_padding(grad_output, input_size, stride, padding, kernel_size):
8 input_size = list(input_size)
9 k = grad_output.dim() - 2
11 if len(input_size) == k + 2:
12 input_size = input_size[-k:]
13 if len(input_size) != k:
14 raise ValueError(
"input_size must have {} elements (got {})" 15 .format(k + 2, len(input_size)))
18 return ((grad_output.size(d + 2) - 1) * stride[d] - 2 * padding[d] +
21 min_sizes = [dim_size(d)
for d
in range(k)]
22 max_sizes = [min_sizes[d] + stride[d] - 1
for d
in range(k)]
23 for size, min_size, max_size
in zip(input_size, min_sizes, max_sizes):
24 if size < min_size
or size > max_size:
26 (
"requested an input grad size of {}, but valid sizes range " 27 "from {} to {} (for a grad_output of {})").format(
28 input_size, min_sizes, max_sizes,
29 grad_output.size()[2:]))
31 return tuple(input_size[d] - min_sizes[d]
for d
in range(k))
34 def conv1d_input(input_size, weight, grad_output, stride=1, padding=0, dilation=1, groups=1):
36 Computes the gradient of conv1d with respect to the input of the convolution. 37 This is same as the 1D transposed convolution operator under the hood but requires 38 the shape of the gradient w.r.t. input to be specified explicitly. 41 input_size : Shape of the input gradient tensor 42 weight: weight tensor (out_channels x in_channels/groups x kW) 43 grad_output : output gradient tensor (minibatch x out_channels x oW) 44 stride (int or tuple, optional): Stride of the convolution. Default: 1 45 padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 46 dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 47 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 51 >>> input = torch.randn(1,1,3, requires_grad=True) 52 >>> weight = torch.randn(1,1,1, requires_grad=True) 53 >>> output = F.conv1d(input, weight) 54 >>> grad_output = torch.randn(output.shape) 55 >>> grad_input = torch.autograd.grad(output, input, grad_output) 56 >>> F.grad.conv1d_input(input.shape, weight, grad_output) 59 stride = _single(stride)
60 padding = _single(padding)
61 dilation = _single(dilation)
62 kernel_size = [weight.shape[2]]
64 if input_size
is None:
65 raise ValueError(
"grad.conv1d_input requires specifying an input_size")
67 grad_input_padding = _grad_input_padding(grad_output, input_size, stride,
70 return torch.conv_transpose1d(
71 grad_output, weight,
None, stride, padding, grad_input_padding, groups,
75 def conv1d_weight(input, weight_size, grad_output, stride=1, padding=0, dilation=1, groups=1):
77 Computes the gradient of conv1d with respect to the weight of the convolution. 80 input: input tensor of shape (minibatch x in_channels x iW) 81 weight_size : Shape of the weight gradient tensor 82 grad_output : output gradient tensor (minibatch x out_channels x oW) 83 stride (int or tuple, optional): Stride of the convolution. Default: 1 84 padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 85 dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 86 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 90 >>> input = torch.randn(1,1,3, requires_grad=True) 91 >>> weight = torch.randn(1,1,1, requires_grad=True) 92 >>> output = F.conv1d(input, weight) 93 >>> grad_output = torch.randn(output.shape) 94 >>> grad_weight = torch.autograd.grad(output, filter, grad_output) 95 >>> F.grad.conv1d_weight(input, weight.shape, grad_output) 98 stride = _single(stride)
99 padding = _single(padding)
100 dilation = _single(dilation)
101 in_channels = input.shape[1]
102 out_channels = grad_output.shape[1]
103 min_batch = input.shape[0]
105 grad_output = grad_output.contiguous().repeat(1, in_channels // groups, 1)
106 grad_output = grad_output.contiguous().view(
107 grad_output.shape[0] * grad_output.shape[1], 1, grad_output.shape[2])
109 input = input.contiguous().view(1, input.shape[0] * input.shape[1],
112 grad_weight = torch.conv1d(input, grad_output,
None, dilation, padding,
113 stride, in_channels * min_batch)
115 grad_weight = grad_weight.contiguous().view(
116 min_batch, grad_weight.shape[1] // min_batch, grad_weight.shape[2])
118 return grad_weight.sum(dim=0).view(
119 in_channels // groups, out_channels, grad_weight.shape[2]).transpose(
120 0, 1).narrow(2, 0, weight_size[2])
123 def conv2d_input(input_size, weight, grad_output, stride=1, padding=0, dilation=1, groups=1):
125 Computes the gradient of conv2d with respect to the input of the convolution. 126 This is same as the 2D transposed convolution operator under the hood but requires 127 the shape of the gradient w.r.t. input to be specified explicitly. 130 input_size : Shape of the input gradient tensor 131 weight: weight tensor (out_channels x in_channels/groups x kH x kW) 132 grad_output : output gradient tensor (minibatch x out_channels x oH x oW) 133 stride (int or tuple, optional): Stride of the convolution. Default: 1 134 padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 135 dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 136 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 140 >>> input = torch.randn(1,1,3,3, requires_grad=True) 141 >>> weight = torch.randn(1,1,1,2, requires_grad=True) 142 >>> output = F.conv2d(input, weight) 143 >>> grad_output = torch.randn(output.shape) 144 >>> grad_input = torch.autograd.grad(output, input, grad_output) 145 >>> F.grad.conv2d_input(input.shape, weight, grad_output) 148 stride = _pair(stride)
149 padding = _pair(padding)
150 dilation = _pair(dilation)
151 kernel_size = (weight.shape[2], weight.shape[3])
153 if input_size
is None:
154 raise ValueError(
"grad.conv2d_input requires specifying an input_size")
156 grad_input_padding = _grad_input_padding(grad_output, input_size, stride,
157 padding, kernel_size)
159 return torch.conv_transpose2d(
160 grad_output, weight,
None, stride, padding, grad_input_padding, groups,
164 def conv2d_weight(input, weight_size, grad_output, stride=1, padding=0, dilation=1, groups=1):
166 Computes the gradient of conv2d with respect to the weight of the convolution. 169 input: input tensor of shape (minibatch x in_channels x iH x iW) 170 weight_size : Shape of the weight gradient tensor 171 grad_output : output gradient tensor (minibatch x out_channels x oH x oW) 172 stride (int or tuple, optional): Stride of the convolution. Default: 1 173 padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 174 dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 175 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 179 >>> input = torch.randn(1,1,3,3, requires_grad=True) 180 >>> weight = torch.randn(1,1,1,2, requires_grad=True) 181 >>> output = F.conv2d(input, weight) 182 >>> grad_output = torch.randn(output.shape) 183 >>> grad_weight = torch.autograd.grad(output, filter, grad_output) 184 >>> F.grad.conv2d_weight(input, weight.shape, grad_output) 187 stride = _pair(stride)
188 padding = _pair(padding)
189 dilation = _pair(dilation)
190 in_channels = input.shape[1]
191 out_channels = grad_output.shape[1]
192 min_batch = input.shape[0]
194 grad_output = grad_output.contiguous().repeat(1, in_channels // groups, 1,
196 grad_output = grad_output.contiguous().view(
197 grad_output.shape[0] * grad_output.shape[1], 1, grad_output.shape[2],
198 grad_output.shape[3])
200 input = input.contiguous().view(1, input.shape[0] * input.shape[1],
201 input.shape[2], input.shape[3])
203 grad_weight = torch.conv2d(input, grad_output,
None, dilation, padding,
204 stride, in_channels * min_batch)
206 grad_weight = grad_weight.contiguous().view(
207 min_batch, grad_weight.shape[1] // min_batch, grad_weight.shape[2],
208 grad_weight.shape[3])
210 return grad_weight.sum(dim=0).view(
211 in_channels // groups, out_channels,
212 grad_weight.shape[2], grad_weight.shape[3]).transpose(0, 1).narrow(
213 2, 0, weight_size[2]).narrow(3, 0, weight_size[3])
216 def conv3d_input(input_size, weight, grad_output, stride=1, padding=0, dilation=1, groups=1):
218 Computes the gradient of conv3d with respect to the input of the convolution. 219 This is same as the 3D transposed convolution operator under the hood but requires 220 the shape of the gradient w.r.t. input to be specified explicitly. 223 input_size : Shape of the input gradient tensor 224 weight: weights tensor (out_channels x in_channels/groups x kT x kH x kW) 225 grad_output : output gradient tensor (minibatch x out_channels x oT x oH x oW) 226 stride (int or tuple, optional): Stride of the convolution. Default: 1 227 padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 228 dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 229 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 233 >>> input = torch.randn(2, 8, 10, 10, 20, requires_grad=True) 234 >>> weight = torch.randn(4, 8, 2, 3, 3, requires_grad=True) 235 >>> output = F.conv3d(input, weight) 236 >>> grad_output = torch.randn(output.shape) 237 >>> grad_input = torch.autograd.grad(output, input, grad_output) 238 >>> F.grad.conv3d_input(input.shape, weight, grad_output) 241 stride = _triple(stride)
242 padding = _triple(padding)
243 dilation = _triple(dilation)
244 kernel_size = (weight.shape[2], weight.shape[3], weight.shape[4])
246 if input_size
is None:
247 raise ValueError(
"grad.conv3d_input requires specifying an input_size")
249 grad_input_padding = _grad_input_padding(grad_output, input_size, stride,
250 padding, kernel_size)
252 return torch.conv_transpose3d(
253 grad_output, weight,
None, stride, padding, grad_input_padding, groups,
257 def conv3d_weight(input, weight_size, grad_output, stride=1, padding=0, dilation=1, groups=1):
259 Computes the gradient of conv3d with respect to the weight of the convolution. 262 input: input tensor of shape (minibatch x in_channels x iT x iH x iW) 263 weight_size : Shape of the weight gradient tensor 264 grad_output : output gradient tensor (minibatch x out_channels x oT x oH x oW) 265 stride (int or tuple, optional): Stride of the convolution. Default: 1 266 padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 267 dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 268 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 272 >>> input = torch.randn(2, 8, 10, 10, 20, requires_grad=True) 273 >>> weight = torch.randn(4, 8, 2, 3, 3, requires_grad=True) 274 >>> output = F.conv3d(input, weight) 275 >>> grad_output = torch.randn(output.shape) 276 >>> grad_weight = torch.autograd.grad(output, weight, grad_output) 277 >>> F.grad.conv3d_weight(input, weight.shape, grad_output) 280 stride = _triple(stride)
281 padding = _triple(padding)
282 dilation = _triple(dilation)
283 in_channels = input.shape[1]
284 out_channels = grad_output.shape[1]
285 min_batch = input.shape[0]
287 grad_output = grad_output.repeat(1, in_channels // groups, 1, 1, 1)
288 grad_output = grad_output.contiguous().view(
289 grad_output.shape[0] * grad_output.shape[1], 1, grad_output.shape[2],
290 grad_output.shape[3], grad_output.shape[4])
292 input = input.contiguous().view(1, input.shape[0] * input.shape[1],
293 input.shape[2], input.shape[3],
296 grad_weight = torch.conv3d(input, grad_output,
None, dilation, padding,
297 stride, in_channels * min_batch)
299 grad_weight = grad_weight.contiguous().view(
300 min_batch, grad_weight.shape[1] // min_batch, grad_weight.shape[2],
301 grad_weight.shape[3], grad_weight.shape[4])
303 return grad_weight.sum(dim=0).view(
304 in_channels // groups, out_channels, grad_weight.shape[2],
305 grad_weight.shape[3], grad_weight.shape[4]).transpose(0, 1).narrow(
306 2, 0, weight_size[2]).narrow(3, 0, weight_size[3]).narrow(
307 4, 0, weight_size[4])