Caffe2 - Python API
A deep learning, cross platform ML framework
rnn.py
1 from collections import namedtuple
2 import warnings
3 
4 import torch
5 
6 
7 PackedSequence_ = namedtuple('PackedSequence',
8  ['data', 'batch_sizes', 'sorted_indices', 'unsorted_indices'])
9 
10 
11 def bind(optional, fn):
12  if optional is None:
13  return None
14  return fn(optional)
15 
16 
17 class PackedSequence(PackedSequence_):
18  r"""Holds the data and list of :attr:`batch_sizes` of a packed sequence.
19 
20  All RNN modules accept packed sequences as inputs.
21 
22  Note:
23  Instances of this class should never be created manually. They are meant
24  to be instantiated by functions like :func:`pack_padded_sequence`.
25 
26  Batch sizes represent the number elements at each sequence step in
27  the batch, not the varying sequence lengths passed to
28  :func:`pack_padded_sequence`. For instance, given data ``abc`` and ``x``
29  the :class:`PackedSequence` would contain data ``axbc`` with
30  ``batch_sizes=[2,1,1]``.
31 
32  Attributes:
33  data (Tensor): Tensor containing packed sequence
34  batch_sizes (Tensor): Tensor of integers holding
35  information about the batch size at each sequence step
36  sorted_indices (Tensor, optional): Tensor of integers holding how this
37  :class:`PackedSequence` is constructed from sequences.
38  unsorted_indices (Tensor, optional): Tensor of integers holding how this
39  to recover the original sequences with correct order.
40 
41  .. note::
42  :attr:`data` can be on arbitrary device and of arbitrary dtype.
43  :attr:`sorted_indices` and :attr:`unsorted_indices` must be ``torch.int64``
44  tensors on the same device as :attr:`data`.
45 
46  However, :attr:`batch_sizes` should always be a CPU ``torch.int64`` tensor.
47 
48  This invariant is maintained throughout :class:`PackedSequence` class,
49  and all functions that construct a `:class:PackedSequence` in PyTorch
50  (i.e., they only pass in tensors conforming to this constraint).
51 
52  """
53 
54  # NOTE [ device and dtype of a PackedSequence ]
55  #
56  # See the note above in doc string (starting with ":attr:`data` can be on
57  # arbitrary device...").
58 
59  def __new__(cls, data, batch_sizes=None, sorted_indices=None, unsorted_indices=None):
60  # PackedSequence used to only have __init__(self, data, batch_sizes)
61  # without a __new__ like this. So to preserve BC for calling in keyword
62  # arg style (e.g., `PackedSequence(data=..., batch_sizes=...)`), we have
63  # to provide two arguments with exact names `data` and `batch_sizes`.
64 
65  # NB: if unsorted_indices is provided, it should be the inverse permutation
66  # to sorted_indices. Don't assert it here because the PackedSequence ctor
67  # should only be used internally.
68  if unsorted_indices is None:
69  unsorted_indices = invert_permutation(sorted_indices)
70 
71  # support being called as `PackedSequence(data, batch_sizes, sorted_indices)`
72  if batch_sizes is not None:
73  return super(PackedSequence, cls).__new__(
74  cls, data, batch_sizes, sorted_indices, unsorted_indices)
75 
76  # support being called as `PackedSequence((data, batch_sizes), *, sorted_indices)`
77  else:
78  assert isinstance(data, (list, tuple)) and len(data) == 2
79  return super(PackedSequence, cls).__new__(
80  cls, data[0], data[1], sorted_indices)
81 
82  def pin_memory(self):
83  # Why not convert `batch_sizes`?
84  # See NOTE [ device and dtype of a PackedSequence ]
85  return type(self)(self.data.pin_memory(), self.batch_sizes,
86  bind(self.sorted_indices, lambda t: t.pin_memory()),
87  bind(self.unsorted_indices, lambda t: t.pin_memory()))
88 
89  def cuda(self, *args, **kwargs):
90  """Returns a GPU copy if `self.data` not already on the GPU"""
91  if self.is_cuda:
92  return self
93  else:
94  # Why not convert `batch_sizes`?
95  # See NOTE [ device and dtype of a PackedSequence ]
96  return type(self)(self.data.cuda(*args, **kwargs), self.batch_sizes,
97  bind(self.sorted_indices, lambda t: t.cuda(*args, **kwargs)),
98  bind(self.unsorted_indices, lambda t: t.cuda(*args, **kwargs)))
99 
100  def cpu(self):
101  """Returns a CPU copy if `self.data` not already on the CPU"""
102  if self.is_cuda:
103  # Why not convert `batch_sizes`?
104  # See NOTE [ device and dtype of a PackedSequence ]
105  return type(self)(self.data.cpu(), self.batch_sizes,
106  bind(self.sorted_indices, lambda t: t.cpu()),
107  bind(self.unsorted_indices, lambda t: t.cpu()))
108  else:
109  return self
110 
111  def double(self):
112  r"""Returns copy with `self.data` cast to double type"""
113 
114  # Why not convert `batch_sizes`?
115  # See NOTE [ device and dtype of a PackedSequence ]
116  return type(self)(self.data.double(), self.batch_sizes,
117  self.sorted_indices, self.unsorted_indices)
118 
119  def float(self):
120  r"""Returns copy with `self.data` cast to float type"""
121 
122  # Why not convert `batch_sizes`?
123  # See NOTE [ device and dtype of a PackedSequence ]
124  return type(self)(self.data.float(), self.batch_sizes,
125  self.sorted_indices, self.unsorted_indices)
126 
127  def half(self):
128  r"""Returns copy with `self.data` cast to half type"""
129 
130  # Why not convert `batch_sizes`?
131  # See NOTE [ device and dtype of a PackedSequence ]
132  return type(self)(self.data.half(), self.batch_sizes,
133  self.sorted_indices, self.unsorted_indices)
134 
135  def long(self):
136  r"""Returns copy with `self.data` cast to long type"""
137 
138  # Why not convert `batch_sizes`?
139  # See NOTE [ device and dtype of a PackedSequence ]
140  return type(self)(self.data.long(), self.batch_sizes,
141  self.sorted_indices, self.unsorted_indices)
142 
143  def int(self):
144  r"""Returns copy with `self.data` cast to int type"""
145 
146  # Why not convert `batch_sizes`?
147  # See NOTE [ device and dtype of a PackedSequence ]
148  return type(self)(self.data.int(), self.batch_sizes,
149  self.sorted_indices, self.unsorted_indices)
150 
151  def short(self):
152  r"""Returns copy with `self.data` cast to short type"""
153 
154  # Why not convert `batch_sizes`?
155  # See NOTE [ device and dtype of a PackedSequence ]
156  return type(self)(self.data.short(), self.batch_sizes,
157  self.sorted_indices, self.unsorted_indices)
158 
159  def char(self):
160  r"""Returns copy with `self.data` cast to char type"""
161 
162  # Why not convert `batch_sizes`?
163  # See NOTE [ device and dtype of a PackedSequence ]
164  return type(self)(self.data.char(), self.batch_sizes,
165  self.sorted_indices, self.unsorted_indices)
166 
167  def byte(self):
168  r"""Returns copy with `self.data` cast to byte type"""
169 
170  # Why not convert `batch_sizes`?
171  # See NOTE [ device and dtype of a PackedSequence ]
172  return type(self)(self.data.byte(), self.batch_sizes,
173  self.sorted_indices, self.unsorted_indices)
174 
175  def to(self, *args, **kwargs):
176  r"""Performs dtype and/or device conversion on `self.data`.
177 
178  It has similar signature as :meth:`torch.Tensor.to`.
179 
180  .. note::
181 
182  If the ``self.data`` Tensor already has the correct :class:`torch.dtype`
183  and :class:`torch.device`, then ``self`` is returned.
184  Otherwise, returns a copy with the desired configuration.
185  """
186 
187  # Why not convert `batch_sizes`?
188  # See NOTE [ device and dtype of a PackedSequence ]
189  data = self.data.to(*args, **kwargs)
190  sorted_indices = self.sorted_indices
191  unsorted_indices = self.unsorted_indices
192  device_kw = 'device'
193  if device_kw in kwargs:
194  sorted_indices = bind(sorted_indices, lambda t: t.to(kwargs[device_kw]))
195  unsorted_indices = bind(unsorted_indices, lambda t: t.to(kwargs[device_kw]))
196  if data is self.data:
197  return self
198  else:
199  return type(self)(data, self.batch_sizes,
200  sorted_indices, unsorted_indices)
201 
202  @property
203  def is_cuda(self):
204  r"""Returns true if `self.data` stored on a gpu"""
205  return self.data.is_cuda
206 
207  def is_pinned(self):
208  r"""Returns true if `self.data` stored on in pinned memory"""
209  return self.data.is_pinned()
210 
211 
212 def invert_permutation(permutation):
213  if permutation is None:
214  return None
215  output = torch.empty_like(permutation)
216  output.scatter_(0, permutation,
217  torch.arange(0, permutation.numel(), device=permutation.device))
218  return output
219 
220 
221 def pack_padded_sequence(input, lengths, batch_first=False, enforce_sorted=True):
222  r"""Packs a Tensor containing padded sequences of variable length.
223 
224  :attr:`input` can be of size ``T x B x *`` where `T` is the length of the
225  longest sequence (equal to ``lengths[0]``), ``B`` is the batch size, and
226  ``*`` is any number of dimensions (including 0). If ``batch_first`` is
227  ``True``, ``B x T x *`` :attr:`input` is expected.
228 
229  For unsorted sequences, use `enforce_sorted = False`. If :attr:`enforce_sorted` is
230  ``True``, the sequences should be sorted by length in a decreasing order, i.e.
231  ``input[:,0]`` should be the longest sequence, and ``input[:,B-1]`` the shortest
232  one. `enforce_sorted = True` is only necessary for ONNX export.
233 
234  Note:
235  This function accepts any input that has at least two dimensions. You
236  can apply it to pack the labels, and use the output of the RNN with
237  them to compute the loss directly. A Tensor can be retrieved from
238  a :class:`PackedSequence` object by accessing its ``.data`` attribute.
239 
240  Arguments:
241  input (Tensor): padded batch of variable length sequences.
242  lengths (Tensor): list of sequences lengths of each batch element.
243  batch_first (bool, optional): if ``True``, the input is expected in ``B x T x *``
244  format.
245  enforce_sorted (bool, optional): if ``True``, the input is expected to
246  contain sequences sorted by length in a decreasing order. If
247  ``False``, this condition is not checked. Default: ``True``.
248 
249  Returns:
250  a :class:`PackedSequence` object
251  """
252  if torch._C._get_tracing_state() and not isinstance(lengths, torch.Tensor):
253  warnings.warn('pack_padded_sequence has been called with a Python list of '
254  'sequence lengths. The tracer cannot track the data flow of Python '
255  'values, and it will treat them as constants, likely rendering '
256  'the trace incorrect for any other combination of lengths.',
257  category=torch.jit.TracerWarning, stacklevel=2)
258  lengths = torch.as_tensor(lengths, dtype=torch.int64)
259  if enforce_sorted:
260  sorted_indices = None
261  else:
262  lengths, sorted_indices = torch.sort(lengths, descending=True)
263  sorted_indices = sorted_indices.to(input.device)
264  batch_dim = 0 if batch_first else 1
265  input = input.index_select(batch_dim, sorted_indices)
266 
267  data, batch_sizes = \
268  torch._C._VariableFunctions._pack_padded_sequence(input, lengths, batch_first)
269  return PackedSequence(data, batch_sizes, sorted_indices)
270 
271 
272 def pad_packed_sequence(sequence, batch_first=False, padding_value=0.0, total_length=None):
273  r"""Pads a packed batch of variable length sequences.
274 
275  It is an inverse operation to :func:`pack_padded_sequence`.
276 
277  The returned Tensor's data will be of size ``T x B x *``, where `T` is the length
278  of the longest sequence and `B` is the batch size. If ``batch_first`` is True,
279  the data will be transposed into ``B x T x *`` format.
280 
281  Batch elements will be ordered decreasingly by their length.
282 
283  .. note::
284  :attr:`total_length` is useful to implement the
285  ``pack sequence -> recurrent network -> unpack sequence`` pattern in a
286  :class:`~torch.nn.Module` wrapped in :class:`~torch.nn.DataParallel`.
287  See :ref:`this FAQ section <pack-rnn-unpack-with-data-parallelism>` for
288  details.
289 
290  Arguments:
291  sequence (PackedSequence): batch to pad
292  batch_first (bool, optional): if ``True``, the output will be in ``B x T x *``
293  format.
294  padding_value (float, optional): values for padded elements.
295  total_length (int, optional): if not ``None``, the output will be padded to
296  have length :attr:`total_length`. This method will throw :class:`ValueError`
297  if :attr:`total_length` is less than the max sequence length in
298  :attr:`sequence`.
299 
300  Returns:
301  Tuple of Tensor containing the padded sequence, and a Tensor
302  containing the list of lengths of each sequence in the batch.
303 
304  """
305  max_seq_length = sequence.batch_sizes.size(0)
306  if total_length is not None:
307  if total_length < max_seq_length:
308  raise ValueError("Expected total_length to be at least the length "
309  "of the longest sequence in input, but got "
310  "total_length={} and max sequence length being {}"
311  .format(total_length, max_seq_length))
312  max_seq_length = total_length
313  padded_output, lengths = torch._C._VariableFunctions._pad_packed_sequence(
314  sequence.data, sequence.batch_sizes, batch_first, padding_value, max_seq_length)
315  if sequence.unsorted_indices is not None:
316  batch_dim = 0 if batch_first else 1
317  return padded_output.index_select(batch_dim, sequence.unsorted_indices), \
318  lengths[sequence.unsorted_indices]
319  return padded_output, lengths
320 
321 
322 def pad_sequence(sequences, batch_first=False, padding_value=0):
323  r"""Pad a list of variable length Tensors with ``padding_value``
324 
325  ``pad_sequence`` stacks a list of Tensors along a new dimension,
326  and pads them to equal length. For example, if the input is list of
327  sequences with size ``L x *`` and if batch_first is False, and ``T x B x *``
328  otherwise.
329 
330  `B` is batch size. It is equal to the number of elements in ``sequences``.
331  `T` is length of the longest sequence.
332  `L` is length of the sequence.
333  `*` is any number of trailing dimensions, including none.
334 
335  Example:
336  >>> from torch.nn.utils.rnn import pad_sequence
337  >>> a = torch.ones(25, 300)
338  >>> b = torch.ones(22, 300)
339  >>> c = torch.ones(15, 300)
340  >>> pad_sequence([a, b, c]).size()
341  torch.Size([25, 3, 300])
342 
343  Note:
344  This function returns a Tensor of size ``T x B x *`` or ``B x T x *``
345  where `T` is the length of the longest sequence. This function assumes
346  trailing dimensions and type of all the Tensors in sequences are same.
347 
348  Arguments:
349  sequences (list[Tensor]): list of variable length sequences.
350  batch_first (bool, optional): output will be in ``B x T x *`` if True, or in
351  ``T x B x *`` otherwise
352  padding_value (float, optional): value for padded elements. Default: 0.
353 
354  Returns:
355  Tensor of size ``T x B x *`` if :attr:`batch_first` is ``False``.
356  Tensor of size ``B x T x *`` otherwise
357  """
358 
359  # assuming trailing dimensions and type of all the Tensors
360  # in sequences are same and fetching those from sequences[0]
361  max_size = sequences[0].size()
362  trailing_dims = max_size[1:]
363  max_len = max([s.size(0) for s in sequences])
364  if batch_first:
365  out_dims = (len(sequences), max_len) + trailing_dims
366  else:
367  out_dims = (max_len, len(sequences)) + trailing_dims
368 
369  out_tensor = sequences[0].data.new(*out_dims).fill_(padding_value)
370  for i, tensor in enumerate(sequences):
371  length = tensor.size(0)
372  # use index notation to prevent duplicate references to the tensor
373  if batch_first:
374  out_tensor[i, :length, ...] = tensor
375  else:
376  out_tensor[:length, i, ...] = tensor
377 
378  return out_tensor
379 
380 
381 def pack_sequence(sequences, enforce_sorted=True):
382  r"""Packs a list of variable length Tensors
383 
384  ``sequences`` should be a list of Tensors of size ``L x *``, where `L` is
385  the length of a sequence and `*` is any number of trailing dimensions,
386  including zero.
387 
388  For unsorted sequences, use `enforce_sorted = False`. If ``enforce_sorted``
389  is ``True``, the sequences should be sorted in the order of decreasing length.
390  ``enforce_sorted = True`` is only necessary for ONNX export.
391 
392 
393  Example:
394  >>> from torch.nn.utils.rnn import pack_sequence
395  >>> a = torch.tensor([1,2,3])
396  >>> b = torch.tensor([4,5])
397  >>> c = torch.tensor([6])
398  >>> pack_sequence([a, b, c])
399  PackedSequence(data=tensor([ 1, 4, 6, 2, 5, 3]), batch_sizes=tensor([ 3, 2, 1]))
400 
401 
402  Arguments:
403  sequences (list[Tensor]): A list of sequences of decreasing length.
404  enforce_sorted (bool, optional): if ``True``, checks that the input
405  contains sequences sorted by length in a decreasing order. If
406  ``False``, this condition is not checked. Default: ``True``.
407 
408  Returns:
409  a :class:`PackedSequence` object
410  """
411  lengths = [v.size(0) for v in sequences]
412  return pack_padded_sequence(pad_sequence(sequences), lengths, enforce_sorted=enforce_sorted)
413 
414 
415 def get_packed_sequence(data, batch_sizes, sorted_indices, unsorted_indices):
416  return PackedSequence(data, batch_sizes, sorted_indices, unsorted_indices)
def cuda(self, args, kwargs)
Definition: rnn.py:89