4 from .module
import Module
5 from ..
import functional
as F
12 r"""A simple lookup table that stores embeddings of a fixed dictionary and size. 14 This module is often used to store word embeddings and retrieve them using indices. 15 The input to the module is a list of indices, and the output is the corresponding 19 num_embeddings (int): size of the dictionary of embeddings 20 embedding_dim (int): the size of each embedding vector 21 padding_idx (int, optional): If given, pads the output with the embedding vector at :attr:`padding_idx` 22 (initialized to zeros) whenever it encounters the index. 23 max_norm (float, optional): If given, each embedding vector with norm larger than :attr:`max_norm` 24 is renormalized to have norm :attr:`max_norm`. 25 norm_type (float, optional): The p of the p-norm to compute for the :attr:`max_norm` option. Default ``2``. 26 scale_grad_by_freq (boolean, optional): If given, this will scale gradients by the inverse of frequency of 27 the words in the mini-batch. Default ``False``. 28 sparse (bool, optional): If ``True``, gradient w.r.t. :attr:`weight` matrix will be a sparse tensor. 29 See Notes for more details regarding sparse gradients. 32 weight (Tensor): the learnable weights of the module of shape (num_embeddings, embedding_dim) 33 initialized from :math:`\mathcal{N}(0, 1)` 36 - Input: :math:`(*)`, LongTensor of arbitrary shape containing the indices to extract 37 - Output: :math:`(*, H)`, where `*` is the input shape and :math:`H=\text{embedding\_dim}` 40 Keep in mind that only a limited number of optimizers support 41 sparse gradients: currently it's :class:`optim.SGD` (`CUDA` and `CPU`), 42 :class:`optim.SparseAdam` (`CUDA` and `CPU`) and :class:`optim.Adagrad` (`CPU`) 45 With :attr:`padding_idx` set, the embedding vector at 46 :attr:`padding_idx` is initialized to all zeros. However, note that this 47 vector can be modified afterwards, e.g., using a customized 48 initialization method, and thus changing the vector used to pad the 49 output. The gradient for this vector from :class:`~torch.nn.Embedding` 54 >>> # an Embedding module containing 10 tensors of size 3 55 >>> embedding = nn.Embedding(10, 3) 56 >>> # a batch of 2 samples of 4 indices each 57 >>> input = torch.LongTensor([[1,2,4,5],[4,3,2,9]]) 59 tensor([[[-0.0251, -1.6902, 0.7172], 60 [-0.6431, 0.0748, 0.6969], 61 [ 1.4970, 1.3448, -0.9685], 62 [-0.3677, -2.7265, -0.1685]], 64 [[ 1.4970, 1.3448, -0.9685], 65 [ 0.4362, -0.4004, 0.9400], 66 [-0.6431, 0.0748, 0.6969], 67 [ 0.9124, -2.3616, 1.1151]]]) 70 >>> # example with padding_idx 71 >>> embedding = nn.Embedding(10, 3, padding_idx=0) 72 >>> input = torch.LongTensor([[0,2,0,5]]) 74 tensor([[[ 0.0000, 0.0000, 0.0000], 75 [ 0.1535, -2.0309, 0.9315], 76 [ 0.0000, 0.0000, 0.0000], 77 [-0.1655, 0.9897, 0.0635]]]) 79 __constants__ = [
'num_embeddings',
'embedding_dim',
'padding_idx',
'max_norm',
80 'norm_type',
'scale_grad_by_freq',
'sparse',
'_weight']
82 def __init__(self, num_embeddings, embedding_dim, padding_idx=None,
83 max_norm=
None, norm_type=2., scale_grad_by_freq=
False,
84 sparse=
False, _weight=
None):
85 super(Embedding, self).__init__()
88 if padding_idx
is not None:
90 assert padding_idx < self.
num_embeddings,
'Padding_idx must be within num_embeddings' 92 assert padding_idx >= -self.
num_embeddings,
'Padding_idx must be within num_embeddings' 102 assert list(_weight.shape) == [num_embeddings, embedding_dim], \
103 'Shape of weight does not match num_embeddings and embedding_dim' 107 def reset_parameters(self):
110 with torch.no_grad():
114 def forward(self, input):
119 def extra_repr(self):
120 s =
'{num_embeddings}, {embedding_dim}' 122 s +=
', padding_idx={padding_idx}' 124 s +=
', max_norm={max_norm}' 126 s +=
', norm_type={norm_type}' 128 s +=
', scale_grad_by_freq={scale_grad_by_freq}' 129 if self.
sparse is not False:
131 return s.format(**self.__dict__)
134 def from_pretrained(cls, embeddings, freeze=True, padding_idx=None,
135 max_norm=
None, norm_type=2., scale_grad_by_freq=
False,
137 r"""Creates Embedding instance from given 2-dimensional FloatTensor. 140 embeddings (Tensor): FloatTensor containing weights for the Embedding. 141 First dimension is being passed to Embedding as ``num_embeddings``, second as ``embedding_dim``. 142 freeze (boolean, optional): If ``True``, the tensor does not get updated in the learning process. 143 Equivalent to ``embedding.weight.requires_grad = False``. Default: ``True`` 144 padding_idx (int, optional): See module initialization documentation. 145 max_norm (float, optional): See module initialization documentation. 146 norm_type (float, optional): See module initialization documentation. Default ``2``. 147 scale_grad_by_freq (boolean, optional): See module initialization documentation. Default ``False``. 148 sparse (bool, optional): See module initialization documentation. 152 >>> # FloatTensor containing pretrained weights 153 >>> weight = torch.FloatTensor([[1, 2.3, 3], [4, 5.1, 6.3]]) 154 >>> embedding = nn.Embedding.from_pretrained(weight) 155 >>> # Get embeddings for index 1 156 >>> input = torch.LongTensor([1]) 158 tensor([[ 4.0000, 5.1000, 6.3000]]) 160 assert embeddings.dim() == 2, \
161 'Embeddings parameter is expected to be 2-dimensional' 162 rows, cols = embeddings.shape
167 padding_idx=padding_idx,
170 scale_grad_by_freq=scale_grad_by_freq,
172 embedding.weight.requires_grad =
not freeze
178 r"""Computes sums or means of 'bags' of embeddings, without instantiating the 179 intermediate embeddings. 181 For bags of constant length, this class 183 * with ``mode="sum"`` is equivalent to :class:`~torch.nn.Embedding` followed by ``torch.sum(dim=0)``, 184 * with ``mode="mean"`` is equivalent to :class:`~torch.nn.Embedding` followed by ``torch.mean(dim=0)``, 185 * with ``mode="max"`` is equivalent to :class:`~torch.nn.Embedding` followed by ``torch.max(dim=0)``. 187 However, :class:`~torch.nn.EmbeddingBag` is much more time and memory efficient than using a chain of these 191 num_embeddings (int): size of the dictionary of embeddings 192 embedding_dim (int): the size of each embedding vector 193 max_norm (float, optional): If given, each embedding vector with norm larger than :attr:`max_norm` 194 is renormalized to have norm :attr:`max_norm`. 195 norm_type (float, optional): The p of the p-norm to compute for the :attr:`max_norm` option. Default ``2``. 196 scale_grad_by_freq (boolean, optional): if given, this will scale gradients by the inverse of frequency of 197 the words in the mini-batch. Default ``False``. 198 Note: this option is not supported when ``mode="max"``. 199 mode (string, optional): ``"sum"``, ``"mean"`` or ``"max"``. Specifies the way to reduce the bag. 201 sparse (bool, optional): if ``True``, gradient w.r.t. :attr:`weight` matrix will be a sparse tensor. See 202 Notes for more details regarding sparse gradients. Note: this option is not 203 supported when ``mode="max"``. 206 weight (Tensor): the learnable weights of the module of shape `(num_embeddings, embedding_dim)` 207 initialized from :math:`\mathcal{N}(0, 1)`. 209 Inputs: :attr:`input` (LongTensor) and :attr:`offsets` (LongTensor, optional) 211 - If :attr:`input` is 2D of shape `(B, N)`, 213 it will be treated as ``B`` bags (sequences) each of fixed length ``N``, and 214 this will return ``B`` values aggregated in a way depending on the :attr:`mode`. 215 :attr:`offsets` is ignored and required to be ``None`` in this case. 217 - If :attr:`input` is 1D of shape `(N)`, 219 it will be treated as a concatenation of multiple bags (sequences). 220 :attr:`offsets` is required to be a 1D tensor containing the 221 starting index positions of each bag in :attr:`input`. Therefore, 222 for :attr:`offsets` of shape `(B)`, :attr:`input` will be viewed as 223 having ``B`` bags. Empty bags (i.e., having 0-length) will have 224 returned vectors filled by zeros. 226 Output shape: `(B, embedding_dim)` 230 >>> # an Embedding module containing 10 tensors of size 3 231 >>> embedding_sum = nn.EmbeddingBag(10, 3, mode='sum') 232 >>> # a batch of 2 samples of 4 indices each 233 >>> input = torch.LongTensor([1,2,4,5,4,3,2,9]) 234 >>> offsets = torch.LongTensor([0,4]) 235 >>> embedding_sum(input, offsets) 236 tensor([[-0.8861, -5.4350, -0.0523], 237 [ 1.1306, -2.5798, -1.0044]]) 239 __constants__ = [
'num_embeddings, embedding_dim',
'max_norm',
'norm_type',
240 'scale_grad_by_freq',
'mode',
'sparse',
'_weight']
242 def __init__(self, num_embeddings, embedding_dim,
243 max_norm=
None, norm_type=2., scale_grad_by_freq=
False,
244 mode=
'mean', sparse=
False, _weight=
None):
245 super(EmbeddingBag, self).__init__()
255 assert list(_weight.shape) == [num_embeddings, embedding_dim], \
256 'Shape of weight does not match num_embeddings and embedding_dim' 261 def reset_parameters(self):
265 def forward(self, input, offsets=None):
267 return F.embedding_bag(input, self.
weight, offsets,
271 def extra_repr(self):
272 s =
'{num_embeddings}, {embedding_dim}' 274 s +=
', max_norm={max_norm}' 276 s +=
', norm_type={norm_type}' 278 s +=
', scale_grad_by_freq={scale_grad_by_freq}' 280 return s.format(**self.__dict__)
283 def from_pretrained(cls, embeddings, freeze=True, max_norm=None,
284 norm_type=2., scale_grad_by_freq=
False,
285 mode=
'mean', sparse=
False):
286 r"""Creates EmbeddingBag instance from given 2-dimensional FloatTensor. 289 embeddings (Tensor): FloatTensor containing weights for the EmbeddingBag. 290 First dimension is being passed to EmbeddingBag as 'num_embeddings', second as 'embedding_dim'. 291 freeze (boolean, optional): If ``True``, the tensor does not get updated in the learning process. 292 Equivalent to ``embeddingbag.weight.requires_grad = False``. Default: ``True`` 293 max_norm (float, optional): See module initialization documentation. Default: ``None`` 294 norm_type (float, optional): See module initialization documentation. Default ``2``. 295 scale_grad_by_freq (boolean, optional): See module initialization documentation. Default ``False``. 296 mode (string, optional): See module initialization documentation. Default: ``"mean"`` 297 sparse (bool, optional): See module initialization documentation. Default: ``False``. 301 >>> # FloatTensor containing pretrained weights 302 >>> weight = torch.FloatTensor([[1, 2.3, 3], [4, 5.1, 6.3]]) 303 >>> embeddingbag = nn.EmbeddingBag.from_pretrained(weight) 304 >>> # Get embeddings for index 1 305 >>> input = torch.LongTensor([[1, 0]]) 306 >>> embeddingbag(input) 307 tensor([[ 2.5000, 3.7000, 4.6500]]) 309 assert embeddings.dim() == 2, \
310 'Embeddings parameter is expected to be 2-dimensional' 311 rows, cols = embeddings.shape
318 scale_grad_by_freq=scale_grad_by_freq,
321 embeddingbag.weight.requires_grad =
not freeze
def reset_parameters(self)
def reset_parameters(self)