Caffe2 - Python API
A deep learning, cross platform ML framework
word_language_model.py
1 # The model is from here:
2 # https://github.com/pytorch/examples/blob/master/word_language_model/model.py
3 
4 import torch
5 import torch.nn as nn
6 from torch.autograd import Variable
7 
8 
9 class RNNModel(nn.Module):
10  """Container module with an encoder, a recurrent module, and a decoder."""
11 
12  def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers,
13  dropout=0.5, tie_weights=False, batchsize=2):
14  super(RNNModel, self).__init__()
15  self.drop = nn.Dropout(dropout)
16  self.encoder = nn.Embedding(ntoken, ninp)
17  if rnn_type in ['LSTM', 'GRU']:
18  self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout)
19  else:
20  try:
21  nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type]
22  except KeyError:
23  raise ValueError("""An invalid option for `--model` was supplied,
24  options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""")
25  self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout)
26  self.decoder = nn.Linear(nhid, ntoken)
27 
28  # Optionally tie weights as in:
29  # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
30  # https://arxiv.org/abs/1608.05859
31  # and
32  # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
33  # https://arxiv.org/abs/1611.01462
34  if tie_weights:
35  if nhid != ninp:
36  raise ValueError('When using the tied flag, nhid must be equal to emsize')
37  self.decoder.weight = self.encoder.weight
38 
39  self.init_weights()
40 
41  self.rnn_type = rnn_type
42  self.nhid = nhid
43  self.nlayers = nlayers
44  self.hidden = self.init_hidden(batchsize)
45 
46  @staticmethod
48  """Detach hidden states from their history."""
49  if isinstance(h, torch.Tensor):
50  return h.detach()
51  else:
52  return tuple(RNNModel.repackage_hidden(v) for v in h)
53 
54  def init_weights(self):
55  initrange = 0.1
56  self.encoder.weight.data.uniform_(-initrange, initrange)
57  self.decoder.bias.data.fill_(0)
58  self.decoder.weight.data.uniform_(-initrange, initrange)
59 
60  def forward(self, input, hidden):
61  emb = self.drop(self.encoder(input))
62  output, hidden = self.rnn(emb, hidden)
63  output = self.drop(output)
64  decoded = self.decoder(output.view(output.size(0) * output.size(1), output.size(2)))
65  self.hidden = RNNModel.repackage_hidden(hidden)
66  return decoded.view(output.size(0), output.size(1), decoded.size(1))
67 
68  def init_hidden(self, bsz):
69  weight = next(self.parameters()).data
70  if self.rnn_type == 'LSTM':
71  return (weight.new(self.nlayers, bsz, self.nhid).zero_(),
72  weight.new(self.nlayers, bsz, self.nhid).zero_())
73  else:
74  return weight.new(self.nlayers, bsz, self.nhid).zero_()