Caffe2 - Python API
A deep learning, cross platform ML framework
seq2seq_util.py
1 ## @package seq2seq_util
2 # Module caffe2.python.examples.seq2seq_util
3 """ A bunch of util functions to build Seq2Seq models with Caffe2."""
4 
5 from __future__ import absolute_import
6 from __future__ import division
7 from __future__ import print_function
8 from __future__ import unicode_literals
9 
10 import collections
11 from future.utils import viewitems
12 
13 import caffe2.proto.caffe2_pb2 as caffe2_pb2
14 from caffe2.python import attention, core, rnn_cell, brew
15 
16 
17 PAD_ID = 0
18 PAD = '<PAD>'
19 GO_ID = 1
20 GO = '<GO>'
21 EOS_ID = 2
22 EOS = '<EOS>'
23 UNK_ID = 3
24 UNK = '<UNK>'
25 
26 
27 def gen_vocab(corpus, unk_threshold):
28  vocab = collections.defaultdict(lambda: len(vocab))
29  freqs = collections.defaultdict(lambda: 0)
30  # Adding padding tokens to the vocabulary to maintain consistency with IDs
31  vocab[PAD]
32  vocab[GO]
33  vocab[EOS]
34  vocab[UNK]
35 
36  with open(corpus) as f:
37  for sentence in f:
38  tokens = sentence.strip().split()
39  for token in tokens:
40  freqs[token] += 1
41  for token, freq in viewitems(freqs):
42  if freq > unk_threshold:
43  vocab[token]
44 
45  return vocab
46 
47 
48 def get_numberized_sentence(sentence, vocab):
49  numerized_sentence = []
50  for token in sentence.strip().split():
51  if token in vocab:
52  numerized_sentence.append(vocab[token])
53  else:
54  numerized_sentence.append(vocab[UNK])
55  return numerized_sentence
56 
57 
58 def rnn_unidirectional_layer(
59  model,
60  inputs,
61  input_lengths,
62  input_size,
63  num_units,
64  dropout_keep_prob,
65  forward_only,
66  return_sequence_output,
67  return_final_state,
68  scope=None,
69 ):
70  """ Unidirectional LSTM encoder."""
71  with core.NameScope(scope):
72  initial_cell_state = model.param_init_net.ConstantFill(
73  [],
74  'initial_cell_state',
75  shape=[num_units],
76  value=0.0,
77  )
78  initial_hidden_state = model.param_init_net.ConstantFill(
79  [],
80  'initial_hidden_state',
81  shape=[num_units],
82  value=0.0,
83  )
84 
85  cell = rnn_cell.LSTMCell(
86  input_size=input_size,
87  hidden_size=num_units,
88  forget_bias=0.0,
89  memory_optimization=False,
90  name=(scope + '/' if scope else '') + 'lstm',
91  forward_only=forward_only,
92  )
93 
94  dropout_ratio = (
95  None if dropout_keep_prob is None else (1.0 - dropout_keep_prob)
96  )
97  if dropout_ratio is not None:
98  cell = rnn_cell.DropoutCell(
99  internal_cell=cell,
100  dropout_ratio=dropout_ratio,
101  name=(scope + '/' if scope else '') + 'dropout',
102  forward_only=forward_only,
103  is_test=False,
104  )
105 
106  outputs_with_grads = []
107  if return_sequence_output:
108  outputs_with_grads.append(0)
109  if return_final_state:
110  outputs_with_grads.extend([1, 3])
111 
112  outputs, (_, final_hidden_state, _, final_cell_state) = (
113  cell.apply_over_sequence(
114  model=model,
115  inputs=inputs,
116  seq_lengths=input_lengths,
117  initial_states=(initial_hidden_state, initial_cell_state),
118  outputs_with_grads=outputs_with_grads,
119  )
120  )
121  return outputs, final_hidden_state, final_cell_state
122 
123 
124 def rnn_bidirectional_layer(
125  model,
126  inputs,
127  input_lengths,
128  input_size,
129  num_units,
130  dropout_keep_prob,
131  forward_only,
132  return_sequence_output,
133  return_final_state,
134  scope=None,
135 ):
136  outputs_fw, final_hidden_fw, final_cell_fw = rnn_unidirectional_layer(
137  model,
138  inputs,
139  input_lengths,
140  input_size,
141  num_units,
142  dropout_keep_prob,
143  forward_only,
144  return_sequence_output,
145  return_final_state,
146  scope=(scope + '/' if scope else '') + 'fw',
147  )
148  with core.NameScope(scope):
149  reversed_inputs = model.net.ReversePackedSegs(
150  [inputs, input_lengths],
151  ['reversed_inputs'],
152  )
153  outputs_bw, final_hidden_bw, final_cell_bw = rnn_unidirectional_layer(
154  model,
155  reversed_inputs,
156  input_lengths,
157  input_size,
158  num_units,
159  dropout_keep_prob,
160  forward_only,
161  return_sequence_output,
162  return_final_state,
163  scope=(scope + '/' if scope else '') + 'bw',
164  )
165  with core.NameScope(scope):
166  outputs_bw = model.net.ReversePackedSegs(
167  [outputs_bw, input_lengths],
168  ['outputs_bw'],
169  )
170 
171  # Concatenate forward and backward results
172  if return_sequence_output:
173  with core.NameScope(scope):
174  outputs, _ = model.net.Concat(
175  [outputs_fw, outputs_bw],
176  ['outputs', 'outputs_dim'],
177  axis=2,
178  )
179  else:
180  outputs = None
181 
182  if return_final_state:
183  with core.NameScope(scope):
184  final_hidden_state, _ = model.net.Concat(
185  [final_hidden_fw, final_hidden_bw],
186  ['final_hidden_state', 'final_hidden_state_dim'],
187  axis=2,
188  )
189  final_cell_state, _ = model.net.Concat(
190  [final_cell_fw, final_cell_bw],
191  ['final_cell_state', 'final_cell_state_dim'],
192  axis=2,
193  )
194  else:
195  final_hidden_state = None
196  final_cell_state = None
197 
198  return outputs, final_hidden_state, final_cell_state
199 
200 
201 def build_embeddings(
202  model,
203  vocab_size,
204  embedding_size,
205  name,
206  freeze_embeddings,
207 ):
208  embeddings = model.param_init_net.GaussianFill(
209  [],
210  name,
211  shape=[vocab_size, embedding_size],
212  std=0.1,
213  )
214  if not freeze_embeddings:
215  model.params.append(embeddings)
216  return embeddings
217 
218 
219 def get_layer_scope(scope, layer_type, i):
220  prefix = (scope + '/' if scope else '') + layer_type
221  return '{}/layer{}'.format(prefix, i)
222 
223 
224 def build_embedding_encoder(
225  model,
226  encoder_params,
227  num_decoder_layers,
228  inputs,
229  input_lengths,
230  vocab_size,
231  embeddings,
232  embedding_size,
233  use_attention,
234  num_gpus=0,
235  forward_only=False,
236  scope=None,
237 ):
238  with core.NameScope(scope or ''):
239  if num_gpus == 0:
240  embedded_encoder_inputs = model.net.Gather(
241  [embeddings, inputs],
242  ['embedded_encoder_inputs'],
243  )
244  else:
245  with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
246  embedded_encoder_inputs_cpu = model.net.Gather(
247  [embeddings, inputs],
248  ['embedded_encoder_inputs_cpu'],
249  )
250  embedded_encoder_inputs = model.CopyCPUToGPU(
251  embedded_encoder_inputs_cpu,
252  'embedded_encoder_inputs',
253  )
254 
255  layer_inputs = embedded_encoder_inputs
256  layer_input_size = embedding_size
257  encoder_units_per_layer = []
258  final_encoder_hidden_states = []
259  final_encoder_cell_states = []
260 
261  num_encoder_layers = len(encoder_params['encoder_layer_configs'])
262  use_bidirectional_encoder = encoder_params.get(
263  'use_bidirectional_encoder',
264  False,
265  )
266 
267  for i, layer_config in enumerate(encoder_params['encoder_layer_configs']):
268 
269  if use_bidirectional_encoder and i == 0:
270  layer_func = rnn_bidirectional_layer
271  output_dims = 2 * layer_config['num_units']
272  else:
273  layer_func = rnn_unidirectional_layer
274  output_dims = layer_config['num_units']
275  encoder_units_per_layer.append(output_dims)
276 
277  is_final_layer = (i == num_encoder_layers - 1)
278 
279  dropout_keep_prob = layer_config.get(
280  'dropout_keep_prob',
281  None,
282  )
283 
284  return_final_state = i >= (num_encoder_layers - num_decoder_layers)
285  (
286  layer_outputs,
287  final_layer_hidden_state,
288  final_layer_cell_state,
289  ) = layer_func(
290  model=model,
291  inputs=layer_inputs,
292  input_lengths=input_lengths,
293  input_size=layer_input_size,
294  num_units=layer_config['num_units'],
295  dropout_keep_prob=dropout_keep_prob,
296  forward_only=forward_only,
297  return_sequence_output=(not is_final_layer) or use_attention,
298  return_final_state=return_final_state,
299  scope=get_layer_scope(scope, 'encoder', i),
300  )
301 
302  if not is_final_layer:
303  layer_inputs = layer_outputs
304  layer_input_size = output_dims
305  final_encoder_hidden_states.append(final_layer_hidden_state)
306  final_encoder_cell_states.append(final_layer_cell_state)
307 
308  encoder_outputs = layer_outputs
309  weighted_encoder_outputs = None
310 
311  return (
312  encoder_outputs,
313  weighted_encoder_outputs,
314  final_encoder_hidden_states,
315  final_encoder_cell_states,
316  encoder_units_per_layer,
317  )
318 
319 
321 
322  def scope(self, name):
323  return self.name + '/' + name if self.name is not None else name
324 
325  def _get_attention_type(self, attention_type_as_string):
326  if attention_type_as_string == 'regular':
327  return attention.AttentionType.Regular
328  elif attention_type_as_string == 'recurrent':
329  return attention.AttentionType.Recurrent
330  else:
331  assert False, 'Unknown type ' + attention_type_as_string
332 
333  def __init__(
334  self,
335  encoder_outputs,
336  encoder_output_dim,
337  encoder_lengths,
338  vocab_size,
339  attention_type,
340  embedding_size,
341  decoder_num_units,
342  decoder_cells,
343  residual_output_layers=None,
344  name=None,
345  weighted_encoder_outputs=None,
346  ):
347  self.name = name
348  self.num_layers = len(decoder_cells)
349  if attention_type == 'none':
351  decoder_cells,
352  name=self.scope('decoder'),
353  residual_output_layers=residual_output_layers,
354  )
355  self.use_attention = False
356  self.decoder_output_dim = decoder_num_units
357  self.output_indices = self.cell.output_indices
358  else:
359  decoder_cell = rnn_cell.MultiRNNCell(
360  decoder_cells,
361  name=self.scope('decoder'),
362  residual_output_layers=residual_output_layers,
363  )
365  encoder_output_dim=encoder_output_dim,
366  encoder_outputs=encoder_outputs,
367  encoder_lengths=encoder_lengths,
368  decoder_cell=decoder_cell,
369  decoder_state_dim=decoder_num_units,
370  name=self.scope('attention_decoder'),
371  attention_type=self._get_attention_type(attention_type),
372  weighted_encoder_outputs=weighted_encoder_outputs,
373  attention_memory_optimization=True,
374  )
375  self.use_attention = True
376  self.decoder_output_dim = decoder_num_units + encoder_output_dim
377 
378  self.output_indices = decoder_cell.output_indices
379  self.output_indices.append(2 * self.num_layers)
380 
381  def get_state_names(self):
382  return self.cell.get_state_names()
383 
384  def get_outputs_with_grads(self):
385  # sequence (all) output locations are at twice their state index
386  return [2 * i for i in self.output_indices]
387 
388  def get_output_dim(self):
389  return self.decoder_output_dim
390 
391  def get_attention_weights(self):
392  assert self.use_attention
393  # [batch_size, encoder_length, 1]
394  return self.cell.get_attention_weights()
395 
396  def apply(
397  self,
398  model,
399  input_t,
400  seq_lengths,
401  states,
402  timestep,
403  ):
404  return self.cell.apply(
405  model=model,
406  input_t=input_t,
407  seq_lengths=seq_lengths,
408  states=states,
409  timestep=timestep,
410  )
411 
412  def apply_over_sequence(
413  self,
414  model,
415  inputs,
416  seq_lengths,
417  initial_states,
418  ):
419  return self.cell.apply_over_sequence(
420  model=model,
421  inputs=inputs,
422  seq_lengths=seq_lengths,
423  initial_states=initial_states,
424  outputs_with_grads=self.get_outputs_with_grads(),
425  )
426 
427 
428 def build_initial_rnn_decoder_states(
429  model,
430  encoder_units_per_layer,
431  decoder_units_per_layer,
432  final_encoder_hidden_states,
433  final_encoder_cell_states,
434  use_attention,
435 ):
436  num_encoder_layers = len(encoder_units_per_layer)
437  num_decoder_layers = len(decoder_units_per_layer)
438  if num_encoder_layers > num_decoder_layers:
439  offset = num_encoder_layers - num_decoder_layers
440  else:
441  offset = 0
442 
443  initial_states = []
444  for i, decoder_num_units in enumerate(decoder_units_per_layer):
445 
446  if (
447  final_encoder_hidden_states and
448  len(final_encoder_hidden_states) > (i + offset)
449  ):
450  final_encoder_hidden_state = final_encoder_hidden_states[i + offset]
451  else:
452  final_encoder_hidden_state = None
453 
454  if final_encoder_hidden_state is None:
455  decoder_initial_hidden_state = model.param_init_net.ConstantFill(
456  [],
457  'decoder_initial_hidden_state_{}'.format(i),
458  shape=[decoder_num_units],
459  value=0.0,
460  )
461  model.params.append(decoder_initial_hidden_state)
462  elif decoder_num_units != encoder_units_per_layer[i + offset]:
463  decoder_initial_hidden_state = brew.fc(
464  model,
465  final_encoder_hidden_state,
466  'decoder_initial_hidden_state_{}'.format(i),
467  encoder_units_per_layer[i + offset],
468  decoder_num_units,
469  axis=2,
470  )
471  else:
472  decoder_initial_hidden_state = final_encoder_hidden_state
473  initial_states.append(decoder_initial_hidden_state)
474 
475  if (
476  final_encoder_cell_states and
477  len(final_encoder_cell_states) > (i + offset)
478  ):
479  final_encoder_cell_state = final_encoder_cell_states[i + offset]
480  else:
481  final_encoder_cell_state = None
482 
483  if final_encoder_cell_state is None:
484  decoder_initial_cell_state = model.param_init_net.ConstantFill(
485  [],
486  'decoder_initial_cell_state_{}'.format(i),
487  shape=[decoder_num_units],
488  value=0.0,
489  )
490  model.params.append(decoder_initial_cell_state)
491  elif decoder_num_units != encoder_units_per_layer[i + offset]:
492  decoder_initial_cell_state = brew.fc(
493  model,
494  final_encoder_cell_state,
495  'decoder_initial_cell_state_{}'.format(i),
496  encoder_units_per_layer[i + offset],
497  decoder_num_units,
498  axis=2,
499  )
500  else:
501  decoder_initial_cell_state = final_encoder_cell_state
502  initial_states.append(decoder_initial_cell_state)
503 
504  if use_attention:
505  initial_attention_weighted_encoder_context = (
506  model.param_init_net.ConstantFill(
507  [],
508  'initial_attention_weighted_encoder_context',
509  shape=[encoder_units_per_layer[-1]],
510  value=0.0,
511  )
512  )
513  model.params.append(initial_attention_weighted_encoder_context)
514  initial_states.append(initial_attention_weighted_encoder_context)
515 
516  return initial_states
517 
518 
519 def build_embedding_decoder(
520  model,
521  decoder_layer_configs,
522  inputs,
523  input_lengths,
524  encoder_lengths,
525  encoder_outputs,
526  weighted_encoder_outputs,
527  final_encoder_hidden_states,
528  final_encoder_cell_states,
529  encoder_units_per_layer,
530  vocab_size,
531  embeddings,
532  embedding_size,
533  attention_type,
534  forward_only,
535  num_gpus=0,
536  scope=None,
537 ):
538  with core.NameScope(scope or ''):
539  if num_gpus == 0:
540  embedded_decoder_inputs = model.net.Gather(
541  [embeddings, inputs],
542  ['embedded_decoder_inputs'],
543  )
544  else:
545  with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
546  embedded_decoder_inputs_cpu = model.net.Gather(
547  [embeddings, inputs],
548  ['embedded_decoder_inputs_cpu'],
549  )
550  embedded_decoder_inputs = model.CopyCPUToGPU(
551  embedded_decoder_inputs_cpu,
552  'embedded_decoder_inputs',
553  )
554 
555  decoder_cells = []
556  decoder_units_per_layer = []
557  for i, layer_config in enumerate(decoder_layer_configs):
558  num_units = layer_config['num_units']
559  decoder_units_per_layer.append(num_units)
560 
561  if i == 0:
562  input_size = embedding_size
563  else:
564  input_size = decoder_cells[-1].get_output_dim()
565 
566  cell = rnn_cell.LSTMCell(
567  forward_only=forward_only,
568  input_size=input_size,
569  hidden_size=num_units,
570  forget_bias=0.0,
571  memory_optimization=False,
572  )
573 
574  dropout_keep_prob = layer_config.get('dropout_keep_prob', None)
575  if dropout_keep_prob is not None:
576  dropout_ratio = 1.0 - layer_config.dropout_keep_prob
577  cell = rnn_cell.DropoutCell(
578  internal_cell=cell,
579  dropout_ratio=dropout_ratio,
580  forward_only=forward_only,
581  is_test=False,
582  name=get_layer_scope(scope, 'decoder_dropout', i),
583  )
584 
585  decoder_cells.append(cell)
586 
587  states = build_initial_rnn_decoder_states(
588  model=model,
589  encoder_units_per_layer=encoder_units_per_layer,
590  decoder_units_per_layer=decoder_units_per_layer,
591  final_encoder_hidden_states=final_encoder_hidden_states,
592  final_encoder_cell_states=final_encoder_cell_states,
593  use_attention=(attention_type != 'none'),
594  )
595  attention_decoder = LSTMWithAttentionDecoder(
596  encoder_outputs=encoder_outputs,
597  encoder_output_dim=encoder_units_per_layer[-1],
598  encoder_lengths=encoder_lengths,
599  vocab_size=vocab_size,
600  attention_type=attention_type,
601  embedding_size=embedding_size,
602  decoder_num_units=decoder_units_per_layer[-1],
603  decoder_cells=decoder_cells,
604  weighted_encoder_outputs=weighted_encoder_outputs,
605  name=scope,
606  )
607  decoder_outputs, _ = attention_decoder.apply_over_sequence(
608  model=model,
609  inputs=embedded_decoder_inputs,
610  seq_lengths=input_lengths,
611  initial_states=states,
612  )
613 
614  # we do softmax over the whole sequence
615  # (max_length in the batch * batch_size) x decoder embedding size
616  # -1 because we don't know max_length yet
617  decoder_outputs_flattened, _ = model.net.Reshape(
618  [decoder_outputs],
619  [
620  'decoder_outputs_flattened',
621  'decoder_outputs_and_contexts_combination_old_shape',
622  ],
623  shape=[-1, attention_decoder.get_output_dim()],
624  )
625 
626  decoder_outputs = decoder_outputs_flattened
627  decoder_output_dim = attention_decoder.get_output_dim()
628 
629  return (decoder_outputs, decoder_output_dim)
630 
631 
632 def output_projection(
633  model,
634  decoder_outputs,
635  decoder_output_size,
636  target_vocab_size,
637  decoder_softmax_size,
638 ):
639  if decoder_softmax_size is not None:
640  decoder_outputs = brew.fc(
641  model,
642  decoder_outputs,
643  'decoder_outputs_scaled',
644  dim_in=decoder_output_size,
645  dim_out=decoder_softmax_size,
646  )
647  decoder_output_size = decoder_softmax_size
648 
649  output_projection_w = model.param_init_net.XavierFill(
650  [],
651  'output_projection_w',
652  shape=[target_vocab_size, decoder_output_size],
653  )
654 
655  output_projection_b = model.param_init_net.XavierFill(
656  [],
657  'output_projection_b',
658  shape=[target_vocab_size],
659  )
660  model.params.extend([
661  output_projection_w,
662  output_projection_b,
663  ])
664  output_logits = model.net.FC(
665  [
666  decoder_outputs,
667  output_projection_w,
668  output_projection_b,
669  ],
670  ['output_logits'],
671  )
672  return output_logits
Module caffe2.python.layers.split.
Module caffe2.python.scope.