Caffe2 - Python API
A deep learning, cross platform ML framework
seq2seq_util.py
1 # Copyright (c) 2016-present, Facebook, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 ##############################################################################
15 
16 ## @package seq2seq_util
17 # Module caffe2.python.examples.seq2seq_util
18 """ A bunch of util functions to build Seq2Seq models with Caffe2."""
19 
20 from __future__ import absolute_import
21 from __future__ import division
22 from __future__ import print_function
23 from __future__ import unicode_literals
24 
25 import collections
26 from future.utils import viewitems
27 
28 import caffe2.proto.caffe2_pb2 as caffe2_pb2
29 from caffe2.python import attention, core, rnn_cell, brew
30 
31 
32 PAD_ID = 0
33 PAD = '<PAD>'
34 GO_ID = 1
35 GO = '<GO>'
36 EOS_ID = 2
37 EOS = '<EOS>'
38 UNK_ID = 3
39 UNK = '<UNK>'
40 
41 
42 def gen_vocab(corpus, unk_threshold):
43  vocab = collections.defaultdict(lambda: len(vocab))
44  freqs = collections.defaultdict(lambda: 0)
45  # Adding padding tokens to the vocabulary to maintain consistency with IDs
46  vocab[PAD]
47  vocab[GO]
48  vocab[EOS]
49  vocab[UNK]
50 
51  with open(corpus) as f:
52  for sentence in f:
53  tokens = sentence.strip().split()
54  for token in tokens:
55  freqs[token] += 1
56  for token, freq in viewitems(freqs):
57  if freq > unk_threshold:
58  vocab[token]
59 
60  return vocab
61 
62 
63 def get_numberized_sentence(sentence, vocab):
64  numerized_sentence = []
65  for token in sentence.strip().split():
66  if token in vocab:
67  numerized_sentence.append(vocab[token])
68  else:
69  numerized_sentence.append(vocab[UNK])
70  return numerized_sentence
71 
72 
73 def rnn_unidirectional_layer(
74  model,
75  inputs,
76  input_lengths,
77  input_size,
78  num_units,
79  dropout_keep_prob,
80  forward_only,
81  return_sequence_output,
82  return_final_state,
83  scope=None,
84 ):
85  """ Unidirectional LSTM encoder."""
86  with core.NameScope(scope):
87  initial_cell_state = model.param_init_net.ConstantFill(
88  [],
89  'initial_cell_state',
90  shape=[num_units],
91  value=0.0,
92  )
93  initial_hidden_state = model.param_init_net.ConstantFill(
94  [],
95  'initial_hidden_state',
96  shape=[num_units],
97  value=0.0,
98  )
99 
100  cell = rnn_cell.LSTMCell(
101  input_size=input_size,
102  hidden_size=num_units,
103  forget_bias=0.0,
104  memory_optimization=False,
105  name=(scope + '/' if scope else '') + 'lstm',
106  forward_only=forward_only,
107  )
108 
109  dropout_ratio = (
110  None if dropout_keep_prob is None else (1.0 - dropout_keep_prob)
111  )
112  if dropout_ratio is not None:
113  cell = rnn_cell.DropoutCell(
114  internal_cell=cell,
115  dropout_ratio=dropout_ratio,
116  name=(scope + '/' if scope else '') + 'dropout',
117  forward_only=forward_only,
118  is_test=False,
119  )
120 
121  outputs_with_grads = []
122  if return_sequence_output:
123  outputs_with_grads.append(0)
124  if return_final_state:
125  outputs_with_grads.extend([1, 3])
126 
127  outputs, (_, final_hidden_state, _, final_cell_state) = (
128  cell.apply_over_sequence(
129  model=model,
130  inputs=inputs,
131  seq_lengths=input_lengths,
132  initial_states=(initial_hidden_state, initial_cell_state),
133  outputs_with_grads=outputs_with_grads,
134  )
135  )
136  return outputs, final_hidden_state, final_cell_state
137 
138 
139 def rnn_bidirectional_layer(
140  model,
141  inputs,
142  input_lengths,
143  input_size,
144  num_units,
145  dropout_keep_prob,
146  forward_only,
147  return_sequence_output,
148  return_final_state,
149  scope=None,
150 ):
151  outputs_fw, final_hidden_fw, final_cell_fw = rnn_unidirectional_layer(
152  model,
153  inputs,
154  input_lengths,
155  input_size,
156  num_units,
157  dropout_keep_prob,
158  forward_only,
159  return_sequence_output,
160  return_final_state,
161  scope=(scope + '/' if scope else '') + 'fw',
162  )
163  with core.NameScope(scope):
164  reversed_inputs = model.net.ReversePackedSegs(
165  [inputs, input_lengths],
166  ['reversed_inputs'],
167  )
168  outputs_bw, final_hidden_bw, final_cell_bw = rnn_unidirectional_layer(
169  model,
170  reversed_inputs,
171  input_lengths,
172  input_size,
173  num_units,
174  dropout_keep_prob,
175  forward_only,
176  return_sequence_output,
177  return_final_state,
178  scope=(scope + '/' if scope else '') + 'bw',
179  )
180  with core.NameScope(scope):
181  outputs_bw = model.net.ReversePackedSegs(
182  [outputs_bw, input_lengths],
183  ['outputs_bw'],
184  )
185 
186  # Concatenate forward and backward results
187  if return_sequence_output:
188  with core.NameScope(scope):
189  outputs, _ = model.net.Concat(
190  [outputs_fw, outputs_bw],
191  ['outputs', 'outputs_dim'],
192  axis=2,
193  )
194  else:
195  outputs = None
196 
197  if return_final_state:
198  with core.NameScope(scope):
199  final_hidden_state, _ = model.net.Concat(
200  [final_hidden_fw, final_hidden_bw],
201  ['final_hidden_state', 'final_hidden_state_dim'],
202  axis=2,
203  )
204  final_cell_state, _ = model.net.Concat(
205  [final_cell_fw, final_cell_bw],
206  ['final_cell_state', 'final_cell_state_dim'],
207  axis=2,
208  )
209  else:
210  final_hidden_state = None
211  final_cell_state = None
212 
213  return outputs, final_hidden_state, final_cell_state
214 
215 
216 def build_embeddings(
217  model,
218  vocab_size,
219  embedding_size,
220  name,
221  freeze_embeddings,
222 ):
223  embeddings = model.param_init_net.GaussianFill(
224  [],
225  name,
226  shape=[vocab_size, embedding_size],
227  std=0.1,
228  )
229  if not freeze_embeddings:
230  model.params.append(embeddings)
231  return embeddings
232 
233 
234 def get_layer_scope(scope, layer_type, i):
235  prefix = (scope + '/' if scope else '') + layer_type
236  return '{}/layer{}'.format(prefix, i)
237 
238 
239 def build_embedding_encoder(
240  model,
241  encoder_params,
242  num_decoder_layers,
243  inputs,
244  input_lengths,
245  vocab_size,
246  embeddings,
247  embedding_size,
248  use_attention,
249  num_gpus=0,
250  forward_only=False,
251  scope=None,
252 ):
253  with core.NameScope(scope or ''):
254  if num_gpus == 0:
255  embedded_encoder_inputs = model.net.Gather(
256  [embeddings, inputs],
257  ['embedded_encoder_inputs'],
258  )
259  else:
260  with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
261  embedded_encoder_inputs_cpu = model.net.Gather(
262  [embeddings, inputs],
263  ['embedded_encoder_inputs_cpu'],
264  )
265  embedded_encoder_inputs = model.CopyCPUToGPU(
266  embedded_encoder_inputs_cpu,
267  'embedded_encoder_inputs',
268  )
269 
270  layer_inputs = embedded_encoder_inputs
271  layer_input_size = embedding_size
272  encoder_units_per_layer = []
273  final_encoder_hidden_states = []
274  final_encoder_cell_states = []
275 
276  num_encoder_layers = len(encoder_params['encoder_layer_configs'])
277  use_bidirectional_encoder = encoder_params.get(
278  'use_bidirectional_encoder',
279  False,
280  )
281 
282  for i, layer_config in enumerate(encoder_params['encoder_layer_configs']):
283 
284  if use_bidirectional_encoder and i == 0:
285  layer_func = rnn_bidirectional_layer
286  output_dims = 2 * layer_config['num_units']
287  else:
288  layer_func = rnn_unidirectional_layer
289  output_dims = layer_config['num_units']
290  encoder_units_per_layer.append(output_dims)
291 
292  is_final_layer = (i == num_encoder_layers - 1)
293 
294  dropout_keep_prob = layer_config.get(
295  'dropout_keep_prob',
296  None,
297  )
298 
299  return_final_state = i >= (num_encoder_layers - num_decoder_layers)
300  (
301  layer_outputs,
302  final_layer_hidden_state,
303  final_layer_cell_state,
304  ) = layer_func(
305  model=model,
306  inputs=layer_inputs,
307  input_lengths=input_lengths,
308  input_size=layer_input_size,
309  num_units=layer_config['num_units'],
310  dropout_keep_prob=dropout_keep_prob,
311  forward_only=forward_only,
312  return_sequence_output=(not is_final_layer) or use_attention,
313  return_final_state=return_final_state,
314  scope=get_layer_scope(scope, 'encoder', i),
315  )
316 
317  if not is_final_layer:
318  layer_inputs = layer_outputs
319  layer_input_size = output_dims
320  final_encoder_hidden_states.append(final_layer_hidden_state)
321  final_encoder_cell_states.append(final_layer_cell_state)
322 
323  encoder_outputs = layer_outputs
324  weighted_encoder_outputs = None
325 
326  return (
327  encoder_outputs,
328  weighted_encoder_outputs,
329  final_encoder_hidden_states,
330  final_encoder_cell_states,
331  encoder_units_per_layer,
332  )
333 
334 
336 
337  def scope(self, name):
338  return self.name + '/' + name if self.name is not None else name
339 
340  def _get_attention_type(self, attention_type_as_string):
341  if attention_type_as_string == 'regular':
342  return attention.AttentionType.Regular
343  elif attention_type_as_string == 'recurrent':
344  return attention.AttentionType.Recurrent
345  else:
346  assert False, 'Unknown type ' + attention_type_as_string
347 
348  def __init__(
349  self,
350  encoder_outputs,
351  encoder_output_dim,
352  encoder_lengths,
353  vocab_size,
354  attention_type,
355  embedding_size,
356  decoder_num_units,
357  decoder_cells,
358  residual_output_layers=None,
359  name=None,
360  weighted_encoder_outputs=None,
361  ):
362  self.name = name
363  self.num_layers = len(decoder_cells)
364  if attention_type == 'none':
366  decoder_cells,
367  name=self.scope('decoder'),
368  residual_output_layers=residual_output_layers,
369  )
370  self.use_attention = False
371  self.decoder_output_dim = decoder_num_units
372  self.output_indices = self.cell.output_indices
373  else:
374  decoder_cell = rnn_cell.MultiRNNCell(
375  decoder_cells,
376  name=self.scope('decoder'),
377  residual_output_layers=residual_output_layers,
378  )
380  encoder_output_dim=encoder_output_dim,
381  encoder_outputs=encoder_outputs,
382  encoder_lengths=encoder_lengths,
383  decoder_cell=decoder_cell,
384  decoder_state_dim=decoder_num_units,
385  name=self.scope('attention_decoder'),
386  attention_type=self._get_attention_type(attention_type),
387  weighted_encoder_outputs=weighted_encoder_outputs,
388  attention_memory_optimization=True,
389  )
390  self.use_attention = True
391  self.decoder_output_dim = decoder_num_units + encoder_output_dim
392 
393  self.output_indices = decoder_cell.output_indices
394  self.output_indices.append(2 * self.num_layers)
395 
396  def get_state_names(self):
397  return self.cell.get_state_names()
398 
399  def get_outputs_with_grads(self):
400  # sequence (all) output locations are at twice their state index
401  return [2 * i for i in self.output_indices]
402 
403  def get_output_dim(self):
404  return self.decoder_output_dim
405 
406  def get_attention_weights(self):
407  assert self.use_attention
408  # [batch_size, encoder_length, 1]
409  return self.cell.get_attention_weights()
410 
411  def apply(
412  self,
413  model,
414  input_t,
415  seq_lengths,
416  states,
417  timestep,
418  ):
419  return self.cell.apply(
420  model=model,
421  input_t=input_t,
422  seq_lengths=seq_lengths,
423  states=states,
424  timestep=timestep,
425  )
426 
427  def apply_over_sequence(
428  self,
429  model,
430  inputs,
431  seq_lengths,
432  initial_states,
433  ):
434  return self.cell.apply_over_sequence(
435  model=model,
436  inputs=inputs,
437  seq_lengths=seq_lengths,
438  initial_states=initial_states,
439  outputs_with_grads=self.get_outputs_with_grads(),
440  )
441 
442 
443 def build_initial_rnn_decoder_states(
444  model,
445  encoder_units_per_layer,
446  decoder_units_per_layer,
447  final_encoder_hidden_states,
448  final_encoder_cell_states,
449  use_attention,
450 ):
451  num_encoder_layers = len(encoder_units_per_layer)
452  num_decoder_layers = len(decoder_units_per_layer)
453  if num_encoder_layers > num_decoder_layers:
454  offset = num_encoder_layers - num_decoder_layers
455  else:
456  offset = 0
457 
458  initial_states = []
459  for i, decoder_num_units in enumerate(decoder_units_per_layer):
460 
461  if (
462  final_encoder_hidden_states and
463  len(final_encoder_hidden_states) > (i + offset)
464  ):
465  final_encoder_hidden_state = final_encoder_hidden_states[i + offset]
466  else:
467  final_encoder_hidden_state = None
468 
469  if final_encoder_hidden_state is None:
470  decoder_initial_hidden_state = model.param_init_net.ConstantFill(
471  [],
472  'decoder_initial_hidden_state_{}'.format(i),
473  shape=[decoder_num_units],
474  value=0.0,
475  )
476  model.params.append(decoder_initial_hidden_state)
477  elif decoder_num_units != encoder_units_per_layer[i + offset]:
478  decoder_initial_hidden_state = brew.fc(
479  model,
480  final_encoder_hidden_state,
481  'decoder_initial_hidden_state_{}'.format(i),
482  encoder_units_per_layer[i + offset],
483  decoder_num_units,
484  axis=2,
485  )
486  else:
487  decoder_initial_hidden_state = final_encoder_hidden_state
488  initial_states.append(decoder_initial_hidden_state)
489 
490  if (
491  final_encoder_cell_states and
492  len(final_encoder_cell_states) > (i + offset)
493  ):
494  final_encoder_cell_state = final_encoder_cell_states[i + offset]
495  else:
496  final_encoder_cell_state = None
497 
498  if final_encoder_cell_state is None:
499  decoder_initial_cell_state = model.param_init_net.ConstantFill(
500  [],
501  'decoder_initial_cell_state_{}'.format(i),
502  shape=[decoder_num_units],
503  value=0.0,
504  )
505  model.params.append(decoder_initial_cell_state)
506  elif decoder_num_units != encoder_units_per_layer[i + offset]:
507  decoder_initial_cell_state = brew.fc(
508  model,
509  final_encoder_cell_state,
510  'decoder_initial_cell_state_{}'.format(i),
511  encoder_units_per_layer[i + offset],
512  decoder_num_units,
513  axis=2,
514  )
515  else:
516  decoder_initial_cell_state = final_encoder_cell_state
517  initial_states.append(decoder_initial_cell_state)
518 
519  if use_attention:
520  initial_attention_weighted_encoder_context = (
521  model.param_init_net.ConstantFill(
522  [],
523  'initial_attention_weighted_encoder_context',
524  shape=[encoder_units_per_layer[-1]],
525  value=0.0,
526  )
527  )
528  model.params.append(initial_attention_weighted_encoder_context)
529  initial_states.append(initial_attention_weighted_encoder_context)
530 
531  return initial_states
532 
533 
534 def build_embedding_decoder(
535  model,
536  decoder_layer_configs,
537  inputs,
538  input_lengths,
539  encoder_lengths,
540  encoder_outputs,
541  weighted_encoder_outputs,
542  final_encoder_hidden_states,
543  final_encoder_cell_states,
544  encoder_units_per_layer,
545  vocab_size,
546  embeddings,
547  embedding_size,
548  attention_type,
549  forward_only,
550  num_gpus=0,
551  scope=None,
552 ):
553  with core.NameScope(scope or ''):
554  if num_gpus == 0:
555  embedded_decoder_inputs = model.net.Gather(
556  [embeddings, inputs],
557  ['embedded_decoder_inputs'],
558  )
559  else:
560  with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
561  embedded_decoder_inputs_cpu = model.net.Gather(
562  [embeddings, inputs],
563  ['embedded_decoder_inputs_cpu'],
564  )
565  embedded_decoder_inputs = model.CopyCPUToGPU(
566  embedded_decoder_inputs_cpu,
567  'embedded_decoder_inputs',
568  )
569 
570  decoder_cells = []
571  decoder_units_per_layer = []
572  for i, layer_config in enumerate(decoder_layer_configs):
573  num_units = layer_config['num_units']
574  decoder_units_per_layer.append(num_units)
575 
576  if i == 0:
577  input_size = embedding_size
578  else:
579  input_size = decoder_cells[-1].get_output_dim()
580 
581  cell = rnn_cell.LSTMCell(
582  forward_only=forward_only,
583  input_size=input_size,
584  hidden_size=num_units,
585  forget_bias=0.0,
586  memory_optimization=False,
587  )
588 
589  dropout_keep_prob = layer_config.get('dropout_keep_prob', None)
590  if dropout_keep_prob is not None:
591  dropout_ratio = 1.0 - layer_config.dropout_keep_prob
592  cell = rnn_cell.DropoutCell(
593  internal_cell=cell,
594  dropout_ratio=dropout_ratio,
595  forward_only=forward_only,
596  is_test=False,
597  name=get_layer_scope(scope, 'decoder_dropout', i),
598  )
599 
600  decoder_cells.append(cell)
601 
602  states = build_initial_rnn_decoder_states(
603  model=model,
604  encoder_units_per_layer=encoder_units_per_layer,
605  decoder_units_per_layer=decoder_units_per_layer,
606  final_encoder_hidden_states=final_encoder_hidden_states,
607  final_encoder_cell_states=final_encoder_cell_states,
608  use_attention=(attention_type != 'none'),
609  )
610  attention_decoder = LSTMWithAttentionDecoder(
611  encoder_outputs=encoder_outputs,
612  encoder_output_dim=encoder_units_per_layer[-1],
613  encoder_lengths=encoder_lengths,
614  vocab_size=vocab_size,
615  attention_type=attention_type,
616  embedding_size=embedding_size,
617  decoder_num_units=decoder_units_per_layer[-1],
618  decoder_cells=decoder_cells,
619  weighted_encoder_outputs=weighted_encoder_outputs,
620  name=scope,
621  )
622  decoder_outputs, _ = attention_decoder.apply_over_sequence(
623  model=model,
624  inputs=embedded_decoder_inputs,
625  seq_lengths=input_lengths,
626  initial_states=states,
627  )
628 
629  # we do softmax over the whole sequence
630  # (max_length in the batch * batch_size) x decoder embedding size
631  # -1 because we don't know max_length yet
632  decoder_outputs_flattened, _ = model.net.Reshape(
633  [decoder_outputs],
634  [
635  'decoder_outputs_flattened',
636  'decoder_outputs_and_contexts_combination_old_shape',
637  ],
638  shape=[-1, attention_decoder.get_output_dim()],
639  )
640 
641  decoder_outputs = decoder_outputs_flattened
642  decoder_output_dim = attention_decoder.get_output_dim()
643 
644  return (decoder_outputs, decoder_output_dim)
645 
646 
647 def output_projection(
648  model,
649  decoder_outputs,
650  decoder_output_size,
651  target_vocab_size,
652  decoder_softmax_size,
653 ):
654  if decoder_softmax_size is not None:
655  decoder_outputs = brew.fc(
656  model,
657  decoder_outputs,
658  'decoder_outputs_scaled',
659  dim_in=decoder_output_size,
660  dim_out=decoder_softmax_size,
661  )
662  decoder_output_size = decoder_softmax_size
663 
664  output_projection_w = model.param_init_net.XavierFill(
665  [],
666  'output_projection_w',
667  shape=[target_vocab_size, decoder_output_size],
668  )
669 
670  output_projection_b = model.param_init_net.XavierFill(
671  [],
672  'output_projection_b',
673  shape=[target_vocab_size],
674  )
675  model.params.extend([
676  output_projection_w,
677  output_projection_b,
678  ])
679  output_logits = model.net.FC(
680  [
681  decoder_outputs,
682  output_projection_w,
683  output_projection_b,
684  ],
685  ['output_logits'],
686  )
687  return output_logits
Module caffe2.python.layers.split.
Module caffe2.python.scope.