Caffe2 - Python API
A deep learning, cross platform ML framework
translate.py
1 # Copyright (c) 2016-present, Facebook, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 ##############################################################################
15 
16 ## @package translate
17 # Module caffe2.python.models.seq2seq.translate
18 from __future__ import absolute_import
19 from __future__ import division
20 from __future__ import print_function
21 from __future__ import unicode_literals
22 
23 import argparse
24 from future.utils import viewitems
25 import logging
26 import numpy as np
27 import sys
28 
29 from caffe2.python import core, rnn_cell, workspace
30 from caffe2.python.models.seq2seq.beam_search import BeamSearchForwardOnly
31 from caffe2.python.models.seq2seq.seq2seq_model_helper import Seq2SeqModelHelper
32 import caffe2.python.models.seq2seq.seq2seq_util as seq2seq_util
33 
34 
35 logger = logging.getLogger(__name__)
36 logger.setLevel(logging.INFO)
37 logger.addHandler(logging.StreamHandler(sys.stderr))
38 
39 
40 def _weighted_sum(model, values, weight, output_name):
41  values_weights = zip(values, [weight] * len(values))
42  values_weights_flattened = [x for v_w in values_weights for x in v_w]
43  return model.net.WeightedSum(
44  values_weights_flattened,
45  output_name,
46  )
47 
48 
50 
51  def scope(self, scope_name, blob_name):
52  return (
53  scope_name + '/' + blob_name
54  if scope_name is not None
55  else blob_name
56  )
57 
58  def _build_decoder(
59  self,
60  model,
61  step_model,
62  model_params,
63  scope,
64  previous_tokens,
65  timestep,
66  fake_seq_lengths,
67  ):
68  attention_type = model_params['attention']
69  assert attention_type in ['none', 'regular']
70  use_attention = (attention_type != 'none')
71 
72  with core.NameScope(scope):
73  encoder_embeddings = seq2seq_util.build_embeddings(
74  model=model,
75  vocab_size=self.source_vocab_size,
76  embedding_size=model_params['encoder_embedding_size'],
77  name='encoder_embeddings',
78  freeze_embeddings=False,
79  )
80 
81  (
82  encoder_outputs,
83  weighted_encoder_outputs,
84  final_encoder_hidden_states,
85  final_encoder_cell_states,
86  encoder_units_per_layer,
87  ) = seq2seq_util.build_embedding_encoder(
88  model=model,
89  encoder_params=model_params['encoder_type'],
90  num_decoder_layers=len(model_params['decoder_layer_configs']),
91  inputs=self.encoder_inputs,
92  input_lengths=self.encoder_lengths,
93  vocab_size=self.source_vocab_size,
94  embeddings=encoder_embeddings,
95  embedding_size=model_params['encoder_embedding_size'],
96  use_attention=use_attention,
97  num_gpus=0,
98  forward_only=True,
99  scope=scope,
100  )
101  with core.NameScope(scope):
102  if use_attention:
103  # [max_source_length, beam_size, encoder_output_dim]
104  encoder_outputs = model.net.Tile(
105  encoder_outputs,
106  'encoder_outputs_tiled',
107  tiles=self.beam_size,
108  axis=1,
109  )
110 
111  if weighted_encoder_outputs is not None:
112  weighted_encoder_outputs = model.net.Tile(
113  weighted_encoder_outputs,
114  'weighted_encoder_outputs_tiled',
115  tiles=self.beam_size,
116  axis=1,
117  )
118 
119  decoder_embeddings = seq2seq_util.build_embeddings(
120  model=model,
121  vocab_size=self.target_vocab_size,
122  embedding_size=model_params['decoder_embedding_size'],
123  name='decoder_embeddings',
124  freeze_embeddings=False,
125  )
126  embedded_tokens_t_prev = step_model.net.Gather(
127  [decoder_embeddings, previous_tokens],
128  'embedded_tokens_t_prev',
129  )
130 
131  decoder_cells = []
132  decoder_units_per_layer = []
133  for i, layer_config in enumerate(model_params['decoder_layer_configs']):
134  num_units = layer_config['num_units']
135  decoder_units_per_layer.append(num_units)
136  if i == 0:
137  input_size = model_params['decoder_embedding_size']
138  else:
139  input_size = (
140  model_params['decoder_layer_configs'][i - 1]['num_units']
141  )
142 
143  cell = rnn_cell.LSTMCell(
144  forward_only=True,
145  input_size=input_size,
146  hidden_size=num_units,
147  forget_bias=0.0,
148  memory_optimization=False,
149  )
150  decoder_cells.append(cell)
151 
152  with core.NameScope(scope):
153  if final_encoder_hidden_states is not None:
154  for i in range(len(final_encoder_hidden_states)):
155  if final_encoder_hidden_states[i] is not None:
156  final_encoder_hidden_states[i] = model.net.Tile(
157  final_encoder_hidden_states[i],
158  'final_encoder_hidden_tiled_{}'.format(i),
159  tiles=self.beam_size,
160  axis=1,
161  )
162  if final_encoder_cell_states is not None:
163  for i in range(len(final_encoder_cell_states)):
164  if final_encoder_cell_states[i] is not None:
165  final_encoder_cell_states[i] = model.net.Tile(
166  final_encoder_cell_states[i],
167  'final_encoder_cell_tiled_{}'.format(i),
168  tiles=self.beam_size,
169  axis=1,
170  )
171  initial_states = \
172  seq2seq_util.build_initial_rnn_decoder_states(
173  model=model,
174  encoder_units_per_layer=encoder_units_per_layer,
175  decoder_units_per_layer=decoder_units_per_layer,
176  final_encoder_hidden_states=final_encoder_hidden_states,
177  final_encoder_cell_states=final_encoder_cell_states,
178  use_attention=use_attention,
179  )
180 
181  attention_decoder = seq2seq_util.LSTMWithAttentionDecoder(
182  encoder_outputs=encoder_outputs,
183  encoder_output_dim=encoder_units_per_layer[-1],
184  encoder_lengths=None,
185  vocab_size=self.target_vocab_size,
186  attention_type=attention_type,
187  embedding_size=model_params['decoder_embedding_size'],
188  decoder_num_units=decoder_units_per_layer[-1],
189  decoder_cells=decoder_cells,
190  weighted_encoder_outputs=weighted_encoder_outputs,
191  name=scope,
192  )
193  states_prev = step_model.net.AddExternalInputs(*[
194  '{}/{}_prev'.format(scope, s)
195  for s in attention_decoder.get_state_names()
196  ])
197  decoder_outputs, states = attention_decoder.apply(
198  model=step_model,
199  input_t=embedded_tokens_t_prev,
200  seq_lengths=fake_seq_lengths,
201  states=states_prev,
202  timestep=timestep,
203  )
204 
205  state_configs = [
206  BeamSearchForwardOnly.StateConfig(
207  initial_value=initial_state,
208  state_prev_link=BeamSearchForwardOnly.LinkConfig(
209  blob=state_prev,
210  offset=0,
211  window=1,
212  ),
213  state_link=BeamSearchForwardOnly.LinkConfig(
214  blob=state,
215  offset=1,
216  window=1,
217  ),
218  )
219  for initial_state, state_prev, state in zip(
220  initial_states,
221  states_prev,
222  states,
223  )
224  ]
225 
226  with core.NameScope(scope):
227  decoder_outputs_flattened, _ = step_model.net.Reshape(
228  [decoder_outputs],
229  [
230  'decoder_outputs_flattened',
231  'decoder_outputs_and_contexts_combination_old_shape',
232  ],
233  shape=[-1, attention_decoder.get_output_dim()],
234  )
235  output_logits = seq2seq_util.output_projection(
236  model=step_model,
237  decoder_outputs=decoder_outputs_flattened,
238  decoder_output_size=attention_decoder.get_output_dim(),
239  target_vocab_size=self.target_vocab_size,
240  decoder_softmax_size=model_params['decoder_softmax_size'],
241  )
242  # [1, beam_size, target_vocab_size]
243  output_probs = step_model.net.Softmax(
244  output_logits,
245  'output_probs',
246  )
247  output_log_probs = step_model.net.Log(
248  output_probs,
249  'output_log_probs',
250  )
251  if use_attention:
252  attention_weights = attention_decoder.get_attention_weights()
253  else:
254  attention_weights = step_model.net.ConstantFill(
255  [self.encoder_inputs],
256  'zero_attention_weights_tmp_1',
257  value=0.0,
258  )
259  attention_weights = step_model.net.Transpose(
260  attention_weights,
261  'zero_attention_weights_tmp_2',
262  )
263  attention_weights = step_model.net.Tile(
264  attention_weights,
265  'zero_attention_weights_tmp',
266  tiles=self.beam_size,
267  axis=0,
268  )
269 
270  return (
271  state_configs,
272  output_log_probs,
273  attention_weights,
274  )
275 
276  def build_word_rewards(self, vocab_size, word_reward, unk_reward):
277  word_rewards = np.full([vocab_size], word_reward, dtype=np.float32)
278  word_rewards[seq2seq_util.PAD_ID] = 0
279  word_rewards[seq2seq_util.GO_ID] = 0
280  word_rewards[seq2seq_util.EOS_ID] = 0
281  word_rewards[seq2seq_util.UNK_ID] = word_reward + unk_reward
282  return word_rewards
283 
284  def __init__(
285  self,
286  translate_params,
287  ):
288  self.models = translate_params['ensemble_models']
289  decoding_params = translate_params['decoding_params']
290  self.beam_size = decoding_params['beam_size']
291 
292  assert len(self.models) > 0
293  source_vocab = self.models[0]['source_vocab']
294  target_vocab = self.models[0]['target_vocab']
295  for model in self.models:
296  assert model['source_vocab'] == source_vocab
297  assert model['target_vocab'] == target_vocab
298 
299  self.source_vocab_size = len(source_vocab)
300  self.target_vocab_size = len(target_vocab)
301 
302  self.decoder_scope_names = [
303  'model{}'.format(i) for i in range(len(self.models))
304  ]
305 
306  self.model = Seq2SeqModelHelper(init_params=True)
307 
308  self.encoder_inputs = self.model.net.AddExternalInput('encoder_inputs')
309  self.encoder_lengths = self.model.net.AddExternalInput(
310  'encoder_lengths'
311  )
312  self.max_output_seq_len = self.model.net.AddExternalInput(
313  'max_output_seq_len'
314  )
315 
316  fake_seq_lengths = self.model.param_init_net.ConstantFill(
317  [],
318  'fake_seq_lengths',
319  shape=[self.beam_size],
320  value=100000,
321  dtype=core.DataType.INT32,
322  )
323 
324  beam_decoder = BeamSearchForwardOnly(
325  beam_size=self.beam_size,
326  model=self.model,
327  go_token_id=seq2seq_util.GO_ID,
328  eos_token_id=seq2seq_util.EOS_ID,
329  )
330  step_model = beam_decoder.get_step_model()
331 
332  state_configs = []
333  output_log_probs = []
334  attention_weights = []
335  for model, scope_name in zip(
336  self.models,
337  self.decoder_scope_names,
338  ):
339  (
340  state_configs_per_decoder,
341  output_log_probs_per_decoder,
342  attention_weights_per_decoder,
343  ) = self._build_decoder(
344  model=self.model,
345  step_model=step_model,
346  model_params=model['model_params'],
347  scope=scope_name,
348  previous_tokens=beam_decoder.get_previous_tokens(),
349  timestep=beam_decoder.get_timestep(),
350  fake_seq_lengths=fake_seq_lengths,
351  )
352  state_configs.extend(state_configs_per_decoder)
353  output_log_probs.append(output_log_probs_per_decoder)
354  if attention_weights_per_decoder is not None:
355  attention_weights.append(attention_weights_per_decoder)
356 
357  assert len(attention_weights) > 0
358  num_decoders_with_attention_blob = (
359  self.model.param_init_net.ConstantFill(
360  [],
361  'num_decoders_with_attention_blob',
362  value=1 / float(len(attention_weights)),
363  shape=[1],
364  )
365  )
366  # [beam_size, encoder_length, 1]
367  attention_weights_average = _weighted_sum(
368  model=step_model,
369  values=attention_weights,
370  weight=num_decoders_with_attention_blob,
371  output_name='attention_weights_average',
372  )
373 
374  num_decoders_blob = self.model.param_init_net.ConstantFill(
375  [],
376  'num_decoders_blob',
377  value=1 / float(len(output_log_probs)),
378  shape=[1],
379  )
380  # [beam_size, target_vocab_size]
381  output_log_probs_average = _weighted_sum(
382  model=step_model,
383  values=output_log_probs,
384  weight=num_decoders_blob,
385  output_name='output_log_probs_average',
386  )
387  word_rewards = self.model.param_init_net.ConstantFill(
388  [],
389  'word_rewards',
390  shape=[self.target_vocab_size],
391  value=0.0,
392  dtype=core.DataType.FLOAT,
393  )
394  (
395  self.output_token_beam_list,
396  self.output_prev_index_beam_list,
397  self.output_score_beam_list,
398  self.output_attention_weights_beam_list,
399  ) = beam_decoder.apply(
400  inputs=self.encoder_inputs,
401  length=self.max_output_seq_len,
402  log_probs=output_log_probs_average,
403  attentions=attention_weights_average,
404  state_configs=state_configs,
405  data_dependencies=[],
406  word_rewards=word_rewards,
407  )
408 
409  workspace.RunNetOnce(self.model.param_init_net)
410  workspace.FeedBlob(
411  'word_rewards',
412  self.build_word_rewards(
413  vocab_size=self.target_vocab_size,
414  word_reward=translate_params['decoding_params']['word_reward'],
415  unk_reward=translate_params['decoding_params']['unk_reward'],
416  )
417  )
418 
419  workspace.CreateNet(
420  self.model.net,
421  input_blobs=[
422  str(self.encoder_inputs),
423  str(self.encoder_lengths),
424  str(self.max_output_seq_len),
425  ],
426  )
427 
428  logger.info('Params created: ')
429  for param in self.model.params:
430  logger.info(param)
431 
432  def load_models(self):
433  db_reader = 'reader'
434  for model, scope_name in zip(
435  self.models,
436  self.decoder_scope_names,
437  ):
438  params_for_current_model = [
439  param
440  for param in self.model.GetAllParams()
441  if str(param).startswith(scope_name)
442  ]
443  assert workspace.RunOperatorOnce(core.CreateOperator(
444  'CreateDB',
445  [], [db_reader],
446  db=model['model_file'],
447  db_type='minidb')
448  ), 'Failed to create db {}'.format(model['model_file'])
449  assert workspace.RunOperatorOnce(core.CreateOperator(
450  'Load',
451  [db_reader],
452  params_for_current_model,
453  load_all=1,
454  add_prefix=scope_name + '/',
455  strip_prefix='gpu_0/',
456  ))
457  logger.info('Model {} is loaded from a checkpoint {}'.format(
458  scope_name,
459  model['model_file'],
460  ))
461 
462  def decode(self, numberized_input, max_output_seq_len):
463  workspace.FeedBlob(
464  self.encoder_inputs,
465  np.array([
466  [token_id] for token_id in reversed(numberized_input)
467  ]).astype(dtype=np.int32),
468  )
469  workspace.FeedBlob(
470  self.encoder_lengths,
471  np.array([len(numberized_input)]).astype(dtype=np.int32),
472  )
473  workspace.FeedBlob(
474  self.max_output_seq_len,
475  np.array([max_output_seq_len]).astype(dtype=np.int64),
476  )
477 
478  workspace.RunNet(self.model.net)
479 
480  num_steps = max_output_seq_len
481  score_beam_list = workspace.FetchBlob(self.output_score_beam_list)
482  token_beam_list = (
483  workspace.FetchBlob(self.output_token_beam_list)
484  )
485  prev_index_beam_list = (
486  workspace.FetchBlob(self.output_prev_index_beam_list)
487  )
488 
489  attention_weights_beam_list = (
490  workspace.FetchBlob(self.output_attention_weights_beam_list)
491  )
492  best_indices = (num_steps, 0)
493  for i in range(num_steps + 1):
494  for hyp_index in range(self.beam_size):
495  if (
496  (
497  token_beam_list[i][hyp_index][0] ==
498  seq2seq_util.EOS_ID or
499  i == num_steps
500  ) and
501  (
502  score_beam_list[i][hyp_index][0] >
503  score_beam_list[best_indices[0]][best_indices[1]][0]
504  )
505  ):
506  best_indices = (i, hyp_index)
507 
508  i, hyp_index = best_indices
509  output = []
510  attention_weights_per_token = []
511  best_score = -score_beam_list[i][hyp_index][0]
512  while i > 0:
513  output.append(token_beam_list[i][hyp_index][0])
514  attention_weights_per_token.append(
515  attention_weights_beam_list[i][hyp_index]
516  )
517  hyp_index = prev_index_beam_list[i][hyp_index][0]
518  i -= 1
519 
520  attention_weights_per_token = reversed(attention_weights_per_token)
521  # encoder_inputs are reversed, see get_batch func
522  attention_weights_per_token = [
523  list(reversed(attention_weights))[:len(numberized_input)]
524  for attention_weights in attention_weights_per_token
525  ]
526  output = list(reversed(output))
527  return output, attention_weights_per_token, best_score
528 
529 
530 def run_seq2seq_beam_decoder(args, model_params, decoding_params):
531  source_vocab = seq2seq_util.gen_vocab(
532  args.source_corpus,
533  args.unk_threshold,
534  )
535  logger.info('Source vocab size {}'.format(len(source_vocab)))
536  target_vocab = seq2seq_util.gen_vocab(
537  args.target_corpus,
538  args.unk_threshold,
539  )
540  inversed_target_vocab = {v: k for (k, v) in viewitems(target_vocab)}
541  logger.info('Target vocab size {}'.format(len(target_vocab)))
542 
544  translate_params=dict(
545  ensemble_models=[dict(
546  source_vocab=source_vocab,
547  target_vocab=target_vocab,
548  model_params=model_params,
549  model_file=args.checkpoint,
550  )],
551  decoding_params=decoding_params,
552  ),
553  )
554  decoder.load_models()
555 
556  for line in sys.stdin:
557  numerized_source_sentence = seq2seq_util.get_numberized_sentence(
558  line,
559  source_vocab,
560  )
561  translation, alignment, _ = decoder.decode(
562  numerized_source_sentence,
563  2 * len(numerized_source_sentence) + 5,
564  )
565  print(' '.join([inversed_target_vocab[tid] for tid in translation]))
566 
567 
568 def main():
569  parser = argparse.ArgumentParser(
570  description='Caffe2: Seq2Seq Translation',
571  )
572  parser.add_argument('--source-corpus', type=str, default=None,
573  help='Path to source corpus in a text file format. Each '
574  'line in the file should contain a single sentence',
575  required=True)
576  parser.add_argument('--target-corpus', type=str, default=None,
577  help='Path to target corpus in a text file format',
578  required=True)
579  parser.add_argument('--unk-threshold', type=int, default=50,
580  help='Threshold frequency under which token becomes '
581  'labeled unknown token')
582 
583  parser.add_argument('--use-bidirectional-encoder', action='store_true',
584  help='Set flag to use bidirectional recurrent network '
585  'in encoder')
586  parser.add_argument('--use-attention', action='store_true',
587  help='Set flag to use seq2seq with attention model')
588  parser.add_argument('--encoder-cell-num-units', type=int, default=512,
589  help='Number of cell units per encoder layer')
590  parser.add_argument('--encoder-num-layers', type=int, default=2,
591  help='Number encoder layers')
592  parser.add_argument('--decoder-cell-num-units', type=int, default=512,
593  help='Number of cell units in the decoder layer')
594  parser.add_argument('--decoder-num-layers', type=int, default=2,
595  help='Number decoder layers')
596  parser.add_argument('--encoder-embedding-size', type=int, default=256,
597  help='Size of embedding in the encoder layer')
598  parser.add_argument('--decoder-embedding-size', type=int, default=512,
599  help='Size of embedding in the decoder layer')
600  parser.add_argument('--decoder-softmax-size', type=int, default=None,
601  help='Size of softmax layer in the decoder')
602 
603  parser.add_argument('--beam-size', type=int, default=6,
604  help='Size of beam for the decoder')
605  parser.add_argument('--word-reward', type=float, default=0.0,
606  help='Reward per each word generated.')
607  parser.add_argument('--unk-reward', type=float, default=0.0,
608  help='Reward per each UNK token generated. '
609  'Typically should be negative.')
610 
611  parser.add_argument('--checkpoint', type=str, default=None,
612  help='Path to checkpoint', required=True)
613 
614  args = parser.parse_args()
615 
616  encoder_layer_configs = [
617  dict(
618  num_units=args.encoder_cell_num_units,
619  ),
620  ] * args.encoder_num_layers
621 
622  if args.use_bidirectional_encoder:
623  assert args.encoder_cell_num_units % 2 == 0
624  encoder_layer_configs[0]['num_units'] /= 2
625 
626  decoder_layer_configs = [
627  dict(
628  num_units=args.decoder_cell_num_units,
629  ),
630  ] * args.decoder_num_layers
631 
632  run_seq2seq_beam_decoder(
633  args,
634  model_params=dict(
635  attention=('regular' if args.use_attention else 'none'),
636  decoder_layer_configs=decoder_layer_configs,
637  encoder_type=dict(
638  encoder_layer_configs=encoder_layer_configs,
639  use_bidirectional_encoder=args.use_bidirectional_encoder,
640  ),
641  encoder_embedding_size=args.encoder_embedding_size,
642  decoder_embedding_size=args.decoder_embedding_size,
643  decoder_softmax_size=args.decoder_softmax_size,
644  ),
645  decoding_params=dict(
646  beam_size=args.beam_size,
647  word_reward=args.word_reward,
648  unk_reward=args.unk_reward,
649  ),
650  )
651 
652 
653 if __name__ == '__main__':
654  main()
Module caffe2.python.scope.
def _build_decoder(self, model, step_model, model_params, scope, previous_tokens, timestep, fake_seq_lengths)
Definition: translate.py:67
def build_word_rewards(self, vocab_size, word_reward, unk_reward)
Definition: translate.py:276