## @package translate
# Module caffe2.python.models.seq2seq.translate
from abc import ABCMeta, abstractmethod
import argparse
from future.utils import viewitems
import logging
import numpy as np
from six import with_metaclass
import sys
from caffe2.python import core, rnn_cell, workspace
from caffe2.python.models.seq2seq.beam_search import BeamSearchForwardOnly
from caffe2.python.models.seq2seq.seq2seq_model_helper import Seq2SeqModelHelper
import caffe2.python.models.seq2seq.seq2seq_util as seq2seq_util
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler(sys.stderr))
def _weighted_sum(model, values, weight, output_name):
values_weights = zip(values, [weight] * len(values))
values_weights_flattened = [x for v_w in values_weights for x in v_w]
return model.net.WeightedSum(
values_weights_flattened,
output_name,
)
class Seq2SeqModelCaffe2EnsembleDecoderBase(with_metaclass(ABCMeta, object)):
@abstractmethod
def get_model_file(self, model):
pass
@abstractmethod
def get_db_type(self):
pass
def build_word_rewards(self, vocab_size, word_reward, unk_reward):
word_rewards = np.full([vocab_size], word_reward, dtype=np.float32)
word_rewards[seq2seq_util.PAD_ID] = 0
word_rewards[seq2seq_util.GO_ID] = 0
word_rewards[seq2seq_util.EOS_ID] = 0
word_rewards[seq2seq_util.UNK_ID] = word_reward + unk_reward
return word_rewards
def load_models(self):
db_reader = 'reader'
for model, scope_name in zip(
self.models,
self.decoder_scope_names,
):
params_for_current_model = [
param
for param in self.model.GetAllParams()
if str(param).startswith(scope_name)
]
assert workspace.RunOperatorOnce(core.CreateOperator(
'CreateDB',
[], [db_reader],
db=self.get_model_file(model),
db_type=self.get_db_type())
), 'Failed to create db {}'.format(self.get_model_file(model))
assert workspace.RunOperatorOnce(core.CreateOperator(
'Load',
[db_reader],
params_for_current_model,
load_all=1,
add_prefix=scope_name + '/',
strip_prefix='gpu_0/',
))
logger.info('Model {} is loaded from a checkpoint {}'.format(
scope_name, self.get_model_file(model)))
class Seq2SeqModelCaffe2EnsembleDecoder(Seq2SeqModelCaffe2EnsembleDecoderBase):
def get_model_file(self, model):
return model['model_file']
def get_db_type(self):
return 'minidb'
def scope(self, scope_name, blob_name):
return (
scope_name + '/' + blob_name
if scope_name is not None
else blob_name
)
def _build_decoder(
self,
model,
step_model,
model_params,
scope,
previous_tokens,
timestep,
fake_seq_lengths,
):
attention_type = model_params['attention']
assert attention_type in ['none', 'regular']
use_attention = (attention_type != 'none')
with core.NameScope(scope):
encoder_embeddings = seq2seq_util.build_embeddings(
model=model,
vocab_size=self.source_vocab_size,
embedding_size=model_params['encoder_embedding_size'],
name='encoder_embeddings',
freeze_embeddings=False,
)
(
encoder_outputs,
weighted_encoder_outputs,
final_encoder_hidden_states,
final_encoder_cell_states,
encoder_units_per_layer,
) = seq2seq_util.build_embedding_encoder(
model=model,
encoder_params=model_params['encoder_type'],
num_decoder_layers=len(model_params['decoder_layer_configs']),
inputs=self.encoder_inputs,
input_lengths=self.encoder_lengths,
vocab_size=self.source_vocab_size,
embeddings=encoder_embeddings,
embedding_size=model_params['encoder_embedding_size'],
use_attention=use_attention,
num_gpus=0,
forward_only=True,
scope=scope,
)
with core.NameScope(scope):
if use_attention:
# [max_source_length, beam_size, encoder_output_dim]
encoder_outputs = model.net.Tile(
encoder_outputs,
'encoder_outputs_tiled',
tiles=self.beam_size,
axis=1,
)
if weighted_encoder_outputs is not None:
weighted_encoder_outputs = model.net.Tile(
weighted_encoder_outputs,
'weighted_encoder_outputs_tiled',
tiles=self.beam_size,
axis=1,
)
decoder_embeddings = seq2seq_util.build_embeddings(
model=model,
vocab_size=self.target_vocab_size,
embedding_size=model_params['decoder_embedding_size'],
name='decoder_embeddings',
freeze_embeddings=False,
)
embedded_tokens_t_prev = step_model.net.Gather(
[decoder_embeddings, previous_tokens],
'embedded_tokens_t_prev',
)
decoder_cells = []
decoder_units_per_layer = []
for i, layer_config in enumerate(model_params['decoder_layer_configs']):
num_units = layer_config['num_units']
decoder_units_per_layer.append(num_units)
if i == 0:
input_size = model_params['decoder_embedding_size']
else:
input_size = (
model_params['decoder_layer_configs'][i - 1]['num_units']
)
cell = rnn_cell.LSTMCell(
forward_only=True,
input_size=input_size,
hidden_size=num_units,
forget_bias=0.0,
memory_optimization=False,
)
decoder_cells.append(cell)
with core.NameScope(scope):
if final_encoder_hidden_states is not None:
for i in range(len(final_encoder_hidden_states)):
if final_encoder_hidden_states[i] is not None:
final_encoder_hidden_states[i] = model.net.Tile(
final_encoder_hidden_states[i],
'final_encoder_hidden_tiled_{}'.format(i),
tiles=self.beam_size,
axis=1,
)
if final_encoder_cell_states is not None:
for i in range(len(final_encoder_cell_states)):
if final_encoder_cell_states[i] is not None:
final_encoder_cell_states[i] = model.net.Tile(
final_encoder_cell_states[i],
'final_encoder_cell_tiled_{}'.format(i),
tiles=self.beam_size,
axis=1,
)
initial_states = \
seq2seq_util.build_initial_rnn_decoder_states(
model=model,
encoder_units_per_layer=encoder_units_per_layer,
decoder_units_per_layer=decoder_units_per_layer,
final_encoder_hidden_states=final_encoder_hidden_states,
final_encoder_cell_states=final_encoder_cell_states,
use_attention=use_attention,
)
attention_decoder = seq2seq_util.LSTMWithAttentionDecoder(
encoder_outputs=encoder_outputs,
encoder_output_dim=encoder_units_per_layer[-1],
encoder_lengths=None,
vocab_size=self.target_vocab_size,
attention_type=attention_type,
embedding_size=model_params['decoder_embedding_size'],
decoder_num_units=decoder_units_per_layer[-1],
decoder_cells=decoder_cells,
weighted_encoder_outputs=weighted_encoder_outputs,
name=scope,
)
states_prev = step_model.net.AddExternalInputs(*[
'{}/{}_prev'.format(scope, s)
for s in attention_decoder.get_state_names()
])
decoder_outputs, states = attention_decoder.apply(
model=step_model,
input_t=embedded_tokens_t_prev,
seq_lengths=fake_seq_lengths,
states=states_prev,
timestep=timestep,
)
state_configs = [
BeamSearchForwardOnly.StateConfig(
initial_value=initial_state,
state_prev_link=BeamSearchForwardOnly.LinkConfig(
blob=state_prev,
offset=0,
window=1,
),
state_link=BeamSearchForwardOnly.LinkConfig(
blob=state,
offset=1,
window=1,
),
)
for initial_state, state_prev, state in zip(
initial_states,
states_prev,
states,
)
]
with core.NameScope(scope):
decoder_outputs_flattened, _ = step_model.net.Reshape(
[decoder_outputs],
[
'decoder_outputs_flattened',
'decoder_outputs_and_contexts_combination_old_shape',
],
shape=[-1, attention_decoder.get_output_dim()],
)
output_logits = seq2seq_util.output_projection(
model=step_model,
decoder_outputs=decoder_outputs_flattened,
decoder_output_size=attention_decoder.get_output_dim(),
target_vocab_size=self.target_vocab_size,
decoder_softmax_size=model_params['decoder_softmax_size'],
)
# [1, beam_size, target_vocab_size]
output_probs = step_model.net.Softmax(
output_logits,
'output_probs',
)
output_log_probs = step_model.net.Log(
output_probs,
'output_log_probs',
)
if use_attention:
attention_weights = attention_decoder.get_attention_weights()
else:
attention_weights = step_model.net.ConstantFill(
[self.encoder_inputs],
'zero_attention_weights_tmp_1',
value=0.0,
)
attention_weights = step_model.net.Transpose(
attention_weights,
'zero_attention_weights_tmp_2',
)
attention_weights = step_model.net.Tile(
attention_weights,
'zero_attention_weights_tmp',
tiles=self.beam_size,
axis=0,
)
return (
state_configs,
output_log_probs,
attention_weights,
)
def __init__(
self,
translate_params,
):
self.models = translate_params['ensemble_models']
decoding_params = translate_params['decoding_params']
self.beam_size = decoding_params['beam_size']
assert len(self.models) > 0
source_vocab = self.models[0]['source_vocab']
target_vocab = self.models[0]['target_vocab']
for model in self.models:
assert model['source_vocab'] == source_vocab
assert model['target_vocab'] == target_vocab
self.source_vocab_size = len(source_vocab)
self.target_vocab_size = len(target_vocab)
self.decoder_scope_names = [
'model{}'.format(i) for i in range(len(self.models))
]
self.model = Seq2SeqModelHelper(init_params=True)
self.encoder_inputs = self.model.net.AddExternalInput('encoder_inputs')
self.encoder_lengths = self.model.net.AddExternalInput(
'encoder_lengths'
)
self.max_output_seq_len = self.model.net.AddExternalInput(
'max_output_seq_len'
Loading ...