Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

neilisaac / torch   python

Repository URL to install this package:

/ python / models / seq2seq / translate.py

## @package translate
# Module caffe2.python.models.seq2seq.translate





from abc import ABCMeta, abstractmethod
import argparse
from future.utils import viewitems
import logging
import numpy as np
from six import with_metaclass
import sys

from caffe2.python import core, rnn_cell, workspace
from caffe2.python.models.seq2seq.beam_search import BeamSearchForwardOnly
from caffe2.python.models.seq2seq.seq2seq_model_helper import Seq2SeqModelHelper
import caffe2.python.models.seq2seq.seq2seq_util as seq2seq_util


logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler(sys.stderr))


def _weighted_sum(model, values, weight, output_name):
    values_weights = zip(values, [weight] * len(values))
    values_weights_flattened = [x for v_w in values_weights for x in v_w]
    return model.net.WeightedSum(
        values_weights_flattened,
        output_name,
    )


class Seq2SeqModelCaffe2EnsembleDecoderBase(with_metaclass(ABCMeta, object)):

    @abstractmethod
    def get_model_file(self, model):
        pass

    @abstractmethod
    def get_db_type(self):
        pass

    def build_word_rewards(self, vocab_size, word_reward, unk_reward):
        word_rewards = np.full([vocab_size], word_reward, dtype=np.float32)
        word_rewards[seq2seq_util.PAD_ID] = 0
        word_rewards[seq2seq_util.GO_ID] = 0
        word_rewards[seq2seq_util.EOS_ID] = 0
        word_rewards[seq2seq_util.UNK_ID] = word_reward + unk_reward
        return word_rewards

    def load_models(self):
        db_reader = 'reader'
        for model, scope_name in zip(
            self.models,
            self.decoder_scope_names,
        ):
            params_for_current_model = [
                param
                for param in self.model.GetAllParams()
                if str(param).startswith(scope_name)
            ]
            assert workspace.RunOperatorOnce(core.CreateOperator(
                'CreateDB',
                [], [db_reader],
                db=self.get_model_file(model),
                db_type=self.get_db_type())
            ), 'Failed to create db {}'.format(self.get_model_file(model))
            assert workspace.RunOperatorOnce(core.CreateOperator(
                'Load',
                [db_reader],
                params_for_current_model,
                load_all=1,
                add_prefix=scope_name + '/',
                strip_prefix='gpu_0/',
            ))
            logger.info('Model {} is loaded from a checkpoint {}'.format(
                scope_name, self.get_model_file(model)))


class Seq2SeqModelCaffe2EnsembleDecoder(Seq2SeqModelCaffe2EnsembleDecoderBase):

    def get_model_file(self, model):
        return model['model_file']

    def get_db_type(self):
        return 'minidb'

    def scope(self, scope_name, blob_name):
        return (
            scope_name + '/' + blob_name
            if scope_name is not None
            else blob_name
        )

    def _build_decoder(
        self,
        model,
        step_model,
        model_params,
        scope,
        previous_tokens,
        timestep,
        fake_seq_lengths,
    ):
        attention_type = model_params['attention']
        assert attention_type in ['none', 'regular']
        use_attention = (attention_type != 'none')

        with core.NameScope(scope):
            encoder_embeddings = seq2seq_util.build_embeddings(
                model=model,
                vocab_size=self.source_vocab_size,
                embedding_size=model_params['encoder_embedding_size'],
                name='encoder_embeddings',
                freeze_embeddings=False,
            )

        (
            encoder_outputs,
            weighted_encoder_outputs,
            final_encoder_hidden_states,
            final_encoder_cell_states,
            encoder_units_per_layer,
        ) = seq2seq_util.build_embedding_encoder(
            model=model,
            encoder_params=model_params['encoder_type'],
            num_decoder_layers=len(model_params['decoder_layer_configs']),
            inputs=self.encoder_inputs,
            input_lengths=self.encoder_lengths,
            vocab_size=self.source_vocab_size,
            embeddings=encoder_embeddings,
            embedding_size=model_params['encoder_embedding_size'],
            use_attention=use_attention,
            num_gpus=0,
            forward_only=True,
            scope=scope,
        )
        with core.NameScope(scope):
            if use_attention:
                # [max_source_length, beam_size, encoder_output_dim]
                encoder_outputs = model.net.Tile(
                    encoder_outputs,
                    'encoder_outputs_tiled',
                    tiles=self.beam_size,
                    axis=1,
                )

            if weighted_encoder_outputs is not None:
                weighted_encoder_outputs = model.net.Tile(
                    weighted_encoder_outputs,
                    'weighted_encoder_outputs_tiled',
                    tiles=self.beam_size,
                    axis=1,
                )

            decoder_embeddings = seq2seq_util.build_embeddings(
                model=model,
                vocab_size=self.target_vocab_size,
                embedding_size=model_params['decoder_embedding_size'],
                name='decoder_embeddings',
                freeze_embeddings=False,
            )
            embedded_tokens_t_prev = step_model.net.Gather(
                [decoder_embeddings, previous_tokens],
                'embedded_tokens_t_prev',
            )

        decoder_cells = []
        decoder_units_per_layer = []
        for i, layer_config in enumerate(model_params['decoder_layer_configs']):
            num_units = layer_config['num_units']
            decoder_units_per_layer.append(num_units)
            if i == 0:
                input_size = model_params['decoder_embedding_size']
            else:
                input_size = (
                    model_params['decoder_layer_configs'][i - 1]['num_units']
                )

            cell = rnn_cell.LSTMCell(
                forward_only=True,
                input_size=input_size,
                hidden_size=num_units,
                forget_bias=0.0,
                memory_optimization=False,
            )
            decoder_cells.append(cell)

        with core.NameScope(scope):
            if final_encoder_hidden_states is not None:
                for i in range(len(final_encoder_hidden_states)):
                    if final_encoder_hidden_states[i] is not None:
                        final_encoder_hidden_states[i] = model.net.Tile(
                            final_encoder_hidden_states[i],
                            'final_encoder_hidden_tiled_{}'.format(i),
                            tiles=self.beam_size,
                            axis=1,
                        )
            if final_encoder_cell_states is not None:
                for i in range(len(final_encoder_cell_states)):
                    if final_encoder_cell_states[i] is not None:
                        final_encoder_cell_states[i] = model.net.Tile(
                            final_encoder_cell_states[i],
                            'final_encoder_cell_tiled_{}'.format(i),
                            tiles=self.beam_size,
                            axis=1,
                        )
            initial_states = \
                seq2seq_util.build_initial_rnn_decoder_states(
                    model=model,
                    encoder_units_per_layer=encoder_units_per_layer,
                    decoder_units_per_layer=decoder_units_per_layer,
                    final_encoder_hidden_states=final_encoder_hidden_states,
                    final_encoder_cell_states=final_encoder_cell_states,
                    use_attention=use_attention,
                )

        attention_decoder = seq2seq_util.LSTMWithAttentionDecoder(
            encoder_outputs=encoder_outputs,
            encoder_output_dim=encoder_units_per_layer[-1],
            encoder_lengths=None,
            vocab_size=self.target_vocab_size,
            attention_type=attention_type,
            embedding_size=model_params['decoder_embedding_size'],
            decoder_num_units=decoder_units_per_layer[-1],
            decoder_cells=decoder_cells,
            weighted_encoder_outputs=weighted_encoder_outputs,
            name=scope,
        )
        states_prev = step_model.net.AddExternalInputs(*[
            '{}/{}_prev'.format(scope, s)
            for s in attention_decoder.get_state_names()
        ])
        decoder_outputs, states = attention_decoder.apply(
            model=step_model,
            input_t=embedded_tokens_t_prev,
            seq_lengths=fake_seq_lengths,
            states=states_prev,
            timestep=timestep,
        )

        state_configs = [
            BeamSearchForwardOnly.StateConfig(
                initial_value=initial_state,
                state_prev_link=BeamSearchForwardOnly.LinkConfig(
                    blob=state_prev,
                    offset=0,
                    window=1,
                ),
                state_link=BeamSearchForwardOnly.LinkConfig(
                    blob=state,
                    offset=1,
                    window=1,
                ),
            )
            for initial_state, state_prev, state in zip(
                initial_states,
                states_prev,
                states,
            )
        ]

        with core.NameScope(scope):
            decoder_outputs_flattened, _ = step_model.net.Reshape(
                [decoder_outputs],
                [
                    'decoder_outputs_flattened',
                    'decoder_outputs_and_contexts_combination_old_shape',
                ],
                shape=[-1, attention_decoder.get_output_dim()],
            )
            output_logits = seq2seq_util.output_projection(
                model=step_model,
                decoder_outputs=decoder_outputs_flattened,
                decoder_output_size=attention_decoder.get_output_dim(),
                target_vocab_size=self.target_vocab_size,
                decoder_softmax_size=model_params['decoder_softmax_size'],
            )
            # [1, beam_size, target_vocab_size]
            output_probs = step_model.net.Softmax(
                output_logits,
                'output_probs',
            )
            output_log_probs = step_model.net.Log(
                output_probs,
                'output_log_probs',
            )
            if use_attention:
                attention_weights = attention_decoder.get_attention_weights()
            else:
                attention_weights = step_model.net.ConstantFill(
                    [self.encoder_inputs],
                    'zero_attention_weights_tmp_1',
                    value=0.0,
                )
                attention_weights = step_model.net.Transpose(
                    attention_weights,
                    'zero_attention_weights_tmp_2',
                )
                attention_weights = step_model.net.Tile(
                    attention_weights,
                    'zero_attention_weights_tmp',
                    tiles=self.beam_size,
                    axis=0,
                )

        return (
            state_configs,
            output_log_probs,
            attention_weights,
        )

    def __init__(
        self,
        translate_params,
    ):
        self.models = translate_params['ensemble_models']
        decoding_params = translate_params['decoding_params']
        self.beam_size = decoding_params['beam_size']

        assert len(self.models) > 0
        source_vocab = self.models[0]['source_vocab']
        target_vocab = self.models[0]['target_vocab']
        for model in self.models:
            assert model['source_vocab'] == source_vocab
            assert model['target_vocab'] == target_vocab

        self.source_vocab_size = len(source_vocab)
        self.target_vocab_size = len(target_vocab)

        self.decoder_scope_names = [
            'model{}'.format(i) for i in range(len(self.models))
        ]

        self.model = Seq2SeqModelHelper(init_params=True)

        self.encoder_inputs = self.model.net.AddExternalInput('encoder_inputs')
        self.encoder_lengths = self.model.net.AddExternalInput(
            'encoder_lengths'
        )
        self.max_output_seq_len = self.model.net.AddExternalInput(
            'max_output_seq_len'
Loading ...