''' This module will handle the text generation with beam search. ''' import torch import torch.nn as nn from torch.autograd import Variable from collections import OrderedDict import numpy as np from transformer.MyModel import EncoderDecoder, Encoder, Decoder, Generator, BahdanauAttention class Translator(object): ''' Load with trained model and handle the beam search ''' def __init__(self, opt): self.opt = opt self.tt = torch.cuda if opt.cuda else torch checkpoint = torch.load(opt.model, map_location=lambda storage, loc: storage) model_opt = checkpoint['settings'] self.model_opt = model_opt attention = BahdanauAttention(model_opt.d_inner_hid) model = EncoderDecoder( Encoder(model_opt.d_word_vec, model_opt.d_inner_hid, num_layers=1, dropout=model_opt.dropout), Decoder(model_opt.d_word_vec, model_opt.d_inner_hid, attention, num_layers=1, dropout=model_opt.dropout), nn.Embedding(model_opt.user_size, model_opt.d_word_vec), nn.Embedding(model_opt.user_size, model_opt.d_word_vec), Generator(model_opt.d_inner_hid, model_opt.user_size)) prob_projection = nn.Softmax() model_dict = checkpoint['model'] new_state_dict = OrderedDict() # for k, v in state_dict.items(): # name = k[7:] # new_state_dict[name] = v print("\n\n\n") print(model_dict) print("\n\n\n") model.load_state_dict(model_dict) print('[Info] Trained model state loaded.') if opt.cuda: model.cuda(0) prob_projection.cuda(0) else: print('no cuda') model.cpu() prob_projection.cpu() model.prob_projection = prob_projection self.model = model self.model.eval() def translate_batch(self, src, src_mask, src_lengths, trg, trg_mask, trg_lengths): ''' Translation work in one batch ''' # self.opt.beam_size=100 # for i in range(35): # with torch.no_grad(): # self.encoder_hidden, self.encoder_final = self.model.encode(src, src_mask, src_lengths) # prev_y = torch.ones(1, 1).fill_(0).type_as(trg) # trg_mask = torch.ones_like(prev_y) # # output = [] # attention_scores = [] # hidden = None # for j in range(self.opt.beam_size): # # output_f = [] for j in range(self.opt.beam_size): with torch.no_grad(): encoder_hidden, encoder_final = self.model.encode(src, src_mask, src_lengths) prev_y = torch.ones(1, 1).fill_(0).type_as(trg) trg_mask = torch.ones_like(prev_y) output = [] attention_scores = [] hidden = None for i in range(35): with torch.no_grad(): out, hidden, pre_output = self.model.decode(encoder_hidden, encoder_final, src_mask, prev_y, trg_mask, hidden) # print(hidden,prev_y) prob = self.model.generator(pre_output[:, -1]) # print(prob) # print(prob) _, next_word = torch.max(prob, dim=1) next_word = next_word.data.item() output.append(next_word) prev_y = torch.ones(1, 1).type_as(trg).fill_(next_word) attention_scores.append(self.model.decoder.attention.alphas.cpu().numpy()) output = np.array(output) output_f.append(output) output_f = np.array(output_f) output_f2 = [] output_f2.append(output_f) output_f2 = np.array(output_f2) print(output_f2) return output_f2 def predict_next_user(self, num, output, hidden, attention_scores, src_mask, prev_y): with torch.no_grad(): out, hidden, pre_output = self.model.decode(encoder_hidden, encoder_final, src_mask, prev_y, trg_mask, hidden) prob = self.model.generator(pre_output[:, -1]) _, next_word = torch.max(prob, dim=1) next_word = next_word.data.item() output.append(next_word) prev_y = torch.ones(1, 1).type_as(trg).fill_(next_word) attention_scores.append(self.model.decoder.attention.alphas.cpu().numpy()) return next_word, prev_y, attention_scores