from gensim.corpora.dictionary import Dictionary import logging from pyemd import emd from nltk.corpus import stopwords import fasttext import json import numpy as np logger = logging.getLogger(__name__) class FasttextEmbedding: def __init__(self, model_path): if model_path.endswith('.bin'): self.model = fasttext.load_model(model_path) self.full = True else: self.model = np.load(model_path) self.full = False self.stopwords = stopwords.words('english') def __getitem__(self, idx): if self.full: return self.model.get_word_vector(idx) else: if idx not in self.model: raise ValueError('Word not available.') return self.model[idx]