123456789101112131415161718192021222324252627 |
- from gensim.corpora.dictionary import Dictionary
- import logging
- from pyemd import emd
- from nltk.corpus import stopwords
- import fasttext
- import json
- import numpy as np
-
- logger = logging.getLogger(__name__)
-
- class FasttextEmbedding:
- def __init__(self, model_path):
- if model_path.endswith('.bin'):
- self.model = fasttext.load_model(model_path)
- self.full = True
- else:
- self.model = np.load(model_path)
- self.full = False
- self.stopwords = stopwords.words('english')
-
- def __getitem__(self, idx):
- if self.full:
- return self.model.get_word_vector(idx)
- else:
- if idx not in self.model:
- raise ValueError('Word not available.')
- return self.model[idx]
|