BSc project of Parham Saremi. The goal of the project was to detect the geographical region of the food using textual and visual features extracted from recipes and ingredients of the food.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fasttext_embedding.py 709B

123456789101112131415161718192021222324252627
  1. from gensim.corpora.dictionary import Dictionary
  2. import logging
  3. from pyemd import emd
  4. from nltk.corpus import stopwords
  5. import fasttext
  6. import json
  7. import numpy as np
  8. logger = logging.getLogger(__name__)
  9. class FasttextEmbedding:
  10. def __init__(self, model_path):
  11. if model_path.endswith('.bin'):
  12. self.model = fasttext.load_model(model_path)
  13. self.full = True
  14. else:
  15. self.model = np.load(model_path)
  16. self.full = False
  17. self.stopwords = stopwords.words('english')
  18. def __getitem__(self, idx):
  19. if self.full:
  20. return self.model.get_word_vector(idx)
  21. else:
  22. if idx not in self.model:
  23. raise ValueError('Word not available.')
  24. return self.model[idx]