|
123456789101112131415161718192021222324252627282930 |
- import datetime
- import pandas as pd
-
-
- class movielens_1m(object):
- def __init__(self):
- self.user_data, self.item_data, self.score_data = self.load()
-
- def load(self):
- path = "movielens/ml-1m"
- profile_data_path = "{}/users.dat".format(path)
- score_data_path = "{}/ratings.dat".format(path)
- item_data_path = "{}/movies_extrainfos.dat".format(path)
-
- profile_data = pd.read_csv(
- profile_data_path, names=['user_id', 'gender', 'age', 'occupation_code', 'zip'],
- sep="::", engine='python'
- )
- item_data = pd.read_csv(
- item_data_path, names=['movie_id', 'title', 'year', 'rate', 'released', 'genre', 'director', 'writer', 'actors', 'plot', 'poster'],
- sep="::", engine='python', encoding="utf-8"
- )
- score_data = pd.read_csv(
- score_data_path, names=['user_id', 'movie_id', 'rating', 'timestamp'],
- sep="::", engine='python'
- )
-
- score_data['time'] = score_data["timestamp"].map(lambda x: datetime.datetime.fromtimestamp(x))
- score_data = score_data.drop(["timestamp"], axis=1)
- return profile_data, item_data, score_data
|