| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412 |
- # -*- Encoding:UTF-8 -*-
-
- import torch
- import torch.nn as nn
- import torch.optim as optim
- import numpy as np
- import argparse
- import os
- import heapq
- import math
- import random
- from DataSet import DataSet
- from BNN import *
-
- # Set device to GPU if available
- DEVICE = torch.device('cuda') if torch.cuda.is_available() else 'cpu'
- print(DEVICE)
- # Seed for reproducibility
- seed = 0
- random.seed(seed)
- np.random.seed(seed)
- torch.manual_seed(seed)
- torch.cuda.manual_seed(seed)
- torch.backends.cudnn.deterministic = True
- torch.backends.cudnn.benchmark = False
-
- class Model(nn.Module):
- def __init__(self, args):
- super(Model, self).__init__()
- self.dataName = args.dataName
- self.dataSet = DataSet(self.dataName)
- self.shape = self.dataSet.shape
- self.maxRate = self.dataSet.maxRate
-
- self.sample_size = 2
- self.train_data = self.bootstrap_sample(self.sample_size)
- self.test_data = self.dataSet.test
-
- self.negNum = args.negNum
- self.testNeg = self.dataSet.getTestNeg(self.test_data, 99)
- self.userLayer = args.userLayer
- self.itemLayer = args.itemLayer
-
- self.user_item_embedding = nn.Parameter(
- torch.tensor(self.dataSet.getEmbedding(), dtype=torch.float32).to(DEVICE))
- self.item_user_embedding = self.user_item_embedding.t().to(DEVICE)
-
- # Define User Layer
- self.user_layers = nn.ModuleList()
- input_size = self.shape[1]
- for size in self.userLayer:
- self.user_layers.append(BayesianLinear(input_size, size, prior_type=args.priorType, device=DEVICE))
- input_size = size
-
- # Define Item Layer
- self.item_layers = nn.ModuleList()
- input_size = self.shape[0]
- for size in self.itemLayer:
- self.item_layers.append(BayesianLinear(input_size, size, prior_type=args.priorType, device=DEVICE))
- input_size = size
-
- self.interaction_layer = nn.Sequential(
- nn.Linear(input_size, 64),
- nn.ReLU(),
- nn.Linear(64, 32),
- nn.ReLU(),
- nn.Linear(32, 1)
- )
-
- self.attention = nn.MultiheadAttention(embed_dim=input_size, num_heads=4, dropout=0.3, batch_first=True)
-
- def forward(self, user, item):
- user_input = self.user_item_embedding[user]
- item_input = self.item_user_embedding[item]
-
- for layer in self.user_layers:
- user_input = torch.relu(layer(user_input))
-
- for layer in self.item_layers:
- item_input = torch.relu(layer(item_input))
-
- user_att = user_input.unsqueeze(1) # Shape: (batch_size, 1, embed_dim)
- item_att = item_input.unsqueeze(1) # Shape: (batch_size, 1, embed_dim)
-
- combined = user_att * item_att # Shape: (batch_size, 2, embed_dim)
-
- att_output, att_weights = self.attention(
- query=combined,
- key=combined,
- value=combined
- )
-
- att_output = att_output.mean(dim=1)
- interaction_input = att_output
-
- y_hat = self.interaction_layer(interaction_input)
- y_hat = torch.sigmoid(y_hat)
- return torch.clamp(y_hat.squeeze(), min=1e-6, max=1.0)
-
- def log_prior(self, type):
- if type == "user":
- return sum(layer.log_prior for layer in self.user_layers)
- else:
- return sum(layer.log_prior for layer in self.item_layers)
-
- def log_variational_posterior(self, type):
- if type == "user":
- return sum(layer.log_variational_posterior for layer in self.user_layers)
- else:
- return sum(layer.log_variational_posterior for layer in self.item_layers)
-
- def sample_elbo(self, user_tensor, item_tensor, target, num_samples, num_batches):
- outputs = torch.zeros(num_samples, user_tensor.size(0), device=DEVICE)
-
- user_log_priors = torch.zeros(num_samples, device=DEVICE)
- user_log_variational_posteriors = torch.zeros(num_samples, device=DEVICE)
-
- item_log_priors = torch.zeros(num_samples, device=DEVICE)
- item_log_variational_posteriors = torch.zeros(num_samples, device=DEVICE)
-
- for i in range(num_samples):
- outputs[i] = self(user_tensor, item_tensor)
-
- user_log_priors[i] = self.log_prior(type="user")
- user_log_variational_posteriors[i] = self.log_variational_posterior(type="user")
-
- item_log_priors[i] = self.log_prior(type="item")
- item_log_variational_posteriors[i] = self.log_variational_posterior(type="item")
-
- user_log_prior = user_log_priors.mean()
- user_log_variational_posterior = user_log_variational_posteriors.mean()
-
- item_log_prior = item_log_priors.mean()
- item_log_variational_posterior = item_log_variational_posteriors.mean()
-
- item_loss = (item_log_variational_posterior - item_log_prior)
- user_loss = (user_log_variational_posterior - user_log_prior)
- return user_loss + item_loss
-
- def bootstrap_sample(self, sample_size):
- """
- Generate a bootstrapped dataset by sampling with replacement.
- """
- indices = np.random.choice(len(self.dataSet.train), size=len(self.dataSet.train) // sample_size,
- replace=True)
- sampled_train = [self.dataSet.train[i] for i in indices]
- return sampled_train
-
-
- class SuperModel(nn.Module):
- """
- A super model that combines predictions from multiple ensemble models using a neural network.
- """
- def __init__(self, ensemble_models, input_size):
- super(SuperModel, self).__init__()
- self.ensemble_models = ensemble_models
- self.combiner = nn.Sequential(
- nn.Linear(input_size, input_size // 2),
- nn.ReLU(),
- nn.Linear(input_size // 2, 1),
- nn.Sigmoid() # To ensure the output is a probability
- )
-
- def forward(self, user, item):
- """
- Forward pass of the super model.
- Combines predictions from ensemble models using a neural network.
- """
- ensemble_predictions = []
-
- with torch.no_grad(): # Ensure no gradients are computed for ensemble models
- for model in self.ensemble_models:
- model.eval() # Set individual models to evaluation mode
- predictions = model(user, item)
- ensemble_predictions.append(predictions)
-
- # Stack predictions to create input for the combiner network
- stacked_predictions = torch.stack(ensemble_predictions, dim=1) # Shape: (batch_size, num_ensemble_models)
- combined_predictions = self.combiner(stacked_predictions).squeeze(-1) # Shape: (batch_size,)
-
- return combined_predictions
-
-
-
- def run_epoch(model, optimizer, criterion, args):
- model.train()
- train_u, train_i, train_r = model.dataSet.getInstances(model.train_data, args.negNum)
- train_len = len(train_u)
- shuffled_idx = np.random.permutation(np.arange(train_len))
- train_u, train_i, train_r = train_u[shuffled_idx], train_i[shuffled_idx], train_r[shuffled_idx]
-
- num_batches = (train_len + args.batchSize - 1) // args.batchSize
- BCE_losses, kls = [], []
-
- for i in range(num_batches):
- min_idx = i * args.batchSize
- max_idx = min(train_len, (i + 1) * args.batchSize)
-
- user_tensor = torch.tensor(train_u[min_idx:max_idx], dtype=torch.long).to(DEVICE)
- item_tensor = torch.tensor(train_i[min_idx:max_idx], dtype=torch.long).to(DEVICE)
- rate_tensor = torch.tensor(train_r[min_idx:max_idx], dtype=torch.float32).to(DEVICE)
-
- rate_tensor = (rate_tensor - rate_tensor.min()) / (rate_tensor.max() - rate_tensor.min())
-
- optimizer.zero_grad()
- y_hat = model(user_tensor, item_tensor)
-
- loss = criterion(y_hat, rate_tensor)
- BCE_losses.append(loss.item())
-
- kl_coef = 4.42322e-08
- loss += kl_coef * model.sample_elbo(user_tensor, item_tensor, rate_tensor, 5, num_batches)
- loss.backward()
- optimizer.step()
-
- kls.append(loss.item())
- if i % 10 == 0:
- print(f'\rBatch {i}/{num_batches}: KL = {np.mean(kls[-10:]):.4f}, BCE = {np.mean(BCE_losses[-10:]):.4f}', end='')
-
- print(f"\nMean BCE Loss: {np.mean(BCE_losses):.4f}")
- print(f"Mean KL Divergence: {np.mean(kls):.4f}")
- return np.mean(kls)
-
-
- def evaluate(model, topK):
- model.eval()
- hr, NDCG = [], []
-
- with torch.no_grad():
- for i in range(len(model.testNeg[0])):
- user_tensor = model.testNeg[0][i]
- item_tensor = model.testNeg[1][i]
- predict = model(user_tensor, item_tensor)
-
- item_score_dict = {item: predict[j].item() for j, item in enumerate(item_tensor)}
- ranklist = heapq.nlargest(topK, item_score_dict, key=item_score_dict.get)
-
- hr.append(1 if item_tensor[0].item() in ranklist else 0)
- NDCG.append(math.log(2) / math.log(ranklist.index(item_tensor[0].item()) + 2) if item_tensor[0].item() in ranklist else 0)
-
- return np.mean(hr), np.mean(NDCG)
-
-
- def main():
- parser = argparse.ArgumentParser(description="Options")
- parser.add_argument('-dataName', action='store', dest='dataName', default='ml-100k')
- parser.add_argument('-negNum', action='store', dest='negNum', default=5, type=int)
- parser.add_argument('-userLayer', action='store', dest='userLayer', default=[512, 64, 64], type=int, nargs='+')
- parser.add_argument('-itemLayer', action='store', dest='itemLayer', default=[1024, 64, 64], type=int, nargs='+')
- parser.add_argument('-lr', action='store', dest='lr', default=0.0001, type=float)
- parser.add_argument('-maxEpochs', action='store', dest='maxEpochs', default=50, type=int)
- parser.add_argument('-batchSize', action='store', dest='batchSize', default=256, type=int)
- parser.add_argument('-earlyStop', action='store', dest='earlyStop', default=5, type=int)
- parser.add_argument('-checkPoint', action='store', dest='checkPoint', default='./checkPoint/')
- parser.add_argument('-topK', action='store', dest='topK', default=10, type=int)
- parser.add_argument('-loadModel', action='store_true', dest='loadModel', help="Load a saved model")
- parser.add_argument('-ensembleSize', action='store', dest='ensembleSize', default=10, type=int)
- parser.add_argument('-maxEpochN', action='store', dest='maxEpochN', default=30, type=int)
- parser.add_argument('-priorType', action='store', dest='priorType', default='ScaleMixtureGaussian',
- choices=['ScaleMixtureGaussian', 'Laplace', 'IsotropicGaussian'])
-
- args = parser.parse_args()
-
- if not os.path.exists(args.checkPoint):
- os.mkdir(args.checkPoint)
-
- ensemble_models = []
- optimizers = []
- ensemble_args = []
- network_layers = [[512, 64, 64], [512, 64], [1024, 64, 64], [512, 256, 64], [1024, 256, 256, 64]]
- prior_types = ['ScaleMixtureGaussian', 'Laplace', 'IsotropicGaussian']
- for ensemble_idx in range(args.ensembleSize):
- args_copy = argparse.Namespace(**vars(args))
- args_copy.userLayer = random.choice(network_layers)
- args_copy.itemLayer = random.choice(network_layers)
- args_copy.priorType = random.choice(prior_types)
- ensemble_model = Model(args_copy).to(DEVICE)
- ensemble_args.append(args_copy)
- optimizer = optim.Adam(ensemble_model.parameters(), lr=args.lr)
- ensemble_models.append(ensemble_model)
- optimizers.append(optimizer)
-
- criterion = nn.BCELoss()
-
- for ensemble_idx in range(args.ensembleSize):
- best_hr = -1
- best_NDCG = -1
- best_epoch = -1
-
- print(f'Ensemble Model Number {ensemble_idx}')
- print("Start Training!")
- print(ensemble_args[ensemble_idx])
-
- classifier = ensemble_models[ensemble_idx]
- optimizer = optimizers[ensemble_idx]
- for epoch in range(args.maxEpochs):
- print("=" * 20 + "Epoch " + str(epoch) + "=" * 20)
- run_epoch(classifier, optimizer, criterion, args)
- print('=' * 50)
- print("Start Evaluation!")
- hr, NDCG = evaluate(classifier, args.topK)
- print("Epoch ", epoch, "HR: {}, NDCG: {}".format(hr, NDCG))
-
- if hr > best_hr or NDCG > best_NDCG:
- best_hr = hr
- best_NDCG = NDCG
- best_epoch = epoch
- torch.save(classifier.state_dict(), os.path.join(args.checkPoint, f'model{ensemble_idx}.pth'))
-
- if epoch - best_epoch > args.earlyStop:
- print("Normal Early stop!")
- break
- print("=" * 20 + "Epoch " + str(epoch) + " End" + "=" * 20)
-
- print("Best hr: {}, NDCG: {}, At Epoch {}".format(best_hr, best_NDCG, best_epoch))
- print("Training complete!\n")
-
- for ensemble_idx in range(args.ensembleSize):
- model_path = os.path.join(args.checkPoint, f'model{ensemble_idx}.pth')
- if os.path.exists(model_path):
- print("Loading saved model from", model_path)
- ensemble_models[ensemble_idx].load_state_dict(torch.load(model_path))
- else:
- print("No saved model found at", model_path)
-
- super_model = SuperModel(ensemble_models, input_size=len(ensemble_models)).to(DEVICE)
- for epoch in range(args.maxEpochN):
- train_super_model(super_model, ensemble_models[0].dataSet.train, args)
- print("\nStart Testing")
- total_hr, total_NDCG = ensemble_eval(ensemble_models, super_model, args.topK)
- print("total hr: {}, total NDCG: {}".format(total_hr, total_NDCG))
-
-
- def train_super_model(super_model, train_data, args):
- super_model.train()
- optimizer = optim.Adam(super_model.parameters(), lr=args.lr)
- criterion = nn.BCELoss()
-
- train_u, train_i, train_r = super_model.ensemble_models[0].dataSet.getInstances(train_data, args.negNum)
- train_len = len(train_u)
- shuffled_idx = np.random.permutation(np.arange(train_len))
- train_u = train_u[shuffled_idx]
- train_i = train_i[shuffled_idx]
- train_r = train_r[shuffled_idx]
-
- num_batches = len(train_u) // args.batchSize + 1
-
- losses = []
- for i in range(num_batches):
- min_idx = i * args.batchSize
- max_idx = min(train_len, (i + 1) * args.batchSize)
-
- user_tensor = torch.tensor(train_u[min_idx:max_idx], dtype=torch.long).to(DEVICE)
- item_tensor = torch.tensor(train_i[min_idx:max_idx], dtype=torch.long).to(DEVICE)
- rate_tensor = torch.tensor(train_r[min_idx:max_idx], dtype=torch.float32).to(DEVICE)
- rate_tensor = (rate_tensor - rate_tensor.min()) / (rate_tensor.max() - rate_tensor.min())
-
- optimizer.zero_grad()
- y_hat = super_model(user_tensor, item_tensor)
- loss = criterion(y_hat, rate_tensor)
- loss.backward()
- optimizer.step()
-
- losses.append(loss.item())
-
- if i % 10 == 0:
- print(f'\rBatch {i}/{num_batches}: loss = {np.mean(losses[-10:]):.4f}', end='')
-
- print("\nMean loss for super model in this epoch is: {}".format(np.mean(losses)))
-
-
- def ensemble_eval(ensemble_models, superModel,topK):
- def getHitRatio(ranklist, targetItem):
- return 1 if targetItem in ranklist else 0
-
- def getNDCG(ranklist, targetItem):
- for i, item in enumerate(ranklist):
- if item == targetItem:
- return math.log(2) / math.log(i + 2)
- return 0
-
-
- hr = []
- NDCG = []
- testUser = ensemble_models[0].testNeg[0]
- testItem = ensemble_models[0].testNeg[1]
-
- with torch.no_grad():
- for i in range(len(testUser)):
- target = testItem[i][0]
- user_tensor = torch.tensor(testUser[i], dtype=torch.long).to(DEVICE)
- item_tensor = torch.tensor(testItem[i], dtype=torch.long).to(DEVICE)
-
- # ensemble_predicts = []
- # for model in ensemble_models:
- # predict = model(user_tensor, item_tensor)
- # ensemble_predicts.append(predict)
-
- total_predict = superModel(user_tensor, item_tensor)
- # print(total_predict)
- item_score_dict = {item: total_predict[j].item() for j, item in enumerate(testItem[i])}
- ranklist = heapq.nlargest(topK, item_score_dict, key=item_score_dict.get)
-
- tmp_hr = getHitRatio(ranklist, target)
- tmp_NDCG = getNDCG(ranklist, target)
- hr.append(tmp_hr)
- NDCG.append(tmp_NDCG)
-
- return np.mean(hr), np.mean(NDCG)
-
- if __name__ == '__main__':
- main()
|