You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

metrics.py 2.9KB

4 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. from __future__ import absolute_import
  2. from __future__ import division
  3. from __future__ import unicode_literals
  4. from __future__ import print_function
  5. import numpy as np
  6. import torch
  7. def compute_metrics(x):
  8. sx = np.sort(-x, axis=1)
  9. d = np.diag(-x)
  10. d = d[:, np.newaxis]
  11. ind = sx - d
  12. ind = np.where(ind == 0)
  13. ind = ind[1]
  14. metrics = {}
  15. metrics['R@1'] = float(np.sum(ind == 0)) * 100 / len(ind)
  16. metrics['R@5'] = float(np.sum(ind < 5)) * 100 / len(ind)
  17. metrics['R@10'] = float(np.sum(ind < 10)) * 100 / len(ind)
  18. metrics["MedianR"] = np.median(ind) + 1
  19. metrics["MeanR"] = np.mean(ind) + 1
  20. # metrics["cols"] = [int(i) for i in list(ind)]
  21. return metrics
  22. def print_computed_metrics(metrics):
  23. r1 = metrics['R@1']
  24. r5 = metrics['R@5']
  25. r10 = metrics['R@10']
  26. mr = metrics['MR']
  27. print('R@1: {:.4f} - R@5: {:.4f} - R@10: {:.4f} - Median R: {}'.format(r1, r5, r10, mr))
  28. # below two functions directly come from: https://github.com/Deferf/Experiments
  29. def tensor_text_to_video_metrics(sim_tensor, top_k = [1,5,10]):
  30. if not torch.is_tensor(sim_tensor):
  31. sim_tensor = torch.tensor(sim_tensor)
  32. # Permute sim_tensor so it represents a sequence of text-video similarity matrices.
  33. # Then obtain the double argsort to position the rank on the diagonal
  34. stacked_sim_matrices = sim_tensor.permute(1, 0, 2)
  35. first_argsort = torch.argsort(stacked_sim_matrices, dim = -1, descending= True)
  36. second_argsort = torch.argsort(first_argsort, dim = -1, descending= False)
  37. # Extracts ranks i.e diagonals
  38. ranks = torch.flatten(torch.diagonal(second_argsort, dim1 = 1, dim2 = 2))
  39. # Now we need to extract valid ranks, as some belong to inf padding values
  40. permuted_original_data = torch.flatten(torch.diagonal(sim_tensor, dim1 = 0, dim2 = 2))
  41. mask = ~ torch.logical_or(torch.isinf(permuted_original_data), torch.isnan(permuted_original_data))
  42. valid_ranks = ranks[mask]
  43. # A quick dimension check validates our results, there may be other correctness tests pending
  44. # Such as dot product localization, but that is for other time.
  45. #assert int(valid_ranks.shape[0]) == sum([len(text_dict[k]) for k in text_dict])
  46. if not torch.is_tensor(valid_ranks):
  47. valid_ranks = torch.tensor(valid_ranks)
  48. results = {f"R{k}": float(torch.sum(valid_ranks < k) * 100 / len(valid_ranks)) for k in top_k}
  49. results["MedianR"] = float(torch.median(valid_ranks + 1))
  50. results["MeanR"] = float(np.mean(valid_ranks.numpy() + 1))
  51. results["Std_Rank"] = float(np.std(valid_ranks.numpy() + 1))
  52. results['MR'] = results["MedianR"]
  53. return results
  54. def tensor_video_to_text_sim(sim_tensor):
  55. if not torch.is_tensor(sim_tensor):
  56. sim_tensor = torch.tensor(sim_tensor)
  57. # Code to avoid nans
  58. sim_tensor[sim_tensor != sim_tensor] = float('-inf')
  59. # Forms a similarity matrix for use with rank at k
  60. values, _ = torch.max(sim_tensor, dim=1, keepdim=True)
  61. return torch.squeeze(values).T