You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PrepareAttributes.m 3.4KB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. % Map attribute to categories
  2. % Use 0 for null/no value, 1 for unknown/private value (if exist) and then real values
  3. function [outAttributes, attUpperRange] = PrepareAttributes(dataFilePath, dataFileName, numNodes, expectedAttrCols, selectedAttr, debug, debugPath)
  4. outNoneValue = 0;
  5. inNoneValue = -1;
  6. countryCol = 1;
  7. countryCompress = 1;
  8. countryOffset = 129; % min value is -128 => real value starting from 1
  9. %countryUnknown = -999;
  10. if nargin >= 3
  11. if nargin < 5
  12. selectedAttr = ones(1,expectedAttrCols);
  13. end
  14. if nargin < 6
  15. debug = 0;
  16. end
  17. if nargin < 7
  18. debugPath = 'debug/';
  19. end
  20. outFullPath = strcat(dataFilePath, debugPath);
  21. outFullName = strcat(outFullPath,dataFileName);
  22. if isdir(outFullPath) == 0 && debug == 1
  23. mkdir(outFullPath);
  24. end
  25. attributes = LoadAsciiAttributes(dataFilePath, dataFileName, numNodes, debug, debugPath);
  26. m = size(attributes,1); % num nodes/lines
  27. n = size(attributes,2); % num attributes/cols
  28. outAttributes = zeros(m,nnz(selectedAttr)); % num cols according to num selectedAttr
  29. if n == expectedAttrCols
  30. for line=1:m
  31. column = 0;
  32. if selectedAttr(countryCol) == 1
  33. % country (col=1)
  34. column = column +1;
  35. value = attributes(line, countryCol)+countryOffset;
  36. if (value < 0)
  37. outAttributes(line, column) = outNoneValue;
  38. else
  39. outAttributes(line, column) = value;
  40. end
  41. end
  42. % games/groups (all other columns)
  43. for inx = 2 : expectedAttrCols
  44. if selectedAttr(inx) == 1
  45. column = column +1;
  46. value = attributes(line,inx);
  47. if value == inNoneValue
  48. outAttributes(line, column) = outNoneValue;
  49. else % no threshold, i.e. binary value
  50. outAttributes(line, column) = 1;
  51. end
  52. end
  53. end
  54. end
  55. % remove empty country indexes and shift codes
  56. if countryCompress && selectedAttr(countryCol) == 1
  57. new_values = outAttributes(:, countryCol);
  58. maxCountry = max(new_values);
  59. for i=1:maxCountry
  60. indices = find(new_values==i);
  61. if size(indices,1) == 0
  62. indices = find(new_values>i);
  63. if size(indices,1) == 0
  64. break;
  65. else
  66. new_values(indices) = new_values(indices) -1;
  67. end
  68. end
  69. end
  70. %maxCountry = max(new_values);
  71. outAttributes(:, countryCol)= new_values;
  72. end
  73. attUpperRange = max(outAttributes);
  74. %attLowRange = min(attributes);
  75. if debug == 1
  76. outFullName = sprintf('%s.att2%d', outFullName, numThreshold);
  77. save(strcat(outFullName,'.mat'), 'outAttributes');
  78. SaveIntMatrixToFile(strcat(outFullName,'.txt'), outAttributes);
  79. end
  80. else
  81. fprintf('PrepareAttributes - Invalid attributes count: expecting %d, got %d\n',expectedAttrCols,n);
  82. end
  83. else
  84. fprintf('PrepareAttributes - Invalid parameters. expecting: dataFilePath, rawAtts, debug\n');
  85. end