You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PrepareAttributesTh.m 4.5KB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. % Map attribute to categories
  2. % Use 0 for null/no value, 1 for unknown/private value (if exist) and then real values
  3. function [outAttributes, attUpperRange] = PrepareAttributesTh(dataFilePath, dataFileName, numNodes, numThreshold, selectedAttr, debug, debugPath)
  4. expectedAttrCols = 11;
  5. noneValue = 0;
  6. countryCol = 1;
  7. countryOffset = 129; % min value is -128 => real value starting from 1
  8. %countryUnknown = -999;
  9. countryCompress = 1;
  10. gamesCol = [2 3 4 5 6 7 8 9 10 11];
  11. gamesTh1 = [20 10 10 10 10 10 10 10 10 10];
  12. gamesTh2 = [80 40 40 40 40 40 40 40 40 40];
  13. inGamesNone = -1; % map to 0
  14. inGamesUnknown = 0; % map to 1
  15. gamesUnknown = 1;
  16. gamesLow = 2;
  17. gamesMedium = 3;
  18. gamesHigh = 4;
  19. if nargin >= 3
  20. if nargin < 4
  21. numThreshold = 0;
  22. end
  23. if nargin < 5
  24. selectedAttr = ones(1,expectedAttrCols);
  25. end
  26. if nargin < 6
  27. debug = 0;
  28. end
  29. if nargin < 7
  30. debugPath = 'debug/';
  31. end
  32. outFullPath = strcat(dataFilePath, debugPath);
  33. outFullName = strcat(outFullPath,dataFileName);
  34. if isdir(outFullPath) == 0 && debug == 1
  35. mkdir(outFullPath);
  36. end
  37. attributes = LoadAsciiAttributes(dataFilePath, dataFileName, numNodes, debug, debugPath);
  38. m = size(attributes,1); % num nodes/lines
  39. n = size(attributes,2); % num attributes/cols
  40. outAttributes = zeros(m,nnz(selectedAttr)); % num cols according to num selectedAttr
  41. if n == expectedAttrCols
  42. for line=1:m
  43. column = 0;
  44. if selectedAttr(countryCol) == 1
  45. % country (col=1)
  46. column = column +1;
  47. value = attributes(line, countryCol)+countryOffset;
  48. if (value < 0)
  49. outAttributes(line, column) = noneValue;
  50. else
  51. outAttributes(line, column) = value;
  52. end
  53. end
  54. % games (cols 2&3)
  55. for inx = 1 : size(gamesCol,2)
  56. gCol = gamesCol(inx);
  57. if selectedAttr(gCol) == 1
  58. column = column +1;
  59. value = attributes(line,gCol);
  60. if value == inGamesNone
  61. outAttributes(line, column) = noneValue;
  62. elseif numThreshold == 1
  63. if value < gamesTh1(inx)
  64. outAttributes(line, column) = 1;
  65. else
  66. outAttributes(line, column) = 2;
  67. end
  68. elseif numThreshold == 2
  69. if value == inGamesUnknown
  70. outAttributes(line, column) = gamesUnknown;
  71. elseif value < gamesTh1(inx)
  72. outAttributes(line, column) = gamesLow;
  73. elseif value < gamesTh2(inx)
  74. outAttributes(line, column) = gamesMedium;
  75. else
  76. outAttributes(line, column) = gamesHigh;
  77. end
  78. else % no threshold, i.e. binary value
  79. outAttributes(line, column) = 1;
  80. end
  81. end
  82. end
  83. end
  84. if countryCompress && selectedAttr(countryCol) == 1
  85. new_values = outAttributes(:, countryCol);
  86. maxCountry = max(new_values);
  87. for i=1:maxCountry
  88. indices = find(new_values==i);
  89. if size(indices,1) == 0
  90. indices = find(new_values>i);
  91. if size(indices,1) == 0
  92. break;
  93. else
  94. new_values(indices) = new_values(indices) -1;
  95. end
  96. end
  97. end
  98. maxCountry = max(new_values);
  99. outAttributes(:, countryCol)= new_values;
  100. end
  101. attUpperRange = max(outAttributes);
  102. %attLowRange = min(attributes);
  103. if debug == 1
  104. outFullName = sprintf('%s.att2%d', outFullName, numThreshold);
  105. save(strcat(outFullName,'.mat'), 'outAttributes');
  106. SaveIntMatrixToFile(strcat(outFullName,'.txt'), outAttributes);
  107. end
  108. else
  109. fprintf('PrepareAttributes - Invalid attributes count: expecting %d, got %d\n',expectedAttrCols,n);
  110. end
  111. else
  112. fprintf('PrepareAttributes - Invalid parameters. expecting: dataFilePath, rawAtts, debug\n');
  113. end