% Map attribute to categories % Use 0 for null/no value, 1 for unknown/private value (if exist) and then real values function [outAttributes, attUpperRange] = PrepareAttributesTh(dataFilePath, dataFileName, numNodes, numThreshold, selectedAttr, debug, debugPath) expectedAttrCols = 11; noneValue = 0; countryCol = 1; countryOffset = 129; % min value is -128 => real value starting from 1 %countryUnknown = -999; countryCompress = 1; gamesCol = [2 3 4 5 6 7 8 9 10 11]; gamesTh1 = [20 10 10 10 10 10 10 10 10 10]; gamesTh2 = [80 40 40 40 40 40 40 40 40 40]; inGamesNone = -1; % map to 0 inGamesUnknown = 0; % map to 1 gamesUnknown = 1; gamesLow = 2; gamesMedium = 3; gamesHigh = 4; if nargin >= 3 if nargin < 4 numThreshold = 0; end if nargin < 5 selectedAttr = ones(1,expectedAttrCols); end if nargin < 6 debug = 0; end if nargin < 7 debugPath = 'debug/'; end outFullPath = strcat(dataFilePath, debugPath); outFullName = strcat(outFullPath,dataFileName); if isdir(outFullPath) == 0 && debug == 1 mkdir(outFullPath); end attributes = LoadAsciiAttributes(dataFilePath, dataFileName, numNodes, debug, debugPath); m = size(attributes,1); % num nodes/lines n = size(attributes,2); % num attributes/cols outAttributes = zeros(m,nnz(selectedAttr)); % num cols according to num selectedAttr if n == expectedAttrCols for line=1:m column = 0; if selectedAttr(countryCol) == 1 % country (col=1) column = column +1; value = attributes(line, countryCol)+countryOffset; if (value < 0) outAttributes(line, column) = noneValue; else outAttributes(line, column) = value; end end % games (cols 2&3) for inx = 1 : size(gamesCol,2) gCol = gamesCol(inx); if selectedAttr(gCol) == 1 column = column +1; value = attributes(line,gCol); if value == inGamesNone outAttributes(line, column) = noneValue; elseif numThreshold == 1 if value < gamesTh1(inx) outAttributes(line, column) = 1; else outAttributes(line, column) = 2; end elseif numThreshold == 2 if value == inGamesUnknown outAttributes(line, column) = gamesUnknown; elseif value < gamesTh1(inx) outAttributes(line, column) = gamesLow; elseif value < gamesTh2(inx) outAttributes(line, column) = gamesMedium; else outAttributes(line, column) = gamesHigh; end else % no threshold, i.e. binary value outAttributes(line, column) = 1; end end end end if countryCompress && selectedAttr(countryCol) == 1 new_values = outAttributes(:, countryCol); maxCountry = max(new_values); for i=1:maxCountry indices = find(new_values==i); if size(indices,1) == 0 indices = find(new_values>i); if size(indices,1) == 0 break; else new_values(indices) = new_values(indices) -1; end end end maxCountry = max(new_values); outAttributes(:, countryCol)= new_values; end attUpperRange = max(outAttributes); %attLowRange = min(attributes); if debug == 1 outFullName = sprintf('%s.att2%d', outFullName, numThreshold); save(strcat(outFullName,'.mat'), 'outAttributes'); SaveIntMatrixToFile(strcat(outFullName,'.txt'), outAttributes); end else fprintf('PrepareAttributes - Invalid attributes count: expecting %d, got %d\n',expectedAttrCols,n); end else fprintf('PrepareAttributes - Invalid parameters. expecting: dataFilePath, rawAtts, debug\n'); end