% Map attribute to categories % Use 0 for null/no value, 1 for unknown/private value (if exist) and then real values function [outAttributes, attUpperRange] = PrepareAttributes(dataFilePath, dataFileName, numNodes, expectedAttrCols, selectedAttr, debug, debugPath) outNoneValue = 0; inNoneValue = -1; countryCol = 1; countryCompress = 1; countryOffset = 129; % min value is -128 => real value starting from 1 %countryUnknown = -999; if nargin >= 3 if nargin < 5 selectedAttr = ones(1,expectedAttrCols); end if nargin < 6 debug = 0; end if nargin < 7 debugPath = 'debug/'; end outFullPath = strcat(dataFilePath, debugPath); outFullName = strcat(outFullPath,dataFileName); if isdir(outFullPath) == 0 && debug == 1 mkdir(outFullPath); end attributes = LoadAsciiAttributes(dataFilePath, dataFileName, numNodes, debug, debugPath); m = size(attributes,1); % num nodes/lines n = size(attributes,2); % num attributes/cols outAttributes = zeros(m,nnz(selectedAttr)); % num cols according to num selectedAttr if n == expectedAttrCols for line=1:m column = 0; if selectedAttr(countryCol) == 1 % country (col=1) column = column +1; value = attributes(line, countryCol)+countryOffset; if (value < 0) outAttributes(line, column) = outNoneValue; else outAttributes(line, column) = value; end end % games/groups (all other columns) for inx = 2 : expectedAttrCols if selectedAttr(inx) == 1 column = column +1; value = attributes(line,inx); if value == inNoneValue outAttributes(line, column) = outNoneValue; else % no threshold, i.e. binary value outAttributes(line, column) = 1; end end end end % remove empty country indexes and shift codes if countryCompress && selectedAttr(countryCol) == 1 new_values = outAttributes(:, countryCol); maxCountry = max(new_values); for i=1:maxCountry indices = find(new_values==i); if size(indices,1) == 0 indices = find(new_values>i); if size(indices,1) == 0 break; else new_values(indices) = new_values(indices) -1; end end end %maxCountry = max(new_values); outAttributes(:, countryCol)= new_values; end attUpperRange = max(outAttributes); %attLowRange = min(attributes); if debug == 1 outFullName = sprintf('%s.att2%d', outFullName, numThreshold); save(strcat(outFullName,'.mat'), 'outAttributes'); SaveIntMatrixToFile(strcat(outFullName,'.txt'), outAttributes); end else fprintf('PrepareAttributes - Invalid attributes count: expecting %d, got %d\n',expectedAttrCols,n); end else fprintf('PrepareAttributes - Invalid parameters. expecting: dataFilePath, rawAtts, debug\n'); end