123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596 |
- % Map attribute to categories
- % Use 0 for null/no value, 1 for unknown/private value (if exist) and then real values
- function [outAttributes, attUpperRange] = PrepareAttributes(dataFilePath, dataFileName, numNodes, expectedAttrCols, selectedAttr, debug, debugPath)
-
- outNoneValue = 0;
- inNoneValue = -1;
-
- countryCol = 1;
- countryCompress = 1;
- countryOffset = 129; % min value is -128 => real value starting from 1
- %countryUnknown = -999;
-
- if nargin >= 3
- if nargin < 5
- selectedAttr = ones(1,expectedAttrCols);
- end
- if nargin < 6
- debug = 0;
- end
- if nargin < 7
- debugPath = 'debug/';
- end
-
- outFullPath = strcat(dataFilePath, debugPath);
- outFullName = strcat(outFullPath,dataFileName);
-
- if isdir(outFullPath) == 0 && debug == 1
- mkdir(outFullPath);
- end
-
- attributes = LoadAsciiAttributes(dataFilePath, dataFileName, numNodes, debug, debugPath);
- m = size(attributes,1); % num nodes/lines
- n = size(attributes,2); % num attributes/cols
- outAttributes = zeros(m,nnz(selectedAttr)); % num cols according to num selectedAttr
-
- if n == expectedAttrCols
- for line=1:m
- column = 0;
- if selectedAttr(countryCol) == 1
- % country (col=1)
- column = column +1;
- value = attributes(line, countryCol)+countryOffset;
- if (value < 0)
- outAttributes(line, column) = outNoneValue;
- else
- outAttributes(line, column) = value;
- end
- end
-
- % games/groups (all other columns)
- for inx = 2 : expectedAttrCols
- if selectedAttr(inx) == 1
- column = column +1;
- value = attributes(line,inx);
- if value == inNoneValue
- outAttributes(line, column) = outNoneValue;
- else % no threshold, i.e. binary value
- outAttributes(line, column) = 1;
- end
- end
- end
- end
-
- % remove empty country indexes and shift codes
- if countryCompress && selectedAttr(countryCol) == 1
- new_values = outAttributes(:, countryCol);
- maxCountry = max(new_values);
- for i=1:maxCountry
- indices = find(new_values==i);
- if size(indices,1) == 0
- indices = find(new_values>i);
- if size(indices,1) == 0
- break;
- else
- new_values(indices) = new_values(indices) -1;
- end
- end
- end
- %maxCountry = max(new_values);
- outAttributes(:, countryCol)= new_values;
- end
-
- attUpperRange = max(outAttributes);
- %attLowRange = min(attributes);
-
- if debug == 1
- outFullName = sprintf('%s.att2%d', outFullName, numThreshold);
- save(strcat(outFullName,'.mat'), 'outAttributes');
- SaveIntMatrixToFile(strcat(outFullName,'.txt'), outAttributes);
- end
- else
- fprintf('PrepareAttributes - Invalid attributes count: expecting %d, got %d\n',expectedAttrCols,n);
- end
- else
- fprintf('PrepareAttributes - Invalid parameters. expecting: dataFilePath, rawAtts, debug\n');
- end
|