|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126 |
- % Map attribute to categories
- % Use 0 for null/no value, 1 for unknown/private value (if exist) and then real values
- function [outAttributes, attUpperRange] = PrepareAttributesTh(dataFilePath, dataFileName, numNodes, numThreshold, selectedAttr, debug, debugPath)
-
- expectedAttrCols = 11;
-
- noneValue = 0;
-
- countryCol = 1;
- countryOffset = 129; % min value is -128 => real value starting from 1
- %countryUnknown = -999;
- countryCompress = 1;
-
- gamesCol = [2 3 4 5 6 7 8 9 10 11];
- gamesTh1 = [20 10 10 10 10 10 10 10 10 10];
- gamesTh2 = [80 40 40 40 40 40 40 40 40 40];
- inGamesNone = -1; % map to 0
- inGamesUnknown = 0; % map to 1
- gamesUnknown = 1;
- gamesLow = 2;
- gamesMedium = 3;
- gamesHigh = 4;
-
- if nargin >= 3
- if nargin < 4
- numThreshold = 0;
- end
- if nargin < 5
- selectedAttr = ones(1,expectedAttrCols);
- end
- if nargin < 6
- debug = 0;
- end
- if nargin < 7
- debugPath = 'debug/';
- end
-
- outFullPath = strcat(dataFilePath, debugPath);
- outFullName = strcat(outFullPath,dataFileName);
-
- if isdir(outFullPath) == 0 && debug == 1
- mkdir(outFullPath);
- end
-
- attributes = LoadAsciiAttributes(dataFilePath, dataFileName, numNodes, debug, debugPath);
- m = size(attributes,1); % num nodes/lines
- n = size(attributes,2); % num attributes/cols
- outAttributes = zeros(m,nnz(selectedAttr)); % num cols according to num selectedAttr
-
- if n == expectedAttrCols
- for line=1:m
- column = 0;
- if selectedAttr(countryCol) == 1
- % country (col=1)
- column = column +1;
- value = attributes(line, countryCol)+countryOffset;
- if (value < 0)
- outAttributes(line, column) = noneValue;
- else
- outAttributes(line, column) = value;
- end
- end
-
- % games (cols 2&3)
- for inx = 1 : size(gamesCol,2)
- gCol = gamesCol(inx);
- if selectedAttr(gCol) == 1
- column = column +1;
- value = attributes(line,gCol);
- if value == inGamesNone
- outAttributes(line, column) = noneValue;
- elseif numThreshold == 1
- if value < gamesTh1(inx)
- outAttributes(line, column) = 1;
- else
- outAttributes(line, column) = 2;
- end
- elseif numThreshold == 2
- if value == inGamesUnknown
- outAttributes(line, column) = gamesUnknown;
- elseif value < gamesTh1(inx)
- outAttributes(line, column) = gamesLow;
- elseif value < gamesTh2(inx)
- outAttributes(line, column) = gamesMedium;
- else
- outAttributes(line, column) = gamesHigh;
- end
- else % no threshold, i.e. binary value
- outAttributes(line, column) = 1;
- end
- end
- end
- end
-
- if countryCompress && selectedAttr(countryCol) == 1
- new_values = outAttributes(:, countryCol);
- maxCountry = max(new_values);
- for i=1:maxCountry
- indices = find(new_values==i);
- if size(indices,1) == 0
- indices = find(new_values>i);
- if size(indices,1) == 0
- break;
- else
- new_values(indices) = new_values(indices) -1;
- end
- end
- end
- maxCountry = max(new_values);
- outAttributes(:, countryCol)= new_values;
- end
-
- attUpperRange = max(outAttributes);
- %attLowRange = min(attributes);
-
- if debug == 1
- outFullName = sprintf('%s.att2%d', outFullName, numThreshold);
- save(strcat(outFullName,'.mat'), 'outAttributes');
- SaveIntMatrixToFile(strcat(outFullName,'.txt'), outAttributes);
- end
- else
- fprintf('PrepareAttributes - Invalid attributes count: expecting %d, got %d\n',expectedAttrCols,n);
- end
- else
- fprintf('PrepareAttributes - Invalid parameters. expecting: dataFilePath, rawAtts, debug\n');
- end
|