|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function [ data, attData, missing_nodes_mapping ] = RemoveRandomNodesWithImages( data, attData, totalAttNum, num_missing_nodes, missing_nodes_mapping, numImagesProfiles, non_neighbors_distance, missingNodesInput )
|
|
|
|
|
|
%RemoveRandomNodes Remove num_missing_nodes from data. If some nodes are
|
|
|
|
|
|
%removed already, provide missing_nodes_mapping
|
|
|
|
|
|
% Detailed explanation goes here
|
|
|
|
|
|
|
|
|
|
|
|
if nargin < 7
|
|
|
|
|
|
non_neighbors_distance = 0;
|
|
|
|
|
|
end
|
|
|
|
|
|
if nargin >= 8 % i.e. getting missingNodesInput
|
|
|
|
|
|
missing_nodes = missingNodesInput;
|
|
|
|
|
|
else
|
|
|
|
|
|
%%data_orig = data;
|
|
|
|
|
|
numAttPerPH = 0;
|
|
|
|
|
|
|
|
|
|
|
|
% if the mapping is larger than the number of nodes we want to remove, empty
|
|
|
|
|
|
% it and start a new mapping. This can happen if we finished looping over
|
|
|
|
|
|
% the number of missing nodes and started a new iteration of an outer loop.
|
|
|
|
|
|
if size(missing_nodes_mapping,2) > num_missing_nodes
|
|
|
|
|
|
missing_nodes_mapping = [];
|
|
|
|
|
|
num_nodes_to_remove = num_missing_nodes;
|
|
|
|
|
|
else
|
|
|
|
|
|
num_nodes_to_remove = num_missing_nodes - size(missing_nodes_mapping,2);
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
% randomly choose missing nodes
|
|
|
|
|
|
%missing_nodes = ChooseMissingNodes_old(num_nodes_to_remove, data, missing_nodes_mapping, non_neighbors_distance);
|
|
|
|
|
|
missing_nodes = ChooseMissingNodes(num_nodes_to_remove, data, attData, totalAttNum, numAttPerPH, missing_nodes_mapping, numImagesProfiles);
|
|
|
|
|
|
|
|
|
|
|
|
%sort the list and create a list of the new nodes that each missing node is mapped to - each link
|
|
|
|
|
|
%to a missing node is replaced by a link to a new, "UNK" node
|
|
|
|
|
|
|
|
|
|
|
|
%Sigal 23.1.14 - missing_nodes is now matrix with two rows:
|
|
|
|
|
|
% first the removed node and second the selected profile)
|
|
|
|
|
|
% no need to call unique as validation is already done in ChooseMissingNodes
|
|
|
|
|
|
%missing_nodes = sort( unique(missing_nodes), 'descend');
|
|
|
|
|
|
%missing_nodes = sort( missing_nodes , 2, 'descend');
|
|
|
|
|
|
missing_nodes_mapping = missing_nodes;
|
|
|
|
|
|
missing_nodes_list = sort(missing_nodes(1,:),'descend');
|
|
|
|
|
|
|
|
|
|
|
|
%replace each link to a missing node with a link to a new node
|
|
|
|
|
|
%find all missing node neighbors
|
|
|
|
|
|
missing_nodes_all_neighbors = zeros(1, size(data,2));
|
|
|
|
|
|
for curr_nissing_node = missing_nodes_list
|
|
|
|
|
|
missing_nodes_all_neighbors = missing_nodes_all_neighbors | data(curr_nissing_node,:);
|
|
|
|
|
|
end
|
|
|
|
|
|
missing_nodes_all_neighbors = find(missing_nodes_all_neighbors);
|
|
|
|
|
|
%for each node in missing_nodes_all_neighbors add edges to placeholder
|
|
|
|
|
|
for i = missing_nodes_all_neighbors
|
|
|
|
|
|
neighbors = find(data(i,:));
|
|
|
|
|
|
missing_neighbors = intersect(neighbors, missing_nodes_list);
|
|
|
|
|
|
|
|
|
|
|
|
missing_neighbors = sort(missing_neighbors, 'descend');
|
|
|
|
|
|
for curr_missing_neighbor = missing_neighbors
|
|
|
|
|
|
if data(i,curr_missing_neighbor) == 1
|
|
|
|
|
|
% append col & row for the placeholder
|
|
|
|
|
|
data = ExpandDataByOne(data, i, non_neighbors_distance);
|
|
|
|
|
|
% sigal 31.1.14 - support remove without attributes
|
|
|
|
|
|
if totalAttNum > 0
|
|
|
|
|
|
attData = ExpandAttByOne(attData, curr_missing_neighbor, non_neighbors_distance, totalAttNum, numAttPerPH);
|
|
|
|
|
|
end
|
|
|
|
|
|
%add the new UNK node to the missing nodes mapping j is the index of the missing node
|
|
|
|
|
|
%look for the first zero in column j of the missing nodes mapping and put the new node
|
|
|
|
|
|
%index there
|
|
|
|
|
|
added_node = 0;
|
|
|
|
|
|
%add it in the first position which equals zero
|
|
|
|
|
|
%sigal 23.1.14 - find index according to actual structure (not sorted)
|
|
|
|
|
|
j = find( missing_nodes_mapping(1,:) == curr_missing_neighbor, 1);
|
|
|
|
|
|
for k = 1 : size(missing_nodes_mapping,1)
|
|
|
|
|
|
if missing_nodes_mapping(k, j) == 0
|
|
|
|
|
|
%if we start with 1000 nodes, and we have 5 missing nodes, after
|
|
|
|
|
|
%adding one node at this point, the size of the graph is 1001. 5 nodes
|
|
|
|
|
|
%will be removed so the correct index of the new node will be 1001 - 5 = 996.
|
|
|
|
|
|
%The next one is 997 and so on.
|
|
|
|
|
|
missing_nodes_mapping(k, j) = size(data,1) - num_missing_nodes;
|
|
|
|
|
|
added_node = 1;
|
|
|
|
|
|
break;
|
|
|
|
|
|
end
|
|
|
|
|
|
end
|
|
|
|
|
|
%if all the column is non-zero, add a new row and put it there
|
|
|
|
|
|
if added_node == 0
|
|
|
|
|
|
missing_nodes_mapping = [missing_nodes_mapping; zeros(1, size(missing_nodes_mapping,2))];
|
|
|
|
|
|
missing_nodes_mapping(size(missing_nodes_mapping,1), j) = size(data,1) - num_missing_nodes;
|
|
|
|
|
|
end
|
|
|
|
|
|
end %if friend
|
|
|
|
|
|
end %missing_neighbors
|
|
|
|
|
|
end %missing_nodes_all_neighbors
|
|
|
|
|
|
end % if getting missingNodesInput
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%remove the missing nodes from the matrix (missing nodes MUST be sorted in descending order!!
|
|
|
|
|
|
%so that removing one does not affect the index of the others)
|
|
|
|
|
|
for j = 1:size(missing_nodes_list,2)
|
|
|
|
|
|
missing_node_idx = missing_nodes_list(j);
|
|
|
|
|
|
%remove column
|
|
|
|
|
|
data(:, missing_node_idx) = [];
|
|
|
|
|
|
%remove row
|
|
|
|
|
|
data(missing_node_idx, :) = [];
|
|
|
|
|
|
% sigal 31.1.14 - support remove without attributes
|
|
|
|
|
|
if totalAttNum > 0
|
|
|
|
|
|
attData(missing_node_idx, :) = [];
|
|
|
|
|
|
end
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
end %function RemoveRandomNodes3
|
|
|
|
|
|
|
|
|
|
|
|
%sigal - move old implementation to function
|
|
|
|
|
|
|
|
|
|
|
|
function [missing_nodes] = ChooseMissingNodes(num_nodes_to_remove, data, attData, totalAttNum, numAttPerPH, missing_nodes_mapping, numImagesProfiles)
|
|
|
|
|
|
missing_nodes_all_neighbors = zeros(1, size(data,2));
|
|
|
|
|
|
|
|
|
|
|
|
%randomize a list of nodes to remove and sort it
|
|
|
|
|
|
if size(missing_nodes_mapping,1)> 0
|
|
|
|
|
|
%Sigal 23.1.14 - second row is the profile mapping
|
|
|
|
|
|
missing_nodes = missing_nodes_mapping(1:2,:); %%sort(missing_nodes_mapping(1,:) , 2, 'descend');
|
|
|
|
|
|
%find all missing node neighbors
|
|
|
|
|
|
for curr_missing_node = missing_nodes(1,:) % first row is the removed nodes
|
|
|
|
|
|
missing_nodes_all_neighbors = missing_nodes_all_neighbors | data(curr_missing_node,:);
|
|
|
|
|
|
missing_nodes_all_neighbors(1,curr_missing_node)=1;
|
|
|
|
|
|
end
|
|
|
|
|
|
else
|
|
|
|
|
|
missing_nodes = [];
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
% outlier1 - nodes with only one edge
|
|
|
|
|
|
numEdges = sum(data,1);
|
|
|
|
|
|
invalidNodes1a = (numEdges==1); %%numEdges<3); %%(numEdges==1);
|
|
|
|
|
|
missing_nodes_all_neighbors(1,invalidNodes1a) = 1;
|
|
|
|
|
|
%invalidNodes1b = (numEdges>7); %% 6.13 (mem issues) use 7
|
|
|
|
|
|
%invalidNodes1b = (numEdges>15); %%25); %%(numEdges==1); %% sigal - 6.2.13 max=15 (sarit)
|
|
|
|
|
|
%invalidNodes1b = (numEdges>8); %%15); %% sigal/sarit - 9.12.13 max=8
|
|
|
|
|
|
%missing_nodes_all_neighbors(1,invalidNodes1b) = 1;
|
|
|
|
|
|
% outlier2 - nodes with less than numAttPerPH attributes
|
|
|
|
|
|
% sigal 31.1.14 - support remove without attributes
|
|
|
|
|
|
if totalAttNum > 0 && numAttPerPH > 0
|
|
|
|
|
|
numAttr = sum(attData,2)';
|
|
|
|
|
|
invalidNodes2 = (numAttr<numAttPerPH);
|
|
|
|
|
|
missing_nodes_all_neighbors(1,invalidNodes2) = 1;
|
|
|
|
|
|
else
|
|
|
|
|
|
invalidNodes2 = zeros(1,size(invalidNodes1a,2));
|
|
|
|
|
|
end
|
|
|
|
|
|
% outlier statistics
|
|
|
|
|
|
count = nnz(invalidNodes1a|invalidNodes2);
|
|
|
|
|
|
if count*1.5 > size(data,2)
|
|
|
|
|
|
fprintf('RemoveRandomNodes2: too many outliers nodes %d.\n',count);
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
%sigal - 23.1.14 - choose image profile
|
|
|
|
|
|
imagesProfiles = 1:1:numImagesProfiles;
|
|
|
|
|
|
if size(missing_nodes,1)> 0
|
|
|
|
|
|
usedProfiles = missing_nodes(2,:);
|
|
|
|
|
|
imagesProfiles(usedProfiles)=[];
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
for i=1:num_nodes_to_remove
|
|
|
|
|
|
valid_nodes = find(missing_nodes_all_neighbors~=1);
|
|
|
|
|
|
if(size(valid_nodes,2) < 1)
|
|
|
|
|
|
fprintf('Full Graph')
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
inx = ceil(rand(1)*size(valid_nodes,2));
|
|
|
|
|
|
|
|
|
|
|
|
node = valid_nodes(inx);
|
|
|
|
|
|
%sigal - 23.1.14 - choose image profile
|
|
|
|
|
|
profile = ceil(rand(1)*size(imagesProfiles,2));
|
|
|
|
|
|
newNode = [node;imagesProfiles(profile)];
|
|
|
|
|
|
imagesProfiles(profile) = [];
|
|
|
|
|
|
% add selected node to missing_nodes list and update the all neighbors list
|
|
|
|
|
|
missing_nodes = [missing_nodes newNode];
|
|
|
|
|
|
missing_nodes_all_neighbors(1,node)=1;
|
|
|
|
|
|
missing_nodes_all_neighbors = missing_nodes_all_neighbors | data(node,:);
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
end %ChooseMissingNodes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
% sigal - append col & row for the placeholder
|
|
|
|
|
|
function [data] = ExpandDataByOne(data, friend, non_neighbors_distance)
|
|
|
|
|
|
new_col = ones(size(data, 1), 1) * non_neighbors_distance;
|
|
|
|
|
|
new_col(friend) = 1;
|
|
|
|
|
|
data = [data new_col];
|
|
|
|
|
|
new_row = ones(1,size(data, 2)) * non_neighbors_distance;
|
|
|
|
|
|
new_row(friend) = 1;
|
|
|
|
|
|
data = [data; new_row];
|
|
|
|
|
|
data(size(data, 1), size(data,2)) = 0;
|
|
|
|
|
|
end %ExpandDataByOne
|
|
|
|
|
|
|
|
|
|
|
|
% sigal - append row for the placeholder
|
|
|
|
|
|
function [attData] = ExpandAttByOne(attData, orgNode, non_neighbors_distance, totalAttNum, numAttPerPH)
|
|
|
|
|
|
if totalAttNum>0 && numAttPerPH>0
|
|
|
|
|
|
attIndices = find(attData(orgNode, :)==1);
|
|
|
|
|
|
while size(attIndices,2) > numAttPerPH
|
|
|
|
|
|
inx = ceil(rand(1)*size(attIndices,2));
|
|
|
|
|
|
attIndices(:,inx) = [];
|
|
|
|
|
|
end
|
|
|
|
|
|
else
|
|
|
|
|
|
attIndices=[];
|
|
|
|
|
|
end
|
|
|
|
|
|
new_row = ones(1,size(attData, 2)) * non_neighbors_distance;
|
|
|
|
|
|
for i=1:size(attIndices,2)
|
|
|
|
|
|
new_row(i)=1;
|
|
|
|
|
|
end
|
|
|
|
|
|
attData = [attData; new_row];
|
|
|
|
|
|
end %ExpandAttByOne
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|