123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161 |
- function [ data, missing_nodes_mapping ] = RemoveRandomNodes( data, num_missing_nodes, missing_nodes_mapping, non_neighbors_distance, missingNodesInput )
- %RemoveRandomNodes Remove num_missing_nodes from data. If some nodes are
- %removed already, provide missing_nodes_mapping
- % Detailed explanation goes here
- graph_size = size(data,1);
- max_distance = max(max(data));
-
- % if the mapping is larger than the number of nodes we want to remove, emty
- % it and start a new mapping. This can happen if we finished looping over
- % the number of missing nodes and started a new iteration of an outer loop.
- if size(missing_nodes_mapping,2) > num_missing_nodes
- missing_nodes_mapping = [];
- num_nodes_to_remove = num_missing_nodes;
- else
- num_nodes_to_remove = num_missing_nodes - size(missing_nodes_mapping,2);
- end
-
- cont = 0;
- while cont == 0
- cont = 1;
-
- if nargin < 5
- %randomize a list of nodes to remove and sort it
- if nargin == 2 || size(missing_nodes_mapping,1) == 0
- missing_nodes = sort(ceil(rand(1,num_nodes_to_remove).*graph_size), 'descend');
- else
- missing_nodes = sort( [missing_nodes_mapping(1,:) ceil(rand(1,num_nodes_to_remove).*graph_size)] , 'descend');
- end
- %check if there are doubles in the list
- sizeDiff = size(missing_nodes,2) - size(unique(missing_nodes),2);
- while sizeDiff > 0
- %selecting new nodes randomly until we have enough
- missing_nodes = sort( [unique(missing_nodes) ceil(rand(1,sizeDiff).*graph_size)] , 'descend');
- sizeDiff = size(missing_nodes,2) - size(unique(missing_nodes),2);
- %fprintf('selected duplicates...\n');
- %cont = 0;
- end
- else
- missing_nodes = missingNodesInput;
- end
-
- %check if we selected one of the nodes which is an
- %outlier, i.e. not connected to any other node
- %TODO: optimize
- %isOutlier = 1;
-
- if cont == 1
- for i = 1:size(missing_nodes,2)
- %check if all the values of this node in the adjacency matrix equal
- %the non_neighbors_distance (except for the diagonal of the matrix)
- %This should not happen if the graph is connected (was an
- %attempt to deal with a corner case)
- while sum(data(missing_nodes(i),:)) - data(missing_nodes(i),missing_nodes(i)) == non_neighbors_distance * (size(data,2) - 1)
- newNode = ceil(rand(1,sizeDiff).*graph_size);
- while ismember(newNode, missing_nodes)
- newNode = ceil(rand(1,sizeDiff).*graph_size);
- end
- missing_nodes(i) = newNode;
- %isOutlier = 1;
- %fprintf('selected outlier...\n');
- end
- % for j = 1:size(data, 2)
- % if data(missing_nodes(i),j) ~= non_neighbors_distance && missing_nodes(i) ~= j
- % isOutlier=0;
- % end
- % end
- end
- end
- missing_nodes = sort( unique(missing_nodes) , 'descend');
- % if isOutlier == 1
- % cont = 0;
- % end
- end
-
- %create a list of the new nodes that each missing node
- %is mapped to - each link to a missing node is replaced
- %by a link to a new, "UNK" node
- missing_nodes_mapping = missing_nodes;
-
- %replace each link to a missing node with a link to a new node
-
- missing_nodes_all_neighbors = zeros(1, size(data,2));
- for curr_nissing_node = missing_nodes
- missing_nodes_all_neighbors = missing_nodes_all_neighbors | data(curr_nissing_node,:);
- end
- missing_nodes_all_neighbors = find(missing_nodes_all_neighbors);
-
- i = 1;
- %while i <= size(data,1)
- for i = missing_nodes_all_neighbors
-
- neighbors = find(data(i,:));
- missing_neighbors = intersect(neighbors, missing_nodes);
- missing_neighbors = sort(missing_neighbors, 'descend');
- for curr_missing_neighbor = missing_neighbors
- %for j= 1:size(missing_nodes,2)
- %missing nodes is sorted descending
- %missing_node_idx = missing_nodes(j);
- if data(i,curr_missing_neighbor) == 1
- new_col = ones(size(data, 1), 1) * non_neighbors_distance;
- new_col(i) = 1;
- data = [data new_col];
- new_row = ones(1,size(data, 2)) * non_neighbors_distance;
- new_row(i) = 1;
- data = [data; new_row];
- data(size(data, 1), size(data,2)) = 0;
-
- %add the new UNK node to the missing nodes mapping
- %j is the index of the missing node
- %look for the first zero in column j of the
- %missing nodes mapping and put the new node
- %index there
- added_node = 0;
- %add it in the first position which equals zero
- j = find(missing_nodes == curr_missing_neighbor, 1);
- for k = 1 : size(missing_nodes_mapping,1)
- if missing_nodes_mapping(k, j) == 0
- %if we start with 1000 nodes, and
- %we have 5 missing nodes, after
- %adding one node at this point, the
- %size of the graph is 1001. 5 nodes
- %will be removed so the correct
- %index of the new node will be 1001 - 5 = 996.
- %The next one is 997 and so on.
- missing_nodes_mapping(k, j) = size(data,1) - num_missing_nodes;
- added_node = 1;
- break;
- end
- end
- %if all the column is non-zero, add a new row and put it there
- if added_node == 0
- missing_nodes_mapping = [missing_nodes_mapping; zeros(1, size(missing_nodes_mapping,2))];
- missing_nodes_mapping(size(missing_nodes_mapping,1), j) = size(data,1) - num_missing_nodes;
- end
- end
- end
- %i = i+1;
- end
-
-
- %remove the missing nodes from the matrix
- %(missing nodes should be sorted in descending order!!
- %so that removing one does not affect the index of the
- %others)
- for j = 1:size(missing_nodes,2)
- missing_node_idx = missing_nodes(j);
- % if missing_node_idx == 1
- % data = data(:, 2 : size(data,2));
- % data = data(2 : size(data,1), :);
- % else
- %remove column
- data(:, missing_node_idx) = [];
- %data = [data(:, 1:missing_node_idx-1) data(:,missing_node_idx+1 : size(data,2))];
- %remove row
- data(missing_node_idx, :) = [];
- %data = [data(1:missing_node_idx-1, :); data(missing_node_idx+1 : size(data,1), :)];
- % end
- end
-
- end
-
|