You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

RemoveRandomNodes.m 6.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. function [ data, missing_nodes_mapping ] = RemoveRandomNodes( data, num_missing_nodes, missing_nodes_mapping, non_neighbors_distance, missingNodesInput )
  2. %RemoveRandomNodes Remove num_missing_nodes from data. If some nodes are
  3. %removed already, provide missing_nodes_mapping
  4. % Detailed explanation goes here
  5. graph_size = size(data,1);
  6. max_distance = max(max(data));
  7. % if the mapping is larger than the number of nodes we want to remove, emty
  8. % it and start a new mapping. This can happen if we finished looping over
  9. % the number of missing nodes and started a new iteration of an outer loop.
  10. if size(missing_nodes_mapping,2) > num_missing_nodes
  11. missing_nodes_mapping = [];
  12. num_nodes_to_remove = num_missing_nodes;
  13. else
  14. num_nodes_to_remove = num_missing_nodes - size(missing_nodes_mapping,2);
  15. end
  16. cont = 0;
  17. while cont == 0
  18. cont = 1;
  19. if nargin < 5
  20. %randomize a list of nodes to remove and sort it
  21. if nargin == 2 || size(missing_nodes_mapping,1) == 0
  22. missing_nodes = sort(ceil(rand(1,num_nodes_to_remove).*graph_size), 'descend');
  23. else
  24. missing_nodes = sort( [missing_nodes_mapping(1,:) ceil(rand(1,num_nodes_to_remove).*graph_size)] , 'descend');
  25. end
  26. %check if there are doubles in the list
  27. sizeDiff = size(missing_nodes,2) - size(unique(missing_nodes),2);
  28. while sizeDiff > 0
  29. %selecting new nodes randomly until we have enough
  30. missing_nodes = sort( [unique(missing_nodes) ceil(rand(1,sizeDiff).*graph_size)] , 'descend');
  31. sizeDiff = size(missing_nodes,2) - size(unique(missing_nodes),2);
  32. %fprintf('selected duplicates...\n');
  33. %cont = 0;
  34. end
  35. else
  36. missing_nodes = missingNodesInput;
  37. end
  38. %check if we selected one of the nodes which is an
  39. %outlier, i.e. not connected to any other node
  40. %TODO: optimize
  41. %isOutlier = 1;
  42. if cont == 1
  43. for i = 1:size(missing_nodes,2)
  44. %check if all the values of this node in the adjacency matrix equal
  45. %the non_neighbors_distance (except for the diagonal of the matrix)
  46. %This should not happen if the graph is connected (was an
  47. %attempt to deal with a corner case)
  48. while sum(data(missing_nodes(i),:)) - data(missing_nodes(i),missing_nodes(i)) == non_neighbors_distance * (size(data,2) - 1)
  49. newNode = ceil(rand(1,sizeDiff).*graph_size);
  50. while ismember(newNode, missing_nodes)
  51. newNode = ceil(rand(1,sizeDiff).*graph_size);
  52. end
  53. missing_nodes(i) = newNode;
  54. %isOutlier = 1;
  55. %fprintf('selected outlier...\n');
  56. end
  57. % for j = 1:size(data, 2)
  58. % if data(missing_nodes(i),j) ~= non_neighbors_distance && missing_nodes(i) ~= j
  59. % isOutlier=0;
  60. % end
  61. % end
  62. end
  63. end
  64. missing_nodes = sort( unique(missing_nodes) , 'descend');
  65. % if isOutlier == 1
  66. % cont = 0;
  67. % end
  68. end
  69. %create a list of the new nodes that each missing node
  70. %is mapped to - each link to a missing node is replaced
  71. %by a link to a new, "UNK" node
  72. missing_nodes_mapping = missing_nodes;
  73. %replace each link to a missing node with a link to a new node
  74. missing_nodes_all_neighbors = zeros(1, size(data,2));
  75. for curr_nissing_node = missing_nodes
  76. missing_nodes_all_neighbors = missing_nodes_all_neighbors | data(curr_nissing_node,:);
  77. end
  78. missing_nodes_all_neighbors = find(missing_nodes_all_neighbors);
  79. i = 1;
  80. %while i <= size(data,1)
  81. for i = missing_nodes_all_neighbors
  82. neighbors = find(data(i,:));
  83. missing_neighbors = intersect(neighbors, missing_nodes);
  84. missing_neighbors = sort(missing_neighbors, 'descend');
  85. for curr_missing_neighbor = missing_neighbors
  86. %for j= 1:size(missing_nodes,2)
  87. %missing nodes is sorted descending
  88. %missing_node_idx = missing_nodes(j);
  89. if data(i,curr_missing_neighbor) == 1
  90. new_col = ones(size(data, 1), 1) * non_neighbors_distance;
  91. new_col(i) = 1;
  92. data = [data new_col];
  93. new_row = ones(1,size(data, 2)) * non_neighbors_distance;
  94. new_row(i) = 1;
  95. data = [data; new_row];
  96. data(size(data, 1), size(data,2)) = 0;
  97. %add the new UNK node to the missing nodes mapping
  98. %j is the index of the missing node
  99. %look for the first zero in column j of the
  100. %missing nodes mapping and put the new node
  101. %index there
  102. added_node = 0;
  103. %add it in the first position which equals zero
  104. j = find(missing_nodes == curr_missing_neighbor, 1);
  105. for k = 1 : size(missing_nodes_mapping,1)
  106. if missing_nodes_mapping(k, j) == 0
  107. %if we start with 1000 nodes, and
  108. %we have 5 missing nodes, after
  109. %adding one node at this point, the
  110. %size of the graph is 1001. 5 nodes
  111. %will be removed so the correct
  112. %index of the new node will be 1001 - 5 = 996.
  113. %The next one is 997 and so on.
  114. missing_nodes_mapping(k, j) = size(data,1) - num_missing_nodes;
  115. added_node = 1;
  116. break;
  117. end
  118. end
  119. %if all the column is non-zero, add a new row and put it there
  120. if added_node == 0
  121. missing_nodes_mapping = [missing_nodes_mapping; zeros(1, size(missing_nodes_mapping,2))];
  122. missing_nodes_mapping(size(missing_nodes_mapping,1), j) = size(data,1) - num_missing_nodes;
  123. end
  124. end
  125. end
  126. %i = i+1;
  127. end
  128. %remove the missing nodes from the matrix
  129. %(missing nodes should be sorted in descending order!!
  130. %so that removing one does not affect the index of the
  131. %others)
  132. for j = 1:size(missing_nodes,2)
  133. missing_node_idx = missing_nodes(j);
  134. % if missing_node_idx == 1
  135. % data = data(:, 2 : size(data,2));
  136. % data = data(2 : size(data,1), :);
  137. % else
  138. %remove column
  139. data(:, missing_node_idx) = [];
  140. %data = [data(:, 1:missing_node_idx-1) data(:,missing_node_idx+1 : size(data,2))];
  141. %remove row
  142. data(missing_node_idx, :) = [];
  143. %data = [data(1:missing_node_idx-1, :); data(missing_node_idx+1 : size(data,1), :)];
  144. % end
  145. end
  146. end