You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

RemoveRandomNodesWithImages.m 9.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. function [ data, attData, missing_nodes_mapping ] = RemoveRandomNodesWithImages( data, attData, totalAttNum, num_missing_nodes, missing_nodes_mapping, numImagesProfiles, non_neighbors_distance, missingNodesInput )
  2. %RemoveRandomNodes Remove num_missing_nodes from data. If some nodes are
  3. %removed already, provide missing_nodes_mapping
  4. % Detailed explanation goes here
  5. if nargin < 7
  6. non_neighbors_distance = 0;
  7. end
  8. if nargin >= 8 % i.e. getting missingNodesInput
  9. missing_nodes = missingNodesInput;
  10. else
  11. %%data_orig = data;
  12. numAttPerPH = 0;
  13. % if the mapping is larger than the number of nodes we want to remove, empty
  14. % it and start a new mapping. This can happen if we finished looping over
  15. % the number of missing nodes and started a new iteration of an outer loop.
  16. if size(missing_nodes_mapping,2) > num_missing_nodes
  17. missing_nodes_mapping = [];
  18. num_nodes_to_remove = num_missing_nodes;
  19. else
  20. num_nodes_to_remove = num_missing_nodes - size(missing_nodes_mapping,2);
  21. end
  22. % randomly choose missing nodes
  23. %missing_nodes = ChooseMissingNodes_old(num_nodes_to_remove, data, missing_nodes_mapping, non_neighbors_distance);
  24. missing_nodes = ChooseMissingNodes(num_nodes_to_remove, data, attData, totalAttNum, numAttPerPH, missing_nodes_mapping, numImagesProfiles);
  25. %sort the list and create a list of the new nodes that each missing node is mapped to - each link
  26. %to a missing node is replaced by a link to a new, "UNK" node
  27. %Sigal 23.1.14 - missing_nodes is now matrix with two rows:
  28. % first the removed node and second the selected profile)
  29. % no need to call unique as validation is already done in ChooseMissingNodes
  30. %missing_nodes = sort( unique(missing_nodes), 'descend');
  31. %missing_nodes = sort( missing_nodes , 2, 'descend');
  32. missing_nodes_mapping = missing_nodes;
  33. missing_nodes_list = sort(missing_nodes(1,:),'descend');
  34. %replace each link to a missing node with a link to a new node
  35. %find all missing node neighbors
  36. missing_nodes_all_neighbors = zeros(1, size(data,2));
  37. for curr_nissing_node = missing_nodes_list
  38. missing_nodes_all_neighbors = missing_nodes_all_neighbors | data(curr_nissing_node,:);
  39. end
  40. missing_nodes_all_neighbors = find(missing_nodes_all_neighbors);
  41. %for each node in missing_nodes_all_neighbors add edges to placeholder
  42. for i = missing_nodes_all_neighbors
  43. neighbors = find(data(i,:));
  44. missing_neighbors = intersect(neighbors, missing_nodes_list);
  45. missing_neighbors = sort(missing_neighbors, 'descend');
  46. for curr_missing_neighbor = missing_neighbors
  47. if data(i,curr_missing_neighbor) == 1
  48. % append col & row for the placeholder
  49. data = ExpandDataByOne(data, i, non_neighbors_distance);
  50. % sigal 31.1.14 - support remove without attributes
  51. if totalAttNum > 0
  52. attData = ExpandAttByOne(attData, curr_missing_neighbor, non_neighbors_distance, totalAttNum, numAttPerPH);
  53. end
  54. %add the new UNK node to the missing nodes mapping j is the index of the missing node
  55. %look for the first zero in column j of the missing nodes mapping and put the new node
  56. %index there
  57. added_node = 0;
  58. %add it in the first position which equals zero
  59. %sigal 23.1.14 - find index according to actual structure (not sorted)
  60. j = find( missing_nodes_mapping(1,:) == curr_missing_neighbor, 1);
  61. for k = 1 : size(missing_nodes_mapping,1)
  62. if missing_nodes_mapping(k, j) == 0
  63. %if we start with 1000 nodes, and we have 5 missing nodes, after
  64. %adding one node at this point, the size of the graph is 1001. 5 nodes
  65. %will be removed so the correct index of the new node will be 1001 - 5 = 996.
  66. %The next one is 997 and so on.
  67. missing_nodes_mapping(k, j) = size(data,1) - num_missing_nodes;
  68. added_node = 1;
  69. break;
  70. end
  71. end
  72. %if all the column is non-zero, add a new row and put it there
  73. if added_node == 0
  74. missing_nodes_mapping = [missing_nodes_mapping; zeros(1, size(missing_nodes_mapping,2))];
  75. missing_nodes_mapping(size(missing_nodes_mapping,1), j) = size(data,1) - num_missing_nodes;
  76. end
  77. end %if friend
  78. end %missing_neighbors
  79. end %missing_nodes_all_neighbors
  80. end % if getting missingNodesInput
  81. %remove the missing nodes from the matrix (missing nodes MUST be sorted in descending order!!
  82. %so that removing one does not affect the index of the others)
  83. for j = 1:size(missing_nodes_list,2)
  84. missing_node_idx = missing_nodes_list(j);
  85. %remove column
  86. data(:, missing_node_idx) = [];
  87. %remove row
  88. data(missing_node_idx, :) = [];
  89. % sigal 31.1.14 - support remove without attributes
  90. if totalAttNum > 0
  91. attData(missing_node_idx, :) = [];
  92. end
  93. end
  94. end %function RemoveRandomNodes3
  95. %sigal - move old implementation to function
  96. function [missing_nodes] = ChooseMissingNodes(num_nodes_to_remove, data, attData, totalAttNum, numAttPerPH, missing_nodes_mapping, numImagesProfiles)
  97. missing_nodes_all_neighbors = zeros(1, size(data,2));
  98. %randomize a list of nodes to remove and sort it
  99. if size(missing_nodes_mapping,1)> 0
  100. %Sigal 23.1.14 - second row is the profile mapping
  101. missing_nodes = missing_nodes_mapping(1:2,:); %%sort(missing_nodes_mapping(1,:) , 2, 'descend');
  102. %find all missing node neighbors
  103. for curr_missing_node = missing_nodes(1,:) % first row is the removed nodes
  104. missing_nodes_all_neighbors = missing_nodes_all_neighbors | data(curr_missing_node,:);
  105. missing_nodes_all_neighbors(1,curr_missing_node)=1;
  106. end
  107. else
  108. missing_nodes = [];
  109. end
  110. % outlier1 - nodes with only one edge
  111. numEdges = sum(data,1);
  112. invalidNodes1a = (numEdges==1); %%numEdges<3); %%(numEdges==1);
  113. missing_nodes_all_neighbors(1,invalidNodes1a) = 1;
  114. %invalidNodes1b = (numEdges>7); %% 6.13 (mem issues) use 7
  115. %invalidNodes1b = (numEdges>15); %%25); %%(numEdges==1); %% sigal - 6.2.13 max=15 (sarit)
  116. invalidNodes1b = (numEdges>8); %%15); %% sigal/sarit - 9.12.13 max=8
  117. missing_nodes_all_neighbors(1,invalidNodes1b) = 1;
  118. % outlier2 - nodes with less than numAttPerPH attributes
  119. % sigal 31.1.14 - support remove without attributes
  120. if totalAttNum > 0 && numAttPerPH > 0
  121. numAttr = sum(attData,2)';
  122. invalidNodes2 = (numAttr<numAttPerPH);
  123. missing_nodes_all_neighbors(1,invalidNodes2) = 1;
  124. else
  125. invalidNodes2 = zeros(1,size(invalidNodes1a,2));
  126. end
  127. % outlier statistics
  128. count = nnz(invalidNodes1a|invalidNodes1b|invalidNodes2);
  129. if count*1.5 > size(data,2)
  130. fprintf('RemoveRandomNodes2: too many outliers nodes %d.\n',count);
  131. end
  132. %sigal - 23.1.14 - choose image profile
  133. imagesProfiles = 1:1:numImagesProfiles;
  134. if size(missing_nodes,1)> 0
  135. usedProfiles = missing_nodes(2,:);
  136. imagesProfiles(usedProfiles)=[];
  137. end
  138. for i=1:num_nodes_to_remove
  139. valid_nodes = find(missing_nodes_all_neighbors~=1);
  140. inx = ceil(rand(1)*size(valid_nodes,2));
  141. node = valid_nodes(inx);
  142. %sigal - 23.1.14 - choose image profile
  143. profile = ceil(rand(1)*size(imagesProfiles,2));
  144. newNode = [node;imagesProfiles(profile)];
  145. imagesProfiles(profile) = [];
  146. % add selected node to missing_nodes list and update the all neighbors list
  147. missing_nodes = [missing_nodes newNode];
  148. missing_nodes_all_neighbors(1,node)=1;
  149. missing_nodes_all_neighbors = missing_nodes_all_neighbors | data(node,:);
  150. end
  151. end %ChooseMissingNodes
  152. % sigal - append col & row for the placeholder
  153. function [data] = ExpandDataByOne(data, friend, non_neighbors_distance)
  154. new_col = ones(size(data, 1), 1) * non_neighbors_distance;
  155. new_col(friend) = 1;
  156. data = [data new_col];
  157. new_row = ones(1,size(data, 2)) * non_neighbors_distance;
  158. new_row(friend) = 1;
  159. data = [data; new_row];
  160. data(size(data, 1), size(data,2)) = 0;
  161. end %ExpandDataByOne
  162. % sigal - append row for the placeholder
  163. function [attData] = ExpandAttByOne(attData, orgNode, non_neighbors_distance, totalAttNum, numAttPerPH)
  164. if totalAttNum>0 && numAttPerPH>0
  165. attIndices = find(attData(orgNode, :)==1);
  166. while size(attIndices,2) > numAttPerPH
  167. inx = ceil(rand(1)*size(attIndices,2));
  168. attIndices(:,inx) = [];
  169. end
  170. else
  171. attIndices=[];
  172. end
  173. new_row = ones(1,size(attData, 2)) * non_neighbors_distance;
  174. for i=1:size(attIndices,2)
  175. new_row(i)=1;
  176. end
  177. attData = [attData; new_row];
  178. end %ExpandAttByOne