| @@ -204,209 +204,3 @@ end %ExpandAttByOne | |||
| function [ data, attData, missing_nodes_mapping ] = RemoveRandomNodesWithImages( data, attData, totalAttNum, num_missing_nodes, missing_nodes_mapping, numImagesProfiles, non_neighbors_distance, missingNodesInput ) | |||
| %RemoveRandomNodes Remove num_missing_nodes from data. If some nodes are | |||
| %removed already, provide missing_nodes_mapping | |||
| % Detailed explanation goes here | |||
| if nargin < 7 | |||
| non_neighbors_distance = 0; | |||
| end | |||
| if nargin >= 8 % i.e. getting missingNodesInput | |||
| missing_nodes = missingNodesInput; | |||
| else | |||
| %%data_orig = data; | |||
| numAttPerPH = 0; | |||
| % if the mapping is larger than the number of nodes we want to remove, empty | |||
| % it and start a new mapping. This can happen if we finished looping over | |||
| % the number of missing nodes and started a new iteration of an outer loop. | |||
| if size(missing_nodes_mapping,2) > num_missing_nodes | |||
| missing_nodes_mapping = []; | |||
| num_nodes_to_remove = num_missing_nodes; | |||
| else | |||
| num_nodes_to_remove = num_missing_nodes - size(missing_nodes_mapping,2); | |||
| end | |||
| % randomly choose missing nodes | |||
| %missing_nodes = ChooseMissingNodes_old(num_nodes_to_remove, data, missing_nodes_mapping, non_neighbors_distance); | |||
| missing_nodes = ChooseMissingNodes(num_nodes_to_remove, data, attData, totalAttNum, numAttPerPH, missing_nodes_mapping, numImagesProfiles); | |||
| %sort the list and create a list of the new nodes that each missing node is mapped to - each link | |||
| %to a missing node is replaced by a link to a new, "UNK" node | |||
| %Sigal 23.1.14 - missing_nodes is now matrix with two rows: | |||
| % first the removed node and second the selected profile) | |||
| % no need to call unique as validation is already done in ChooseMissingNodes | |||
| %missing_nodes = sort( unique(missing_nodes), 'descend'); | |||
| %missing_nodes = sort( missing_nodes , 2, 'descend'); | |||
| missing_nodes_mapping = missing_nodes; | |||
| missing_nodes_list = sort(missing_nodes(1,:),'descend'); | |||
| %replace each link to a missing node with a link to a new node | |||
| %find all missing node neighbors | |||
| missing_nodes_all_neighbors = zeros(1, size(data,2)); | |||
| for curr_nissing_node = missing_nodes_list | |||
| missing_nodes_all_neighbors = missing_nodes_all_neighbors | data(curr_nissing_node,:); | |||
| end | |||
| missing_nodes_all_neighbors = find(missing_nodes_all_neighbors); | |||
| %for each node in missing_nodes_all_neighbors add edges to placeholder | |||
| for i = missing_nodes_all_neighbors | |||
| neighbors = find(data(i,:)); | |||
| missing_neighbors = intersect(neighbors, missing_nodes_list); | |||
| missing_neighbors = sort(missing_neighbors, 'descend'); | |||
| for curr_missing_neighbor = missing_neighbors | |||
| if data(i,curr_missing_neighbor) == 1 | |||
| % append col & row for the placeholder | |||
| data = ExpandDataByOne(data, i, non_neighbors_distance); | |||
| % sigal 31.1.14 - support remove without attributes | |||
| if totalAttNum > 0 | |||
| attData = ExpandAttByOne(attData, curr_missing_neighbor, non_neighbors_distance, totalAttNum, numAttPerPH); | |||
| end | |||
| %add the new UNK node to the missing nodes mapping j is the index of the missing node | |||
| %look for the first zero in column j of the missing nodes mapping and put the new node | |||
| %index there | |||
| added_node = 0; | |||
| %add it in the first position which equals zero | |||
| %sigal 23.1.14 - find index according to actual structure (not sorted) | |||
| j = find( missing_nodes_mapping(1,:) == curr_missing_neighbor, 1); | |||
| for k = 1 : size(missing_nodes_mapping,1) | |||
| if missing_nodes_mapping(k, j) == 0 | |||
| %if we start with 1000 nodes, and we have 5 missing nodes, after | |||
| %adding one node at this point, the size of the graph is 1001. 5 nodes | |||
| %will be removed so the correct index of the new node will be 1001 - 5 = 996. | |||
| %The next one is 997 and so on. | |||
| missing_nodes_mapping(k, j) = size(data,1) - num_missing_nodes; | |||
| added_node = 1; | |||
| break; | |||
| end | |||
| end | |||
| %if all the column is non-zero, add a new row and put it there | |||
| if added_node == 0 | |||
| missing_nodes_mapping = [missing_nodes_mapping; zeros(1, size(missing_nodes_mapping,2))]; | |||
| missing_nodes_mapping(size(missing_nodes_mapping,1), j) = size(data,1) - num_missing_nodes; | |||
| end | |||
| end %if friend | |||
| end %missing_neighbors | |||
| end %missing_nodes_all_neighbors | |||
| end % if getting missingNodesInput | |||
| %remove the missing nodes from the matrix (missing nodes MUST be sorted in descending order!! | |||
| %so that removing one does not affect the index of the others) | |||
| for j = 1:size(missing_nodes_list,2) | |||
| missing_node_idx = missing_nodes_list(j); | |||
| %remove column | |||
| data(:, missing_node_idx) = []; | |||
| %remove row | |||
| data(missing_node_idx, :) = []; | |||
| % sigal 31.1.14 - support remove without attributes | |||
| if totalAttNum > 0 | |||
| attData(missing_node_idx, :) = []; | |||
| end | |||
| end | |||
| end %function RemoveRandomNodes3 | |||
| %sigal - move old implementation to function | |||
| function [missing_nodes] = ChooseMissingNodes(num_nodes_to_remove, data, attData, totalAttNum, numAttPerPH, missing_nodes_mapping, numImagesProfiles) | |||
| missing_nodes_all_neighbors = zeros(1, size(data,2)); | |||
| %randomize a list of nodes to remove and sort it | |||
| if size(missing_nodes_mapping,1)> 0 | |||
| %Sigal 23.1.14 - second row is the profile mapping | |||
| missing_nodes = missing_nodes_mapping(1:2,:); %%sort(missing_nodes_mapping(1,:) , 2, 'descend'); | |||
| %find all missing node neighbors | |||
| for curr_missing_node = missing_nodes(1,:) % first row is the removed nodes | |||
| missing_nodes_all_neighbors = missing_nodes_all_neighbors | data(curr_missing_node,:); | |||
| missing_nodes_all_neighbors(1,curr_missing_node)=1; | |||
| end | |||
| else | |||
| missing_nodes = []; | |||
| end | |||
| % outlier1 - nodes with only one edge | |||
| numEdges = sum(data,1); | |||
| invalidNodes1a = (numEdges==1); %%numEdges<3); %%(numEdges==1); | |||
| missing_nodes_all_neighbors(1,invalidNodes1a) = 1; | |||
| %invalidNodes1b = (numEdges>7); %% 6.13 (mem issues) use 7 | |||
| %invalidNodes1b = (numEdges>15); %%25); %%(numEdges==1); %% sigal - 6.2.13 max=15 (sarit) | |||
| %invalidNodes1b = (numEdges>8); %%15); %% sigal/sarit - 9.12.13 max=8 | |||
| %missing_nodes_all_neighbors(1,invalidNodes1b) = 1; | |||
| % outlier2 - nodes with less than numAttPerPH attributes | |||
| % sigal 31.1.14 - support remove without attributes | |||
| if totalAttNum > 0 && numAttPerPH > 0 | |||
| numAttr = sum(attData,2)'; | |||
| invalidNodes2 = (numAttr<numAttPerPH); | |||
| missing_nodes_all_neighbors(1,invalidNodes2) = 1; | |||
| else | |||
| invalidNodes2 = zeros(1,size(invalidNodes1a,2)); | |||
| end | |||
| % outlier statistics | |||
| count = nnz(invalidNodes1a|invalidNodes2); | |||
| if count*1.5 > size(data,2) | |||
| fprintf('RemoveRandomNodes2: too many outliers nodes %d.\n',count); | |||
| end | |||
| %sigal - 23.1.14 - choose image profile | |||
| imagesProfiles = 1:1:numImagesProfiles; | |||
| if size(missing_nodes,1)> 0 | |||
| usedProfiles = missing_nodes(2,:); | |||
| imagesProfiles(usedProfiles)=[]; | |||
| end | |||
| for i=1:num_nodes_to_remove | |||
| valid_nodes = find(missing_nodes_all_neighbors~=1); | |||
| if(size(valid_nodes,2) < 1) | |||
| fprintf('Full Graph') | |||
| end | |||
| inx = ceil(rand(1)*size(valid_nodes,2)); | |||
| node = valid_nodes(inx); | |||
| %sigal - 23.1.14 - choose image profile | |||
| profile = ceil(rand(1)*size(imagesProfiles,2)); | |||
| newNode = [node;imagesProfiles(profile)]; | |||
| imagesProfiles(profile) = []; | |||
| % add selected node to missing_nodes list and update the all neighbors list | |||
| missing_nodes = [missing_nodes newNode]; | |||
| missing_nodes_all_neighbors(1,node)=1; | |||
| missing_nodes_all_neighbors = missing_nodes_all_neighbors | data(node,:); | |||
| end | |||
| end %ChooseMissingNodes | |||
| % sigal - append col & row for the placeholder | |||
| function [data] = ExpandDataByOne(data, friend, non_neighbors_distance) | |||
| new_col = ones(size(data, 1), 1) * non_neighbors_distance; | |||
| new_col(friend) = 1; | |||
| data = [data new_col]; | |||
| new_row = ones(1,size(data, 2)) * non_neighbors_distance; | |||
| new_row(friend) = 1; | |||
| data = [data; new_row]; | |||
| data(size(data, 1), size(data,2)) = 0; | |||
| end %ExpandDataByOne | |||
| % sigal - append row for the placeholder | |||
| function [attData] = ExpandAttByOne(attData, orgNode, non_neighbors_distance, totalAttNum, numAttPerPH) | |||
| if totalAttNum>0 && numAttPerPH>0 | |||
| attIndices = find(attData(orgNode, :)==1); | |||
| while size(attIndices,2) > numAttPerPH | |||
| inx = ceil(rand(1)*size(attIndices,2)); | |||
| attIndices(:,inx) = []; | |||
| end | |||
| else | |||
| attIndices=[]; | |||
| end | |||
| new_row = ones(1,size(attData, 2)) * non_neighbors_distance; | |||
| for i=1:size(attIndices,2) | |||
| new_row(i)=1; | |||
| end | |||
| attData = [attData; new_row]; | |||
| end %ExpandAttByOne | |||