| LogMsg(sprintf('DumpDataset for %s%s iter %d/%d into %s%s.', dataFilePath, dataFileName, iter, i, outPath, outFile)); | LogMsg(sprintf('DumpDataset for %s%s iter %d/%d into %s%s.', dataFilePath, dataFileName, iter, i, outPath, outFile)); | ||||
| end | end | ||||
| %fprintf('Completed DumpDataset.\n'); | %fprintf('Completed DumpDataset.\n'); | ||||
| % cases we are using the GED as the main measure | % cases we are using the GED as the main measure | ||||
| fprintf('^%s', newPredictedGraph) | fprintf('^%s', newPredictedGraph) | ||||
| %remap the original data so that the known nodes match the | %remap the original data so that the known nodes match the | ||||
| %predicted data and the missing nodes match the predicted | %predicted data and the missing nodes match the predicted | ||||
| % nodes created from each cluster | % nodes created from each cluster | ||||
| small_data4 = newPredictedGraph; | small_data4 = newPredictedGraph; | ||||
| small_data4(indices_to_remove,:) = []; | small_data4(indices_to_remove,:) = []; | ||||
| small_data4(:,indices_to_remove) = []; | small_data4(:,indices_to_remove) = []; | ||||
| %%% Sigal 27.1.13 - save reduce graphs for GED | |||||
| Sigal 27.1.13 - save reduce graphs for GED | |||||
| if dumpSmallFlag == 1 | if dumpSmallFlag == 1 | ||||
| saveSmallData(dumpSmallDataPath, dataFileName, iter, affinity_calculation_type, withAttrWeight, num_missing_nodes, small_data, 1); | saveSmallData(dumpSmallDataPath, dataFileName, iter, affinity_calculation_type, withAttrWeight, num_missing_nodes, small_data, 1); | ||||
| saveSmallData(dumpSmallDataPath, dataFileName, iter, affinity_calculation_type, withAttrWeight, num_missing_nodes, small_data2, 2); | saveSmallData(dumpSmallDataPath, dataFileName, iter, affinity_calculation_type, withAttrWeight, num_missing_nodes, small_data2, 2); | ||||
| small_data2(:,indices_to_remove) = []; | small_data2(:,indices_to_remove) = []; | ||||
| %fprintf('&%s', small_data2) | %fprintf('&%s', small_data2) | ||||
| %fprintf('&&%s', small_data) | %fprintf('&&%s', small_data) | ||||
| %%% Sigal 27.1.13 - save reduce graphs for GED | |||||
| Sigal 27.1.13 - save reduce graphs for GED | |||||
| withAttrWeight = origWithAttrWeight+1000*(1-percent_known_placeholders)*10; | withAttrWeight = origWithAttrWeight+1000*(1-percent_known_placeholders)*10; | ||||
| if dumpSmallFlag == 1 | if dumpSmallFlag == 1 | ||||
| saveSmallData(dumpSmallDataPath, dataFileName, iter, affinity_calculation_type, withAttrWeight, num_missing_nodes, small_data, 1); | saveSmallData(dumpSmallDataPath, dataFileName, iter, affinity_calculation_type, withAttrWeight, num_missing_nodes, small_data, 1); | ||||
| % sigal - 29.10.13 | % sigal - 29.10.13 | ||||
| % calc only PHs affinity | % calc only PHs affinity | ||||
| function [affinity] = CalcPHsAffinity( data, affType, actual_graph_size, num_missing_nodes, num_attr_nodes, attWeight, addMissingAtt) | function [affinity] = CalcPHsAffinity( data, affType, actual_graph_size, num_missing_nodes, num_attr_nodes, attWeight, addMissingAtt) | ||||
| global affinity_calculation_shortest_path; | global affinity_calculation_shortest_path; | ||||
| global affinity_calculation_euclid; | global affinity_calculation_euclid; | ||||
| function saveSmallData(dumpSmallDataPath, dataFileName, iter, affinity_type, withAttr, missNodes, small_data, i) | function saveSmallData(dumpSmallDataPath, dataFileName, iter, affinity_type, withAttr, missNodes, small_data, i) | ||||
| %%% Sigal 24.1.13 - TODO | |||||
| Sigal 24.1.13 - TODO | |||||
| outFile = sprintf('%s_%d_%d_%d_%d_small_data_%d', dataFileName, iter, missNodes, affinity_type, withAttr, i); | outFile = sprintf('%s_%d_%d_%d_%d_small_data_%d', dataFileName, iter, missNodes, affinity_type, withAttr, i); | ||||
| if affinity_type == 9 % save instead a dummy size (1) and the best_alg | if affinity_type == 9 % save instead a dummy size (1) and the best_alg | ||||
| SaveIntMatrixToFile(strcat(dumpSmallDataPath, outFile,'_edges.txt'), small_data, 1); | SaveIntMatrixToFile(strcat(dumpSmallDataPath, outFile,'_edges.txt'), small_data, 1); | ||||
| end | end | ||||
| sumPurity = sum(crossPurity,2); | sumPurity = sum(crossPurity,2); | ||||
| end % function CalcSumPurity | end % function CalcSumPurity | ||||
| % Map attribute to categories | |||||
| % Use 0 for null/no value, 1 for unknown/private value (if exist) and then real values | |||||
| function [outAttributes, attUpperRange, selectedAttr, attStat] = PrepareAttributes5(dataFilePath, dataFileName, numNodes, expectedAttrCols, maxAttStat, inSelectedAttr, debug, debugPath) | |||||
| Map attribute to categories | |||||
| Use 0 for null/no value, 1 for unknown/private value (if exist) and then real values | |||||
| function [outAttributes, attUpperRange, selectedAttr, attStat] = PrepareAttributes5(dataFilePath, dataFileName, expectedAttrCols, maxAttStat, inSelectedAttr, debug, debugPath) | |||||
| outNoneValue = 0; | outNoneValue = 0; | ||||
| m = size(attributes,1); % num nodes/lines | m = size(attributes,1); % num nodes/lines | ||||
| n = size(attributes,2); % num attributes/cols | n = size(attributes,2); % num attributes/cols | ||||
| if n ~= expectedAttrCols || m ~= numNodes | |||||
| fprintf('PrepareAttributes - Invalid size: expecting (%dx%d), got (%dx%d)\n',numNodes,expectedAttrCols,m,n); | |||||
| if n ~= expectedAttrCols | |||||
| fprintf('PrepareAttributes - Invalid size: expecting (%d), got (%dx%d)\n',expectedAttrCols,m,n); | |||||
| return; | return; | ||||
| end | end | ||||
| for i=1:maxCountry | for i=1:maxCountry | ||||
| indices = (new_values==i); | indices = (new_values==i); | ||||
| count = sum(indices); | count = sum(indices); | ||||
| if count/numNodes > maxAttStat | |||||
| if count/m > maxAttStat | |||||
| new_values(indices) = 0; | new_values(indices) = 0; | ||||
| end | end | ||||
| end | end | ||||
| % calculate statistics and filter according to zero & maxAttStat | % calculate statistics and filter according to zero & maxAttStat | ||||
| attStat = zeros(1,n); | attStat = zeros(1,n); | ||||
| for a = 1:n | for a = 1:n | ||||
| attStat(a) = nnz(outAttributes(:,a))/numNodes; | |||||
| attStat(a) = nnz(outAttributes(:,a))/m; | |||||
| if attStat(a) == 0 | if attStat(a) == 0 | ||||
| selectedAttr(a)=0; | selectedAttr(a)=0; | ||||
| elseif selectedAttr(a)>0 && attStat(a) > maxAttStat && a>1 % don't filter country | elseif selectedAttr(a)>0 && attStat(a) > maxAttStat && a>1 % don't filter country |
| for i = missing_nodes_all_neighbors | for i = missing_nodes_all_neighbors | ||||
| neighbors = find(data(i,:)); | neighbors = find(data(i,:)); | ||||
| missing_neighbors = intersect(neighbors, missing_nodes_list); | missing_neighbors = intersect(neighbors, missing_nodes_list); | ||||
| missing_neighbors = sort(missing_neighbors, 'descend'); | missing_neighbors = sort(missing_neighbors, 'descend'); | ||||
| for curr_missing_neighbor = missing_neighbors | for curr_missing_neighbor = missing_neighbors | ||||
| if data(i,curr_missing_neighbor) == 1 | if data(i,curr_missing_neighbor) == 1 | ||||
| end %function RemoveRandomNodes3 | end %function RemoveRandomNodes3 | ||||
| %sigal - move old implementation to function | %sigal - move old implementation to function | ||||
| function [missing_nodes] = ChooseMissingNodes(num_nodes_to_remove, data, attData, totalAttNum, numAttPerPH, missing_nodes_mapping, numImagesProfiles) | |||||
| missing_nodes_all_neighbors = zeros(1, size(data,2)); | |||||
| %randomize a list of nodes to remove and sort it | |||||
| if size(missing_nodes_mapping,1)> 0 | |||||
| %Sigal 23.1.14 - second row is the profile mapping | |||||
| missing_nodes = missing_nodes_mapping(1:2,:); %%sort(missing_nodes_mapping(1,:) , 2, 'descend'); | |||||
| %find all missing node neighbors | |||||
| for curr_missing_node = missing_nodes(1,:) % first row is the removed nodes | |||||
| missing_nodes_all_neighbors = missing_nodes_all_neighbors | data(curr_missing_node,:); | |||||
| missing_nodes_all_neighbors(1,curr_missing_node)=1; | |||||
| end | |||||
| else | |||||
| missing_nodes = []; | |||||
| end | |||||
| % outlier1 - nodes with only one edge | |||||
| numEdges = sum(data,1); | |||||
| invalidNodes1a = (numEdges==1); %%numEdges<3); %%(numEdges==1); | |||||
| missing_nodes_all_neighbors(1,invalidNodes1a) = 1; | |||||
| %invalidNodes1b = (numEdges>7); %% 6.13 (mem issues) use 7 | |||||
| %invalidNodes1b = (numEdges>15); %%25); %%(numEdges==1); %% sigal - 6.2.13 max=15 (sarit) | |||||
| %invalidNodes1b = (numEdges>8); %%15); %% sigal/sarit - 9.12.13 max=8 | |||||
| %missing_nodes_all_neighbors(1,invalidNodes1b) = 1; | |||||
| % outlier2 - nodes with less than numAttPerPH attributes | |||||
| % sigal 31.1.14 - support remove without attributes | |||||
| if totalAttNum > 0 && numAttPerPH > 0 | |||||
| numAttr = sum(attData,2)'; | |||||
| invalidNodes2 = (numAttr<numAttPerPH); | |||||
| missing_nodes_all_neighbors(1,invalidNodes2) = 1; | |||||
| else | |||||
| invalidNodes2 = zeros(1,size(invalidNodes1a,2)); | |||||
| end | |||||
| % outlier statistics | |||||
| count = nnz(invalidNodes1a|invalidNodes2); | |||||
| if count*1.5 > size(data,2) | |||||
| fprintf('RemoveRandomNodes2: too many outliers nodes %d.\n',count); | |||||
| end | |||||
| %sigal - 23.1.14 - choose image profile | |||||
| imagesProfiles = 1:1:numImagesProfiles; | |||||
| if size(missing_nodes,1)> 0 | |||||
| usedProfiles = missing_nodes(2,:); | |||||
| imagesProfiles(usedProfiles)=[]; | |||||
| end | |||||
| for i=1:num_nodes_to_remove | |||||
| valid_nodes = find(missing_nodes_all_neighbors~=1); | |||||
| if(size(valid_nodes,2) < 1) | |||||
| fprintf('Full Graph') | |||||
| end | |||||
| inx = ceil(rand(1)*size(valid_nodes,2)); | |||||
| node = valid_nodes(inx); | |||||
| %sigal - 23.1.14 - choose image profile | |||||
| profile = ceil(rand(1)*size(imagesProfiles,2)); | |||||
| newNode = [node;imagesProfiles(profile)]; | |||||
| imagesProfiles(profile) = []; | |||||
| % add selected node to missing_nodes list and update the all neighbors list | |||||
| missing_nodes = [missing_nodes newNode]; | |||||
| missing_nodes_all_neighbors(1,node)=1; | |||||
| missing_nodes_all_neighbors = missing_nodes_all_neighbors | data(node,:); | |||||
| end | |||||
| end %ChooseMissingNodes | |||||
| % sigal - append col & row for the placeholder | |||||
| function [data] = ExpandDataByOne(data, friend, non_neighbors_distance) | |||||
| new_col = ones(size(data, 1), 1) * non_neighbors_distance; | |||||
| new_col(friend) = 1; | |||||
| data = [data new_col]; | |||||
| new_row = ones(1,size(data, 2)) * non_neighbors_distance; | |||||
| new_row(friend) = 1; | |||||
| data = [data; new_row]; | |||||
| data(size(data, 1), size(data,2)) = 0; | |||||
| end %ExpandDataByOne | |||||
| % sigal - append row for the placeholder | |||||
| function [attData] = ExpandAttByOne(attData, orgNode, non_neighbors_distance, totalAttNum, numAttPerPH) | |||||
| if totalAttNum>0 && numAttPerPH>0 | |||||
| attIndices = find(attData(orgNode, :)==1); | |||||
| while size(attIndices,2) > numAttPerPH | |||||
| inx = ceil(rand(1)*size(attIndices,2)); | |||||
| attIndices(:,inx) = []; | |||||
| end | |||||
| else | |||||
| attIndices=[]; | |||||
| end | |||||
| new_row = ones(1,size(attData, 2)) * non_neighbors_distance; | |||||
| for i=1:size(attIndices,2) | |||||
| new_row(i)=1; | |||||
| end | |||||
| attData = [attData; new_row]; | |||||
| end %ExpandAttByOne | |||||
| function [ data, attData, missing_nodes_mapping ] = RemoveRandomNodesWithImages( data, attData, totalAttNum, num_missing_nodes, missing_nodes_mapping, numImagesProfiles, non_neighbors_distance, missingNodesInput ) | |||||
| %RemoveRandomNodes Remove num_missing_nodes from data. If some nodes are | |||||
| %removed already, provide missing_nodes_mapping | |||||
| % Detailed explanation goes here | |||||
| if nargin < 7 | |||||
| non_neighbors_distance = 0; | |||||
| end | |||||
| if nargin >= 8 % i.e. getting missingNodesInput | |||||
| missing_nodes = missingNodesInput; | |||||
| else | |||||
| %%data_orig = data; | |||||
| numAttPerPH = 0; | |||||
| % if the mapping is larger than the number of nodes we want to remove, empty | |||||
| % it and start a new mapping. This can happen if we finished looping over | |||||
| % the number of missing nodes and started a new iteration of an outer loop. | |||||
| if size(missing_nodes_mapping,2) > num_missing_nodes | |||||
| missing_nodes_mapping = []; | |||||
| num_nodes_to_remove = num_missing_nodes; | |||||
| else | |||||
| num_nodes_to_remove = num_missing_nodes - size(missing_nodes_mapping,2); | |||||
| end | |||||
| % randomly choose missing nodes | |||||
| %missing_nodes = ChooseMissingNodes_old(num_nodes_to_remove, data, missing_nodes_mapping, non_neighbors_distance); | |||||
| missing_nodes = ChooseMissingNodes(num_nodes_to_remove, data, attData, totalAttNum, numAttPerPH, missing_nodes_mapping, numImagesProfiles); | |||||
| %sort the list and create a list of the new nodes that each missing node is mapped to - each link | |||||
| %to a missing node is replaced by a link to a new, "UNK" node | |||||
| %Sigal 23.1.14 - missing_nodes is now matrix with two rows: | |||||
| % first the removed node and second the selected profile) | |||||
| % no need to call unique as validation is already done in ChooseMissingNodes | |||||
| %missing_nodes = sort( unique(missing_nodes), 'descend'); | |||||
| %missing_nodes = sort( missing_nodes , 2, 'descend'); | |||||
| missing_nodes_mapping = missing_nodes; | |||||
| missing_nodes_list = sort(missing_nodes(1,:),'descend'); | |||||
| %replace each link to a missing node with a link to a new node | |||||
| %find all missing node neighbors | |||||
| missing_nodes_all_neighbors = zeros(1, size(data,2)); | |||||
| for curr_nissing_node = missing_nodes_list | |||||
| missing_nodes_all_neighbors = missing_nodes_all_neighbors | data(curr_nissing_node,:); | |||||
| end | |||||
| missing_nodes_all_neighbors = find(missing_nodes_all_neighbors); | |||||
| %for each node in missing_nodes_all_neighbors add edges to placeholder | |||||
| for i = missing_nodes_all_neighbors | |||||
| neighbors = find(data(i,:)); | |||||
| missing_neighbors = intersect(neighbors, missing_nodes_list); | |||||
| missing_neighbors = sort(missing_neighbors, 'descend'); | |||||
| for curr_missing_neighbor = missing_neighbors | |||||
| if data(i,curr_missing_neighbor) == 1 | |||||
| % append col & row for the placeholder | |||||
| data = ExpandDataByOne(data, i, non_neighbors_distance); | |||||
| % sigal 31.1.14 - support remove without attributes | |||||
| if totalAttNum > 0 | |||||
| attData = ExpandAttByOne(attData, curr_missing_neighbor, non_neighbors_distance, totalAttNum, numAttPerPH); | |||||
| end | |||||
| %add the new UNK node to the missing nodes mapping j is the index of the missing node | |||||
| %look for the first zero in column j of the missing nodes mapping and put the new node | |||||
| %index there | |||||
| added_node = 0; | |||||
| %add it in the first position which equals zero | |||||
| %sigal 23.1.14 - find index according to actual structure (not sorted) | |||||
| j = find( missing_nodes_mapping(1,:) == curr_missing_neighbor, 1); | |||||
| for k = 1 : size(missing_nodes_mapping,1) | |||||
| if missing_nodes_mapping(k, j) == 0 | |||||
| %if we start with 1000 nodes, and we have 5 missing nodes, after | |||||
| %adding one node at this point, the size of the graph is 1001. 5 nodes | |||||
| %will be removed so the correct index of the new node will be 1001 - 5 = 996. | |||||
| %The next one is 997 and so on. | |||||
| missing_nodes_mapping(k, j) = size(data,1) - num_missing_nodes; | |||||
| added_node = 1; | |||||
| break; | |||||
| end | |||||
| end | |||||
| %if all the column is non-zero, add a new row and put it there | |||||
| if added_node == 0 | |||||
| missing_nodes_mapping = [missing_nodes_mapping; zeros(1, size(missing_nodes_mapping,2))]; | |||||
| missing_nodes_mapping(size(missing_nodes_mapping,1), j) = size(data,1) - num_missing_nodes; | |||||
| end | |||||
| end %if friend | |||||
| end %missing_neighbors | |||||
| end %missing_nodes_all_neighbors | |||||
| end % if getting missingNodesInput | |||||
| %remove the missing nodes from the matrix (missing nodes MUST be sorted in descending order!! | |||||
| %so that removing one does not affect the index of the others) | |||||
| for j = 1:size(missing_nodes_list,2) | |||||
| missing_node_idx = missing_nodes_list(j); | |||||
| %remove column | |||||
| data(:, missing_node_idx) = []; | |||||
| %remove row | |||||
| data(missing_node_idx, :) = []; | |||||
| % sigal 31.1.14 - support remove without attributes | |||||
| if totalAttNum > 0 | |||||
| attData(missing_node_idx, :) = []; | |||||
| end | |||||
| end | |||||
| end %function RemoveRandomNodes3 | |||||
| %sigal - move old implementation to function | |||||
| function [missing_nodes] = ChooseMissingNodes(num_nodes_to_remove, data, attData, totalAttNum, numAttPerPH, missing_nodes_mapping, numImagesProfiles) | function [missing_nodes] = ChooseMissingNodes(num_nodes_to_remove, data, attData, totalAttNum, numAttPerPH, missing_nodes_mapping, numImagesProfiles) | ||||
| missing_nodes_all_neighbors = zeros(1, size(data,2)); | missing_nodes_all_neighbors = zeros(1, size(data,2)); | ||||
| missing_nodes_all_neighbors(1,invalidNodes1a) = 1; | missing_nodes_all_neighbors(1,invalidNodes1a) = 1; | ||||
| %invalidNodes1b = (numEdges>7); %% 6.13 (mem issues) use 7 | %invalidNodes1b = (numEdges>7); %% 6.13 (mem issues) use 7 | ||||
| %invalidNodes1b = (numEdges>15); %%25); %%(numEdges==1); %% sigal - 6.2.13 max=15 (sarit) | %invalidNodes1b = (numEdges>15); %%25); %%(numEdges==1); %% sigal - 6.2.13 max=15 (sarit) | ||||
| invalidNodes1b = (numEdges>8); %%15); %% sigal/sarit - 9.12.13 max=8 | |||||
| missing_nodes_all_neighbors(1,invalidNodes1b) = 1; | |||||
| %invalidNodes1b = (numEdges>8); %%15); %% sigal/sarit - 9.12.13 max=8 | |||||
| %missing_nodes_all_neighbors(1,invalidNodes1b) = 1; | |||||
| % outlier2 - nodes with less than numAttPerPH attributes | % outlier2 - nodes with less than numAttPerPH attributes | ||||
| % sigal 31.1.14 - support remove without attributes | % sigal 31.1.14 - support remove without attributes | ||||
| if totalAttNum > 0 && numAttPerPH > 0 | if totalAttNum > 0 && numAttPerPH > 0 | ||||
| invalidNodes2 = zeros(1,size(invalidNodes1a,2)); | invalidNodes2 = zeros(1,size(invalidNodes1a,2)); | ||||
| end | end | ||||
| % outlier statistics | % outlier statistics | ||||
| count = nnz(invalidNodes1a|invalidNodes1b|invalidNodes2); | |||||
| count = nnz(invalidNodes1a|invalidNodes2); | |||||
| if count*1.5 > size(data,2) | if count*1.5 > size(data,2) | ||||
| fprintf('RemoveRandomNodes2: too many outliers nodes %d.\n',count); | fprintf('RemoveRandomNodes2: too many outliers nodes %d.\n',count); | ||||
| end | end | ||||
| for i=1:num_nodes_to_remove | for i=1:num_nodes_to_remove | ||||
| valid_nodes = find(missing_nodes_all_neighbors~=1); | valid_nodes = find(missing_nodes_all_neighbors~=1); | ||||
| inx = ceil(rand(1)*size(valid_nodes,2)); | |||||
| if(size(valid_nodes,2) < 1) | |||||
| fprintf('Full Graph') | |||||
| end | |||||
| inx = ceil(rand(1)*size(valid_nodes,2)); | |||||
| node = valid_nodes(inx); | node = valid_nodes(inx); | ||||
| %sigal - 23.1.14 - choose image profile | %sigal - 23.1.14 - choose image profile | ||||
| profile = ceil(rand(1)*size(imagesProfiles,2)); | profile = ceil(rand(1)*size(imagesProfiles,2)); | ||||
| end %ExpandAttByOne | end %ExpandAttByOne | ||||
| function [] = RunExpSrv8s(iterStartStr, iterEndStr, netSizeStr, addAttPercStr) | |||||
| %function [] = RunExpSrv8s(iterStartStr, iterEndStr, netSizeStr, normF1, normF2, normF3, addAttPercStr) | |||||
| function [] = RunExpSrv8s(iterStartStr, iterEndStr, netSizeStr, normF1, normF2, normF3, addAttPercStr) | |||||
| function [] = RunExpSrv8s(iterStartStr, iterEndStr, addAttPercStr) | |||||
| %affinity calculation types | %affinity calculation types | ||||
| % affinity_calculation_shortest_path = 0; | % affinity_calculation_shortest_path = 0; | ||||
| % ds_100k = [20000 25000 50000 75000 100000]; | % ds_100k = [20000 25000 50000 75000 100000]; | ||||
| % ds_100km = [100001]; | % ds_100km = [100001]; | ||||
| % ds_Train = [2001 2049]; | % ds_Train = [2001 2049]; | ||||
| ds_GridTrain = [12]; | |||||
| ds_GridTrain = [45]; | |||||
| % input netSize | % input netSize | ||||
| netSize = str2num(netSizeStr); | |||||
| % if find(ds_10k==netSize) | % if find(ds_10k==netSize) | ||||
| % ds_str = 'Datasets_10K/'; | % ds_str = 'Datasets_10K/'; | ||||
| % num_missing_nodes_arr = [10 20 30 50 70 100];%[10 100 150]; | % num_missing_nodes_arr = [10 20 30 50 70 100];%[10 100 150]; | ||||
| % fprintf('RunExpSrv8s:Invalid netSize %d.\n',netSize); | % fprintf('RunExpSrv8s:Invalid netSize %d.\n',netSize); | ||||
| % return; | % return; | ||||
| % end | % end | ||||
| ds_str = 'graph_production/produced_graphs/'; | |||||
| num_missing_nodes_arr = [1 2 3]; | |||||
| ds_str = 'in/'; | |||||
| num_missing_nodes_arr = [2]; | |||||
| fprintf('RunExpSrv8s: netSize %d, dataset %s\n',netSize,ds_str); | |||||
| rootDir = '../'; | rootDir = '../'; | ||||
| %rootDir = '/Users/armin/Desktop/DML/projects/graphgenproj/'; %Facebook/'; | %rootDir = '/Users/armin/Desktop/DML/projects/graphgenproj/'; %Facebook/'; | ||||
| %rootDir = 'D:/__SN_Jan14_FF75/'; %Facebook/'; | %rootDir = 'D:/__SN_Jan14_FF75/'; %Facebook/'; | ||||
| filePrefix = 'testgraph_*'; % 'facebook_sparse_'; % | filePrefix = 'testgraph_*'; % 'facebook_sparse_'; % | ||||
| netSizes = netSize; %%[2048 4096 5000 8192 10000 16384 32768]; | |||||
| %Sigal - 13.2.14 - images data | %Sigal - 13.2.14 - images data | ||||
| imagesDir = strcat(rootDir,'Images/'); | imagesDir = strcat(rootDir,'Images/'); | ||||
| datasetDir = strcat(rootDir,ds_str); %'Datasets_10K/'); %'Facebook/Datasets_10K/'); %'Traing_16K/'); %% TODO sigal - change rootDir path before EXE build | datasetDir = strcat(rootDir,ds_str); %'Datasets_10K/'); %'Facebook/Datasets_10K/'); %'Traing_16K/'); %% TODO sigal - change rootDir path before EXE build | ||||
| factor_str = sprintf('F%d%d%d_',normFactorVec); | factor_str = sprintf('F%d%d%d_',normFactorVec); | ||||
| images_str = sprintf('I%dP%dM%d_',imgSimType,imgSimProbDiff*10,imgMissProb*10); | images_str = sprintf('I%dP%dM%d_',imgSimType,imgSimProbDiff*10,imgMissProb*10); | ||||
| results_dir = strcat(datasetDir,'testImg_noTh_',netSizeStr,'/',factor_str,images_str,'Iter_',iterStartStr,iterEndStr,'/'); | |||||
| results_dir = strcat(datasetDir,'testImg_noTh_','/',factor_str,images_str,'Iter_',iterStartStr,iterEndStr,'/'); | |||||
| fprintf('RunExpSrv8b: results_dir %s\n',results_dir); | fprintf('RunExpSrv8b: results_dir %s\n',results_dir); | ||||
| runAlgFlag = 1; | runAlgFlag = 1; | ||||
| % end | % end | ||||
| % LogMsg(sprintf('Select %d attributes out of %d ...', sum(attSelected), size(attSelected,2))); | % LogMsg(sprintf('Select %d attributes out of %d ...', sum(attSelected), size(attSelected,2))); | ||||
| for nodes = netSizes | |||||
| fprintf('----------!!!!%d---------', nodes); | |||||
| if size(strfind(filePrefix, 'facebook'),1)>0 | |||||
| prefix = sprintf('%s%s%d_%s',datasetDir,filePrefix,nodes,'*.mat'); % '0*.txt.mat'); | |||||
| else | |||||
| prefix = sprintf('%s%s%d_%s',datasetDir,filePrefix,nodes,'*.txt.mat'); % '0*.txt.mat'); | |||||
| end | |||||
| prefix = sprintf('%s%s_%s',datasetDir,filePrefix,'*.txt.mat'); % '0*.txt.mat'); | |||||
| files = dir(prefix); | files = dir(prefix); | ||||
| firstIter = 1; | firstIter = 1; | ||||
| end | end | ||||
| for i = 1:size(files,1) % loop over the list of networks | for i = 1:size(files,1) % loop over the list of networks | ||||
| try | |||||
| file = files(i).name; | |||||
| file = files(i).name; | |||||
| if size(strfind(filePrefix, 'facebook'),1)>0 | |||||
| LogMsg(sprintf('facebook netwrok, skipping attributes ...')); | |||||
| attributes = []; | |||||
| attUpperRange = []; | |||||
| else | |||||
| % sigal 12/6/13 - use binary attribute mat file | % sigal 12/6/13 - use binary attribute mat file | ||||
| attFile = strrep(file, '.txt.mat', '.usr.mat'); | attFile = strrep(file, '.txt.mat', '.usr.mat'); | ||||
| [attributes, attUpperRange, attSelected, attStat] = PrepareAttributes5(datasetDir, attFile, nodes, numAttrCols, maxAttStat, attSelected); | |||||
| [attributes, attUpperRange, attSelected, ~] = PrepareAttributes5(datasetDir, attFile, numAttrCols, maxAttStat, attSelected); | |||||
| LogMsg(sprintf('Select %d attributes out of %d ...', sum(attSelected), size(attSelected,2))); | LogMsg(sprintf('Select %d attributes out of %d ...', sum(attSelected), size(attSelected,2))); | ||||
| end | |||||
| if runAlgFlag == 1 | |||||
| date_now = clock; | |||||
| date_now = strcat(num2str(date_now(1)),'_',num2str(date_now(2)),'_', num2str(date_now(3)),'_', num2str(date_now(4)), num2str(date_now(5)),'_', num2str(date_now(6))); | |||||
| % make sure dump & results directories exist | |||||
| if (firstIter == 1 && i == 1) | |||||
| firstIter = 0; | |||||
| if isdir(results_dir) == 0 | |||||
| mkdir(results_dir); | |||||
| if runAlgFlag == 1 | |||||
| date_now = clock; | |||||
| date_now = strcat(num2str(date_now(1)),'_',num2str(date_now(2)),'_', num2str(date_now(3)),'_', num2str(date_now(4)), num2str(date_now(5)),'_', num2str(date_now(6))); | |||||
| % make sure dump & results directories exist | |||||
| if (firstIter == 1 && i == 1) | |||||
| firstIter = 0; | |||||
| if isdir(results_dir) == 0 | |||||
| mkdir(results_dir); | |||||
| end | |||||
| dumpFilePath = sprintf('%sdumpKronEM_%s/', results_dir, date_now); | |||||
| if (dumpKronEM == 1) | |||||
| mkdir(dumpFilePath); | |||||
| end | |||||
| dump_data_dir = sprintf('%sdumpData_%s/', results_dir, date_now); | |||||
| if dumpGED == 1 && isdir(dump_data_dir) == 0 | |||||
| mkdir(dump_data_dir) | |||||
| end | |||||
| end | end | ||||
| dumpFilePath = sprintf('%sdumpKronEM_%s/', results_dir, date_now); | |||||
| if (dumpKronEM == 1) | |||||
| mkdir(dumpFilePath); | |||||
| % run algorithm (file load is done internaly) | |||||
| [rand_score,purity,p_triads,missing_nodes_mapping,removed_nodes] = MissingNodes_S8b(datasetDir, file, ... | |||||
| attributes, attUpperRange, attWeight, addMissingAtt, normFactorVec, affinities, num_missing_nodes_arr, attAffinityThreshold, ... | |||||
| imagesData, numImagesProfiles, imgMissProb, imgSimType, imgSimProbDiff, percentKnownPHsVec, dumpGED, dump_data_dir, iter); | |||||
| %[rand_score,purity,p_triads,missing_nodes_mapping,removed_nodes] = MissingNodes_Sparse(datasetDir, file, affinities, 1); | |||||
| % dump graph data for KronEM runs | |||||
| if dumpKronEM == 1 | |||||
| DumpDataset(datasetDir, file, iter, removed_nodes, dumpFilePath); | |||||
| end | end | ||||
| dump_data_dir = sprintf('%sdumpData_%s/', results_dir, date_now); | |||||
| if dumpGED == 1 && isdir(dump_data_dir) == 0 | |||||
| mkdir(dump_data_dir) | |||||
| % save results | |||||
| out_file = sprintf('%sres_%s_%s.mat', results_dir, file, date_now); | |||||
| save(out_file); | |||||
| file_name = sprintf('%s_%s','../output/mine/',file); | |||||
| if(iter == 0) | |||||
| copyfile('../output/graphed_0.mat',file_name); | |||||
| end | end | ||||
| end | |||||
| % run algorithm (file load is done internaly) | |||||
| [rand_score,purity,p_triads,missing_nodes_mapping,removed_nodes] = MissingNodes_S8b(datasetDir, file, ... | |||||
| attributes, attUpperRange, attWeight, addMissingAtt, normFactorVec, affinities, num_missing_nodes_arr, attAffinityThreshold, ... | |||||
| imagesData, numImagesProfiles, imgMissProb, imgSimType, imgSimProbDiff, percentKnownPHsVec, dumpGED, dump_data_dir, iter); | |||||
| %[rand_score,purity,p_triads,missing_nodes_mapping,removed_nodes] = MissingNodes_Sparse(datasetDir, file, affinities, 1); | |||||
| % dump graph data for KronEM runs | |||||
| if dumpKronEM == 1 | |||||
| DumpDataset(datasetDir, file, iter, removed_nodes, dumpFilePath); | |||||
| end | |||||
| % save results | |||||
| out_file = sprintf('%sres_%s_%s.mat', results_dir, file, date_now); | |||||
| save(out_file); | |||||
| LogMsg(sprintf('Results for file %s,iter %d at %s',file,iter,out_file)); | |||||
| %fprintf('Completed RunExperiment cycle - results at %s.\n',out_file); | |||||
| LogMsg(sprintf('Results for file %s,iter %d at %s',file,iter,out_file)); | |||||
| %fprintf('Completed RunExperiment cycle - results at %s.\n',out_file); | |||||
| end | |||||
| catch | |||||
| fprintf('An Error Occured!!!!!') | |||||
| end | end | ||||
| % beep; | % beep; | ||||
| end | end | ||||
| end | |||||
| end | end | ||||
| date_now = clock; | date_now = clock; | ||||
| date_now = strcat(num2str(date_now(1)),'_',num2str(date_now(2)),'_', num2str(date_now(3)),'_', num2str(date_now(4)), num2str(date_now(5)),'_', num2str(date_now(6))); | date_now = strcat(num2str(date_now(1)),'_',num2str(date_now(2)),'_', num2str(date_now(3)),'_', num2str(date_now(4)), num2str(date_now(5)),'_', num2str(date_now(6))); | ||||
| LogMsg(sprintf('%s Completed RunExpSrv8s RunExperiment (random %d).',date_now,randSeed)); | LogMsg(sprintf('%s Completed RunExpSrv8s RunExperiment (random %d).',date_now,randSeed)); | ||||
| end | |||||
| end |
| % calc GED vice the orignal graph | % calc GED vice the orignal graph | ||||
| % end | % end | ||||
| %graphDist = []; | %graphDist = []; | ||||
| rootDir = '/Users/armin/Desktop/DML/projects/graphgenproj/graph_production/' ; | |||||
| rootDir = 'C:\Users\Iraj\Desktop\DML\' ; | |||||
| datasetDir = strcat(rootDir,'produced_graphs/'); %' '; % sigal 28.8.12 | datasetDir = strcat(rootDir,'produced_graphs/'); %' '; % sigal 28.8.12 | ||||
| resultsDir = strcat(rootDir,'results_ged/'); | resultsDir = strcat(rootDir,'results_ged/'); | ||||
| file1Iter = []; % networks options [1:1:10] - selection iter | file1Iter = []; % networks options [1:1:10] - selection iter |