function [] = RunExpSrv8b(iterStartStr, iterEndStr, netSizeStr, addAttPercStr) % affinity calculation types % affinity_calculation_shortest_path = 0; % affinity_calculation_euclid = 1; % affinity_calculation_common_friends = 2; % affinity_calculation_random_clustering = 3; % affinity_calculation_adamic_adar = 4; % affinity_calculation_katz_beta_0_5 = 5; % affinity_calculation_katz_beta_0_05 = 6; % affinity_calculation_katz_beta_0_005 = 7; % affinity_calculation_boost = 9; % sigal 12.3.13 add BOOST option expParams = 3; if nargin < expParams usageStr = 'Usage: RunExperiment []\n'; fprintf('RunExpSrv8b:Invalid usage: expected at least %d parameters.\n%s', expParams, usageStr); return; end % input_iter = input('iteration number:'); iterStart = str2num(iterStartStr); iterEnd = str2num(iterEndStr); % input_factors normFactorVec = [9 2 2]; % input addAttPerc; addAttPerc = 1; %[1 0.7]; if nargin > expParams addAttPerc = str2double(addAttPercStr); if addAttPerc > 1 addAttPerc = 1; elseif addAttPerc < 0 addAttPerc = 0; end end affinities = 2; %[2,4,3,9]; %[2,4,3,9]; %,6]; %3,4]; %,6]; %num_missing_nodes_arr = [10 20 30 50 70 100]; %[11 21 31 41 50]; %[30 50 100 150 200]; percentKnownPHsVec = 1; ds_10k = [2000 5000 10000]; ds_10km = [10001]; ds_32k = [2048 4096 8192 16384 32768]; ds_100k = [20000 25000 50000 75000 100000]; ds_100km = [100001]; ds_Train = [2001 2049]; % input netSize netSize = str2num(netSizeStr); if find(ds_10k==netSize) ds_str = 'Datasets_10K/'; num_missing_nodes_arr = [10 20 30 50 70 100];%[10 100 150]; elseif find(ds_10km==netSize) ds_str = 'Datasets_10K/'; netSize = netSize-1; num_missing_nodes_arr = [50 100 150 200 250]; %[10 100 150]; elseif find(ds_32k==netSize) ds_str = 'Datasets_32K/'; %'Train/'; % num_missing_nodes_arr = [11 21 31 41 50]; % 100]; %Train elseif find(ds_100k==netSize) ds_str = 'Datasets_100K/'; num_missing_nodes_arr = [50 100 200 300 500]; elseif find(ds_100km==netSize) ds_str = 'Datasets_100K/'; netSize = netSize-1; num_missing_nodes_arr = [200 400 600 800 1000]; elseif find(ds_Train==netSize) ds_str = 'Train/'; netSize = netSize-1; num_missing_nodes_arr = [10 30 50 70 100 150]; %[11 21 31 41 50 100 150]; else fprintf('RunExpSrv8s:Invalid netSize %d.\n',netSize); return; end fprintf('RunExpSrv8b: netSize %d, dataset %s\n',netSize,ds_str); %rootDir = 'C:/_SN_Jan14_FF75/'; %% TODO sigal - change rootDir path before EXE build rootDir = 'D:/__SN_Jan14_FF75/'; %Facebook/'; filePrefix = 'Steam_*'; % 'facebook_sparse_'; % netSizes = netSize; %%[2048 4096 5000 8192 10000 16384 32768]; %Sigal - 13.2.14 - images data imagesDir = strcat(rootDir,'Images/'); imagesFile = 'ImagesMatchA.csv'; imagesCount = 200000; imagesData = []; % LoadAsciiImagesMatch(imagesDir, imagesFile, imagesCount, debugFlag); %[]; % numImagesProfiles = 200; % 50; % imgMissProb = 0; %0.2; %%no images imgSimProbDiff = 0.1; %%0.2; imgSimType = 1; %% 0=realData, 1=rand(uniform distribution), 2=randn(normal distribution) datasetDir = strcat(rootDir,ds_str); %'Datasets_10K/'); %'Facebook/Datasets_10K/'); %'Traing_16K/'); %% TODO sigal - change rootDir path before EXE build factor_str = sprintf('F%d%d%d_',normFactorVec); images_str = sprintf('I%dP%dM%d_',imgSimType,imgSimProbDiff*10,imgMissProb*10); results_dir = strcat(datasetDir,'testImg_noTh_',netSizeStr,'/',factor_str,images_str,'Iter_',iterStartStr,iterEndStr,'/'); fprintf('RunExpSrv8b: results_dir %s\n',results_dir); runAlgFlag = 1; debugFlag = 0; dumpKronEM = 0; dumpGED = 0; numThreshold = 0; maxAttStat = 1.35; %1.20; % population threshold - use this attribute only if it appears less than this percentage numAttrCols = 60; %21; %%50; %%11; %40; %%50; attSelected = ones(1,numAttrCols); attWeight = [0.3]; %[0.2 0.3 0.4 0.5 0.6 0.7 0.8]; %%0.3; addMissingAtt = addAttPerc; attAffinityThreshold = 0.15; % noise threshold % skipAtt = [6 2 20 26 22 17 8 14 3 1]; % top 10 PercC % attSelected(skipAtt) = 0; date_now = clock; randSeed = round((date_now(5)+date_now(6)*11)*iterStart+31); rand(1,randSeed); date_now = strcat(num2str(date_now(1)),'_',num2str(date_now(2)),'_', num2str(date_now(3)),'_', num2str(date_now(4)), num2str(date_now(5)),'_', num2str(date_now(6))); LogMsg(sprintf('%s Start RunExpSrv8b RunExperiment (random %d, addMissingAtt %.2f)...',date_now,randSeed,addMissingAtt)); % attSelected = zeros(1,numAttrCols); % % for i = [1:4,6:8,12:20,22,24,26:30] %% top 23 threshold % % for i = [1,3,17,14,8,26,22,18,6,16] %% top 5/10 sum % % for i = [1,14,3,8,22,26,17,19,18,20] %% top 5/10 one % for i = [1,3,17,14,18,8,6,16,26,10] %% top 5/10 two % attSelected(i) = 1; % end % LogMsg(sprintf('Select %d attributes out of %d ...', sum(attSelected), size(attSelected,2))); for nodes = netSizes if size(strfind(filePrefix, 'facebook'),1)>0 prefix = sprintf('%s%s%d_%s',datasetDir,filePrefix,nodes,'*.mat'); % '0*.txt.mat'); else prefix = sprintf('%s%s%d_%s',datasetDir,filePrefix,nodes,'*.txt.mat'); % '0*.txt.mat'); end files = dir(prefix); firstIter = 1; for iter = iterStart:iterEnd % loop over same network with different missing nodes if size(files,1) == 0 fprintf('*** ERROR: RunExpSrv8b no file were found (prefix %s)\n',prefix); end for i = 1:size(files,1) % loop over the list of networks file = files(i).name; if size(strfind(filePrefix, 'facebook'),1)>0 LogMsg(sprintf('facebook netwrok, skipping attributes ...')); attributes = []; attUpperRange = []; else % sigal 12/6/13 - use binary attribute mat file attFile = strrep(file, '.txt.mat', '.usr.mat'); [attributes, attUpperRange, attSelected, attStat] = PrepareAttributes5(datasetDir, attFile, nodes, numAttrCols, maxAttStat, attSelected); LogMsg(sprintf('Select %d attributes out of %d ...', sum(attSelected), size(attSelected,2))); end if runAlgFlag == 1 date_now = clock; date_now = strcat(num2str(date_now(1)),'_',num2str(date_now(2)),'_', num2str(date_now(3)),'_', num2str(date_now(4)), num2str(date_now(5)),'_', num2str(date_now(6))); % make sure dump & results directories exist if (firstIter == 1 && i == 1) firstIter = 0; if isdir(results_dir) == 0 mkdir(results_dir); end dumpFilePath = sprintf('%sdumpKronEM_%s/', results_dir, date_now); if (dumpKronEM == 1) mkdir(dumpFilePath); end dump_data_dir = sprintf('%sdumpData_%s/', results_dir, date_now); if dumpGED == 1 && isdir(dump_data_dir) == 0 mkdir(dump_data_dir) end end % run algorithm (file load is done internaly) [rand_score,purity,p_triads,missing_nodes_mapping,removed_nodes] = MissingNodes_S8b(datasetDir, file, ... attributes, attUpperRange, attWeight, addMissingAtt, normFactorVec, affinities, num_missing_nodes_arr, attAffinityThreshold, ... imagesData, numImagesProfiles, imgMissProb, imgSimType, imgSimProbDiff, percentKnownPHsVec, dumpGED, dump_data_dir, iter); %[rand_score,purity,p_triads,missing_nodes_mapping,removed_nodes] = MissingNodes_Sparse(datasetDir, file, affinities, 1); % dump graph data for KronEM runs if dumpKronEM == 1 DumpDataset(datasetDir, file, iter, removed_nodes, dumpFilePath); end % save results out_file = sprintf('%sres_%s_%s.mat', results_dir, file, date_now); save(out_file); LogMsg(sprintf('Results for file %s,iter %d at %s',file,iter,out_file)); %fprintf('Completed RunExperiment cycle - results at %s.\n',out_file); end % beep; end end end date_now = clock; date_now = strcat(num2str(date_now(1)),'_',num2str(date_now(2)),'_', num2str(date_now(3)),'_', num2str(date_now(4)), num2str(date_now(5)),'_', num2str(date_now(6))); LogMsg(sprintf('%s Completed RunExpSrv8b RunExperiment (random %d).',date_now,randSeed)); end