123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154 |
- clear;
- clc;
-
- %affinity calculation types
- % affinity_calculation_shortest_path = 0;
- % affinity_calculation_euclid = 1;
- % affinity_calculation_common_friends = 2;
- % affinity_calculation_random_clustering = 3;
- % affinity_calculation_adamic_adar = 4;
- % affinity_calculation_katz_beta_0_5 = 5;
- % affinity_calculation_katz_beta_0_05 = 6;
- % affinity_calculation_katz_beta_0_005 = 7;
-
- affinities = 3; %%[2,3,4,6]; %3,4]; %,6];
- num_missing_nodes_arr = 11; %%[11 21 31 41 50]; %%5:5:30; %10:10:50; %%[11 21 31 41 50]; %10:10:50;
- percentKnownPHsVec = 1;
-
- datasetDir = 'D:/SocialNets/Steam/Exp_March13/Train/' ;
- results_dir = strcat(datasetDir,'Stat/');
- filePrefix = 'Steam_*';
- netSizes = 10000; %10000; %2048;
- runAlgFlag = 1;
- debugFlag = 0;
- dumpFlag = 0;
- numThreshold = 0;
- %%maxAttStat = 0.25; % use this attribute only if it appears less than this percentage
- numAttrCols = 60; %%21; %40; %50
- %attSelected = ones(1,numAttrCols);
- %attSelected(1) = 0; %skip country
- attWeight = 0.3;
- addMissingAtt = 0;
-
- date_now = clock;
- date_now = strcat(num2str(date_now(1)),'_',num2str(date_now(2)),'_', num2str(date_now(3)),'_', num2str(date_now(4)), num2str(date_now(5)),'_', num2str(date_now(6)));
- LogMsg(sprintf('%s Strating CheckProperties ...',date_now));
-
- outFile = sprintf('%sPropertiesStat_%s.txt', results_dir, date_now);
- fileID = 0;
-
- for nodes = netSizes
-
- prefix = sprintf('%s%s%d_%s',datasetDir,filePrefix,nodes,'*.mat');
- files = dir(prefix);
- firstIter = 1;
-
- for iter = 1 %1:2 %100 % loop over same network with different missing nodes
-
- for i = 1:size(files,1) % loop over the list of networks
-
- file = files(i).name;
-
- fprintf('reading network information from file %s%s ...\n', datasetDir, file);
- load(strcat(datasetDir, file), 'data');
- data = sparse(data);
-
- for maxAttStat = 1 %%[0.25 0.3 0.4 0.5 0.75 1]
-
- attSelected = ones(1,numAttrCols);
- %skip all games
- % for g=2:1:11
- % attSelected(g) = 0;
- % end
- %skip all groups
- % for g=12:1:50
- % attSelected(g) = 0;
- % end
- attSelectedNum = nnz(attSelected);
-
- attFile = strrep(file, '.txt.mat', '.usr.txt');
- [attributes, attUpperRange, attSelected, attStat] = PrepareAttributes2(datasetDir, attFile, nodes, numAttrCols, maxAttStat, attSelected);
-
- if runAlgFlag == 1
- date_now = clock;
- date_now = strcat(num2str(date_now(1)),'_',num2str(date_now(2)),'_', num2str(date_now(3)),'_', num2str(date_now(4)), num2str(date_now(5)),'_', num2str(date_now(6)));
-
- % make sure dump & results directories exist
- if (firstIter == 1 && i == 1)
- firstIter = 0;
- if isdir(results_dir) == 0
- mkdir(results_dir);
- end
- fileID = fopen(outFile,'w');
- fprintf(fileID,'\tfile\ti\titer\tnodes\tedges\tnonedges\tmaxAttStat\tattSelectedNum\tnumAtt\tAttId\tp\tp^2\t(1-p)^2\t2p(1-p)');
- fprintf(fileID,'\tedgesAttOne\tpercAttOne\tedgesAttZero\tpercAttZero\tedgesAttTwo\tpercAttTwo');
- %fprintf(fileID,'\tnonedgesAttOne\tpercAttOne\tnonedgesAttZero\tpercAttZero\tnonedgesAttTwo\tpercAttTwo');
- fprintf(fileID,'\n');
- end
-
- numAtt = nnz(attSelected); %%size(attributes,2);
- edges = nnz(data);
- nonedges = nodes*nodes-nodes-edges;
-
- for attId = 1:numAttrCols
- % Calc Attributes Affinity - similarity score
- fprintf('calculating attribtes similarity matrix for attId=%d ...\n',attId);
- [attSimilarity] = CalcOneAttributeSimilarity(data, attributes, attId, 1);
-
- % save iteration data
- fprintf(fileID,'\t%s',file);
- fprintf(fileID,'\t%d',i);
- fprintf(fileID,'\t%d',iter);
- fprintf(fileID,'\t%d',nodes);
- fprintf(fileID,'\t%d',edges);
- fprintf(fileID,'\t%d',nonedges);
- fprintf(fileID,'\t%d',maxAttStat);
- fprintf(fileID,'\t%d',attSelectedNum);
- fprintf(fileID,'\t%d',numAtt);
-
- fprintf(fileID,'\t%d',attId);
- p=attStat(attId);
- fprintf(fileID,'\t%d',p);
- fprintf(fileID,'\t%d',p*p);
- fprintf(fileID,'\t%d',(1-p)*(1-p));
- fprintf(fileID,'\t%d',2*p*(1-p));
- %Note - Sigal, replace 0 and 2 so we can have sparse matrix
- % edges properties
- edgesInd = find(data);
- edgesAttOne = size(find(attSimilarity(edgesInd)==1),1);
- edgesAttTwo = size(find(attSimilarity(edgesInd)==0),1);
- edgesAttZero = size(find(attSimilarity(edgesInd)==2),1);
-
- fprintf(fileID,'\t%d\t%d',edgesAttOne,edgesAttOne/edges);
- fprintf(fileID,'\t%d\t%d',edgesAttZero,edgesAttZero/edges);
- fprintf(fileID,'\t%d\t%d',edgesAttTwo,edgesAttTwo/edges);
-
- % nonedges properties
- % nonedges = nodes*nodes-nodes-edges;
- % nonedgesInd = find(data==0);
- % nonedgesAttOne = size(find(attSimilarity(nonedgesInd)==1),1)-nodes;
- % nonedgesAttTwo = size(find(attSimilarity(nonedgesInd)==0),1);
- % nonedgesAttZero = size(find(attSimilarity(nonedgesInd)==2),1);
- %
- % fprintf(fileID,'\t%d\t%d',nonedgesAttOne,nonedgesAttOne/nonedges);
- % fprintf(fileID,'\t%d\t%d',nonedgesAttZero,nonedgesAttZero/nonedges);
- % fprintf(fileID,'\t%d\t%d',nonedgesAttTwo,nonedgesAttTwo/nonedges);
-
- fprintf(fileID,'\n');
-
- %LogMsg(sprintf('Results for file %s,iter %d at %s',file,iter,out_file));
- %fprintf('Completed RunExperiment cycle - results at %s.\n',out_file);
- end
- end
- end
-
- % beep;
- end
- end
- end
-
- if fileID ~= 0
- fclose(fileID);
- end
-
- LogMsg(sprintf('%s Completed RunExperiment.',date_now));
|