123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101 |
- clc;
- clear all;
-
- max_queue_size = 200000;
- max_facebook_node = 957359;
- %D:\SocialNets\Facebook\Data
- fileName = 'd:\\SocialNets\\Facebook\\Data\\mhrw-socialgraph-anonymized.txt';
- dataset_out_dir = 'd:\\SocialNets\\DataSets6\\Facebook_June13\\';
-
- if isdir(dataset_out_dir) == 0
- mkdir(dataset_out_dir);
- end
-
- %graph_size = 16384;
- graph_size = 100000;
- graph_sizes = [2000,5000,10000,15000,20000,25000,30000,35000,40000,50000,60000,70000,75000,80000,90000]; %,100000];
- debugFlag = 0;
- numIter = 10; %0;
- startNodes = [779983 867168 121571 874428 605394 93381 266622 523561 916677 923744];
- bfs_start_node = floor(rand(1) * max_facebook_node); %some node from the mhrw dataset
-
- for iter = 4 : numIter-1
-
- while find(startNodes==bfs_start_node)
- bfs_start_node = floor(rand(1) * max_facebook_node); %some node from the mhrw dataset
- end
- startNodes = [startNodes bfs_start_node];
-
- %bfs_start_node =21632297; %most connected node in the uni dataset
- %bfs_start_node =72261595;
-
- date_now = clock;
- date_now = strcat(num2str(date_now(1)),'_',num2str(date_now(2)),'_', num2str(date_now(3)),'_', num2str(date_now(4)), num2str(date_now(5)),'_', num2str(date_now(6)));
- LogMsg(sprintf('%s Start Facebook_Crawl (%d of %d) starting from node %d ...',date_now, iter+1, numIter, bfs_start_node));
-
- curr_node = bfs_start_node;
- queue = [];
- developed_nodes = [];
- graph = [];
- curr_graph_size = 0;
-
- while curr_graph_size < graph_size
- %fprintf('Current Node: %d\n', curr_node);
- developed_nodes = [developed_nodes, curr_node];
- curr_neighbors = FindFacebookNeighbors_binary(fileName, curr_node);
- if curr_neighbors ~= -1
- curr_neighbors = curr_neighbors(curr_neighbors <= max_facebook_node);
- for j = 1 : size(curr_neighbors,1)
- %queue = union(curr_neighbors(j,:), queue); %add the new nodes to the queue but make it exclusive (no repetitions)
- queue = [queue, setdiff(curr_neighbors(j,:),queue)];
- %queue = [queue, curr_neighbors(j,:)]; %BFS
- if size(queue,2) > max_queue_size
- queue = queue(1:max_queue_size);
- end
- curr_graph_size = curr_graph_size+1;
- graph{curr_graph_size} = curr_neighbors;
-
- % save incremental netwroks
- % if curr_graph_size == 1024 || curr_graph_size==2048 || curr_graph_size==4096 || curr_graph_size==8192
- if find(graph_sizes==curr_graph_size)
- data = BuildFacebookData(graph);
- networkName = sprintf('%sfacebook_sparse_%d_%d_%d', dataset_out_dir,curr_graph_size,iter,bfs_start_node);
- LogMsg(sprintf('Saving %s',networkName));
- save(networkName);
- clear data;
- end
- end
- end
- %queue = setdiff(queue, developed_nodes); %remove nodes that were already developed from the queue - also sorts the queue
-
- if debugFlag == 1
- fprintf('graph size: %d, queue size: %d\n' , size(graph,2), size(queue,2));
- end
- if size(queue, 1) == 0
- fprintf('queue is empty');
- break;
- end
- while ismember(curr_node, developed_nodes)
- curr_node = queue(1);
- queue(1) = [];
- end
- %curr_node = queue(ceil(rand(1) * size(queue,2))); %select a random node from the queue
-
- end
-
- clear queue;
- % pack;
-
- % save final network
- data = BuildFacebookData(graph);
- networkName = sprintf('%sfacebook_sparse_%d_%d_%d', dataset_out_dir,curr_graph_size,iter,bfs_start_node);
- LogMsg(sprintf('Saving %s',networkName));
- save(networkName);
- clear data;
-
- LogMsg(strcat('startNodes ', sprintf(' %d ',startNodes)));
- % pack;
-
- end
-
-
|