clc; clear all; max_queue_size = 200000; max_facebook_node = 957359; %D:\SocialNets\Facebook\Data fileName = 'd:\\SocialNets\\Facebook\\Data\\mhrw-socialgraph-anonymized.txt'; dataset_out_dir = 'd:\\SocialNets\\DataSets6\\Facebook_June13\\'; if isdir(dataset_out_dir) == 0 mkdir(dataset_out_dir); end %graph_size = 16384; graph_size = 100000; graph_sizes = [2000,5000,10000,15000,20000,25000,30000,35000,40000,50000,60000,70000,75000,80000,90000]; %,100000]; debugFlag = 0; numIter = 10; %0; startNodes = [779983 867168 121571 874428 605394 93381 266622 523561 916677 923744]; bfs_start_node = floor(rand(1) * max_facebook_node); %some node from the mhrw dataset for iter = 4 : numIter-1 while find(startNodes==bfs_start_node) bfs_start_node = floor(rand(1) * max_facebook_node); %some node from the mhrw dataset end startNodes = [startNodes bfs_start_node]; %bfs_start_node =21632297; %most connected node in the uni dataset %bfs_start_node =72261595; date_now = clock; date_now = strcat(num2str(date_now(1)),'_',num2str(date_now(2)),'_', num2str(date_now(3)),'_', num2str(date_now(4)), num2str(date_now(5)),'_', num2str(date_now(6))); LogMsg(sprintf('%s Start Facebook_Crawl (%d of %d) starting from node %d ...',date_now, iter+1, numIter, bfs_start_node)); curr_node = bfs_start_node; queue = []; developed_nodes = []; graph = []; curr_graph_size = 0; while curr_graph_size < graph_size %fprintf('Current Node: %d\n', curr_node); developed_nodes = [developed_nodes, curr_node]; curr_neighbors = FindFacebookNeighbors_binary(fileName, curr_node); if curr_neighbors ~= -1 curr_neighbors = curr_neighbors(curr_neighbors <= max_facebook_node); for j = 1 : size(curr_neighbors,1) %queue = union(curr_neighbors(j,:), queue); %add the new nodes to the queue but make it exclusive (no repetitions) queue = [queue, setdiff(curr_neighbors(j,:),queue)]; %queue = [queue, curr_neighbors(j,:)]; %BFS if size(queue,2) > max_queue_size queue = queue(1:max_queue_size); end curr_graph_size = curr_graph_size+1; graph{curr_graph_size} = curr_neighbors; % save incremental netwroks % if curr_graph_size == 1024 || curr_graph_size==2048 || curr_graph_size==4096 || curr_graph_size==8192 if find(graph_sizes==curr_graph_size) data = BuildFacebookData(graph); networkName = sprintf('%sfacebook_sparse_%d_%d_%d', dataset_out_dir,curr_graph_size,iter,bfs_start_node); LogMsg(sprintf('Saving %s',networkName)); save(networkName); clear data; end end end %queue = setdiff(queue, developed_nodes); %remove nodes that were already developed from the queue - also sorts the queue if debugFlag == 1 fprintf('graph size: %d, queue size: %d\n' , size(graph,2), size(queue,2)); end if size(queue, 1) == 0 fprintf('queue is empty'); break; end while ismember(curr_node, developed_nodes) curr_node = queue(1); queue(1) = []; end %curr_node = queue(ceil(rand(1) * size(queue,2))); %select a random node from the queue end clear queue; % pack; % save final network data = BuildFacebookData(graph); networkName = sprintf('%sfacebook_sparse_%d_%d_%d', dataset_out_dir,curr_graph_size,iter,bfs_start_node); LogMsg(sprintf('Saving %s',networkName)); save(networkName); clear data; LogMsg(strcat('startNodes ', sprintf(' %d ',startNodes))); % pack; end