You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Facebook_Crawl1.m 2.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. clc;
  2. clear all;
  3. max_queue_size = 200000;
  4. graph_size = 16384;
  5. %fileName = 'C:\\MissingNodes\\DataSets\\Facebook\\uni-socialgraph-anonymized.txt';
  6. max_facebook_node = 957359;
  7. fileName = 'd:\\MissingNodes\\DataSets\\Facebook\\mhrw-socialgraph-anonymized.txt';
  8. for iter = 1 : 10
  9. bfs_start_node = floor(rand(1) * max_facebook_node); %some node from the mhrw dataset
  10. %bfs_start_node =21632297; %most connected node in the uni dataset
  11. %bfs_start_node =72261595;
  12. curr_node = bfs_start_node;
  13. queue = [];
  14. developed_nodes = [];
  15. graph = [];
  16. curr_graph_size = 0;
  17. while curr_graph_size < graph_size
  18. if curr_graph_size == 1024 || curr_graph_size==2048 || curr_graph_size==4096 || curr_graph_size==8192
  19. data = BuildFacebookData(graph);
  20. save(sprintf('facebook_sparse_%d_%d', curr_graph_size, iter))
  21. clear data;
  22. end
  23. %fprintf('Current Node: %d\n', curr_node);
  24. developed_nodes = [developed_nodes, curr_node];
  25. curr_neighbors = FindFacebookNeighbors_binary(fileName, curr_node);
  26. if curr_neighbors ~= -1
  27. curr_neighbors = curr_neighbors(curr_neighbors <= max_facebook_node);
  28. for j = 1 : size(curr_neighbors,1)
  29. %queue = union(curr_neighbors(j,:), queue); %add the new nodes to the queue but make it exclusive (no repetitions)
  30. queue = [queue, setdiff(curr_neighbors(j,:),queue)];
  31. %queue = [queue, curr_neighbors(j,:)]; %BFS
  32. if size(queue,2) > max_queue_size
  33. queue = queue(1:max_queue_size);
  34. end
  35. curr_graph_size = curr_graph_size+1;
  36. graph{curr_graph_size} = curr_neighbors;
  37. end
  38. end
  39. %queue = setdiff(queue, developed_nodes); %remove nodes that were already developed from the queue - also sorts the queue
  40. fprintf('graph size: %d\n' , size(graph,2));
  41. fprintf('queue size: %d\n', size(queue,2));
  42. if size(queue, 1) == 0
  43. fprintf('queue is empty');
  44. break;
  45. end
  46. while ismember(curr_node, developed_nodes)
  47. curr_node = queue(1);
  48. queue(1) = [];
  49. end
  50. %curr_node = queue(ceil(rand(1) * size(queue,2))); %select a random node from the queue
  51. end
  52. clear queue;
  53. pack
  54. data = BuildFacebookData(graph);
  55. save(sprintf('facebook_sparse_%d_%d', graph_size, iter))
  56. clear data;
  57. pack
  58. end