
%% Compute statistics for all our datasets

%% Experimental setup
% This experiment should be run from the rapr/experiments/datastats directory
cwd = pwd;
dirtail = 'experiments/datastats'; 
if strcmp(cwd(end-length(dirtail)+1:end),dirtail) == 0
    warning('%s should be executed from rapr/%s\n', mfilename, dirtail);
end

datadir = '../../data';
wg_scc = '../../wg++/wg_scc';
wg_cores = '../../wg++/wg_cores';

matlab_bgl_path = '~/dev/matlab/matlab_bgl';
libbvg_path = '../../libbvg';

matlab_wg_file = 'matlab_bg_data';

%%
% List the graph names in two sets.  The first is a set of mat files, 
% and the second is a set of .graph files in the bvgraph format.  The
% two wg_* strings give the path to the wg_scc and wg_cores programs
% to compute the statistics for really large graphs without bringing
% the graph into Matlab directly.
matdata = {'wb-cs.stanford','uk-2006-05.hostgraph','uk-2007-05.hostgraph','generank'};
bvgdata = {'uk-2006-05','uk-2007-05',...
           'cnr-2000','eu-2005','in-2004','nz2006','uk-2005','us2004scc'};           


% for graphs with more than largedata edges, use wg_* funcs
largedata = 70e6; 

%%
% This experiment requires the MatlabBGL library, and the most recent 3.0
% release with the core_numbers function.  An official 3.0 (non-beta) 
% will (hopefully) be done soon.  If not, this experiment will still work
% for everything but the matdata terms and you'll have to set largedata=0

addpath(matlab_bgl_path);
addpath(libbvg_path);

%% Compute for matdata

results = [];
ngraphs = length(matdata);
for gi=1:ngraphs
    g = matdata{gi};
    results(gi).name = g;
    load([datadir filesep g '.mat']);
    % this loads P for the transition probs.
    A = spones(P); % just make sure
    
    results(gi).nverts = size(A,1);
    results(gi).nedges = nnz(A);
    
    tic; 
    x = sum(A,1); % x = indegree
    results(gi).timeid = toc;;
    results(gi).maxid = max(x);
    results(gi).nzeroid = sum(x==0);
    tic; 
    x = sum(A,2); % x = outdegree
    results(gi).timeod = toc;
    results(gi).maxod = max(x);
    results(gi).nzerood = sum(x==0);
    
    tic; 
    x = components(A);
    results(gi).timecomps = toc;
    ccs = accumarray(x,1);
    results(gi).ncomp = max(x);
    results(gi).maxcompsize = max(ccs);
    results(gi).nisocomp = sum(ccs==1);
    clear ccs;
    
    tic;
    x = core_numbers(A);
    results(gi).timecores = toc;
    cns = accumarray(x+1,1);
    results(gi).maxcorenum = max(x);
    results(gi).zerocoresize = sum(x==0);
    results(gi).onecoresize = sum(x==1);
    [cs,cn] = max(cns);
    results(gi).maxcore = [cs cn-1];
    clear cns cs cn;
    
    results(gi), fprintf('\n');
end

matresults = results;
save 'matresults' matresults;

%% Compute for bvgdata

results = [];
ngraphs = length(bvgdata);
for gi=1:ngraphs
    g = bvgdata{gi}; graphbase = [datadir filesep g];
    results(gi).name = g;
    
    A = bvgraph(graphbase,struct('load_type','offline')); % don't load into mem
    
    results(gi).nverts = size(A,1);
    results(gi).nedges = nnz(A);
    
    tic; 
    x = sum(A,1); % x = indegree
    results(gi).timeid = toc;
    results(gi).maxid = max(x);
    results(gi).nzeroid = sum(x==0);
    tic; 
    x = sum(A,2); % x = outdegree
    results(gi).timeod = toc;
    results(gi).maxod = max(x);
    results(gi).nzerood = sum(x==0);
    
    % add nnz(A) < 0 to fix a bug with data larger than 2^31
    if nnz(A) > largedata || nnz(A) < 0
        % run with wg codes
        tic;
        system(sprintf('%s %s > %s',wg_scc,graphbase,matlab_wg_file));
        results(gi).timecomps = toc;
        x = load(matlab_wg_file);
        x = x+1;
        
        ccs = accumarray(x,1);
        results(gi).ncomp = max(x);
        results(gi).maxcompsize = max(ccs);
        results(gi).nisocomp = sum(ccs==1);
        clear x ccs;
        
        tic;
        system(sprintf('%s %s > %s',wg_cores,graphbase,matlab_wg_file));
        results(gi).timecores= toc;
        x = load(matlab_wg_file);
        
        cns = accumarray(x+1,1);
        results(gi).maxcorenum = max(x);
        results(gi).zerocoresize = sum(x==0);
        results(gi).onecoresize = sum(x==1);
        [cs,cn] = max(cns);
        results(gi).maxcore = [cs cn-1];
        clear cns cs cn;
        
    else
        % load as a Matlab graph
        G = A;
        A = sparse(G);
        
        tic; 
        x = components(A);
        results(gi).timecomps = toc;
        ccs = accumarray(x,1);
        results(gi).ncomp = max(x);
        results(gi).maxcompsize = max(ccs);
        results(gi).nisocomp = sum(ccs==1);
        clear ccs;
        
        tic;
        x = core_numbers(A);
        results(gi).timecores = toc;
        cns = accumarray(x+1,1);
        results(gi).maxcorenum = max(x);
        results(gi).zerocoresize = sum(x==0);
        results(gi).onecoresize = sum(x==1);
        [cs,cn] = max(cns);
        results(gi).maxcore = [cs cn-1];
        clear cns cs cn;
    end
    
    results(gi), fprintf('\n');
end

bvgresults = results;
save 'bvgresults' bvgresults;


ans = 

            name: 'wb-cs.stanford'
          nverts: 9914
          nedges: 36854
          timeid: 4.5000e-04
           maxid: 340
         nzeroid: 699
          timeod: 5.0900e-04
           maxod: 277
         nzerood: 2861
       timecomps: 0.0093
           ncomp: 4391
     maxcompsize: 2759
        nisocomp: 4207
       timecores: 0.0069
      maxcorenum: 13
    zerocoresize: 892
     onecoresize: 4357
         maxcore: [4357 1]



ans = 

            name: 'uk-2006-05.hostgraph'
          nverts: 11402
          nedges: 730774
          timeid: 0.0020
           maxid: 2750
         nzeroid: 260
          timeod: 0.0042
           maxod: 5994
         nzerood: 2434
       timecomps: 0.0858
           ncomp: 2935
     maxcompsize: 7945
        nisocomp: 2838
       timecores: 0.0897
      maxcorenum: 67
    zerocoresize: 355
     onecoresize: 655
         maxcore: [2491 19]



ans = 

            name: 'uk-2007-05.hostgraph'
          nverts: 114529
          nedges: 1836441
          timeid: 0.0083
           maxid: 3518
         nzeroid: 4766
          timeod: 0.0199
           maxod: 51692
         nzerood: 49379
       timecomps: 0.3452
           ncomp: 54822
     maxcompsize: 59160
        nisocomp: 54384
       timecores: 0.3358
      maxcorenum: 259
    zerocoresize: 4885
     onecoresize: 10710
         maxcore: [18933 5]



ans = 

            name: 'generank'
          nverts: 4047
          nedges: 339596
          timeid: 0.0011
           maxid: 493
         nzeroid: 0
          timeod: 0.0017
           maxod: 493
         nzerood: 0
       timecomps: 0.0379
           ncomp: 10
     maxcompsize: 4026
        nisocomp: 0
       timecores: 0.0380
      maxcorenum: 129
    zerocoresize: 0
     onecoresize: 38
         maxcore: [211 110]


Attaching outdegree ibs..
All done.
num vertices = 77741046
num edges = 2965197340
num components = 10789143
largest component size(s) = 49710330 339092 338297 290431 264104 
Attaching outdegree ibs..
All done.
num vertices = 77741046
num edges = 2965197340

ans = 

            name: 'uk-2006-05'
          nverts: 77741046
          nedges: -1.3298e+09
          timeid: 72.5990
           maxid: 4070239
         nzeroid: 994082
          timeod: 64.7232
           maxod: 20178
         nzerood: 8439830
       timecomps: 3.5774e+03
           ncomp: 10789143
     maxcompsize: 49710330
        nisocomp: 10662714
       timecores: 3.2727e+03
      maxcorenum: 4986
    zerocoresize: 1103487
     onecoresize: 34780373
         maxcore: [34780373 1]


Attaching outdegree ibs..
All done.
num vertices = 105896555
num edges = 3738733648
num components = 21398038
largest component size(s) = 68582555 235228 139204 117787 58766 
Attaching outdegree ibs..
All done.
num vertices = 105896555
num edges = 3738733648

ans = 

            name: 'uk-2007-05'
          nverts: 105896555
          nedges: -556233648
          timeid: 89.6180
           maxid: 975418
         nzeroid: 6554828
          timeod: 80.1578
           maxod: 15402
         nzerood: 12949672
       timecomps: 6.1916e+03
           ncomp: 21398038
     maxcompsize: 68582555
        nisocomp: 21204368
       timecores: 4.1982e+03
      maxcorenum: 5664
    zerocoresize: 7070454
     onecoresize: 49760746
         maxcore: [49760746 1]



ans = 

            name: 'cnr-2000'
          nverts: 325557
          nedges: 3216152
          timeid: 0.2058
           maxid: 18235
         nzeroid: 0
          timeod: 0.1253
           maxod: 2716
         nzerood: 78056
       timecomps: 0.2203
           ncomp: 100977
     maxcompsize: 112023
        nisocomp: 98756
       timecores: 0.2497
      maxcorenum: 81
    zerocoresize: 0
     onecoresize: 165919
         maxcore: [165919 1]



ans = 

            name: 'eu-2005'
          nverts: 862664
          nedges: 19235140
          timeid: 0.7288
           maxid: 68922
         nzeroid: 0
          timeod: 0.6754
           maxod: 6985
         nzerood: 71675
       timecomps: 1.4857
           ncomp: 90768
     maxcompsize: 752725
        nisocomp: 89607
       timecores: 1.8415
      maxcorenum: 378
    zerocoresize: 0
     onecoresize: 215580
         maxcore: [215580 1]



ans = 

            name: 'in-2004'
          nverts: 1382908
          nedges: 16917053
          timeid: 0.6033
           maxid: 21866
         nzeroid: 86
          timeod: 0.5545
           maxod: 7753
         nzerood: 282306
       timecomps: 1.1032
           ncomp: 367675
     maxcompsize: 593687
        nisocomp: 351033
       timecores: 1.4324
      maxcorenum: 473
    zerocoresize: 363
     onecoresize: 672031
         maxcore: [672031 1]



ans = 

            name: 'nz2006'
          nverts: 604913
          nedges: 3777080
          timeid: 0.1408
           maxid: 20864
         nzeroid: 0
          timeod: 0.1271
           maxod: 3281
         nzerood: 435563
       timecomps: 0.3227
           ncomp: 455317
     maxcompsize: 144020
        nisocomp: 455141
       timecores: 0.3530
      maxcorenum: 81
    zerocoresize: 0
     onecoresize: 492870
         maxcore: [492870 1]


Attaching outdegree ibs..
All done.
num vertices = 39459925
num edges = 936364282
num components = 5811041
largest component size(s) = 25711307 130913 65948 36731 34665 
Attaching outdegree ibs..
All done.
num vertices = 39459925
num edges = 936364282

ans = 

            name: 'uk-2005'
          nverts: 39459925
          nedges: 936364282
          timeid: 24.7944
           maxid: 1776852
         nzeroid: 15661
          timeod: 21.5671
           maxod: 5213
         nzerood: 4407986
       timecomps: 1.4916e+03
           ncomp: 5811041
     maxcompsize: 25711307
        nisocomp: 5587959
       timecores: 1.3081e+03
      maxcorenum: 585
    zerocoresize: 40444
     onecoresize: 16807640
         maxcore: [16807640 1]



ans = 

            name: 'us2004scc'
          nverts: 1084200
          nedges: 11554007
          timeid: 0.5384
           maxid: 45309
         nzeroid: 0
          timeod: 0.4750
           maxod: 12479
         nzerood: 0
       timecomps: 0.9180
           ncomp: 1
     maxcompsize: 1084200
        nisocomp: 0
       timecores: 1.2360
      maxcorenum: 143
    zerocoresize: 0
     onecoresize: 607287
         maxcore: [607287 1]


