Source code for absorbing_centrality.algorithms

"""
    algorithms
    ~~~~~~~~~~

    Provides the following algorithms to pick a team of high
    absorbing centrality:
    * greedy_team
        Implements the greedy algorithm.

    :copyright: 2015 Charalampos Mavroforakis, <cmav@bu.edu> and contributors
    :license: ISC

"""


from __future__ import division

from sys import maxint
from time import clock

from networkx.algorithms.link_analysis import pagerank_scipy
from numpy import arange, zeros

from .graph_helpers import (add_supernode, canonical_relabel_nodes,
                            compute_fundamental_matrix,
                            compute_personalized_transition_matrix,
                            compute_transition_matrix, is_canonical,
                            keep_largest_component)


[docs]def greedy_team(G, k, query=None, candidates=None, fast_select=False, return_times=False, with_restarts=False, alpha=0.85): """Selects a team of nodes according to the greedy algorithm. Parameters ---------- G : Networkx graph The graph from which the team will be selected. k : int The size of the team. query : list, optional If provided, the distance is measured with respect to the nodes in `query`. candidates : list, optional If provided, the team is picked only among the nodes in `candidates`. fast_select : bool, optional If True, the greedy algorithm will only consider candidates in a smart way, by examining their gain at each round (default is False). with_restarts : bool, optional If True, the greedy algorithm is based on the transition matrix w/ restarts to the supernode (default is False). alpha : float, optional If the transition matrix has restarts, `alpha` is the probability for the random surfer to continue (default is 0.85). Returns ------- scores : list The scores of all the greedy teams of size up to `k`. teams : list The list of greedy times of size up to `k`. times : list The time to compute each team. Returned only if `return_times` is True. """ best_scores = zeros((k, 1)) best_solutions = [] times = [] solution_set = [] if not is_canonical(G): G = canonical_relabel_nodes(keep_largest_component(G)) if candidates is None or not candidates: candidates = list(G.nodes()) else: candidates = [G.graph['canonical_map'][c] for c in candidates] if query is None or not query: query = G.nodes() else: query = [G.graph['canonical_map'][q] for q in query] ##################################################### # find the pagerank of the nodes before adding the supernode personalization = None if with_restarts: personalization = {v: 1 if v in query else 0 for v in G} pagerank_centrality = pagerank_scipy(G, personalization=personalization) G = add_supernode(G, query) n = G.number_of_nodes() k = min(k, len(candidates)) round_start = clock() if with_restarts: P = compute_personalized_transition_matrix(G, alpha) else: P = compute_transition_matrix(G) # The first node added to the solution will be the node with highest pagerank score ####### # We start by finding the first node of the team. Instead of doing n inversions # we will select the node with the highest pagerank. current_round = 0 candidate_values = [(c, pagerank_centrality[c]) for c in candidates] pagerank_candidates = [c for c, value in sorted(candidate_values, key=lambda tup: tup[1], reverse=True)[:5]] round_min = maxint round_best = pagerank_candidates[0] for pagerank_candidate in pagerank_candidates: non_absorbing_nodes = [i for i in arange(n) if i + 1 not in [pagerank_candidate]] P_abs = P[non_absorbing_nodes, :][:, non_absorbing_nodes] F = compute_fundamental_matrix(P_abs, fast=True) row_sums = F.sum(axis=1) score = row_sums[-1].sum() - F[-1, -1] if score < round_min: round_min = score round_best = pagerank_candidate best_candidate = round_best solution_set.append(G.graph['label_map'][best_candidate]) times.append(clock() - round_start) non_absorbing_nodes = [i for i in arange(n) if i + 1 != best_candidate] P_abs = P[non_absorbing_nodes, :][:, non_absorbing_nodes] F = compute_fundamental_matrix(P_abs, fast=True) best_scores[current_round] = round_min best_solutions.append(solution_set) current_round += 1 # We will take advantage of the submodularity of the problem # At each round, we remember ,for ech node, the gain that adding this node has # At the next round, we compute a running round_best_gain (e.g. from the first node in the loop) # and for every next node, we first check if that node's gain in the previous round was smaller # than the current round_best_gain. The submodularity property suggests that if this is the case, # then in the previous values is an upper bound for this round's gain for that node, so we skip checking it round_gain = {} nodes_to_check = candidates absorbing_nodes = [best_candidate] while len(solution_set) < k: round_min = maxint round_best_member = -1 round_start = clock() for c in nodes_to_check: if G.graph['label_map'][c] in solution_set: continue if fast_select and c in round_gain: gain_upper_bound = round_gain[c] if gain_upper_bound < best_scores[current_round - 1] - round_min: # the node's gain cannot exceed its upper bound continue non_absorbing_nodes = [i for i in arange(n) if i + 1 not in absorbing_nodes + [c]] P_abs = P[non_absorbing_nodes, :][:, non_absorbing_nodes] F_new = compute_fundamental_matrix(P_abs) # Now that we inverted P, we need to compute the absorbing time. row_sums = F_new.sum(axis=1) total_steps = row_sums[-1].sum() - F_new[-1, -1] round_gain[c] = best_scores[current_round - 1] - total_steps if total_steps < round_min: round_best_member = c round_min = total_steps absorbing_nodes.append(round_best_member) solution_set = solution_set + [G.graph['label_map'][round_best_member]] best_scores[current_round] = round_min best_solutions.append(solution_set) if fast_select: # order the candidates by decreasing gain, # so that we increase the chances to find the next node faster nodes_to_check = sorted(round_gain, key=round_gain.get, reverse=True) times.append(clock() - round_start) current_round = current_round + 1 if return_times: return (best_scores, best_solutions, times) else: return (best_scores, best_solutions)