Package biana :: Package GraphUtilities :: Module GraphUtilities
[hide private]
[frames] | no frames]

Source Code for Module biana.GraphUtilities.GraphUtilities

  1  """ 
  2      BIANA: Biologic Interactions and Network Analysis 
  3      Copyright (C) 2009  Javier Garcia-Garcia, Emre Guney, Baldo Oliva 
  4   
  5      This program is free software: you can redistribute it and/or modify 
  6      it under the terms of the GNU General Public License as published by 
  7      the Free Software Foundation, either version 3 of the License, or 
  8      (at your option) any later version. 
  9   
 10      This program is distributed in the hope that it will be useful, 
 11      but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 13      GNU General Public License for more details. 
 14   
 15      You should have received a copy of the GNU General Public License 
 16      along with this program.  If not, see <http://www.gnu.org/licenses/>. 
 17   
 18  """ 
 19   
 20   
 21  import networkx 
 22  import sets, random 
 23   
 24  #MIN_NUMBER_OF_PERTURBATION = 25 
 25  MAX_NUMBER_OF_TRIAL = 6 
 26   
27 -def create_graph_with_same_type(G):
28 return create_empty_copy(G)
29
30 -def create_graph():
31 """ 32 Creates & returns a graph 33 """ 34 35 return networkx.Graph()
36 37
38 -def get_number_of_distinct_edges(G):
39 edge_list = G.edges() 40 edge_set = sets.Set() 41 for id1, id2, data in edge_list: 42 edge_set.add((id1, id2)) 43 return len(edge_set)
44
45 -def get_shortest_path_between(G, source_id, target_id):
46 return networkx.shortest_path(G, source_id, target_id)
47
48 -def get_all_paths_from(G, source_id):
49 """ 50 get all paths from source node to all possible nodes in a dictionary 51 """ 52 return networkx.dijkstra_path(G, source_id) 53 54
55 -def get_path_network(G, listNodes, path_length_cutoff=10000):
56 """ 57 Returns a subgraph containing only nodes in listNodes 58 59 Nodes are connected if exist a path between them, and weight of the edge consists in the length of shortest path 60 61 If the shortest between two nodes includes another node in the list, this edge is not added 62 """ 63 64 # First, check all nodes in listNodes are in network 65 new_graph = networkx.MultiGraph() 66 67 for x in xrange(len(listNodes)): 68 for y in xrange(x): 69 sp = networkx.shortest_path(G, listNodes[x], listNodes[y]) 70 if sp: 71 if (len(sp)-1)<=path_length_cutoff: 72 if len(sets.Set(listNodes).intersection(sets.Set(sp[1:-1])))==0: 73 new_graph.add_edge(listNodes[x],listNodes[y],len(sp)-1) 74 75 return new_graph
76 77 78
79 -def get_connected_components(G, return_as_graph_list=True):
80 """ 81 Finds (strongly in the case of directed network) connected components of graph 82 returnAsGraphList: returns list of graph objects corresponding to connected components (from larger to smaller) 83 otherwise returns list of node list corresponding nodes in connected components 84 """ 85 result_list = [] 86 87 if return_as_graph_list: 88 result_list = networkx.connected_component_subgraphs(G) 89 else: 90 result_list = network.connected_components(G) 91 92 return result_list
93
94 -def randomize_graph(graph, randomization_type):
95 """ 96 Creates a random network from given network as a networkx graph 97 randomization_type: 98 - "random": add same number of edges randomly between nodes of original graph 99 - "preserve_topology": keep edges, shuffle nodes of original graph 100 - "preserve_topology_and_node_degree": keep edges, shuffle nodes of original graph with the nodes of same degree 101 - "preserve_degree_distribution": remove an edge between two random nodes with degrees k, l then add to two nodes with degrees k-1 & l-1, then shuffle nodes 102 - "preserve_degree_distribution_and_node_degree": remove 2 random edges between a-b and c-d where degree(a)=degree(c) and degree(b)=degree(d) then add 2 edges between a-d and b-c, then shuffle nodes with the same degree 103 """ 104 #if use_existing: 105 # new_graph = graph # problem 106 #else: 107 new_graph = networkx.create_empty_copy(graph) 108 109 n_edge = graph.number_of_edges() 110 111 new_graph.add_nodes_from(graph.nodes()) 112 113 if randomization_type == "random": 114 #for i in xrange(n_edge): 115 for edge in graph.edges(): 116 source_id = random.choice(new_graph.nodes()) 117 target_id = random.choice(new_graph.nodes()) 118 while new_graph.has_edge(source_id, target_id): 119 source_id = random.choice(new_graph.nodes()) 120 target_id = random.choice(new_graph.nodes()) 121 new_graph.add_edge(source_id, target_id, graph.get_edge(edge[0],edge[1])) 122 123 elif randomization_type=="preserve_topology": # shuffle_nodes 124 nodes = graph.nodes() 125 random_nodes = graph.nodes() 126 random.shuffle(random_nodes) 127 equivalences = dict([(nodes[i],random_nodes[i]) for i in xrange(len(nodes))]) 128 new_graph.add_edges_from([ (equivalences[current_edge[0]],equivalences[current_edge[1]],graph.get_edge(current_edge[0],current_edge[1])) for current_edge in graph.edges() ]) 129 130 elif randomization_type=="preserve_topology_and_node_degree": # shuffle_nodes_within_same_degree 131 nodes_by_degree = dict( (degree,[]) for degree in graph.degree() ) 132 graph_degree = graph.degree(with_labels=True) 133 [ nodes_by_degree[graph_degree[node]].append(node) for node in graph_degree ] 134 equivalences = {} 135 for current_degree in nodes_by_degree.keys(): 136 nodes = nodes_by_degree[current_degree] 137 random_nodes = list(nodes) 138 random.shuffle(random_nodes) 139 equivalences.update(dict([(nodes[i],random_nodes[i]) for i in xrange(len(nodes))])) 140 new_graph.add_edges_from([ (equivalences[current_edge[0]],equivalences[current_edge[1]], graph.get_edge(current_edge[0],current_edge[1])) for current_edge in graph.edges() ]) 141 142 elif randomization_type=="preserve_degree_distribution": 143 ## add edges as well 144 for current_node1, current_node2 in graph.edges(): 145 new_graph.add_edge(current_node1, current_node2, graph.get_edge(current_node1, current_node2)) 146 max_degree = sorted(graph.degree())[-1] 147 #nodes_by_degree = dict( (degree,{}) for degree in graph.degree() ) 148 nodes_by_degree = dict( (degree,{}) for degree in xrange(max_degree+1) ) 149 graph_degree = graph.degree(with_labels=True) 150 [ nodes_by_degree[graph_degree[node]].setdefault(node) for node in graph_degree ] 151 #print new_graph.nodes(), new_graph.edges() 152 #print nodes_by_degree 153 #if n_edge < MIN_NUMBER_OF_PERTURBATION: 154 # n_perturbation = random.randint(n_edge/2, n_edge) 155 #else: 156 # n_perturbation = random.randint(MIN_NUMBER_OF_PERTURBATION, n_edge) 157 n_perturbation = random.randint(n_edge/2, n_edge) 158 for i in xrange(n_perturbation): 159 n_trial = 0 160 while True: 161 n_trial += 1 162 if n_trial > MAX_NUMBER_OF_TRIAL: 163 #print "Warning: Max number of trials exceeded in perturbation ", i 164 break 165 source_id = random.choice(new_graph.nodes()) 166 source_degree = new_graph.degree(source_id) 167 while source_degree < 1: 168 source_id = random.choice(new_graph.nodes()) 169 source_degree = new_graph.degree(source_id) 170 target_id = random.choice(new_graph.neighbors(source_id)) 171 target_degree = new_graph.degree(target_id) 172 del nodes_by_degree[source_degree][source_id] 173 nodes_by_degree[source_degree-1].setdefault(source_id) 174 del nodes_by_degree[target_degree][target_id] 175 nodes_by_degree[target_degree-1].setdefault(target_id) 176 ## not very important to check for cases where new_source = source (v.v. for targets) 177 new_source_id = random.choice(nodes_by_degree[source_degree-1].keys()) 178 new_target_id = random.choice(nodes_by_degree[target_degree-1].keys()) 179 #print source_id, target_id, " / ", new_source_id, new_target_id 180 ## check if going to add an existing edge or self edge 181 if new_graph.has_edge(new_source_id, new_target_id) or new_source_id == new_target_id: 182 del nodes_by_degree[source_degree-1][source_id] 183 nodes_by_degree[source_degree].setdefault(source_id) 184 del nodes_by_degree[target_degree-1][target_id] 185 nodes_by_degree[target_degree].setdefault(target_id) 186 continue 187 #print "rm %d %d" % (source_id, target_id) 188 edge_data = new_graph.get_edge(source_id, target_id) 189 new_graph.delete_edge(source_id, target_id) 190 #print "add %d %d" % (new_source_id, new_target_id) 191 new_graph.add_edge(new_source_id, new_target_id, edge_data) 192 del nodes_by_degree[source_degree-1][new_source_id] 193 nodes_by_degree[source_degree].setdefault(new_source_id) 194 del nodes_by_degree[target_degree-1][new_target_id] 195 nodes_by_degree[target_degree].setdefault(new_target_id) 196 break 197 #self.randomize_graph(new_graph, "preserve_topology") 198 199 elif randomization_type=="preserve_degree_distribution_and_node_degree": 200 ## add edges as well 201 for current_node1, current_node2 in graph.edges(): 202 new_graph.add_edge(current_node1, current_node2, graph.get_edge(current_node1, current_node2)) 203 nodes_by_degree = dict( (degree,{}) for degree in graph.degree() ) 204 graph_degree = graph.degree(with_labels=True) 205 [ nodes_by_degree[graph_degree[node]].setdefault(node) for node in graph_degree ] 206 207 #if n_edge < MIN_NUMBER_OF_PERTURBATION: 208 # n_perturbation = random.randint(1, n_edge) 209 #else: 210 # n_perturbation = random.randint(MIN_NUMBER_OF_PERTURBATION, n_edge) 211 n_perturbation = random.randint(n_edge/2, n_edge) 212 for i in xrange(n_perturbation): 213 source_id = random.choice(new_graph.nodes()) 214 source_degree = new_graph.degree(source_id) 215 ## find a node for which another node with the same degree exists 216 #available_neighbors = [] 217 n_trial = 0 218 while True: #(len(nodes_by_degree[source_degree]) < 2 or len(available_neighbors) < 1): 219 n_trial += 1 220 if n_trial > MAX_NUMBER_OF_TRIAL: 221 #print "Warning: Max number of trials exceeded in perturbation ", i 222 break 223 source_id = random.choice(new_graph.nodes()) 224 source_degree = new_graph.degree(source_id) 225 if len(nodes_by_degree[source_degree]) < 2: 226 continue 227 available_neighbors = [] 228 ## find a neighbor for which another node with the same degree exists 229 for neighbor_id in new_graph.neighbors_iter(source_id): 230 if source_degree == new_graph.degree(neighbor_id): 231 if len(nodes_by_degree[new_graph.degree(neighbor_id)]) > 2: 232 available_neighbors.append(neighbor_id) 233 else: 234 if len(nodes_by_degree[new_graph.degree(neighbor_id)]) > 1: 235 available_neighbors.append(neighbor_id) 236 if len(available_neighbors) < 1: 237 continue 238 target_id = random.choice(available_neighbors) 239 target_degree = new_graph.degree(target_id) 240 ## select a new source node with different id 241 while True: 242 new_source_id = random.choice(nodes_by_degree[source_degree].keys()) 243 while new_source_id == source_id: 244 new_source_id = random.choice(nodes_by_degree[source_degree].keys()) 245 new_available_neighbors = [] 246 ## find a neighbor as new target node for which id is different from target and has an id equivalent to target 247 for neighbor_id in new_graph.neighbors_iter(new_source_id): 248 if target_degree == new_graph.degree(neighbor_id): 249 new_available_neighbors.append(neighbor_id) 250 if len(new_available_neighbors) < 1: 251 continue 252 new_target_id = random.choice(new_available_neighbors) 253 if len(new_available_neighbors) > 1: 254 while new_target_id == target_id: 255 new_target_id = random.choice(new_available_neighbors) 256 #print new_available_neighbors, new_target_id 257 else: 258 new_target_id = new_available_neighbors[0] 259 break 260 #print source_id, target_id, " / ", new_source_id, new_target_id 261 if source_id == new_target_id or new_source_id == target_id: 262 continue 263 if new_graph.has_edge(source_id, new_target_id) or new_graph.has_edge(new_source_id, target_id): 264 continue 265 #print "rm %d %d" % (source_id, target_id) 266 #print "rm %d %d" % (new_source_id, new_target_id) 267 edge_data_1 = new_graph.get_edge(source_id, target_id) 268 edge_data_2 = new_graph.get_edge(new_source_id, new_target_id) 269 new_graph.delete_edge(source_id, target_id) 270 new_graph.delete_edge(new_source_id, new_target_id) 271 #print "add %d %d" % (source_id, new_target_id) 272 #print "add %d %d" % (new_source_id, target_id) 273 new_graph.add_edge(source_id, new_target_id, edge_data_1) 274 new_graph.add_edge(new_source_id, target_id, edge_data_2) 275 276 else: 277 raise Exception("Unknown randomization type %s" % randomization_type) 278 279 return new_graph
280 281 if __name__ == "__main__": 282 283 test_network = networkx.Graph() 284 test_network.add_nodes_from([1,2,3,4,5,6,7,8,9]) 285 test_network.add_edges_from([(1,2),(1,3),(1,5),(2,6),(3,4),(5,6),(4,7),(7,8)]) # (1,4) 286 287 print "original network:" 288 print test_network.edges() 289 290 print "preserve topology:" 291 random_network = randomize_graph(graph=test_network, randomization_type="preserve_topology") 292 print random_network.edges() 293 294 print "preserve topology and node degrees:" 295 random_network = randomize_graph(graph=test_network, randomization_type="preserve_topology_and_node_degree") 296 print random_network.edges() 297 298 print "preserve degree distribution:" 299 randomn = randomize_graph(graph=test_network, randomization_type="preserve_degree_distribution") 300 print randomn.edges() 301 302 print "preserve degree distribution and node degree:" 303 randomn = randomize_graph(graph=test_network, randomization_type="preserve_degree_distribution_and_node_degree") 304 print randomn.edges() 305 306 print "creating big graph..." 307 test_power_graph = networkx.Graph() 308 test_power_graph.add_nodes_from(range(100000)) 309 test_power_graph.add_edges_from([ (random.randint(0,99999),random.randint(0,99999)) for x in xrange(1000000) ]) 310 print "randomizing big network by preserve_topology..." 311 randomn = randomize_graph(graph=test_power_graph, randomization_type="preserve_topology") 312 print "randomizing by preserve_topology_and_node_degree..." 313 randomn = randomize_graph(graph=test_power_graph, randomization_type="preserve_topology_and_node_degree") 314 print "randomizing by preserve_degree_distribution..." 315 randomn = randomize_graph(graph=test_power_graph, randomization_type="preserve_degree_distribution") 316