1 """
2 BIANA: Biologic Interactions and Network Analysis
3 Copyright (C) 2009 Javier Garcia-Garcia, Emre Guney, Baldo Oliva
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 """
19
20
21 import networkx
22 import sets, random
23
24
25 MAX_NUMBER_OF_TRIAL = 6
26
28 return create_empty_copy(G)
29
31 """
32 Creates & returns a graph
33 """
34
35 return networkx.Graph()
36
37
39 edge_list = G.edges()
40 edge_set = sets.Set()
41 for id1, id2, data in edge_list:
42 edge_set.add((id1, id2))
43 return len(edge_set)
44
46 return networkx.shortest_path(G, source_id, target_id)
47
49 """
50 get all paths from source node to all possible nodes in a dictionary
51 """
52 return networkx.dijkstra_path(G, source_id)
53
54
56 """
57 Returns a subgraph containing only nodes in listNodes
58
59 Nodes are connected if exist a path between them, and weight of the edge consists in the length of shortest path
60
61 If the shortest between two nodes includes another node in the list, this edge is not added
62 """
63
64
65 new_graph = networkx.MultiGraph()
66
67 for x in xrange(len(listNodes)):
68 for y in xrange(x):
69 sp = networkx.shortest_path(G, listNodes[x], listNodes[y])
70 if sp:
71 if (len(sp)-1)<=path_length_cutoff:
72 if len(sets.Set(listNodes).intersection(sets.Set(sp[1:-1])))==0:
73 new_graph.add_edge(listNodes[x],listNodes[y],len(sp)-1)
74
75 return new_graph
76
77
78
80 """
81 Finds (strongly in the case of directed network) connected components of graph
82 returnAsGraphList: returns list of graph objects corresponding to connected components (from larger to smaller)
83 otherwise returns list of node list corresponding nodes in connected components
84 """
85 result_list = []
86
87 if return_as_graph_list:
88 result_list = networkx.connected_component_subgraphs(G)
89 else:
90 result_list = network.connected_components(G)
91
92 return result_list
93
95 """
96 Creates a random network from given network as a networkx graph
97 randomization_type:
98 - "random": add same number of edges randomly between nodes of original graph
99 - "preserve_topology": keep edges, shuffle nodes of original graph
100 - "preserve_topology_and_node_degree": keep edges, shuffle nodes of original graph with the nodes of same degree
101 - "preserve_degree_distribution": remove an edge between two random nodes with degrees k, l then add to two nodes with degrees k-1 & l-1, then shuffle nodes
102 - "preserve_degree_distribution_and_node_degree": remove 2 random edges between a-b and c-d where degree(a)=degree(c) and degree(b)=degree(d) then add 2 edges between a-d and b-c, then shuffle nodes with the same degree
103 """
104
105
106
107 new_graph = networkx.create_empty_copy(graph)
108
109 n_edge = graph.number_of_edges()
110
111 new_graph.add_nodes_from(graph.nodes())
112
113 if randomization_type == "random":
114
115 for edge in graph.edges():
116 source_id = random.choice(new_graph.nodes())
117 target_id = random.choice(new_graph.nodes())
118 while new_graph.has_edge(source_id, target_id):
119 source_id = random.choice(new_graph.nodes())
120 target_id = random.choice(new_graph.nodes())
121 new_graph.add_edge(source_id, target_id, graph.get_edge(edge[0],edge[1]))
122
123 elif randomization_type=="preserve_topology":
124 nodes = graph.nodes()
125 random_nodes = graph.nodes()
126 random.shuffle(random_nodes)
127 equivalences = dict([(nodes[i],random_nodes[i]) for i in xrange(len(nodes))])
128 new_graph.add_edges_from([ (equivalences[current_edge[0]],equivalences[current_edge[1]],graph.get_edge(current_edge[0],current_edge[1])) for current_edge in graph.edges() ])
129
130 elif randomization_type=="preserve_topology_and_node_degree":
131 nodes_by_degree = dict( (degree,[]) for degree in graph.degree() )
132 graph_degree = graph.degree(with_labels=True)
133 [ nodes_by_degree[graph_degree[node]].append(node) for node in graph_degree ]
134 equivalences = {}
135 for current_degree in nodes_by_degree.keys():
136 nodes = nodes_by_degree[current_degree]
137 random_nodes = list(nodes)
138 random.shuffle(random_nodes)
139 equivalences.update(dict([(nodes[i],random_nodes[i]) for i in xrange(len(nodes))]))
140 new_graph.add_edges_from([ (equivalences[current_edge[0]],equivalences[current_edge[1]], graph.get_edge(current_edge[0],current_edge[1])) for current_edge in graph.edges() ])
141
142 elif randomization_type=="preserve_degree_distribution":
143
144 for current_node1, current_node2 in graph.edges():
145 new_graph.add_edge(current_node1, current_node2, graph.get_edge(current_node1, current_node2))
146 max_degree = sorted(graph.degree())[-1]
147
148 nodes_by_degree = dict( (degree,{}) for degree in xrange(max_degree+1) )
149 graph_degree = graph.degree(with_labels=True)
150 [ nodes_by_degree[graph_degree[node]].setdefault(node) for node in graph_degree ]
151
152
153
154
155
156
157 n_perturbation = random.randint(n_edge/2, n_edge)
158 for i in xrange(n_perturbation):
159 n_trial = 0
160 while True:
161 n_trial += 1
162 if n_trial > MAX_NUMBER_OF_TRIAL:
163
164 break
165 source_id = random.choice(new_graph.nodes())
166 source_degree = new_graph.degree(source_id)
167 while source_degree < 1:
168 source_id = random.choice(new_graph.nodes())
169 source_degree = new_graph.degree(source_id)
170 target_id = random.choice(new_graph.neighbors(source_id))
171 target_degree = new_graph.degree(target_id)
172 del nodes_by_degree[source_degree][source_id]
173 nodes_by_degree[source_degree-1].setdefault(source_id)
174 del nodes_by_degree[target_degree][target_id]
175 nodes_by_degree[target_degree-1].setdefault(target_id)
176
177 new_source_id = random.choice(nodes_by_degree[source_degree-1].keys())
178 new_target_id = random.choice(nodes_by_degree[target_degree-1].keys())
179
180
181 if new_graph.has_edge(new_source_id, new_target_id) or new_source_id == new_target_id:
182 del nodes_by_degree[source_degree-1][source_id]
183 nodes_by_degree[source_degree].setdefault(source_id)
184 del nodes_by_degree[target_degree-1][target_id]
185 nodes_by_degree[target_degree].setdefault(target_id)
186 continue
187
188 edge_data = new_graph.get_edge(source_id, target_id)
189 new_graph.delete_edge(source_id, target_id)
190
191 new_graph.add_edge(new_source_id, new_target_id, edge_data)
192 del nodes_by_degree[source_degree-1][new_source_id]
193 nodes_by_degree[source_degree].setdefault(new_source_id)
194 del nodes_by_degree[target_degree-1][new_target_id]
195 nodes_by_degree[target_degree].setdefault(new_target_id)
196 break
197
198
199 elif randomization_type=="preserve_degree_distribution_and_node_degree":
200
201 for current_node1, current_node2 in graph.edges():
202 new_graph.add_edge(current_node1, current_node2, graph.get_edge(current_node1, current_node2))
203 nodes_by_degree = dict( (degree,{}) for degree in graph.degree() )
204 graph_degree = graph.degree(with_labels=True)
205 [ nodes_by_degree[graph_degree[node]].setdefault(node) for node in graph_degree ]
206
207
208
209
210
211 n_perturbation = random.randint(n_edge/2, n_edge)
212 for i in xrange(n_perturbation):
213 source_id = random.choice(new_graph.nodes())
214 source_degree = new_graph.degree(source_id)
215
216
217 n_trial = 0
218 while True:
219 n_trial += 1
220 if n_trial > MAX_NUMBER_OF_TRIAL:
221
222 break
223 source_id = random.choice(new_graph.nodes())
224 source_degree = new_graph.degree(source_id)
225 if len(nodes_by_degree[source_degree]) < 2:
226 continue
227 available_neighbors = []
228
229 for neighbor_id in new_graph.neighbors_iter(source_id):
230 if source_degree == new_graph.degree(neighbor_id):
231 if len(nodes_by_degree[new_graph.degree(neighbor_id)]) > 2:
232 available_neighbors.append(neighbor_id)
233 else:
234 if len(nodes_by_degree[new_graph.degree(neighbor_id)]) > 1:
235 available_neighbors.append(neighbor_id)
236 if len(available_neighbors) < 1:
237 continue
238 target_id = random.choice(available_neighbors)
239 target_degree = new_graph.degree(target_id)
240
241 while True:
242 new_source_id = random.choice(nodes_by_degree[source_degree].keys())
243 while new_source_id == source_id:
244 new_source_id = random.choice(nodes_by_degree[source_degree].keys())
245 new_available_neighbors = []
246
247 for neighbor_id in new_graph.neighbors_iter(new_source_id):
248 if target_degree == new_graph.degree(neighbor_id):
249 new_available_neighbors.append(neighbor_id)
250 if len(new_available_neighbors) < 1:
251 continue
252 new_target_id = random.choice(new_available_neighbors)
253 if len(new_available_neighbors) > 1:
254 while new_target_id == target_id:
255 new_target_id = random.choice(new_available_neighbors)
256
257 else:
258 new_target_id = new_available_neighbors[0]
259 break
260
261 if source_id == new_target_id or new_source_id == target_id:
262 continue
263 if new_graph.has_edge(source_id, new_target_id) or new_graph.has_edge(new_source_id, target_id):
264 continue
265
266
267 edge_data_1 = new_graph.get_edge(source_id, target_id)
268 edge_data_2 = new_graph.get_edge(new_source_id, new_target_id)
269 new_graph.delete_edge(source_id, target_id)
270 new_graph.delete_edge(new_source_id, new_target_id)
271
272
273 new_graph.add_edge(source_id, new_target_id, edge_data_1)
274 new_graph.add_edge(new_source_id, target_id, edge_data_2)
275
276 else:
277 raise Exception("Unknown randomization type %s" % randomization_type)
278
279 return new_graph
280
281 if __name__ == "__main__":
282
283 test_network = networkx.Graph()
284 test_network.add_nodes_from([1,2,3,4,5,6,7,8,9])
285 test_network.add_edges_from([(1,2),(1,3),(1,5),(2,6),(3,4),(5,6),(4,7),(7,8)])
286
287 print "original network:"
288 print test_network.edges()
289
290 print "preserve topology:"
291 random_network = randomize_graph(graph=test_network, randomization_type="preserve_topology")
292 print random_network.edges()
293
294 print "preserve topology and node degrees:"
295 random_network = randomize_graph(graph=test_network, randomization_type="preserve_topology_and_node_degree")
296 print random_network.edges()
297
298 print "preserve degree distribution:"
299 randomn = randomize_graph(graph=test_network, randomization_type="preserve_degree_distribution")
300 print randomn.edges()
301
302 print "preserve degree distribution and node degree:"
303 randomn = randomize_graph(graph=test_network, randomization_type="preserve_degree_distribution_and_node_degree")
304 print randomn.edges()
305
306 print "creating big graph..."
307 test_power_graph = networkx.Graph()
308 test_power_graph.add_nodes_from(range(100000))
309 test_power_graph.add_edges_from([ (random.randint(0,99999),random.randint(0,99999)) for x in xrange(1000000) ])
310 print "randomizing big network by preserve_topology..."
311 randomn = randomize_graph(graph=test_power_graph, randomization_type="preserve_topology")
312 print "randomizing by preserve_topology_and_node_degree..."
313 randomn = randomize_graph(graph=test_power_graph, randomization_type="preserve_topology_and_node_degree")
314 print "randomizing by preserve_degree_distribution..."
315 randomn = randomize_graph(graph=test_power_graph, randomization_type="preserve_degree_distribution")
316