Skip to content

Commit 03e9cba

Browse files
author
Hamid Gasmi
committed
#187 is completed
1 parent f345696 commit 03e9cba

File tree

11 files changed

+99
-0
lines changed

11 files changed

+99
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import sys
2+
3+
class De_Bruijn_Graph:
4+
def __init__(self, patterns):
5+
6+
assert(len(patterns) > 0)
7+
self._pattern_size = len(patterns[0])
8+
assert(self._pattern_size > 0)
9+
10+
self._build_gragh(patterns)
11+
12+
def _build_gragh(self, patterns):
13+
14+
self.nodes = []
15+
self.adjacency_list = []
16+
node_kmer_no_dict = dict()
17+
for pattern in patterns:
18+
assert(len(pattern) == self._pattern_size)
19+
20+
prefix_node_no = self.get_node_no(pattern[0:self._pattern_size-1], node_kmer_no_dict)
21+
suffix_node_no = self.get_node_no(pattern[1:self._pattern_size], node_kmer_no_dict)
22+
23+
self.adjacency_list[prefix_node_no].append(suffix_node_no)
24+
25+
def get_node_no(self, kmer, node_kmer_no_dict):
26+
27+
if kmer in node_kmer_no_dict:
28+
node_no = node_kmer_no_dict[kmer]
29+
30+
else:
31+
node_no = len(self.nodes)
32+
self.nodes.append(kmer)
33+
self.adjacency_list.append([])
34+
node_kmer_no_dict[kmer] = node_no
35+
36+
return node_no
37+
38+
def str_adjacency_list(self):
39+
40+
result_list = []
41+
for node in range(len(self.nodes)):
42+
if len(self.adjacency_list[node]) == 0:
43+
continue
44+
45+
node_adjacents = [ self.nodes[node] ]
46+
node_adjacents.append(' -> ')
47+
for a in range(len(self.adjacency_list[node])):
48+
adjacent_node_id = self.adjacency_list[node][a]
49+
node_adjacents.append(self.nodes[adjacent_node_id])
50+
if a < len(self.adjacency_list[node]) - 1:
51+
node_adjacents.append(',')
52+
result_list.append(''.join(node_adjacents))
53+
54+
return '\n'.join(result_list)
55+
56+
if __name__ == "__main__":
57+
patterns = sys.stdin.read().strip().splitlines()
58+
59+
de_bruijn_graph = De_Bruijn_Graph(patterns)
60+
61+
print(de_bruijn_graph.str_adjacency_list())
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
GAG -> AGG
2+
CAG -> AGG,AGG
3+
GGG -> GGG,GGA
4+
AGG -> GGG
5+
GGA -> GAG
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
GAGG
2+
CAGG
3+
GGGG
4+
GGGA
5+
CAGG
6+
AGGG
7+
GGAG
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
GCAA -> CAAG
2+
CAGC -> AGCT
3+
TGAC -> GACG
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
GCAAG
2+
CAGCT
3+
TGACG
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
AGG -> GGT,GGC
2+
GGC -> GCT
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
AGGT
2+
GGCT
3+
AGGC
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
TTC -> TCT,TCT
2+
GGC -> GCT,GCT
3+
AAG -> AGT
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
TTCT
2+
GGCT
3+
AAGT
4+
GGCT
5+
TTCT
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
C -> A,A,A,A,C,A
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
CA
2+
CA
3+
CA
4+
CA
5+
CC
6+
CA

0 commit comments

Comments
 (0)