import igraph as ig
from igraph import Graph
from Bio import SeqIO
import logging
from logging import debug, info, warning, error
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
class DBGraph(Graph):
newid = 0 # allow for a fixed id per node
k = 4 # we need to know the k-mer size (note: the length of the sequence per node is k-1)
#igraph = None
name = ""
compacted = False
contigs = []
def __init__(self, name="", k=4):
self.name = name
self.k = k
super().__init__(directed=True)
# utility function to quickly retrieve the edge between two igraph nodes
# this function should exist in igraph itself
def edge(self, n1, n2):
gl = self
return gl.es[gl.get_eid(n1.index, n2.index)]
"""
utility functions: adds the node sequences to the graph
This one initialized a fresh node if the sequence is not already there.
"""
def find_or_fresh_node(self, s):
new_node = None
g = self
try:
new_node = g.vs.find(sequence=s)
except:
if not new_node:
new_node = g.add_vertex(name=s)
new_node['id'] = self.newid # we need to keep a fixed id to reference nodes if other nodes are deleted
new_node['sequence'] = s
new_node['label'] = str(new_node.index) +':'+ s
new_node['visited'] = False
new_node['compacted'] = False
new_node['coverage'] = 1
new_node['sum'] = 0
new_node['dist'] = 0
assert new_node, 'Failed to create new node!'
self.newid += 1
return new_node
def add(self, s1, s2):
if self.compacted:
raise Exception("New nodes cannot be added to a compacted graph.")
g = self
n1 = self.find_or_fresh_node(s1)
n2 = self.find_or_fresh_node(s2)
i1 = n1.index
i2 = n2.index
debug ("adding edge"+str(i1)+" --> "+str(i2))
eid = None
try: # the edge could be there already
eid = g.get_eid(i1,i2)
g.es[eid]["coverage"] += 1 ## in a true DeBrujn Graph the edges carry the multiplicity!
except: # the edge is new
g.add_edges([(i1,i2)])
eid = g.get_eid(i1,i2)
g.es[eid]["coverage"] = 1
def add_kmer(self,kmer):
assert len(kmer) == self.k, "kmer sequence must have length k"
self.add(kmer[0:self.k-1], kmer[1:self.k])
def add_from_file(self, file):
for record in SeqIO.parse(file, 'fasta'):
i = 0
while i < (len(record.seq) - self.k + 1):
self.add_kmer(record.seq[i:i+self.k])
i += 1
def get_node0(self):
return Node(graph=self, id=0)
"""
compact the graph by compressing linear stretches of nodes into a single node
and deleting the redundant nodes
conserves coverage by computing the average coverage for the compacted nodes
and stores it in the node
That leads to a much more compact representation and reduces the effort of
subsequenct filtering and contig generation
Note that no more k-mers can be added after compaction, because the original
k-mers can no longer be found by dictionary lookup
"""
def compact(self):
if self.compacted:
raise Exception("Compaction can only be run once per graph")
self.compacted = True
## look for simple 0 start nodes first
## only nodes with outdegree can be compacted
# for ni in nset0: print(ni["name"])
i = 0
# update set of not yet compacted nodes after each iteration
while True: # this needs to be done this way, otherwise the set is not updated correctly
nset0 = self.vs.select(_outdegree_eq = 1, compacted = False)
debug(str (i) + ': len(nset0)' + str(len(nset0)) )
if len(nset0) < 1:
info('no more compactable stretches')
break
n = nset0[0]
i += 1
### we want to walk to the left-most position, before engaging with the compaction
n['visited'] = True
while (n.indegree() == 1 and n.predecessors()[0].outdegree() == 1 and not n.predecessors()[0]['visited']):
# move left
n = n.predecessors()[0]
n['visited'] = True # avoid infinite loop
debug('seeking left' + n['sequence'] )
for noob in self.vs: noob['visited'] = False # set it back
debug ("Start compacting: " + str(n) + str(n.outdegree()))
assert n.outdegree() == 1 and (not n['compacted']), "We hit an already compacted node!" # this should never happen because of select statement
first_time = True
# don't take this node again
# we are going to simply delete the neighbors, there is only one dircet neighbor
# but we also need to check if the next node is balanced
n['compacted'] = True # mark this node as compacted, so it won't come up again
n['sum'] = 0 # compacted nodes store their coverage themselves, sum of coverages
n['dist'] = 0 # distance forward to next "junction" ahead
## control id, keeping the originally assigned ids in the nodes
cid = n["id"]
## This is the actual compaction for node n
while True:
n = self.vs.select(id = cid)[0]
## when deletng nodes, weird things kan happen, so make sure we got the same node as before
assert n, "node must stay the same under one iteration"
## exit condition: we don't have exactly 1 successor, or the next node has
## multiple incoming nodes
if (n.outdegree() != 1 or n.successors()[0].indegree() != 1):
debug ('-------------- finished with node ' + str(n))
break
succ = n.successors()[0] # node is linear, so we are sure
### don't follow only self-reference:
if n == succ:
debug('------------------ hit a self reference, finished')
break
if succ['visited']:
debug ('------------------ hit a loop here')
break
if n in n.successors() or n in succ.successors():
debug('------------------- we hit a loop, finished')
break
## Only the very first node in our path needs to keep the full k-mer, that is a node with in_degree 0
## All other need to be clipped now
if n.indegree() > 0 and first_time:
# otherwise we would artificially prolong the sequence
# indegree 0 nodes always keep their sequence
n['sequence'] = n['sequence'][self.k-2:]
first_time = False
else:
first_time = False # but only once, otherwise we'd chop off the whole sequence
debug("really compacting " + n['sequence'] + ' --> ' + succ['sequence'] + str(n))
n['visited'] = True
eid = self.get_eid(n.index, succ.index) # geet the edge id
## add edges to all succ.successors to this node
for s in succ.successors():
self.add_edges([(n.index,s.index)]) ## add a new edge,
new_eid = self.get_eid(n.index, s.index) ## and get the index of the edge we just made
## we need to copy the coverage attribute over,
## so get the edge connecting
## successor to its next successor
succ_eid = self.get_eid(succ.index, s.index)
self.es[new_eid]['coverage'] = self.es[succ_eid]['coverage'] # copy coverage
## get the successors single character and add to node sequence
if succ["compacted"]:
## add the sequence of the compacted node, it is already compacted
n["sequence"] += succ["sequence"]
n["sum"] += succ["sum"]
n['dist'] += succ['dist']
else:
n["sequence"] += succ["sequence"][-1:]
if n['sum'] == None:
error ('this should never happen: ' + str(n))
assert self.es[eid]['coverage'], "Every edge should have a coverage attribute"
assert n['sum'] != None, "we set the coverage attribute already"
n['sum'] += self.es[eid]['coverage'] # here the coverage is in the edge
n['dist'] += 1 # now we have made only a single step
n["name"] = n["sequence"]
n['label'] = '['+ str(n.index) +'] '+ n['sequence']
## now delete the successor and the edge
eid = self.get_eid(n.index, succ.index)
self.delete_edges(eid)
debug ("deleting: " + succ['name'])
self.delete_vertices(succ.index)
## Now the compaction is complete, but we still have sum and dist per node, but not coverage, so we need to update
## the compacted nodes in a last sweep over all compacted nodes
for n in self.vs: n['visited'] = False
for n in self.vs.select(compacted = True):
if (n['dist'] == 0):
n['coverage'] = 1
n['compacted'] = False ## these nodes were not really compacted, there were just marked
else:
n['coverage'] = n['sum']/n['dist']
def viz(self):
g = self
layout = g.layout("kk")
color_dict = {True: "lightgreen", False: "tomato"}
colors = [color_dict[c] for c in g.vs["compacted"]]
shapes = []
visual_style = {}
visual_style["edge_width"] = [0.5 * int(coverage) for coverage in g.es["coverage"]]
visual_style["vertex_size"] = [20 * int(coverage) for coverage in g.vs["coverage"]]
for s in g.vs.indegree():
if s == 0:
shapes.append("rectangle")
else:
shapes.append("circle")
g.vs['label'] = list(map(lambda x: str(x.index)+':'+x['sequence'], g.vs))
return(ig.plot(g, bbox=(0, 0, 400, 400), vertex_color=colors,
vertex_shape=shapes, vertex_label_dist = -0.5, layout=layout, **visual_style ))
## utility function to get the best node, strategy:
## ignore self reference
## 0. if no further node: None
## 1. if outdegree == 1, get the node
## 2. if outdegree > 0, prioritize:
## 1) get not visited node with highest coverage
## the user is responsible to mark node as visited if it is used
## we don't want to hit on an infinite loop, but if a visited node has an unvisited next node
## it could still be part of an Eulerian walk
"""
TODO: This function has much room for improvement. It may break up a perfectly Eulerian
walk through the graph. Find an example of such a Eulerian graph topology. How could this be
improved?
"""
def get_next_node(self, n):
if (n.outdegree() < 1): return None
if (n.outdegree() == 1):
nn = n.successors()[0] ## there is only one, return it, but only if it is not self referential
if n == nn: # or nn['visited']: # self reference or already visited
### this is sub-optimal, we cannot solve graph named "double loop" below
### like this properly, at least it protects against infinite loop
### in order to resolve double loops and alternative paths, we have to return
### nn in case it has an edge that we haven't visited yet
return None
if not all(list(map(lambda y: y['visited'] or nn == y, nn.successors()))):
# checks, if of successor's successors has not been visited yet
# slightly better, but still not optimal, one would want to define a
# coverage threshold, at this point. However, such threshold should be
# adaptive, depending on global or local coverage
# Idea: calculate local coverage at the environment,
# to use a node twice, each edge to traverse should have at least similar (e.g. 1/2?) coverage
# compared to the local environment
return nn
else:
return None
nset = n.successors()
tmp = list(filter (lambda x: not x['visited'], nset))
if len(tmp) > 0:
nn = sorted(tmp, key=lambda node: (n.outdegree(), node['coverage'], self.edge(n, node)['coverage'], len(node['sequence'] )), reverse=True)[0]
if n == nn: # self reference
return None
else:
return nn
else:
tmp = nset
tmp = list(filter(lambda x: (not all(list(map(lambda y: y['visited'] or x == y, x.successors())))), tmp))
if len(tmp) < 1:
return None
else:
debug ('has unvisited next: ' + str(tmp))
tmp = sorted(tmp, key=lambda node: (node['coverage'], self.edge(n, node)['coverage'], len(node['sequence'] )), reverse=True)
nn = tmp[0]
if n == nn: # self reference
return None
else:
return nn
"""
make a walk through the graph, given the start node
"""
def walk(self, n):
sequence = n['sequence']
n['visited'] = True
n1 = n
debug('walk: ' + n1['name'] + n1['sequence'])
while self.get_next_node(n1):
n1 = self.get_next_node(n1)
debug('walk: ' + n1['name'])
n1['visited'] = True
if n1['compacted']:
sequence += n1['sequence']
debug ('seq: '+n1['sequence'])
else:
sequence += n1['sequence'][self.k-2]
debug ('seq: '+n1['sequence'][self.k-2])
info ('Done')
return sequence
### The gredy assembly is the simplest possible heuristics and works on the compacted graph
### With greedy bubble correction and at every junction, follow the path of highest coverage
### No variants will be detected this way
### Dangling short ends will be cut off if they are shorter than min_contig_len, and there is another
### path > 2*k ahead, even if they have higher coverage
### If a repeat (a node that has already been visited) is hit with distance < min_contig_len, the contig will be dropped
# - get all start nodes, easy ones first until all nodes have been visited
def assemble(self):
contigs = []
min_contig_len = self.k ## very lenient, could be increased but ok for toy examples
# get optimal start nodes first: optimal start nodes are those with indegree 0 and not visited
counter = 0
nset0 = self.vs.select( visited = False, _indegree = 0 )
while counter <= len(nset0): # protect against infinite loop
nset = self.vs.select(visited = False, _indegree = 0)
# we could also sort the nodeset to refine our strategy further but we don't need that now
if len(nset) < 1: break
contig = self.walk(nset[0])
if len(contig) >= min_contig_len:
contigs.append(contig)
# do the same for remaining other unvisited nodes
nset0 = self.vs.select( visited = False)
while counter <= len(nset0): # protect against infinite loop
nset = self.vs.select(visited = False)
# we could also sort the nodeset to refine our strategy further
# as we might end up being somewhere in a loop, we could as well start with
# the longest node
nset = sorted(nset, key = lambda x: len(x['sequence']), reverse = True)
if len(nset) < 1: break
contig = self.walk(nset[0])
if len(contig) >= min_contig_len:
contigs.append(contig)
self.contigs = contigs
return contigs
"""
utility funciton to calculate the coverage per graph
assume there is a real path between nodes
"""
def get_path_coverage(self, path):
pathsum = 0
pathlen = 0
for i in range(len(path)):
n = self.vs[path[i]]
if (n['compacted']):
pathsum += n['sum']
pathlen += n['dist']
else:
if i < len(path)-1:
pathsum += self.edge(self.vs[path[i]], self.vs[path[i+1]])['coverage']
pathlen += 1
if (pathlen > 0):
return pathsum/pathlen
else:
return 0
## simple bubble removal strategy:
## a bubble starts at a node with outdegree > 1
## it ends if the paths join again
## if the paths join at all
## if there are alternative paths to the confluent node, AND all nodes in a path have outdegree == 1
## then we have a real bubble: remove the nodes in the path(s) with lowest coverage
## however, we ignore the case for now that there could be divergent paths emerging from a lower
## coverage bubble.
def remove_bubbles(self):
max_look_ahead = 100 # maximum number of nodes to look ahead for a junction
# first check if there is a bubble at all
# we heavily use graph and set operations here
nset0 = self.vs.select(_outdegree_gt = 1) # the maximum number of nodes to visit
counter = 0
while (counter < len(nset0)): # max number of iterations possible because we only delete nodes
counter = counter + 1
## we have to update our nodes each time, because nodes could have been deleted already
nset = self.vs.select(_outdegree_gt = 1)
if len (nset) < 1: break
n = nset[0]
nhoods = self.neighborhood(n.successors(), order=max_look_ahead, mode="out") ## get nodeset in neighborhoods
for i in range(len(nhoods)):
for j in range(i+1, len(nhoods)):
## if the intersections are non-empty, there is a confluent node
confluent_node = isect(nhoods[i],nhoods[j])
if (confluent_node):
info (str(counter) + ':' +' bubble detected:' + n['name'] +' ---> ' + self.vs[confluent_node]['name'])
paths = list(map(lambda x: self.get_shortest_paths(x,confluent_node)[0], n.successors()))
paths = sorted(paths, key= lambda x: (g.get_path_coverage(x),len(x))) # order paths by coverage, then by length
debug(str(paths))
keep_this = paths.pop()
for path in paths:
if n.index in path: continue ## this is a loop, we cannot deal with this case yet
del_me = set(path) - set(keep_this) # get a set with shared nodes removed
# this guarantees also that we do not accidentially delete nodes that are still needed
# one could decide to only delete linear nodes
del_me = list(filter(lambda x: self.vs[x].outdegree() == 1 , del_me))
## if the graph was compacted already this would delete only compacted nodes
# del_me = list(filter(lambda x: self.vs[x].indegree() == 1 , del_me))
info ("deleting the following nodes: "+ str(del_me))
self.delete_vertices(del_me) # that will also clean the edges
def isect(lst1, lst2):
lst3 = [value for value in lst1 if value in lst2]
if len(lst3) < 1:
return None
else:
return lst3[0]
class Node:
id = 0
graph = None
inode = None
def __init__(self, graph, id=None, sequence=None):
self.graph = graph
if sequence:
self.inode = graph.vs.find(sequence=sequence)
else:
if id != None:
self.inode = graph.vs[id]
self.id = self.inode['id']
def indegree(self):
return self.inode.indegree()
def outdegree(self):
return self.inode.outdegree()
g = DBGraph('hey joe')
#g.add_kmer('tttt')
## bubble graph
g.add('abb','bbb')
DEBUG:root:adding edge0 --> 1
g = DBGraph('hey joe')
#g.add_kmer('tttt')
## bubble graph
g.add('abb','bbb')
g.add('bbb','bba')
g.add('bba','bas')
g.add('bas','asu')
g.add('bas','ast')
g.add('ast','stu')
g.add('stu','tup')
g.add('stu','tus')
g.add('tus','usu')
g.add('usu','suu')
g.add('asu','suu')
g.add('bbb','bbA')
g.add('bbb','bbA')
g.add('bbb','bbA')
g.add('bbA','bAs')
g.add('bbA','bAs')
g.add('bAs','Asu')
g.add('bAs','Asu')
g.add('Asu','suu')
g.add('suu','uuu')
## alternative start and end nodes
## g.add("NAT","ATG")
# g.add("NAT","ATG")
# g.add("NAT","ATG")
# g.add("XAT","ATG")
# g.add("ATG", "TGA")
# g.add("ATG", "TGA")
# g.add("ATG", "TGA")
# g.add("ATG", "TGA")
# g.add("ATG", "TGA")
g.viz()
DEBUG:root:adding edge0 --> 1 DEBUG:root:adding edge1 --> 2 DEBUG:root:adding edge2 --> 3 DEBUG:root:adding edge3 --> 4 DEBUG:root:adding edge3 --> 5 DEBUG:root:adding edge5 --> 6 DEBUG:root:adding edge6 --> 7 DEBUG:root:adding edge6 --> 8 DEBUG:root:adding edge8 --> 9 DEBUG:root:adding edge9 --> 10 DEBUG:root:adding edge4 --> 10 DEBUG:root:adding edge1 --> 11 DEBUG:root:adding edge1 --> 11 DEBUG:root:adding edge1 --> 11 DEBUG:root:adding edge11 --> 12 DEBUG:root:adding edge11 --> 12 DEBUG:root:adding edge12 --> 13 DEBUG:root:adding edge12 --> 13 DEBUG:root:adding edge13 --> 10 DEBUG:root:adding edge10 --> 14
g.compact()
g.viz()
DEBUG:root:0: len(nset0)10 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 0, {'name': 'abb', 'id': 0, 'sequence': 'abb', 'label': '0:abb', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting abb --> bbbigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 0, {'name': 'abb', 'id': 0, 'sequence': 'abb', 'label': '0:abb', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: bbb DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 0, {'name': 'abbb', 'id': 0, 'sequence': 'abbb', 'label': '[0] abbb', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1}) DEBUG:root:1: len(nset0)9 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 1, {'name': 'bba', 'id': 2, 'sequence': 'bba', 'label': '2:bba', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting a --> basigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 1, {'name': 'bba', 'id': 2, 'sequence': 'a', 'label': '2:bba', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: bas DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 1, {'name': 'as', 'id': 2, 'sequence': 'as', 'label': '[1] as', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1}) DEBUG:root:2: len(nset0)8 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 2, {'name': 'asu', 'id': 4, 'sequence': 'asu', 'label': '4:asu', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 2, {'name': 'asu', 'id': 4, 'sequence': 'asu', 'label': '4:asu', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:3: len(nset0)7 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 3, {'name': 'ast', 'id': 5, 'sequence': 'ast', 'label': '5:ast', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting t --> stuigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 3, {'name': 'ast', 'id': 5, 'sequence': 't', 'label': '5:ast', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: stu DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 3, {'name': 'tu', 'id': 5, 'sequence': 'tu', 'label': '[3] tu', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1}) DEBUG:root:4: len(nset0)6 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 5, {'name': 'tus', 'id': 8, 'sequence': 'tus', 'label': '8:tus', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting s --> usuigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 5, {'name': 'tus', 'id': 8, 'sequence': 's', 'label': '8:tus', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: usu DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 5, {'name': 'su', 'id': 8, 'sequence': 'su', 'label': '[5] su', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1}) DEBUG:root:5: len(nset0)4 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 6, {'name': 'suu', 'id': 10, 'sequence': 'suu', 'label': '10:suu', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting u --> uuuigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 6, {'name': 'suu', 'id': 10, 'sequence': 'u', 'label': '10:suu', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: uuu DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 6, {'name': 'uu', 'id': 10, 'sequence': 'uu', 'label': '[6] uu', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1}) DEBUG:root:6: len(nset0)3 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 7, {'name': 'bbA', 'id': 11, 'sequence': 'bbA', 'label': '11:bbA', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting A --> bAsigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 7, {'name': 'bbA', 'id': 11, 'sequence': 'A', 'label': '11:bbA', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: bAs DEBUG:root:really compacting As --> Asuigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 7, {'name': 'As', 'id': 11, 'sequence': 'As', 'label': '[7] As', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 1}) DEBUG:root:deleting: Asu DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 7, {'name': 'Asu', 'id': 11, 'sequence': 'Asu', 'label': '[7] Asu', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 4, 'dist': 2}) DEBUG:root:7: len(nset0)0 INFO:root:no more compactable stretches
g = DBGraph("cycle")
# cyclic
g.add("ZAP","APZ")
g.add("APZ","PZA")
g.add("PZA","ZAP")
## this must not give into infinite loop
g.compact()
g.assemble() # output: ['ZAPZA']
DEBUG:root:adding edge0 --> 1 DEBUG:root:adding edge1 --> 2 DEBUG:root:adding edge2 --> 0 DEBUG:root:0: len(nset0)3 DEBUG:root:seeking leftPZA DEBUG:root:seeking leftAPZ DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38250>, 1, {'name': 'APZ', 'id': 1, 'sequence': 'APZ', 'label': '1:APZ', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting Z --> PZAigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38250>, 1, {'name': 'APZ', 'id': 1, 'sequence': 'Z', 'label': '1:APZ', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: PZA DEBUG:root:------------------- we hit a loop, finished DEBUG:root:1: len(nset0)1 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38250>, 0, {'name': 'ZAP', 'id': 0, 'sequence': 'ZAP', 'label': '0:ZAP', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:------------------- we hit a loop, finished DEBUG:root:2: len(nset0)0 INFO:root:no more compactable stretches DEBUG:root:walk: ZAPZAP DEBUG:root:walk: ZA DEBUG:root:seq: ZA INFO:root:Done
['ZAPZA']
# test construction from file
g = DBGraph('file')
g.add_from_file('test.fasta')
g.viz()
DEBUG:root:adding edge0 --> 1 DEBUG:root:adding edge1 --> 2 DEBUG:root:adding edge2 --> 3 DEBUG:root:adding edge3 --> 4 DEBUG:root:adding edge4 --> 5 DEBUG:root:adding edge5 --> 6 DEBUG:root:adding edge6 --> 7 DEBUG:root:adding edge7 --> 8 DEBUG:root:adding edge8 --> 9 DEBUG:root:adding edge9 --> 10 DEBUG:root:adding edge10 --> 11 DEBUG:root:adding edge12 --> 13 DEBUG:root:adding edge13 --> 14 DEBUG:root:adding edge14 --> 3 DEBUG:root:adding edge3 --> 4 DEBUG:root:adding edge4 --> 5 DEBUG:root:adding edge5 --> 6 DEBUG:root:adding edge6 --> 7 DEBUG:root:adding edge7 --> 8 DEBUG:root:adding edge8 --> 9 DEBUG:root:adding edge9 --> 10 DEBUG:root:adding edge10 --> 11 DEBUG:root:adding edge15 --> 16 DEBUG:root:adding edge16 --> 17 DEBUG:root:adding edge17 --> 18 DEBUG:root:adding edge18 --> 19 DEBUG:root:adding edge19 --> 20 DEBUG:root:adding edge20 --> 21 DEBUG:root:adding edge21 --> 22 DEBUG:root:adding edge22 --> 23 DEBUG:root:adding edge23 --> 24 DEBUG:root:adding edge24 --> 25 DEBUG:root:adding edge25 --> 26 DEBUG:root:adding edge26 --> 27 DEBUG:root:adding edge27 --> 28 DEBUG:root:adding edge28 --> 29 DEBUG:root:adding edge29 --> 30 DEBUG:root:adding edge30 --> 31 DEBUG:root:adding edge31 --> 32 DEBUG:root:adding edge32 --> 33 DEBUG:root:adding edge33 --> 34 DEBUG:root:adding edge34 --> 35 DEBUG:root:adding edge35 --> 36 DEBUG:root:adding edge36 --> 37 DEBUG:root:adding edge37 --> 38 DEBUG:root:adding edge38 --> 39 DEBUG:root:adding edge39 --> 40 DEBUG:root:adding edge40 --> 41 DEBUG:root:adding edge41 --> 42 DEBUG:root:adding edge42 --> 43 DEBUG:root:adding edge43 --> 44 DEBUG:root:adding edge44 --> 45 DEBUG:root:adding edge45 --> 15 DEBUG:root:adding edge15 --> 16 DEBUG:root:adding edge16 --> 46 DEBUG:root:adding edge46 --> 47 DEBUG:root:adding edge47 --> 48 DEBUG:root:adding edge48 --> 49 DEBUG:root:adding edge49 --> 50 DEBUG:root:adding edge50 --> 51 DEBUG:root:adding edge51 --> 52 DEBUG:root:adding edge52 --> 53
g.compact()
g.viz()
DEBUG:root:0: len(nset0)51 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 0, {'name': Seq('abc'), 'id': 0, 'sequence': Seq('abc'), 'label': Seq('0:abc'), 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting abc --> bcdigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 0, {'name': Seq('abc'), 'id': 0, 'sequence': Seq('abc'), 'label': Seq('0:abc'), 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: bcd DEBUG:root:really compacting abcd --> cdeigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 0, {'name': Seq('abcd'), 'id': 0, 'sequence': Seq('abcd'), 'label': Seq('[0] abcd'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1}) DEBUG:root:deleting: cde DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 0, {'name': Seq('abcde'), 'id': 0, 'sequence': Seq('abcde'), 'label': Seq('[0] abcde'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2}) DEBUG:root:1: len(nset0)48 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('def'), 'id': 3, 'sequence': Seq('def'), 'label': Seq('3:def'), 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting f --> efgigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('def'), 'id': 3, 'sequence': Seq('f'), 'label': Seq('3:def'), 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: efg DEBUG:root:really compacting fg --> fghigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fg'), 'id': 3, 'sequence': Seq('fg'), 'label': Seq('[1] fg'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 1}) DEBUG:root:deleting: fgh DEBUG:root:really compacting fgh --> ghiigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fgh'), 'id': 3, 'sequence': Seq('fgh'), 'label': Seq('[1] fgh'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 4, 'dist': 2}) DEBUG:root:deleting: ghi DEBUG:root:really compacting fghi --> hijigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghi'), 'id': 3, 'sequence': Seq('fghi'), 'label': Seq('[1] fghi'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 6, 'dist': 3}) DEBUG:root:deleting: hij DEBUG:root:really compacting fghij --> ijkigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghij'), 'id': 3, 'sequence': Seq('fghij'), 'label': Seq('[1] fghij'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 8, 'dist': 4}) DEBUG:root:deleting: ijk DEBUG:root:really compacting fghijk --> jkligraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghijk'), 'id': 3, 'sequence': Seq('fghijk'), 'label': Seq('[1] fghijk'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 10, 'dist': 5}) DEBUG:root:deleting: jkl DEBUG:root:really compacting fghijkl --> klmigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghijkl'), 'id': 3, 'sequence': Seq('fghijkl'), 'label': Seq('[1] fghijkl'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 12, 'dist': 6}) DEBUG:root:deleting: klm DEBUG:root:really compacting fghijklm --> lmnigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghijklm'), 'id': 3, 'sequence': Seq('fghijklm'), 'label': Seq('[1] fghijklm'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 14, 'dist': 7}) DEBUG:root:deleting: lmn DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghijklmn'), 'id': 3, 'sequence': Seq('fghijklmn'), 'label': Seq('[1] fghijklmn'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 16, 'dist': 8}) DEBUG:root:2: len(nset0)40 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 2, {'name': Seq('abd'), 'id': 12, 'sequence': Seq('abd'), 'label': Seq('12:abd'), 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting abd --> bddigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 2, {'name': Seq('abd'), 'id': 12, 'sequence': Seq('abd'), 'label': Seq('12:abd'), 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: bdd DEBUG:root:really compacting abdd --> ddeigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 2, {'name': Seq('abdd'), 'id': 12, 'sequence': Seq('abdd'), 'label': Seq('[2] abdd'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1}) DEBUG:root:deleting: dde DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 2, {'name': Seq('abdde'), 'id': 12, 'sequence': Seq('abdde'), 'label': Seq('[2] abdde'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2}) DEBUG:root:3: len(nset0)37 DEBUG:root:seeking left_th DEBUG:root:seeking leftr_t DEBUG:root:seeking lefter_ DEBUG:root:seeking leftver DEBUG:root:seeking leftove DEBUG:root:seeking left_ov DEBUG:root:seeking lefts_o DEBUG:root:seeking leftps_ DEBUG:root:seeking leftmps DEBUG:root:seeking leftump DEBUG:root:seeking leftjum DEBUG:root:seeking left_ju DEBUG:root:seeking leftx_j DEBUG:root:seeking leftox_ DEBUG:root:seeking leftfox DEBUG:root:seeking left_fo DEBUG:root:seeking leftn_f DEBUG:root:seeking leftwn_ DEBUG:root:seeking leftown DEBUG:root:seeking leftrow DEBUG:root:seeking leftbro DEBUG:root:seeking left_br DEBUG:root:seeking leftk_b DEBUG:root:seeking leftck_ DEBUG:root:seeking leftick DEBUG:root:seeking leftuic DEBUG:root:seeking leftqui DEBUG:root:seeking left_qu DEBUG:root:seeking lefte_q DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('e_q'), 'id': 17, 'sequence': Seq('e_q'), 'label': Seq('17:e_q'), 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting q --> _quigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('e_q'), 'id': 17, 'sequence': Seq('q'), 'label': Seq('17:e_q'), 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: _qu DEBUG:root:really compacting qu --> quiigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('qu'), 'id': 17, 'sequence': Seq('qu'), 'label': Seq('[5] qu'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1}) DEBUG:root:deleting: qui DEBUG:root:really compacting qui --> uicigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('qui'), 'id': 17, 'sequence': Seq('qui'), 'label': Seq('[5] qui'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2}) DEBUG:root:deleting: uic DEBUG:root:really compacting quic --> ickigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quic'), 'id': 17, 'sequence': Seq('quic'), 'label': Seq('[5] quic'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 3, 'dist': 3}) DEBUG:root:deleting: ick DEBUG:root:really compacting quick --> ck_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick'), 'id': 17, 'sequence': Seq('quick'), 'label': Seq('[5] quick'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 4, 'dist': 4}) DEBUG:root:deleting: ck_ DEBUG:root:really compacting quick_ --> k_bigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_'), 'id': 17, 'sequence': Seq('quick_'), 'label': Seq('[5] quick_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 5, 'dist': 5}) DEBUG:root:deleting: k_b DEBUG:root:really compacting quick_b --> _brigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_b'), 'id': 17, 'sequence': Seq('quick_b'), 'label': Seq('[5] quick_b'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 6, 'dist': 6}) DEBUG:root:deleting: _br DEBUG:root:really compacting quick_br --> broigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_br'), 'id': 17, 'sequence': Seq('quick_br'), 'label': Seq('[5] quick_br'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 7, 'dist': 7}) DEBUG:root:deleting: bro DEBUG:root:really compacting quick_bro --> rowigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_bro'), 'id': 17, 'sequence': Seq('quick_bro'), 'label': Seq('[5] quick_bro'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 8, 'dist': 8}) DEBUG:root:deleting: row DEBUG:root:really compacting quick_brow --> ownigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brow'), 'id': 17, 'sequence': Seq('quick_brow'), 'label': Seq('[5] quick_brow'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 9, 'dist': 9}) DEBUG:root:deleting: own DEBUG:root:really compacting quick_brown --> wn_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown'), 'id': 17, 'sequence': Seq('quick_brown'), 'label': Seq('[5] quick_brown'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 10, 'dist': 10}) DEBUG:root:deleting: wn_ DEBUG:root:really compacting quick_brown_ --> n_figraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_'), 'id': 17, 'sequence': Seq('quick_brown_'), 'label': Seq('[5] quick_brown_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 11, 'dist': 11}) DEBUG:root:deleting: n_f DEBUG:root:really compacting quick_brown_f --> _foigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_f'), 'id': 17, 'sequence': Seq('quick_brown_f'), 'label': Seq('[5] quick_brown_f'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 12, 'dist': 12}) DEBUG:root:deleting: _fo DEBUG:root:really compacting quick_brown_fo --> foxigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fo'), 'id': 17, 'sequence': Seq('quick_brown_fo'), 'label': Seq('[5] quick_brown_fo'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 13, 'dist': 13}) DEBUG:root:deleting: fox DEBUG:root:really compacting quick_brown_fox --> ox_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox'), 'id': 17, 'sequence': Seq('quick_brown_fox'), 'label': Seq('[5] quick_brown_fox'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 14, 'dist': 14}) DEBUG:root:deleting: ox_ DEBUG:root:really compacting quick_brown_fox_ --> x_jigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_'), 'id': 17, 'sequence': Seq('quick_brown_fox_'), 'label': Seq('[5] quick_brown_fox_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 15, 'dist': 15}) DEBUG:root:deleting: x_j DEBUG:root:really compacting quick_brown_fox_j --> _juigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_j'), 'id': 17, 'sequence': Seq('quick_brown_fox_j'), 'label': Seq('[5] quick_brown_fox_j'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 16, 'dist': 16}) DEBUG:root:deleting: _ju DEBUG:root:really compacting quick_brown_fox_ju --> jumigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_ju'), 'id': 17, 'sequence': Seq('quick_brown_fox_ju'), 'label': Seq('[5] quick_brown_fox_ju'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 17, 'dist': 17}) DEBUG:root:deleting: jum DEBUG:root:really compacting quick_brown_fox_jum --> umpigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jum'), 'id': 17, 'sequence': Seq('quick_brown_fox_jum'), 'label': Seq('[5] quick_brown_fox_jum'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 18, 'dist': 18}) DEBUG:root:deleting: ump DEBUG:root:really compacting quick_brown_fox_jump --> mpsigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jump'), 'id': 17, 'sequence': Seq('quick_brown_fox_jump'), 'label': Seq('[5] quick_brown_fox_jump'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 19, 'dist': 19}) DEBUG:root:deleting: mps DEBUG:root:really compacting quick_brown_fox_jumps --> ps_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps'), 'label': Seq('[5] quick_brown_fox_jumps'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 20, 'dist': 20}) DEBUG:root:deleting: ps_ DEBUG:root:really compacting quick_brown_fox_jumps_ --> s_oigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_'), 'label': Seq('[5] quick_brown_fox_jumps_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 21, 'dist': 21}) DEBUG:root:deleting: s_o DEBUG:root:really compacting quick_brown_fox_jumps_o --> _ovigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_o'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_o'), 'label': Seq('[5] quick_brown_fox_jumps_o'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 22, 'dist': 22}) DEBUG:root:deleting: _ov DEBUG:root:really compacting quick_brown_fox_jumps_ov --> oveigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_ov'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_ov'), 'label': Seq('[5] quick_brown_fox_jumps_ov'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 23, 'dist': 23}) DEBUG:root:deleting: ove DEBUG:root:really compacting quick_brown_fox_jumps_ove --> verigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_ove'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_ove'), 'label': Seq('[5] quick_brown_fox_jumps_ove'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 24, 'dist': 24}) DEBUG:root:deleting: ver DEBUG:root:really compacting quick_brown_fox_jumps_over --> er_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_over'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_over'), 'label': Seq('[5] quick_brown_fox_jumps_over'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 25, 'dist': 25}) DEBUG:root:deleting: er_ DEBUG:root:really compacting quick_brown_fox_jumps_over_ --> r_tigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_over_'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_over_'), 'label': Seq('[5] quick_brown_fox_jumps_over_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 26, 'dist': 26}) DEBUG:root:deleting: r_t DEBUG:root:really compacting quick_brown_fox_jumps_over_t --> _thigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_over_t'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_over_t'), 'label': Seq('[5] quick_brown_fox_jumps_over_t'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 27, 'dist': 27}) DEBUG:root:deleting: _th DEBUG:root:really compacting quick_brown_fox_jumps_over_th --> theigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_over_th'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_over_th'), 'label': Seq('[5] quick_brown_fox_jumps_over_th'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 28, 'dist': 28}) DEBUG:root:deleting: the DEBUG:root:------------------- we hit a loop, finished DEBUG:root:4: len(nset0)7 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('e_l'), 'id': 46, 'sequence': Seq('e_l'), 'label': Seq('46:e_l'), 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting l --> _laigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('e_l'), 'id': 46, 'sequence': Seq('l'), 'label': Seq('46:e_l'), 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: _la DEBUG:root:really compacting la --> lazigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('la'), 'id': 46, 'sequence': Seq('la'), 'label': Seq('[5] la'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1}) DEBUG:root:deleting: laz DEBUG:root:really compacting laz --> azyigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('laz'), 'id': 46, 'sequence': Seq('laz'), 'label': Seq('[5] laz'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2}) DEBUG:root:deleting: azy DEBUG:root:really compacting lazy --> zy_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('lazy'), 'id': 46, 'sequence': Seq('lazy'), 'label': Seq('[5] lazy'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 3, 'dist': 3}) DEBUG:root:deleting: zy_ DEBUG:root:really compacting lazy_ --> y_digraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('lazy_'), 'id': 46, 'sequence': Seq('lazy_'), 'label': Seq('[5] lazy_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 4, 'dist': 4}) DEBUG:root:deleting: y_d DEBUG:root:really compacting lazy_d --> _doigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('lazy_d'), 'id': 46, 'sequence': Seq('lazy_d'), 'label': Seq('[5] lazy_d'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 5, 'dist': 5}) DEBUG:root:deleting: _do DEBUG:root:really compacting lazy_do --> dogigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('lazy_do'), 'id': 46, 'sequence': Seq('lazy_do'), 'label': Seq('[5] lazy_do'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 6, 'dist': 6}) DEBUG:root:deleting: dog DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('lazy_dog'), 'id': 46, 'sequence': Seq('lazy_dog'), 'label': Seq('[5] lazy_dog'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 7, 'dist': 7}) DEBUG:root:5: len(nset0)0 INFO:root:no more compactable stretches
g.remove_bubbles()
g.assemble() ## [Seq('12345678'), Seq('abcdefghijklmnopqrstuvwXYZ')]
# output: [Seq('abcdefghijklmn'),
# Seq('abdde'),
# Seq('quick_brown_fox_jumps_over_the_lazy_dog')]
# no bubble will be deleted because it is indeed a loop
INFO:root:1: bubble detected:he_ ---> lazy_dog DEBUG:root:[[5], [4, 3, 5]] INFO:root:deleting the following nodes: [] DEBUG:root:walk: abcdeabcde DEBUG:root:walk: fghijklmn DEBUG:root:seq: fghijklmn INFO:root:Done DEBUG:root:walk: abddeabdde INFO:root:Done DEBUG:root:walk: quick_brown_fox_jumps_over_thequick_brown_fox_jumps_over_the DEBUG:root:walk: he_ DEBUG:root:seq: _ DEBUG:root:walk: lazy_dog DEBUG:root:seq: lazy_dog INFO:root:Done
[Seq('abcdefghijklmn'), Seq('abdde'), Seq('quick_brown_fox_jumps_over_the_lazy_dog')]
g = DBGraph('test more stuff')
g.add("TGA","GAA")
g.add("GAA","AAT")
g.add("GAA","AAT")
g.add("GAA","AAT")
g.add("GAA","AAT")
g.add("AAT","ATT")
g.add("AAT","ATC")
# short linear graph
g.add("POP","OPA")
g.add('POP','OPA')
g.add("OPA","PAM")
## self repeat + cyclic graph
g.add("XXX","XXX")
g.add("XXX","XXX")
g.add("XXX","XXX")
g.add("XXX","XXX")
g.add("XXX","XXX")
g.add("XXX","XXY")
g.add("XXX","XXY")
g.add("XXY","XYX")
g.add("XYX","YXX")
g.add("YXX","XXX")
g.viz()
DEBUG:root:adding edge0 --> 1 DEBUG:root:adding edge1 --> 2 DEBUG:root:adding edge1 --> 2 DEBUG:root:adding edge1 --> 2 DEBUG:root:adding edge1 --> 2 DEBUG:root:adding edge2 --> 3 DEBUG:root:adding edge2 --> 4 DEBUG:root:adding edge5 --> 6 DEBUG:root:adding edge5 --> 6 DEBUG:root:adding edge6 --> 7 DEBUG:root:adding edge8 --> 8 DEBUG:root:adding edge8 --> 8 DEBUG:root:adding edge8 --> 8 DEBUG:root:adding edge8 --> 8 DEBUG:root:adding edge8 --> 8 DEBUG:root:adding edge8 --> 9 DEBUG:root:adding edge8 --> 9 DEBUG:root:adding edge9 --> 10 DEBUG:root:adding edge10 --> 11 DEBUG:root:adding edge11 --> 8
g.compact()
g.assemble() # should give: ['TGAATT', 'POPAM', 'XXXYXX']
DEBUG:root:0: len(nset0)7 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 0, {'name': 'TGA', 'id': 0, 'sequence': 'TGA', 'label': '0:TGA', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting TGA --> GAAigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 0, {'name': 'TGA', 'id': 0, 'sequence': 'TGA', 'label': '0:TGA', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: GAA DEBUG:root:really compacting TGAA --> AATigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 0, {'name': 'TGAA', 'id': 0, 'sequence': 'TGAA', 'label': '[0] TGAA', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1}) DEBUG:root:deleting: AAT DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 0, {'name': 'TGAAT', 'id': 0, 'sequence': 'TGAAT', 'label': '[0] TGAAT', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 5, 'dist': 2}) DEBUG:root:1: len(nset0)5 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 3, {'name': 'POP', 'id': 5, 'sequence': 'POP', 'label': '5:POP', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting POP --> OPAigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 3, {'name': 'POP', 'id': 5, 'sequence': 'POP', 'label': '5:POP', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: OPA DEBUG:root:really compacting POPA --> PAMigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 3, {'name': 'POPA', 'id': 5, 'sequence': 'POPA', 'label': '[3] POPA', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 1}) DEBUG:root:deleting: PAM DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 3, {'name': 'POPAM', 'id': 5, 'sequence': 'POPAM', 'label': '[3] POPAM', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 3, 'dist': 2}) DEBUG:root:2: len(nset0)3 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 5, {'name': 'XXY', 'id': 9, 'sequence': 'XXY', 'label': '9:XXY', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting Y --> XYXigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 5, {'name': 'XXY', 'id': 9, 'sequence': 'Y', 'label': '9:XXY', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: XYX DEBUG:root:really compacting YX --> YXXigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 5, {'name': 'YX', 'id': 9, 'sequence': 'YX', 'label': '[5] YX', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1}) DEBUG:root:deleting: YXX DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 5, {'name': 'YXX', 'id': 9, 'sequence': 'YXX', 'label': '[5] YXX', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2}) DEBUG:root:3: len(nset0)0 INFO:root:no more compactable stretches DEBUG:root:walk: TGAATTGAAT DEBUG:root:walk: ATT DEBUG:root:seq: T INFO:root:Done DEBUG:root:walk: POPAMPOPAM INFO:root:Done DEBUG:root:walk: ATCATC INFO:root:Done DEBUG:root:walk: XXXXXX DEBUG:root:walk: YXX DEBUG:root:seq: YXX INFO:root:Done
['TGAATT', 'POPAM', 'XXXYXX']
g = DBGraph('hey joe')
g.add('123','234')
g.add('234','345')
g.add('345','456')
g.viz()
DEBUG:root:adding edge0 --> 1 DEBUG:root:adding edge1 --> 2 DEBUG:root:adding edge2 --> 3
g.assemble() # simple graphs can be assembled directly: 123456
DEBUG:root:walk: 123123 DEBUG:root:walk: 234 DEBUG:root:seq: 4 DEBUG:root:walk: 345 DEBUG:root:seq: 5 DEBUG:root:walk: 456 DEBUG:root:seq: 6 INFO:root:Done
['123456']
g.compact()
g.viz()
DEBUG:root:0: len(nset0)3 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3c50>, 0, {'name': '123', 'id': 0, 'sequence': '123', 'label': '0:123', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting 123 --> 234igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3c50>, 0, {'name': '123', 'id': 0, 'sequence': '123', 'label': '0:123', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: 234 DEBUG:root:really compacting 1234 --> 345igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3c50>, 0, {'name': '1234', 'id': 0, 'sequence': '1234', 'label': '[0] 1234', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1}) DEBUG:root:deleting: 345 DEBUG:root:really compacting 12345 --> 456igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3c50>, 0, {'name': '12345', 'id': 0, 'sequence': '12345', 'label': '[0] 12345', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2}) DEBUG:root:deleting: 456 DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3c50>, 0, {'name': '123456', 'id': 0, 'sequence': '123456', 'label': '[0] 123456', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 3, 'dist': 3}) DEBUG:root:1: len(nset0)0 INFO:root:no more compactable stretches
g = DBGraph('double loop')
g.add('0ab','abc')
g.add('abc','bcd')
g.add('bcd','cde')
g.add('bcd','cdE')
g.add('cde','dea')
g.add('cdE','dEa')
g.add('dea','eab')
g.add('dEa','Eab')
g.add('eab','abc')
g.add('Eab','abc')
g.viz()
DEBUG:root:adding edge0 --> 1 DEBUG:root:adding edge1 --> 2 DEBUG:root:adding edge2 --> 3 DEBUG:root:adding edge2 --> 4 DEBUG:root:adding edge3 --> 5 DEBUG:root:adding edge4 --> 6 DEBUG:root:adding edge5 --> 7 DEBUG:root:adding edge6 --> 8 DEBUG:root:adding edge7 --> 1 DEBUG:root:adding edge8 --> 1
g.compact()
g.viz()
DEBUG:root:0: len(nset0)8 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 0, {'name': '0ab', 'id': 0, 'sequence': '0ab', 'label': '0:0ab', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 0, {'name': '0ab', 'id': 0, 'sequence': '0ab', 'label': '0:0ab', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:1: len(nset0)7 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 1, {'name': 'abc', 'id': 1, 'sequence': 'abc', 'label': '1:abc', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting c --> bcdigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 1, {'name': 'abc', 'id': 1, 'sequence': 'c', 'label': '1:abc', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: bcd DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 1, {'name': 'cd', 'id': 1, 'sequence': 'cd', 'label': '[1] cd', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1}) DEBUG:root:2: len(nset0)6 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 2, {'name': 'cde', 'id': 3, 'sequence': 'cde', 'label': '3:cde', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting e --> deaigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 2, {'name': 'cde', 'id': 3, 'sequence': 'e', 'label': '3:cde', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: dea DEBUG:root:really compacting ea --> eabigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 2, {'name': 'ea', 'id': 3, 'sequence': 'ea', 'label': '[2] ea', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1}) DEBUG:root:deleting: eab DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 2, {'name': 'eab', 'id': 3, 'sequence': 'eab', 'label': '[2] eab', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2}) DEBUG:root:3: len(nset0)3 DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 3, {'name': 'cdE', 'id': 4, 'sequence': 'cdE', 'label': '4:cdE', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1 DEBUG:root:really compacting E --> dEaigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 3, {'name': 'cdE', 'id': 4, 'sequence': 'E', 'label': '4:cdE', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0}) DEBUG:root:deleting: dEa DEBUG:root:really compacting Ea --> Eabigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 3, {'name': 'Ea', 'id': 4, 'sequence': 'Ea', 'label': '[3] Ea', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1}) DEBUG:root:deleting: Eab DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 3, {'name': 'Eab', 'id': 4, 'sequence': 'Eab', 'label': '[3] Eab', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2}) DEBUG:root:4: len(nset0)0 INFO:root:no more compactable stretches
g.remove_bubbles()
g.viz()
INFO:root:1: bubble detected:cd ---> eab DEBUG:root:[[2], [3, 1, 2]] INFO:root:deleting the following nodes: []
g.assemble() # should give: ['0abcdeabcdEab']
DEBUG:root:walk: 0ab0ab DEBUG:root:walk: cd DEBUG:root:seq: cd DEBUG:root:walk: eab DEBUG:root:seq: eab DEBUG:root:has unvisited next: [igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 1, {'name': 'cd', 'id': 1, 'sequence': 'cd', 'label': '1:cd', 'visited': True, 'compacted': True, 'coverage': 1.0, 'sum': 1, 'dist': 1})] DEBUG:root:has unvisited next: [igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 1, {'name': 'cd', 'id': 1, 'sequence': 'cd', 'label': '1:cd', 'visited': True, 'compacted': True, 'coverage': 1.0, 'sum': 1, 'dist': 1})] DEBUG:root:walk: cd DEBUG:root:seq: cd DEBUG:root:walk: Eab DEBUG:root:seq: Eab INFO:root:Done
['0abcdeabcdEab']