#!/usr/bin/env python # coding: utf-8 # ### based on https://www.biostars.org/p/9513063/#9513063 # In[1]: s='''1.83155e-63 8 25 7.37596e-64 10 20 9.14344e-65 7 24 6.86568e-72 30 50 6.45089e-69 24679455 24680333 4.49086e-56 24679455 24680312 1.78896e-52 35167152 35167547 2.57611e-51 35167209 35167547''' get_ipython().run_line_magic('store', 's >data.txt') # In[2]: import pandas as pd df = pd.read_csv("data.txt",sep=" ",names=["e-value","start","end"]) # In[3]: df.head() # In[4]: def range_extract(lst): 'Yield 2-tuple ranges or 1-tuple single elements from list of increasing' 'ints; interval making code modified from' 'https://www.rosettacode.org/wiki/Range_extraction#Python' lenlst = len(lst) i = 0 while i< lenlst: low = lst[i] while i = 1: #<---MAIN DIFFERENCE yield (low, hi) else: yield (low,) i += 1 def printr(ranges): print( '\n'.join( (('%i:%i' % r) if len(r) == 2 else '%i' % r) for r in ranges ) ) def expand_all_ranges_to_each_position(the_min,the_max): ''' Takes the minimum and the max position and returns a list of all the positions in between as well as both the boundaries. ''' return list(range(the_min,the_max+1)) all_positions = [] for row in df.itertuples(): all_positions.extend(expand_all_ranges_to_each_position(min(row.start,row.end),max(row.start,row.end))) all_positions = sorted(set(all_positions)) #get unique individual positions sorted; `set()` insures unique positions for lst in [all_positions]: #print(list(range_extract(lst))) printr(range_extract(sorted(lst))) # In[ ]: