#!/usr/bin/env python # coding: utf-8 # # Panel widgets exploiting MGnify API # In[ ]: import sys import os import io if 'google.colab' in str(get_ipython()): # clone the momics-demos repository to use the utils module from there # TODO: eventually utils from momics will be used for that try: os.system('git clone https://github.com/palec87/momics-demos.git') print(f"Repository cloned") except OSError as e: print(f"An error occurred while cloning the repository: {e}") sys.path.insert(0,'/content/momics-demos') else: sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..'))) from utils import init_setup, get_notebook_environment init_setup() # Initialize the environment variable notebook_environment = 'unknown' # Determine the notebook environment env = get_notebook_environment() print(f"Environment: {env}") # In[2]: # Connection to MGnify API import os # this repo is not maintained, or less than jsonapi-requests # consider a dep change from jsonapi_client import Session as APISession from jsonapi_client import Modifier import requests import panel as pn # Dataframes and display import pandas as pd pd.set_option('display.max_columns', None) pd.set_option('display.max_colwidth', None) # Data transformation from functools import reduce # Plots import matplotlib.pyplot as plt import seaborn as sns # import plotly.graph_objects as go get_ipython().run_line_magic('matplotlib', 'inline') import momics.plotting as pl # Create signature of MAGs for comparison against database # import sourmash # import glob # import time # from pathlib import PurePath as pp # from Bio import SeqIO # Warning verbosity import warnings warnings.filterwarnings(action="ignore") # In[ ]: pn.extension() ## Query and show endpoints ## ############################## select_endpoint = pn.widgets.Select( name="MGnify endpoints", value="", options=[], description="Select endpoint to query", ) button0 = pn.widgets.Button(name='Get', button_type='primary') def update_endpoints(event): if not event: return r = requests.get(f"https://www.ebi.ac.uk/metagenomics/api/v1/") endpoint_dict = r.json()['data'] endpoints = [k for k in endpoint_dict.keys()] select_endpoint.options = endpoints pn.bind(update_endpoints, button0, watch=True) pn.Row(button0, select_endpoint) # In[ ]: ## Query genomes ## ################### styles = { "box-shadow": "rgba(50, 50, 93, 0.25) 0px 6px 12px -2px, rgba(0, 0, 0, 0.3) 0px 3px 7px -3px", "border-radius": "4px", "padding": "10px", } pn.extension('tabulator') button1 = pn.widgets.Button(name='Query', button_type='primary') text_input = pn.widgets.TextInput(name='Text Input', placeholder='Enter a string here...') atable = pn.widgets.Tabulator(sizing_mode="stretch_both", name="Data View") def query_genomes(event): if not event: return with APISession("https://www.ebi.ac.uk/metagenomics/api/v1") as mgnify: search_filter = Modifier(f"taxon_lineage={text_input.value}") resources = map(lambda r: r.json, mgnify.iterate(select_endpoint.value, filter=search_filter)) resources_df = pd.json_normalize(resources) print('Queried', text_input.value, "from", select_endpoint.value) # update table view atable.value = resources_df # create data folder if it doesn't exist os.system("mkdir -p data") # save to parquet resources_df.to_parquet(os.path.join("data", f"{select_endpoint.value}_{text_input.value}.parquet")) # this is alert for the dsahboard, TODO: needs to be tested pn.pane.Alert('## Data saved to data folder ##', alert_type='success', width=500).servable() pn.bind(query_genomes, button1, watch=True) tabs = pn.Tabs( ('select', pn.Row(button1, text_input)), ('view', atable), styles=styles, sizing_mode="stretch_width", height=500, margin=10 ) tabs # In[ ]: # Data transformation from functools import reduce def mgn_split_taxonomy(df): features = ['domain', 'phylum', 'class', 'order', 'family', 'genus', 'species'] # all_genomes_df = atable.value # Split the 'attributes.taxon-lineage' column and create new columns lineage_split = df['attributes.taxon-lineage'].str.split(';', expand=True) lineage_split.columns = features # Concatenate the original DataFrame with the new columns all_genomes_tax_df = pd.concat([df, lineage_split], axis=1) return features, all_genomes_tax_df features, taxonomy_df = mgn_split_taxonomy(atable.value) sankey_df = taxonomy_df.groupby(features).size().reset_index(name='count') sankey_df.head() # In[ ]: pn.extension() # panel plot widget with plotly sankey sankey = pl.get_sankey(sankey_df, cat_cols=features[0:6], value_cols='count', title='Taxon Lineage') # panel pane for plotly sankey sankey_pane = pn.pane.Plotly(sankey, config={'displayModeBar': False}) sankey_pane # In[ ]: