Notebook

Panel widgets exploiting MGnify API¶

In [ ]:

import sys
import os
import io

if 'google.colab' in str(get_ipython()):
    # clone the momics-demos repository to use the utils module from there
    # TODO: eventually utils from momics will be used for that
    try:
        os.system('git clone https://github.com/palec87/momics-demos.git')
        print(f"Repository cloned")
    except OSError as e:
        print(f"An error occurred while cloning the repository: {e}")

    sys.path.insert(0,'/content/momics-demos')

else:
    sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from utils import init_setup, get_notebook_environment
init_setup()

# Initialize the environment variable
notebook_environment = 'unknown'
# Determine the notebook environment
env = get_notebook_environment()
print(f"Environment: {env}")

In [2]:

# Connection to MGnify API


import os
# this repo is not maintained, or less than jsonapi-requests
# consider a dep change
from jsonapi_client import Session as APISession
from jsonapi_client import Modifier
import requests
import panel as pn

# Dataframes and display
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

# Data transformation
from functools import reduce

# Plots
import matplotlib.pyplot as plt
import seaborn as sns
# import plotly.graph_objects as go
%matplotlib inline

import momics.plotting as pl

# Create signature of MAGs for comparison against database
# import sourmash
# import glob
# import time
# from pathlib import PurePath as pp
# from Bio import SeqIO

# Warning verbosity
import warnings 
warnings.filterwarnings(action="ignore")

In [ ]:

pn.extension()
## Query and show endpoints ##
##############################
select_endpoint = pn.widgets.Select(
    name="MGnify endpoints",
    value="",
    options=[],
    description="Select endpoint to query",
)

button0 = pn.widgets.Button(name='Get', button_type='primary')
def update_endpoints(event):
    if not event:
        return
    r = requests.get(f"https://www.ebi.ac.uk/metagenomics/api/v1/")
    endpoint_dict = r.json()['data']
    endpoints = [k for k in endpoint_dict.keys()]
    select_endpoint.options = endpoints

pn.bind(update_endpoints, button0, watch=True)
pn.Row(button0, select_endpoint)

In [ ]:

## Query genomes ##
###################
styles = {
    "box-shadow": "rgba(50, 50, 93, 0.25) 0px 6px 12px -2px, rgba(0, 0, 0, 0.3) 0px 3px 7px -3px",
    "border-radius": "4px",
    "padding": "10px",
}
pn.extension('tabulator')
button1 = pn.widgets.Button(name='Query', button_type='primary')
text_input = pn.widgets.TextInput(name='Text Input', placeholder='Enter a string here...')
atable = pn.widgets.Tabulator(sizing_mode="stretch_both", name="Data View")

def query_genomes(event):
    if not event:
        return
    with APISession("https://www.ebi.ac.uk/metagenomics/api/v1") as mgnify:
        search_filter = Modifier(f"taxon_lineage={text_input.value}")
        resources = map(lambda r: r.json, mgnify.iterate(select_endpoint.value, filter=search_filter))
        resources_df = pd.json_normalize(resources)

    print('Queried', text_input.value, "from", select_endpoint.value)

    # update table view
    atable.value = resources_df
    
    # create data folder if it doesn't exist
    os.system("mkdir -p data")
    # save to parquet
    resources_df.to_parquet(os.path.join("data", f"{select_endpoint.value}_{text_input.value}.parquet"))

    # this is alert for the dsahboard, TODO: needs to be tested
    pn.pane.Alert('## Data saved to data folder ##', alert_type='success', width=500).servable()


pn.bind(query_genomes, button1, watch=True)
tabs = pn.Tabs(
    ('select', pn.Row(button1, text_input)),
    ('view', atable),
    styles=styles, sizing_mode="stretch_width", height=500, margin=10
)

tabs

In [ ]:

# Data transformation
from functools import reduce

def mgn_split_taxonomy(df):
    features = ['domain', 'phylum', 'class', 'order', 'family', 'genus', 'species']
    # all_genomes_df = atable.value

    # Split the 'attributes.taxon-lineage' column and create new columns
    lineage_split = df['attributes.taxon-lineage'].str.split(';', expand=True)
    lineage_split.columns = features

    # Concatenate the original DataFrame with the new columns
    all_genomes_tax_df = pd.concat([df, lineage_split], axis=1)
    return features, all_genomes_tax_df

features, taxonomy_df = mgn_split_taxonomy(atable.value)
sankey_df = taxonomy_df.groupby(features).size().reset_index(name='count')

sankey_df.head()

In [ ]:

pn.extension()

# panel plot widget with plotly sankey
sankey = pl.get_sankey(sankey_df, cat_cols=features[0:6],
                       value_cols='count', title='Taxon Lineage')

# panel pane for plotly sankey
sankey_pane = pn.pane.Plotly(sankey, config={'displayModeBar': False})
sankey_pane

In [ ]: