#!/usr/bin/env python # coding: utf-8 # In[10]: import numpy as np import pandas as pd import re import requests import base64 import ipywidgets as widgets import bokeh.palettes from operator import itemgetter from IPython.display import display, clear_output, HTML from hublib.ui import Download from bokeh.io import show, output_notebook from bokeh.models import ColorBar, ColumnDataSource, CategoricalColorMapper from bokeh.plotting import figure from bokeh.transform import transform output_notebook(hide_banner=True) #API urls api_url='http://85.215.208.224:8983/solr/strainAPI/select?q=allele_symbol:' api_url_two='http://85.215.208.224:8983/solr/strainAPI/select?q=mp_ids:\"' api_url_three='http://85.215.208.224:8983/solr/strainAPI/select?q=name:' api_url_end='&rows=2000' # In[12]: class App(): def __init__(self): self.tab = widgets.Tab() self.tab.children = [] #fisrt tab self.table_container = widgets.Output() self.send_button, send_box = self.create_send_button() self.res_button = self.create_reset_button() self.container = widgets.VBox([send_box, widgets.HBox([self.send_button,self.res_button])]) self.down_container = widgets.HBox([]) self.tab_one = widgets.VBox([ self.container, self.table_container, self.down_container ]) #second tab self.table_container_two = widgets.Output() self.send_button_two, send_box_two = self.create_send_button_two() self.res_button_two = self.create_reset_button_two() self.container_two = widgets.VBox([send_box_two, widgets.HBox([self.send_button_two,self.res_button_two])]) self.down_container_two = widgets.HBox([]) self.tab_two = widgets.VBox([ self.container_two, self.table_container_two, self.down_container_two ]) #third tab self.table_container_three = widgets.Output() self.plot_container = widgets.Output() self.send_button_three, send_box_three = self.create_send_button_three() self.plot_button = self.create_plot_button() self.res_button_three = self.create_reset_button_three() self.container_three = widgets.VBox([send_box_three, widgets.HBox([self.send_button_three,self.res_button_three])]) self.down_container_three = widgets.HBox([]) self.slider_container = widgets.HBox() self.tab_three = widgets.VBox([ self.container_three, self.table_container_three, self.down_container_three, self.slider_container, self.plot_container ]) #final view self.tab.children = [self.tab_one,self.tab_two,self.tab_three] self.tab.set_title(0, 'MP terms by gene symbols') self.tab.set_title(1, 'gene symbols by MP terms') self.tab.set_title(2, 'common mp terms by strains') self.final_container = widgets.VBox([ widgets.HTML(('

Infrafrontier Strains API

'), layout=widgets.Layout(margin='0 0 5em 0')), self.tab ]) #FUNCTIONS FOR FIRST TAB PAGE def create_send_button(self): label = widgets.Label('Gene Symbol:') self.text_area = widgets.Textarea(placeholder='') send_b = widgets.Button(description='Submit', tooltip='submit your gene symbol', disabled=True) send_b.on_click(self.on_change_s) self.text_area.observe(self.on_change_text, names=['value']) sub_box = widgets.HBox([label,self.text_area]) return(send_b, sub_box) def create_reset_button(self): reset_b = widgets.Button(description='Reset', tooltip='reset the environment to generate a new table', disabled=True) reset_b.on_click(self.on_click_r) return (reset_b) def on_change_text(self,_): if (self.text_area.value == '') | (self.text_area.value.isspace()): self.send_button.disabled=True else: self.send_button.disabled=False def on_click_r(self,_): try: del(self.df) for i in self.down_container.children: remove = self.down_container.children[-1] self.down_container.children = self.down_container.children[:-1] remove.close() for i in self.slider_container.children: remove = self.slider_container.children[-1] self.slider_container.children = self.slider_container.children[:-1] remove.close() except NameError: print("") self.text_area.value='' self.res_button.disabled=True self.send_button.disabled=True self.table_container.clear_output(wait=False) def on_change_s(self,_): with self.table_container: self.table_container.clear_output(wait=False) self.send_button.disabled = True gene = self.text_area.value query = requests.get(api_url+gene+api_url_end).json() if (query["response"]["numFound"] != 0): req = query["response"]["docs"] self.df = pd.DataFrame(columns=["Gene Symbol","Strain name","MP term ids", "MP term names"]) sym=[] names = [] mp_ids = [] mp_terms = [] for i in req: sym.append(i["allele_symbol"]) names.append(i["name"]) mp_ids.append(i["mp_ids"]) mp_terms.append(i["mp_terms"]) self.df["Gene Symbol"] = sym self.df["Strain name"] = names self.df["MP term ids"] = mp_ids self.df["MP term names"] = mp_terms del(sym,names,mp_ids,mp_terms,req) #create a copy of the dataframe to better display the table df2 = self.df.copy() for i in df2["MP term ids"]: i[0] = i[0].replace(",",", ") df2 = df2.applymap(lambda x: re.sub("['\[\]]","",str(x))) display(HTML(df2.to_html(justify="left", index=False))) self.csv_file = self.df.to_csv("result_table.csv",index=False) self.res_button.disabled = False htmlWidget = widgets.HTML(value="") self.create_download_link("result_table.csv", htmlWidget) self.down_container.children = tuple(list(self.down_container.children) + [htmlWidget]) else: print("No data found for gene "+gene) self.res_button.disabled = False def create_download_link(self, filename, htmlWidget): title="Click here to download the table in csv format" data = open(filename, "rb").read() b64 = base64.b64encode(data) payload = b64.decode() html = '' htmlWidget.value = html.format(payload=payload,title=title,filename=filename) #FUNCTIONS FOR SECOND TAB PAGE def create_send_button_two(self): label = widgets.Label('MP term id:') self.text_area_two = widgets.Textarea(placeholder='') send_b = widgets.Button(description='Submit', tooltip='submit your MP term id', disabled=True) send_b.on_click(self.on_change_s_two) self.text_area_two.observe(self.on_change_text_two, names=['value']) sub_box = widgets.HBox([label,self.text_area_two]) return(send_b, sub_box) def create_reset_button_two(self): reset_b = widgets.Button(description='Reset', tooltip='reset the environment to generate a new table', disabled=True) reset_b.on_click(self.on_click_r_two) return (reset_b) def on_change_text_two(self,_): if (self.text_area_two.value == '') | (self.text_area_two.value.isspace()): self.send_button_two.disabled=True else: self.send_button_two.disabled=False def on_click_r_two(self,_): try: del(self.df_two) for i in self.down_container_two.children: remove = self.down_container_two.children[-1] self.down_container_two.children = self.down_container_two.children[:-1] remove.close() except NameError: print("") self.text_area_two.value='' self.res_button_two.disabled=True self.send_button_two.disabled=True self.table_container_two.clear_output(wait=False) def on_change_s_two(self,_): with self.table_container_two: self.table_container_two.clear_output(wait=False) self.send_button_two.disabled = True term = self.text_area_two.value query = requests.get(api_url_two+term+'\"'+api_url_end).json() if (query["response"]["numFound"] != 0): req = query["response"]["docs"] self.df_two = pd.DataFrame(columns=["MP term id", "Genes"]) sym=[] for i in req: sym.append(i["allele_symbol"][0]) sym = list(dict.fromkeys(sym)) self.df_two["MP term id"] = [term] self.df_two["Genes"][0] = sym del(sym,req) #create a copy of the dataframe to better display the table df2_two = self.df_two.copy() df2_two = df2_two.applymap(lambda x: re.sub("['\[\]]","",str(x))) display(HTML(df2_two.to_html(justify="left", index=False))) self.csv_file_two = self.df_two.to_csv("result_table.csv",index=False) self.res_button_two.disabled = False htmlWidget_two = widgets.HTML(value="") self.create_download_link_two("result_table.csv", htmlWidget_two) self.down_container_two.children = tuple(list(self.down_container_two.children) + [htmlWidget_two]) else: print("No gene found with the MP term id "+term) self.res_button_two.disabled = False def create_download_link_two(self, filename, htmlWidget): title="Click here to download the table in csv format" data = open(filename, "rb").read() b64 = base64.b64encode(data) payload = b64.decode() html = '' htmlWidget.value = html.format(payload=payload,title=title,filename=filename) #FUNCTIONS FOR THIRD TAB def create_send_button_three(self): label = widgets.Label('Strain names list:') self.text_area_three = widgets.Textarea(placeholder='') send_b = widgets.Button(description='Submit', tooltip='submit your list', disabled=True) send_b.on_click(self.on_change_s_three) self.text_area_three.observe(self.on_change_text_three, names=['value']) sub_box = widgets.HBox([label,self.text_area_three]) return(send_b, sub_box) def create_reset_button_three(self): reset_b = widgets.Button(description='Reset', tooltip='reset the environment to generate new results', disabled=True) reset_b.on_click(self.on_click_r_three) return (reset_b) def on_change_text_three(self,_): if (self.text_area_three.value == '') | (self.text_area_three.value.isspace()): self.send_button_three.disabled=True else: self.send_button_three.disabled=False def on_click_r_three(self,_): try: del(self.df_t) #del(self.matr) for i in self.down_container_three.children: remove = self.down_container_three.children[-1] self.down_container_three.children = self.down_container_three.children[:-1] remove.close() except NameError: pass self.text_area_three.value='' self.res_button_three.disabled=True self.send_button_three.disabled=True self.table_container_three.clear_output(wait=False) def on_change_s_three(self,_): with self.table_container_three: self.table_container_three.clear_output(wait=False) self.send_button_three.disabled = True strains = self.text_area_three.value #Commented parts regard using the strain id instead of names (here for reference, remove in final release) #s = strains.split(",") #self.df_t["Strain id"] = s #for i in s: #try: #x = requests.get(api_url_three+i+api_url_end).json()["response"]["docs"][0] #self.df_t.loc[self.df_t['Strain id'] == i, "Strain name"] = x["name"] #self.df_t.loc[self.df_t['Strain id'] == i, "MP term ids"] = x["mp_ids"] #self.df_t.loc[self.df_t['Strain id'] == i, "MP term names"] = x["mp_terms"] #del(x) #except IndexError: #self.df_t.loc[self.df_t['Strain id'] == i, "Strain name"] = "nan" #self.df_t.loc[self.df_t['Strain id'] == i, "MP term ids"] = "nan" #self.df_t.loc[self.df_t['Strain id'] == i, "MP term names"] = "nan" query = requests.get(api_url_three+strains+api_url_end).json() if (query["response"]["numFound"] != 0): req = query["response"]["docs"] self.df_t = pd.DataFrame(columns=["Strain id","Strain name","MP term ids", "MP term names"]) ids =[] names = [] mp_ids = [] mp_terms = [] for i in req: ids.append(i["strain_id"]) names.append(i["name"]) mp_ids.append(i["mp_ids"]) mp_terms.append(i["mp_terms"]) self.df_t["Strain id"] = ids self.df_t["Strain name"] = names self.df_t["MP term ids"] = mp_ids self.df_t["MP term names"] = mp_terms del(ids,names,mp_ids,mp_terms,req) #create a copy of the dataframe to better display the table df3 = self.df_t.copy() df3 = df3.applymap(lambda x: re.sub(",",", ",str(x))) df3 = df3.applymap(lambda x: re.sub("['\[\]]","",str(x))) display(HTML(df3.to_html(justify="left", index=False))) self.csv_file_three = self.df_t.to_csv("result_table.csv",index=False) self.res_button_three.disabled = False htmlWidget_three = widgets.HTML(value="") self.create_download_link_three("result_table.csv", htmlWidget_three) self.down_container_three.children = tuple(list(self.down_container_three.children) + [htmlWidget_three]) #Parts to allow the plotting of the heatmap. #self._slider, self.slider_box = self.create_slider() #self.slider_container.children = tuple(list(self.slider_container.children) + [self.slider_box]) #self.update_app() else: print("No strains found with the name "+term) self.res_button_three.disabled = False def create_download_link_three(self, filename, htmlWidget): title="Click here to download the table in csv format" data = open(filename, "rb").read() b64 = base64.b64encode(data) payload = b64.decode() html = '' htmlWidget.value = html.format(payload=payload,title=title,filename=filename) def create_slider(self): slider_label = widgets.Label('Threshold: ') slider = widgets.IntSlider(value=0, min=0, max = 0, step=1, orintation='horizontal', readout=True, readout_format="d") slider.observe(self.on_change, names=['value']) slider_box = widgets.HBox([slider_label,slider]) return (slider, slider_box) def on_change(self, _): self.update_app() def update_app(self): try: self._slider.max = max(list(self.matr.max()),key=itemgetter(0))[0] except AttributeError: self.matr = self.create_matrix() self._slider.max = max(list(self.matr.max()),key=itemgetter(0))[0] threshold = self._slider.value try: with self.plot_container: p = self.create_plot(threshold) self.plot_container.clear_output(wait=True) show(p, notebook_handle=True) except (NameError,AttributeError) as e: pass def create_matrix(self): df = self.df_t.copy().dropna() t = pd.DataFrame(columns=df["Strain name"].values,index=df["Strain name"].values) for i in t.index: x = df.where(df["Strain name"] == i).dropna()["MP term ids"].values[0].split(",") for j in t.columns: y = df.where(df["Strain name"] == j).dropna()["MP term ids"].values[0].split(",") s = re.sub("[\[\]']","",str(list(np.intersect1d(x,y)))) t.loc[i,j] = (len(np.intersect1d(x,y)),s) del(x,y) del(i,j,df) t.columns.name="strain_col" t.index.name="strain_ind" return(t) def create_plot(self,threshold): tem_mat = self.matr.copy() rem = [] if threshold != 0: for i in tem_mat.index: if tem_mat.loc[i].max()[0] < threshold: rem.append(i) tem_mat.drop(rem, inplace=True, axis=0) tem_mat.drop(rem, inplace=True, axis=1) # Create a custom palette and add a specific mapper to map color with values, we are converting them to strings to create a categorical color mapper to include only the # values that we have in the matrix and retrieve a better representation tmp = tem_mat.stack(dropna=False).rename("value").reset_index() tmp["terms"] = "" for i in range(0, len(tmp)): tmp.terms[i] = tmp.value[i][1] tmp.value[i] = tmp.value[i][0] fact = tmp.value.unique() fact.sort() fact = fact.astype(str) tmp.value = tmp.value.astype(str) tmp.terms = tmp.terms.astype(str) mapper = CategoricalColorMapper(palette=bokeh.palettes.inferno(len(tmp.value.unique())), factors=fact, nan_color='gray') # Define a figure p = figure( width=1280, height=800, x_range=list(tmp.strain_col.drop_duplicates()), y_range=list(tmp.strain_ind.drop_duplicates()[::-1]), tooltips=[('number of common MP terms ', '@value'), ('MP term ids', '@terms')], x_axis_location="above", output_backend="webgl", toolbar_location="right", tools="pan,wheel_zoom,box_zoom,reset,save") # Create rectangles for heatmap p.rect( x="strain_col", y="strain_ind", width=1, height=1, source=ColumnDataSource(tmp), fill_color=transform('value', mapper)) p.xaxis.major_label_orientation = 45 p.yaxis.major_label_orientation = 45 # Add legend color_bar = ColorBar( color_mapper=mapper, label_standoff=6, border_line_color=None) p.add_layout(color_bar, 'right') del(tem_mat) return(p) def create_plot_button(self): plot_b = widgets.Button(description='Common MP heatmap', tooltip='Create an heatmap showing the common MP terms between strains', disabled=True) #plot_b.on_click(self.on_click_p) return (plot_b) app = App() app.final_container