Testing code for plotting results with mhcpredict. We use matplotlib and bokeh.
from bokeh.io import output_notebook, show
output_notebook()
import os, types
from collections import OrderedDict
import numpy as np
import pandas as pd
from bokeh.plotting import Figure
from bokeh.models import Grid, Range1d, ColumnDataSource, HoverTool
from bokeh.models.renderers import GlyphRenderer
from epitopepredict import base, sequtils, analysis
genbankfile = 'testing/zaire-ebolavirus.gb'
df = sequtils.genbank2Dataframe(genbankfile, cds=True)
reload(base)
P = base.getPredictor('tepitope')
savepath1 = 'tepitope'
#run prediction for several alleles and save results to savepath
alleles = ["HLA-DRB1*0101", "HLA-DRB1*0108", "HLA-DRB1*0305", "HLA-DRB1*0401",
"HLA-DRB1*0404", "HLA-DRB3*0101", "HLA-DRB4*0104"]
#P.predictProteins(df,length=11,alleles=alleles,save=True,path=savepath1)
#iedb mhcI
P2 = base.getPredictor('iedbmhc1')
savepath2 = 'iedbmhc1'
mhc1alleles = ["HLA-A*01:01","HLA-A*68:02"]
mhc1alleles = pd.read_csv('testing/mhc1_common.csv').allele[:3]
P2.predictProteins(df,length=11,alleles=mhc1alleles,save=True,path=savepath2)
predictions done for 9 proteins in 3 alleles
filename = 'tepitope/ZEBOVgp5.mpk'
filename2 = 'iedbmhc1/ZEBOVgp5.mpk'
P.data = pd.read_msgpack(filename)
P2.data = pd.read_msgpack(filename2)
colormaps={'tepitope':'Greens','netmhciipan':'Oranges','iedbmhc2':'Pinks',
'threading':'Purples','iedbmhc1':'Blues'}
colors = {'tepitope':'green','netmhciipan':'orange',
'iedbmhc1':'blue','iedbmhc2':'pink','threading':'purple'}
def plotTracks(predictors, title='', alleles=2, width=900, height=None,
seqdepot=None, bcell=None, exp=None, tools=True):
"""Plot binding predictions in multiple alleles for a single protein.
predictors: a dictionary of Predictor objects
with their predicted binder data usually for a single protein. If data from
multiple proteins is provided the first one is used
alleles: the minimum number of alleles for a binder to be shown
"""
from collections import OrderedDict
from bokeh.palettes import Spectral3
if type(predictors) is not types.ListType:
predictors = [predictors]
if tools == True:
tools="xpan, xwheel_zoom, resize, hover, reset, save"
else:
tools=''
#get title from the dataframe?
alls=1
n = alleles
for p in predictors:
alls += len(p.data.groupby('allele'))
if height==None:
height = 130+10*alls
yrange = Range1d(start=0, end=alls+3)
plot = Figure(title=title,title_text_font_size="11pt",plot_width=width,
plot_height=height, y_range=yrange,
y_axis_label='allele',
tools=tools,
background_fill_color="#FAFAFA",
toolbar_location="below")
h=3
'''if bcell != None:
plotBCell(plot, bcell, alls)
if seqdepot != None:
plotAnnotations(plot,seqdepot)
if exp is not None:
plotExp(plot, exp)'''
#lists for hover data
#we plot all rects at once
x=[];y=[];allele=[];widths=[];clrs=[];peptide=[]
predictor=[];position=[];score=[];leg=[]
l=80
for pred in predictors:
m = pred.name
print m, pred
df = pred.data
sckey = pred.scorekey
pb = pred.getPromiscuousBinders(data=df,n=n)
if len(pb) == 0:
continue
l = pred.getLength()
grps = df.groupby('allele')
alleles = grps.groups.keys()
if len(pb)==0:
continue
c=colors[m]
leg.append(m)
for a,g in grps:
b = pred.getBinders(data=g)
b = b[b.pos.isin(pb.pos)] #only promiscuous
b.sort_values('pos',inplace=True)
scores = b[sckey].values
score.extend(scores)
pos = b['pos'].values
position.extend(pos)
x.extend(pos+(l/2.0)) #offset as coords are rect centers
widths.extend([l for i in scores])
clrs.extend([c for i in scores])
y.extend([h+0.5 for i in scores])
alls = [a for i in scores]
allele.extend(alls)
peptide.extend(list(b.peptide.values))
predictor.extend([m for i in scores])
h+=1
source = ColumnDataSource(data=dict(x=x,y=y,allele=allele,peptide=peptide,
predictor=predictor,position=position,score=score))
plot.rect(x,y, width=widths, height=0.8,
#x_range=Range1d(start=1, end=seqlen+l),
color=clrs,line_color='gray',alpha=0.7,source=source)
hover = plot.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
("allele", "@allele"),
("position", "@position"),
("peptide", "@peptide"),
("score", "@score"),
("predictor", "@predictor"),
])
seqlen = pred.data.pos.max()+l
plot.set(x_range=Range1d(start=0, end=seqlen+1))#, bounds=(0, seqlen+1)))
plot.xaxis.major_label_text_font_size = "8pt"
plot.xaxis.major_label_text_font_style = "bold"
plot.ygrid.grid_line_color = None
plot.yaxis.major_label_text_font_size = '0pt'
plot.xaxis.major_label_orientation = np.pi/4
return plot
plot = plotTracks([P,P2],alleles=3)
show(plot)
def plotStackedArea(pred, title=None):
from bokeh.charts import Area,Line
from bokeh.palettes import brewer, Spectral11
tools="xpan, xwheel_zoom, resize, hover, reset, save"
#plot = Figure(plot_width=800, plot_height=400)
l = pred.getLength()
seqlen = pred.data.pos.max()+l
if title == None:
title = list(pred.data.head(1).name)[0]
#calculate plot data
df = pred.data
#b = pred.getBinders(data=df,n=n)
#l = pred.getLength()
grps = df.groupby('allele')
#colors = brewer["Spectral"][len(grps)]
#print t
data = {}
scores = []; pos=[]
for i,g in grps:
#get running mean instead?
y = g.sort_values('pos')[pred.scorekey]
y = y.clip(lower=0)
#y = pd.rolling_mean(y, window=l, center=True).fillna(0)
data[i] = y
scores.extend(y.values)
pos.extend(g.pos.values)
#source = ColumnDataSource(data=dict(x=t.index,y=t.values))
p = Area(data, title=title, legend="top_left", width=900, height=400, palette=Spectral11,
stack=True, xlabel='position', ylabel='score', tools=tools)
grid = p.select(type=Grid)
grid.grid_line_color = None
p.set(x_range=Range1d(start=0, end=seqlen+1, bounds=(0, seqlen+1)))
#glyphs = p.select(dict(type=GlyphRenderer))
hover = p.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
("x", "@x"),
("y", "@y"),
#("score", "@scores"),
])
return p
plot=plotStackedArea(P)
show(plot)
<Bokeh Notebook handle for In[96]>
def plotLine(pred, title=None):
from bokeh.charts import Line
from bokeh.palettes import brewer, Spectral11
l = pred.getLength()
seqlen = pred.data.pos.max()#+l
if title == None:
title = list(pred.data.head(1).name)[0]
#calculate plot data
df = pred.data
df = df.sort_values(['allele','pos'])
#l = pred.getLength()
t = df.groupby('pos').agg({pred.scorekey:np.sum})
print len(t)
t = t.clip(lower=0)
t = pd.rolling_mean(t, window=l, center=True)#.fillna(0)
print df.ix[22].mean()
peptides = df.peptide.unique()
print len(t)
tools="xpan, xwheel_zoom, resize, hover, reset, save"
p = Figure(title=title,title_text_font_size="11pt",plot_width=900,
plot_height=400,
y_axis_label=pred.scorekey,
tools=tools,
background_fill_color="#FAFAFA",
toolbar_location="below")
source = ColumnDataSource(data=dict(x=t.index,y=t.values,peptide=peptides))
p.line(x=t.index, y=t[pred.scorekey], line_width=2, source=source)
p.set(x_range=Range1d(start=0, end=seqlen+1, bounds=(0, seqlen+1)))
hover = p.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
("pos", "@x"),
("total score", "@y"),
("peptide", "@peptide")
])
return p
plot=plotLine(P2)
show(plot)
278 ann_ic50 28077.928571 ann_rank 66.750000 ic50 29122.865000 length 11.000000 netmhcpan_ic50 43751.975000 percentile_rank 65.066667 pos 22.000000 rank 176.066667 smm_ic50 49592.912362 smm_rank 58.357143 dtype: float64 278
<Bokeh Notebook handle for In[148]>
def plotHeatMap(pred, title=None):
from bokeh.charts import HeatMap
from bokeh.palettes import YlOrRd9, GnBu9, OrRd9
tools="xpan, xwheel_zoom, resize, hover, reset, save"
l = pred.getLength()
seqlen = pred.data.pos.max()
if title == None:
title = list(pred.data.head(1).name)[0]
#calculate plot data
df = pred.data.reset_index()
df['score'] = df[pred.scorekey]#.clip(lower=-2)
df = df.sort_values(['allele','pos'])
scores = list(df.sort_values(['allele','pos']).score)
peptides = df.peptide.unique()
#print df[:3]
p = HeatMap(df, x='pos', y='allele', values='score', title=title,
width=800, height=400, tools=tools, stat=None, palette=OrRd9)
hover = p.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
("pos", "@x"),
("allele", "@y"),
("score", "@scores"),
])
p.set(x_range=Range1d(start=0, end=seqlen+1, bounds=(0, seqlen+1)))
return p
plot=plotHeatMap(P)
show(plot)
<Bokeh Notebook handle for In[108]>
from bokeh.charts import HeatMap, bins
from bokeh.sampledata.autompg import autompg
data = {'fruit': ['apples', 'apples', 'apples', 'apples', 'apples',
'pears', 'pears', 'pears', 'pears', 'pears',
'bananas', 'bananas', 'bananas', 'bananas', 'bananas'],
'sample': [1, 2, 3]*5,
'fruit_count': [4, 5, 8, 12, 4, 6, 5, 4, 8, 7, 1, 2, 4, 8, 12],
'year': [2009, 2010, 2011, 2012, 2013, 2009, 2010, 2011, 2012, 2013, 2009, 2010,
2011, 2012, 2013]}
values = data['fruit_count']
hm = HeatMap(data,x='year', y='sample', values='fruit_count', stat=None, tools='hover')
hover = hm.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
("x", "@x"),
("y", "@y"),
("count", '@values')
])
print hm.select(dict(type=GlyphRenderer))
show(hm)
[<bokeh.models.renderers.GlyphRenderer object at 0x7fd4ae4e0210>, <bokeh.models.renderers.GlyphRenderer object at 0x7fd4ae4e0290>, <bokeh.models.renderers.GlyphRenderer object at 0x7fd4ae4e0d10>, <bokeh.models.renderers.GlyphRenderer object at 0x7fd4ae4e0d90>, <bokeh.models.renderers.GlyphRenderer object at 0x7fd4ae4e0690>]
<Bokeh Notebook handle for In[145]>