Here we conduct a general expration of TP53 mutations within the HNSCC discovery cohort. While we try and remain unbiased in our screen for molecular coorelates of survival, we do have much more information on TP53 mutations than most others.
In Poeta, a TP53 mutation is labeled as disruptive if it is either a stop mutation, or if is located at a binding site and induces a change in polarity of the encoded amino acid. Interestingly, we found that the polarity of the substitution had little effect on prognosis and that patients with a mutation to the L2 binding site had worse outcomes than patients with a mutation to the L3 binding site. In addition, within the context of the framework we set forth for biomarker discovery, we chose to ignore the classification of mutations (past silent/non-silent) in order to keep sample size high at the risk of false positives. For these reasons we elected to simply display the functional assignment of the mutations in Figure 1 rather than obscure these results with a classification scheme.
import NotebookImport
from Imports import *
importing IPython notebook from Imports.ipynb Populating the interactive namespace from numpy and matplotlib changing to source dirctory populating namespace with data
p53_mut = mut.df.ix['TP53'].ix[keepers_o].dropna().astype(int)
survival_and_stats(p53_mut, surv, figsize=(5,4), order=[2,1,0])
screen_feature(p53_mut>0, kruskal_pandas, clinical.processed.T).head()
H | p | q | |
---|---|---|---|
spread_inferred | 7.65 | 0.01 | 0.06 |
smoker_inferred | 7.39 | 0.01 | 0.06 |
drinker_inferred | 6.45 | 0.01 | 0.07 |
invasion_inferred | 4.91 | 0.03 | 0.12 |
post_2000 | 0.69 | 0.41 | 1.00 |
ecs = clinical.clinical.presenceofpathologicalnodalextracapsularspread
ecs.name = 'Extra Capsular Spread'
pd.crosstab(p53_mut>0, ecs).T.plot(kind='bar', rot=15)
<matplotlib.axes.AxesSubplot at 0x79a5f50>
It is important to note, that here a patient with multiple mutation is counted multiple times.
import re as re
get_nums = lambda s: re.findall(r'\d+', s)
def is_disruptive(v):
c = v.Variant_Classification
if c != 'Missense_Mutation':
if 'Ins' in c or 'Del' in c:
return 'InDel'
else:
return v.Variant_Classification.split('_')[0]
else:
s = v.Protein_Change
aa = int(get_nums(s)[0])
if int(aa) in range(163,196):
return 'L2'
if int(aa) in range(236, 252):
return 'L3'
return 'other'
p53 = FH.get_submaf(run.data_path, cancer.name, ['TP53'], fields='All').ix['TP53']
dd = p53.apply(is_disruptive, 1)
dd = dd.replace('Silent',nan).dropna()
p53 = p53.ix[dd.index]
others = keepers_o.diff(p53.Tumor_Sample_Barcode.ix[dd.index]).intersection(mut.df.columns)
dd.index = p53.Tumor_Sample_Barcode.ix[dd.index]
dd = pd.concat([pd.Series('WT', others), dd])
dd = dd[[i in keepers_o for i in dd.index]]
pc = pd.Series(list(p53.Protein_Change), index=p53.Tumor_Sample_Barcode)
pc = pd.concat([pd.Series('WT', others), pc])
pc = pc[[i in keepers_o for i in pc.index]]
s2 = surv.unstack().ix[dd.index]
s2.index = range(len(dd))
s2 = s2.stack()
pats = pd.Series(dd.index, range(len(dd)))
dd.index = range(len(dd))
pc.index = range(len(dd))
df = pd.concat([pats, pc, dd, s2[:,'days'], s2[:,'event']],
keys=['patient ID','Functional Class','Protien Change',
'Days to Death/Censoring', 'Death Indicator'],
axis=1).sort(['patient ID'])
df = df.set_index('patient ID')
df.to_csv(FIGDIR + 'fig2b.csv')
fig, ax = subplots(figsize=(3.5,2.7))
c={'WT': 'grey', 'Splice':colors[0], 'other': colors[5], 'L3': colors[1], 'L2':colors[2],
'Nonsense': colors[3], 'InDel': colors[4]}
draw_survival_curve(dd, s2, colors=c, ax=ax)
ax.legend().set_visible(False)
prettify_ax(ax)
fig.tight_layout()
fig.savefig(FIGDIR + 'fig2b.pdf', transparent=True)
survival_and_stats(dd, s2, colors=colors[:6] + ['grey'] + colors[6:], figsize=(4.5,6))
get_surv_fit_lr(s2, dd[dd!='WT'])
Stats | Median Survival | 5y Survival | Log-Rank | |||||||
---|---|---|---|---|---|---|---|---|---|---|
# Patients | # Events | Median | Lower | Upper | Surv | Lower | Upper | chi2 | p | |
11.9 | 0.036 | |||||||||
other | 78 | 30 | 4 | 2.5 | NaN | 0.476 | 0.351 | 0.647 | ||
InDel | 48 | 19 | 3.53 | 1.5 | NaN | 0.421 | 0.262 | 0.676 | ||
Nonsense | 34 | 18 | 1.6 | 1.25 | NaN | 0.333 | 0.19 | 0.585 | ||
L3 | 31 | 12 | 2.16 | 1.5 | NaN | 0.229 | 0.0518 | 1 | ||
L2 | 30 | 18 | 1.08 | 0.986 | NaN | 0.251 | 0.124 | 0.512 | ||
Splice | 17 | 10 | 1.43 | 0.767 | NaN | 0.247 | 0.0842 | 0.724 |
Bar Plot of Hazard Ratios for Supplement
dd = dd.replace('WT', 'aWT')
f = get_cox_ph(s2, dd, interactions=False)
ci = convert_robj(robjects.r.summary(f)[7])
ci.index = map(lambda s: s[7:], ci.index)
n = ci.ix[0]*0 +1
n.name = 'WT'
ci = ci.append(n)
fig, ax = subplots(figsize=(7,4))
ci = ci.sort('exp(coef)')
haz = ci['exp(coef)']
b = haz.plot(kind='bar', ax=ax,
yerr=[haz - ci['lower .95'], ci['upper .95'] - haz], ecolor='black',
rot=0, color=['grey', colors[5], colors[4], colors[0], colors[3],
colors[2], colors[1]])
prettify_ax(ax)
ax.set_ylabel('Hazard Ratio')
<matplotlib.text.Text at 0x11358990>
P-values for Bar Comparisons
from itertools import combinations
sig = pd.Series({c: get_cox_ph_ms(s2, dd[dd.isin(c)], interactions=False)['LR']
for c in combinations(dd.unique(),2)})
sig.order()
aWT L2 4.06e-05 Nonsense 1.83e-03 Splice 2.61e-03 L2 other 4.88e-03 aWT L3 1.18e-02 InDel 1.68e-02 other 2.74e-02 InDel L2 3.58e-02 L3 L2 4.35e-02 Splice other 8.19e-02 Nonsense other 1.26e-01 L2 1.57e-01 InDel Splice 2.21e-01 Splice L3 2.73e-01 InDel Nonsense 3.87e-01 L3 other 4.57e-01 Nonsense Splice 5.59e-01 L3 5.66e-01 InDel other 6.00e-01 Splice L2 6.11e-01 InDel L3 8.93e-01 dtype: float64
lo = pd.read_csv('../Extra_Data/amino_acids.csv', index_col=1)
lo = lo.groupby(level=0).first()
def is_disruptive(s):
if s.endswith('*'):
return True
if s.endswith('splice'):
return False
if 'fs' in s:
return False
aa = s[3:-1]
try:
if int(aa) in range(163,196) + range(236, 252):
if lo.Polarity[s[2]] != lo.Polarity[s[-1]]:
return True
except:
pass
return False
p53 = FH.get_submaf(run.data_path, cancer.name, ['TP53'], fields='All').ix['TP53']
status = pd.concat([combine(p53.Protein_Change.map(is_disruptive), p53.is_silent==0),
p53.Tumor_Sample_Barcode], axis=1,
keys=['status','barcode'])
status = status.set_index('barcode')['status']
status = (status == 'both').groupby(level=0).sum().clip_upper(1.)
status = status.ix[mut.df.columns].fillna(-1).map({-1:'WT',0:'Non-Disruptive',1:'Disruptive'})
status = status.ix[keepers_o]
survival_and_stats(status, surv, colors=colors[:6] + ['grey'] + colors[6:], figsize=(7,5))
get_surv_fit_lr(surv, status[status.isin(['Non-Disruptive', 'WT'])])
Stats | Median Survival | 5y Survival | Log-Rank | |||||||
---|---|---|---|---|---|---|---|---|---|---|
# Patients | # Events | Median | Lower | Upper | Surv | Lower | Upper | chi2 | p | |
8.12 | 0.00437 | |||||||||
Non-Disruptive | 140 | 61 | 2.58 | 1.71 | NaN | 0.4 | 0.299 | 0.534 | ||
WT | 45 | 10 | NaN | 4.71 | NaN | 0.664 | 0.494 | 0.893 |
def is_disruptive_mod(s):
if s.endswith('*'):
return True
if s.endswith('splice'):
return True
if 'fs' in s:
return False
aa = s[3:-1]
try:
if int(aa) in range(163,196) + range(236, 252):
return True
except:
pass
return False
p53 = FH.get_submaf(run.data_path, cancer.name, ['TP53'], fields='All').ix['TP53']
status = pd.concat([combine(p53.Protein_Change.map(is_disruptive_mod), p53.is_silent==0),
p53.Tumor_Sample_Barcode], axis=1,
keys=['status','barcode'])
status = status.set_index('barcode')['status']
status = (status == 'both').groupby(level=0).sum().clip_upper(1.)
status = status.ix[mut.df.columns].fillna(-1).map({-1:'WT',0:'Non-Disruptive',1:'Disruptive'})
status = status.ix[keepers_o]
survival_and_stats(status, surv, colors=colors[:6] + ['grey'] + colors[6:], figsize=(7,5))
f = get_cox_ph(surv, status[status.isin(['Non-Disruptive', 'WT'])]=='Non-Disruptive', interactions=False,
print_desc=True);
coef exp(coef) se(coef) z p feature 0.79 2.2 0.353 2.24 0.025 Likelihood ratio test=5.81 on 1 df, p=0.0159 n= 150, number of events= 52
exp(.79), exp(.79) - exp(.79 - .353)
(2.2033964262559369, 0.65534034919959683)
cc = p53.set_index('Tumor_Sample_Barcode').Protein_Change
cc = pd.concat([pd.Series('WT', others), cc])
cc = cc[cc.isin(true_index(cc.value_counts() > 5))]
s2 = surv.unstack().ix[cc.index]
s2.index = range(len(cc))
s2 = s2.stack()
cc.index = range(len(cc))
survival_and_stats(cc, s2, colors=['grey'] + colors, figsize=(7,5))