import math
import pandas as pd
from bokeh.io import output_notebook
from bokeh.plotting import figure, show, output_file
from bokeh.charts import Bar
from bokeh.charts.attributes import cat
from bokeh.models import HoverTool, ColumnDataSource, FixedTicker
from bokeh.palettes import Category10, Set1, Spectral
output_notebook()
from zefram import get_session, get_table, framework, Framework, RingSize
The data is retrieved as pandas.DataFrame
allowing convenient manipulations and vizualiztions.
df = get_table('frameworks')
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 229 entries, 0 to 228 Data columns (total 45 columns): id 229 non-null int64 code 229 non-null object atoms 204 non-null float64 cages 204 non-null float64 channels 204 non-null float64 connections 204 non-null float64 junctions 204 non-null float64 lcd 204 non-null float64 name 204 non-null object pld 201 non-null float64 portals 204 non-null float64 tpw_abs 204 non-null float64 tpw_pct 204 non-null float64 url_zeomics 204 non-null object a 229 non-null float64 b 229 non-null float64 c 229 non-null float64 alpha 229 non-null float64 beta 229 non-null float64 gamma 229 non-null float64 accessible_area 225 non-null float64 accessible_area_m2pg 225 non-null float64 accessible_volume 225 non-null float64 accessible_volume_pct 225 non-null float64 channel_dim 229 non-null int64 cif 229 non-null object framework_density 229 non-null float64 isdisordered 229 non-null bool isinterrupted 229 non-null bool maxdsd_a 225 non-null float64 maxdsd_b 225 non-null float64 maxdsd_c 225 non-null float64 maxdsi 225 non-null float64 occupiable_area 225 non-null float64 occupiable_area_m2pg 225 non-null float64 occupiable_volume 225 non-null float64 occupiable_volume_pct 225 non-null float64 rdls 224 non-null float64 sbu 229 non-null object specific_accessible_area 225 non-null float64 specific_occupiable_area 225 non-null float64 td10 229 non-null float64 topological_density 176 non-null float64 url_iza 229 non-null object _spacegroup_id 229 non-null int64 dtypes: bool(2), float64(34), int64(3), object(6) memory usage: 77.5+ KB
def assign_label(rings):
'''
assign string labels to a list of ints
matching the usual zeolite pore size labels
'''
if max(rings) == 8:
return 's'
elif max(rings) == 10:
if 8 in rings:
return 'ms'
else:
return 'm'
elif max(rings) == 12:
if 10 in rings and 8 in rings:
return 'lms'
elif 10 in rings:
return 'lm'
elif 8 in rings:
return 'ls'
else:
return 'l'
else:
return None
def are_all_even(lst):
'check if all the elements of a list are even numbers'
return all(x % 2 == 0 for x in lst)
df.loc[:, 'Rings'] = [[r.size for r in framework(row['code']).ring_sizes] for i, row in df.iterrows()]
# get the largest ring
df.loc[:, 'max_ring'] = df['Rings'].apply(max)
# assign a string label
df.loc[:, 'ring_label'] = df.Rings.apply(assign_label)
df.loc[:, 'alpo_possible'] = df.Rings.apply(are_all_even)
df.rename(columns={'lcd': 'Largest cavity dia.', 'pld': 'Pore limiting dia.',
'maxdsi': 'Max dia. of inc. sphere'}, inplace=True)
Now we can plot a histogram depicting the distirubtion of frameworks depneding on the largest channel.
p = Bar(df, 'max_ring', values='max_ring', agg='count', group='channel_dim',
xlabel='Largest channel [#T atoms]', ylabel='Number of frameworks',
legend='top_right', plot_width=760)
show(p)
gencols = ['code', 'channel_dim', 'Largest cavity dia.', 'Pore limiting dia.',
'Max dia. of inc. sphere', 'Rings', 'ring_label', 'alpo_possible']
df0d = df.loc[df['channel_dim'] == 0, gencols].copy()
df1d = df.loc[df['channel_dim'] == 1, gencols].copy()
df2d = df.loc[df['channel_dim'] == 2, gencols].copy()
df3d = df.loc[df['channel_dim'] == 3, gencols].copy()
import numpy as np
from sklearn.utils.extmath import cartesian
def show_plot(dataframe, width=750, height=300,radius=0.4, alpha=0.6, title=None, showit=True):
df = dataframe.copy()
colors = {'s': Spectral[4][0], 'm': Spectral[4][1], 'l': Spectral[4][2], 'alpo': Spectral[4][3]}
ncols = 10
nitems = df.shape[0]
if nitems % ncols == 0:
nrows = nitems / ncols
else:
nrows = nitems // ncols + 1
xy = cartesian([np.arange(nrows), np.arange(ncols)])
df.loc[:, 'x'] = xy[:nitems, 1]
df.loc[:, 'y'] = xy[:nitems, 0]
source = ColumnDataSource(data=df)
hover = HoverTool(
tooltips=[
("Pore limiting diameter", "@Pore limiting dia."),
("Largest cavity D.", "@Largest cavity dia."),
("Max. D. Sph. I.", "@maxdsi"),
("Dimensionality", "@channel_dim"),
("Rings", "@rings")
]
)
p = figure(title=title,
x_range = (df.x.min() - 0.5, df.x.max() + 0.5),
y_range = (df.y.max() + 0.5, df.y.min() - 0.5),
plot_width=width, plot_height=height, tools='save')
# empty circles
x = df.loc[df.ring_label.str.len().isnull(), 'x'].values
y = df.loc[df.ring_label.str.len().isnull(), 'y'].values
p.circle(x, y, radius=radius, color='#ffffff', fill_alpha=1.0, line_color='#888888')
# full circles
mask = df.ring_label.str.len() == 1
x = df.loc[mask, 'x'].values
y = df.loc[mask, 'y'].values
c = [colors[k] for k in df.loc[mask, 'ring_label']]
p.circle(x, y, radius=radius, color=c, fill_alpha=alpha, line_color='#888888')
# half circles/wedges
mask = df.ring_label.str.len() == 2
x = df.loc[mask, 'x'].values
y = df.loc[mask, 'y'].values
c1 = [colors[k] for k in df.loc[mask, 'ring_label'].str[0]]
c2 = [colors[k] for k in df.loc[mask, 'ring_label'].str[1]]
p.wedge(x, y, radius=radius, start_angle=math.pi/4, end_angle=5*math.pi/4,
color=c1, alpha=alpha, direction="anticlock")
p.wedge(x, y, radius=radius, start_angle=5*math.pi/4, end_angle=math.pi/4,
color=c2, alpha=alpha, direction="anticlock")
# third circles/wedges
mask = df.ring_label.str.len() == 3
x = df.loc[mask, 'x'].values
y = df.loc[mask, 'y'].values
p.wedge(x, y, radius=radius, start_angle=math.pi/2, end_angle=7*math.pi/6,
color=colors['s'], alpha=alpha, direction="anticlock")
p.wedge(x, y, radius=radius, start_angle=7*math.pi/6, end_angle=11*math.pi/6,
color=colors['m'], alpha=alpha, direction="anticlock")
p.wedge(x, y, radius=radius, start_angle=11*math.pi/6, end_angle=math.pi/2,
color=colors['l'], alpha=alpha, direction="anticlock")
x = df.loc[df.alpo_possible, 'x'].values
y = df.loc[df.alpo_possible, 'y'].values
p.circle(x, y, radius=radius + 0.02, color='#ffffff', fill_alpha=0.0,
line_color=colors['alpo'], line_width=5.0, line_alpha=0.6)
maxx = df['x'].max()
maxy = df['y'].max()
p.circle([maxx-3.2, maxx-2.2, maxx-1.2, maxx-0.2], [maxy] * 4,
color=[colors['s'], colors['m'], colors['l'], colors['alpo']], radius=0.1, alpha=alpha)
p.text([maxx-3, maxx-2, maxx-1, maxx-0.05], [maxy] * 4, text=['8', '10', '12', 'AlPO'],
text_alpha=0.7, text_align='left', text_baseline='middle')
text_props = {
"text_font": "times",
"text_font_style": "bold",
"text_font_size": "13pt",
"source": source,
"angle": 0,
"color": "black",
"text_alpha": 0.7,
"text_align": "center",
"text_baseline": "middle"
}
p.text(x="x", y="y", text="code", **text_props)
p.grid.grid_line_color = None
p.axis[0].ticker.num_minor_ticks = 0
p.axis[1].ticker.num_minor_ticks = 0
p.axis.major_label_text_font_size = '0pt'
p.axis.major_tick_line_color = None
p.border_fill_color = 'white'
p.outline_line_width = 0
p.axis.visible = False
p.legend.location = "bottom_right"
p.legend.orientation = "horizontal"
if showit:
show(p)
else:
return p
show_plot(df0d, width=760, height=250,radius=0.44, title='Zeolite Topologies')
show_plot(df1d, width=760, height=530,radius=0.44, title='Unidirectional Zeolite Topologies')
show_plot(df2d, width=760, height=450,radius=0.44, title='Bidirectional Zeolite Topologies')
show_plot(df3d, width=760, height=750,radius=0.44, title='Tridirectional Zeolite Topologies')
def diameters_bar_plot(data, category, title=None, showit=True):
'''
plot a bar chart for a given category
Args:
data (pd.DataFrame) :
category (str) : 's', 'm', 'l', 'ms',...
'''
mask = data.ring_label == category
cols = ['code', 'Max dia. of inc. sphere', 'Largest cavity dia.', 'Pore limiting dia.']
melted = pd.melt(data.loc[mask, cols].sort_values(by='Largest cavity dia.'),
id_vars=['code'], value_vars=cols[1:])
p = Bar(melted, label=cat('code', sort=False), values="value", group="variable",
legend="top_left", xlabel='Framwork code', ylabel='Length [Angstrom]',
plot_width=750, title=title)
if showit:
show(p)
else:
return p
diameters_bar_plot(df, 's', title='Small pore frameworks (8MR)')
diameters_bar_plot(df, 'm', title='Medium pore frameworks (10MR)')
diameters_bar_plot(df, 'l', title='Large pore frameworks (12MR)')
diameters_bar_plot(df, 'ms', title='Small-medium pore frameworks (8 and 10 MR)')
diameters_bar_plot(df, 'lm', title='Medium-large pore frameworks (10 and 12 MR)')
diameters_bar_plot(df, 'ls', title='Small-large pore frameworks (8 and 12 MR)')
diameters_bar_plot(df, 'lms', title='Small-medium-large pore frameworks (8, 10 and 12 MR)')
%version_information bokeh, pandas, numpy, zefram
Software | Version |
---|---|
Python | 3.6.1 64bit [GCC 4.4.7 20120313 (Red Hat 4.4.7-1)] |
IPython | 6.0.0 |
OS | Linux 3.16.0 4 amd64 x86_64 with debian 8.7 |
bokeh | 0.12.5 |
pandas | 0.19.2 |
numpy | 1.12.1 |
zefram | 0.1.2 |
Mon May 08 00:27:01 2017 CEST |