from bokeh.io import output_notebook
output_notebook()
from bokeh.plotting import figure, output_file, show
from bokeh.layouts import gridplot
import numpy as np
from freud import parallel, box, density
from ipywidgets import IntProgress
from IPython.display import display
parallel.setNumThreads(4)
def default_bokeh(p):
p.title.text_font_size = "18pt"
p.title.align = "center"
p.xaxis.axis_label_text_font_size = "14pt"
p.yaxis.axis_label_text_font_size = "14pt"
p.xaxis.major_tick_in = 10
p.xaxis.major_tick_out = 0
p.xaxis.minor_tick_in = 5
p.xaxis.minor_tick_out = 0
p.yaxis.major_tick_in = 10
p.yaxis.major_tick_out = 0
p.yaxis.minor_tick_in = 5
p.yaxis.minor_tick_out = 0
p.xaxis.major_label_text_font_size = "12pt"
p.yaxis.major_label_text_font_size = "12pt"
If you have never programmed in C++ before (or even if you have), you may not be familiar with pointers. While it's beyond the scope of this tutorial to cover what pointers are and how they are used, this tutorial will briefly cover some of the issues you may have in Freud which involve pointers. If you are interested in pointers, or if you plan on developing freud, the C++ tutorial on pointers is a good place to start.
Run the following code and take a guess what the value of b[0]
will be:
a = [1]
b = a
a[0] = 2
print(b[0])
2
In Python, this is an example of assignment by reference. b
is not created as its own list. Rather, b is created as a reference to a
, so by changing a
, we also change b
. In C++ you can do the same thing (create/pass variables by reference) but you can also do it by pointer, which is the memory address of the value. While there is a difference, the end result is the same: by changing the value of a
you change the value of b
. There are many pros to doing things this way, including lower memory usage and faster performance, but it's easy to inadvertently overwrite data.
In Freud we decided to create our NumPy arrays by passing a pointer from C++. This was the fastest, most efficient way to do this, but can result in data being changed or overwritten, so take care when performing calculations, and create copies where necessary.
The example below "overwrites" data stored in r_avg
and y_avg
. Read the code and see the output.
# create the RDF object
rdf = density.RDF(rmax=10.0, dr=0.1)
# load the example data
data_path = "ex_data/phi065"
box_data = np.load("{}/box_data.npy".format(data_path))
pos_data = np.load("{}/pos_data.npy".format(data_path))
quat_data = np.load("{}/quat_data.npy".format(data_path))
n_frames = pos_data.shape[0]
# for all frames except the first (your syntax will vary based on your reader)
myProgressBar = IntProgress(min=1,max=n_frames)
display(myProgressBar)
for frame in range(1, n_frames):
myProgressBar.value = frame
# read box, position data
l_box = box_data[frame]
l_pos = pos_data[frame]
# create the freud box object
fbox = box.Box(Lx=l_box["Lx"], Ly=l_box["Ly"], is2D=True)
# compute
rdf.accumulate(fbox, l_pos, l_pos)
# get the center of the histogram bins
r_avg = rdf.getR()
# get the value of the histogram bins
y_avg = rdf.getRDF()
# do the same thing, but only for the last frame
# read box, position data
l_box = box_data[-1]
l_pos = pos_data[-1]
# create the freud box object
fbox = box.Box(Lx=l_box["Lx"], Ly=l_box["Ly"], is2D=True)
# compute; reset is not necessary, called automatically
rdf.compute(fbox, l_pos, l_pos)
# get the center of the histogram bins
r = rdf.getR()
# get the value of the histogram bins
y = rdf.getRDF()
# create bokeh plot
p0 = figure(title="RDF", x_axis_label='r', y_axis_label='g(r)')
p0.circle(r, y, legend="Compute")
p0.line(r, y, legend="Compute", line_width=2)
p0.square(r_avg, y_avg, legend="Accumulate", fill_color=None, line_color="red")
p0.line(r_avg, y_avg, legend="Accumulate", line_dash=[4,4], line_width=2, line_color="red")
default_bokeh(p0)
p1 = figure(title="RDF", x_axis_label='r', y_axis_label='g(r)')
p1.line(r, y, legend="Compute", line_width=2)
p1.line(r_avg, y_avg, legend="Accumulate", line_width=2, line_color="red")
default_bokeh(p1)
grid = gridplot([p0, p1], ncols=2, plot_width=400, plot_height=400)
show(grid)
rdf = density.RDF(rmax=10.0, dr=0.1)
data_path = "ex_data/phi065"
box_data = np.load("{}/box_data.npy".format(data_path))
pos_data = np.load("{}/pos_data.npy".format(data_path))
quat_data = np.load("{}/quat_data.npy".format(data_path))
# reset the rdf; required if not using compute
n_frames = pos_data.shape[0]
# for all frames except the first (your syntax will vary based on your reader)
myProgressBar = IntProgress(min=1,max=n_frames)
display(myProgressBar)
for frame in range(1, n_frames):
myProgressBar.value = frame
# read box, position data
l_box = box_data[frame]
l_pos = pos_data[frame]
# create the freud box object
fbox = box.Box(Lx=l_box["Lx"], Ly=l_box["Ly"], is2D=True)
# compute
rdf.accumulate(fbox, l_pos, l_pos)
# get the center of the histogram bins
r_avg = np.copy(rdf.getR())
# get the value of the histogram bins
y_avg = np.copy(rdf.getRDF())
# read box, position data
l_box = box_data[-1]
l_pos = pos_data[-1]
# create the freud box object
fbox = box.Box(Lx=l_box["Lx"], Ly=l_box["Ly"], is2D=True)
# compute
rdf.compute(fbox, l_pos, l_pos)
# get the center of the histogram bins
r = rdf.getR()
# get the value of the histogram bins
y = rdf.getRDF()
# create bokeh plot
p0 = figure(title="RDF", x_axis_label='r', y_axis_label='g(r)')
p0.circle(r, y, legend="Compute")
p0.line(r, y, legend="Compute", line_width=2)
p0.square(r_avg, y_avg, legend="Accumulate", fill_color=None, line_color="red")
p0.line(r_avg, y_avg, legend="Accumulate", line_dash=[4,4], line_width=2, line_color="red")
default_bokeh(p0)
p1 = figure(title="RDF", x_axis_label='r', y_axis_label='g(r)')
p1.line(r, y, legend="Compute", line_width=2)
p1.line(r_avg, y_avg, legend="Accumulate", line_width=2, line_color="red")
default_bokeh(p1)
grid = gridplot([p0, p1], ncols=2, plot_width=400, plot_height=400)
show(grid)
While unlikely, it is possible for freud objects to pass out of scope and be garbage collected, so the pointer ends up pointing to other data, resulting in a segmentation fault. This is a very rare occurrence that can be avoided by using copies.
np.copy()
can be used to avoid this scenarionp.copy()
can be used to avoid this.