We will now revisit the first example of this tutorial with an example that is better suited to the numerical scheme used in Devito. As a reminder, the governing equation is:
$$\frac{\partial u}{\partial t}+c\frac{\partial u}{\partial x} + c\frac{\partial u}{\partial y} = 0$$We then discretized this using forward differences in time and backward differences in space:
$$u_{i,j}^{n+1} = u_{i,j}^n-c \frac{\Delta t}{\Delta x}(u_{i,j}^n-u_{i-1,j}^n)-c \frac{\Delta t}{\Delta y}(u_{i,j}^n-u_{i,j-1}^n)$$In the previous example, the system was initialised with a hat function. As easy as this example seems, it actually highlights a few limitations of finite differences and related methods:
In the remainder of this example, we will reproduce the results from the previous example, only this time with a smooth initial condition. This lets us observe Devito in a setting for which it is better equipped.
from examples.cfd import plot_field, init_hat, init_smooth
import numpy as np
%matplotlib inline
# Some variable declarations
nx = 81
ny = 81
nt = 100
c = 1.
dx = 2. / (nx - 1)
dy = 2. / (ny - 1)
sigma = .2
dt = sigma * dx
Let us now initialise the field with an infinitely smooth bump, as given by [J.A. Krakos (2012): Unsteady Adjoint Analysis for Output Sensitivity and Mesh Adaptation, PhD thesis, p. 68] as $$ f(r)= \begin{cases} \frac{1}{A}e^{-1/(r-r^2)} &\text{ for } 0 < r < 1,\\ 0 &\text{ else.} \end{cases} $$ We use this with $A=100$, and define the initial condition in two dimensions as $$u^0(x,y)=1+f\left(\frac{2}{3}x\right)*f\left(\frac{2}{3}y\right).$$
#NBVAL_IGNORE_OUTPUT
# Create field and assign initial conditions
u = np.empty((nx, ny))
init_smooth(field=u, dx=dx, dy=dy)
# Plot initial condition
plot_field(u, zmax=4)
Solving this will move the bump again.
# Repeat initialisation, so we can re-run the cell
init_smooth(field=u, dx=dx, dy=dy)
for n in range(nt + 1):
# Copy previous result into a new buffer
un = u.copy()
# Update the new result with a 3-point stencil
u[1:, 1:] = (un[1:, 1:] - (c * dt / dx * (un[1:, 1:] - un[1:, :-1])) -
(c * dt / dy * (un[1:, 1:] - un[:-1, 1:])))
# Apply boundary conditions
u[0, :] = 1.
u[-1, :] = 1.
u[:, 0] = 1.
u[:, -1] = 1.
#NBVAL_IGNORE_OUTPUT
# A small sanity check for auto-testing
assert (u[45:55, 45:55] > 1.8).all()
u_ref = u.copy()
plot_field(u, zmax=4.)
Hooray, the wave moved! It looks like the solver works much better for this example: The wave has not noticeably changed its shape.
Again, we can re-create this via a Devito operator. Let's fill the initial buffer with smooth data and look at it:
#NBVAL_IGNORE_OUTPUT
from devito import Grid, TimeFunction
grid = Grid(shape=(nx, ny), extent=(2., 2.))
u = TimeFunction(name='u', grid=grid)
init_smooth(field=u.data[0], dx=dx, dy=dy)
plot_field(u.data[0])
We create again the discretized equation as shown below. Note that the equation is still the same, only the initial condition has changed.
from devito import Eq
eq = Eq(u.dt + c*u.dxl + c*u.dyl)
print(eq)
Eq(1.0*Derivative(u(t, x, y), x) + 1.0*Derivative(u(t, x, y), y) + Derivative(u(t, x, y), t), 0)
SymPy can re-organise this equation just like in the previous example.
from devito import solve
stencil = solve(eq, u.forward)
print(stencil)
1.0*(-dt*h_x*u(t, x, y) + dt*h_x*u(t, x, y - h_y) - dt*h_y*u(t, x, y) + dt*h_y*u(t, x - h_x, y) + h_x*h_y*u(t, x, y))/(h_x*h_y)
We can now use this stencil expression to create an operator to apply to our data object:
#NBVAL_IGNORE_OUTPUT
from devito import Operator
# Reset our initial condition in both buffers.
# This is required to avoid 0s propagating into
# our solution, which has a background value of 1.
init_smooth(field=u.data[0], dx=dx, dy=dy)
init_smooth(field=u.data[1], dx=dx, dy=dy)
# Apply boundary conditions
u.data[:, 0, :] = 1.
u.data[:, -1, :] = 1.
u.data[:, :, 0] = 1.
u.data[:, :, -1] = 1.
# Create an Operator that updates the forward stencil
# point in the interior subdomain only.
op = Operator(Eq(u.forward, stencil, subdomain=grid.interior))
# Apply the operator for a number of timesteps
op(time=nt, dt=dt)
plot_field(u.data[0, :, :])
# Some small sanity checks for the testing framework
assert (u.data[0, 45:55, 45:55] > 1.8).all()
assert np.allclose(u.data[0], u_ref, rtol=3.e-2)
Operator `Kernel` run in 0.02 s
Again, this looks just like the result from NumPy. Since this example is just like the one before, the low-level treatment of boundaries is also unchanged.
#NBVAL_IGNORE_OUTPUT
# Reset our data field and ICs in both buffers
init_smooth(field=u.data[0], dx=dx, dy=dy)
init_smooth(field=u.data[1], dx=dx, dy=dy)
# For defining BCs, we generally to explicitly set rows/columns
# in our field using an expression. We can use Devito's "indexed"
# notation to do this:
x, y = grid.dimensions
t = grid.stepping_dim
bc_left = Eq(u[t + 1, 0, y], 1.)
bc_right = Eq(u[t + 1, nx-1, y], 1.)
bc_top = Eq(u[t + 1, x, ny-1], 1.)
bc_bottom = Eq(u[t + 1, x, 0], 1.)
# Now combine the BC expressions with the stencil to form operator.
expressions = [Eq(u.forward, stencil, subdomain=grid.interior)]
expressions += [bc_left, bc_right, bc_top, bc_bottom]
op = Operator(expressions=expressions, dle=None, dse=None) # <-- Turn off performance optimisations
op(time=nt, dt=dt)
plot_field(u.data[0, :, :])
# Some small sanity checks for the testing framework
assert (u.data[0, 45:55, 45:55] > 1.8).all()
assert np.allclose(u.data[0], u_ref, rtol=3.e-2)
Operator `Kernel` run in 0.01 s
The C code of the Kernel is also still the same.
print(op.ccode)
#define _POSIX_C_SOURCE 200809L #include "stdlib.h" #include "math.h" #include "sys/time.h" struct dataobj { void *restrict data; int * size; int * npsize; int * dsize; int * hsize; int * hofs; int * oofs; } ; struct profiler { double section0; double section1; double section2; } ; int Kernel(const float dt, const float h_x, const float h_y, struct dataobj *restrict u_vec, const int time_M, const int time_m, struct profiler * timers, const int x_M, const int x_m, const int xi_ltkn, const int xi_rtkn, const int y_M, const int y_m, const int yi_ltkn, const int yi_rtkn) { float (*restrict u)[u_vec->size[1]][u_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data; for (int time = time_m, t0 = (time)%(2), t1 = (time + 1)%(2); time <= time_M; time += 1, t0 = (time)%(2), t1 = (time + 1)%(2)) { struct timeval start_section0, end_section0; gettimeofday(&start_section0, NULL); /* Begin section0 */ for (int xi = x_m + xi_ltkn; xi <= x_M - xi_rtkn; xi += 1) { for (int yi = y_m + yi_ltkn; yi <= y_M - yi_rtkn; yi += 1) { u[t1][xi + 1][yi + 1] = 1.0F*(dt*h_x*u[t0][xi + 1][yi] - dt*h_x*u[t0][xi + 1][yi + 1] + dt*h_y*u[t0][xi][yi + 1] - dt*h_y*u[t0][xi + 1][yi + 1] + h_x*h_y*u[t0][xi + 1][yi + 1])/(h_x*h_y); } } /* End section0 */ gettimeofday(&end_section0, NULL); timers->section0 += (double)(end_section0.tv_sec-start_section0.tv_sec)+(double)(end_section0.tv_usec-start_section0.tv_usec)/1000000; struct timeval start_section1, end_section1; gettimeofday(&start_section1, NULL); /* Begin section1 */ for (int y = y_m; y <= y_M; y += 1) { u[t1][1][y + 1] = 1.00000000000000F; u[t1][81][y + 1] = 1.00000000000000F; } /* End section1 */ gettimeofday(&end_section1, NULL); timers->section1 += (double)(end_section1.tv_sec-start_section1.tv_sec)+(double)(end_section1.tv_usec-start_section1.tv_usec)/1000000; struct timeval start_section2, end_section2; gettimeofday(&start_section2, NULL); /* Begin section2 */ for (int x = x_m; x <= x_M; x += 1) { u[t1][x + 1][81] = 1.00000000000000F; u[t1][x + 1][1] = 1.00000000000000F; } /* End section2 */ gettimeofday(&end_section2, NULL); timers->section2 += (double)(end_section2.tv_sec-start_section2.tv_sec)+(double)(end_section2.tv_usec-start_section2.tv_usec)/1000000; } return 0; }