#!/usr/bin/env python # coding: utf-8 # In[1]: name = '2015-10-12-cython_nogil' title = "Update on optimizing code for iso-surfaces using cython" # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') import warnings warnings.simplefilter("ignore") import os from datetime import datetime from IPython.core.display import HTML with open('creative_commons.txt', 'r') as f: html = f.read() hour = datetime.utcnow().strftime('%H:%M') comments="true" date = '-'.join(name.split('-')[:3]) slug = '-'.join(name.split('-')[3:]) metadata = dict(title=title, date=date, hour=hour, comments=comments, slug=slug, name=name) markdown = """Title: {title} date: {date} {hour} comments: {comments} slug: {slug} {{% notebook {name}.ipynb cells[2:] %}} """.format(**metadata) content = os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir, '{}.md'.format(name))) with open('{}'.format(content), 'w') as f: f.writelines(markdown) html = '''

This post was written as an IPython notebook. It is available for download or as a static html.

%s''' % (name, name, html) # This post is an update on of the previous iso-surface [post](https://ocefpaf.github.io/python4oceanographers/blog/2015/10/05/isosurfaces/). # # After reading a little bit more on cython I found out that, # when using memory views, # we can release the [GIL](https://wiki.python.org/moin/GlobalInterpreterLock) using the [nogil](http://docs.cython.org/src/userguide/memoryviews.html) annotation. # I wonder if releasing the GIL can get the cython performance closer to numba's. # # First let's set the same data as before up to run some tests. # In[3]: import numpy as np p = np.linspace(-100, 0, 30)[:, None, None] * np.ones((50, 70)) x, y = np.mgrid[0:20:50j, 0:20:70j] q = np.sin(x) + p p0 = -50. # The two cells below will compiled two versions of the `zslice` function. # The first with the GIL and the second releasing during the loop. # In[4]: get_ipython().run_line_magic('load_ext', 'Cython') # In[5]: get_ipython().run_cell_magic('cython', '', "\ncimport cython\nimport numpy as np\ncimport numpy as np\n\nNaN = np.NaN\n\n@cython.boundscheck(False)\n@cython.wraparound(False)\ndef gil_zslice(double[:, :, ::1] q,\n double[:, :, ::1] p,\n double p0,\n mask_val=NaN):\n cdef int L = q.shape[2]\n cdef int M = q.shape[1]\n cdef int N = q.shape[0]\n cdef double dp, dq, dq0\n cdef int i, j, k\n \n cdef np.ndarray[double, ndim=2, mode='c'] q_iso = np.empty((M, L), dtype=np.float64)\n \n for i in range(L):\n for j in range(M):\n q_iso[j, i] = mask_val\n for k in range(N-1):\n if (((p[k, j, i] < p0) and (p[k+1, j, i] > p0)) or\n ((p[k, j, i] > p0) and (p[k+1, j, i] < p0))):\n dp = p[k+1, j, i] - p[k, j, i]\n dp0 = p0 - p[k, j, i]\n dq = q[k+1, j, i] - q[k, j, i]\n q_iso[j, i] = q[k, j, i] + dq*dp0/dp\n return q_iso\n") # In[6]: get_ipython().run_cell_magic('cython', '', "\ncimport cython\n\nimport numpy as np\ncimport numpy as np\n\nNaN = np.NaN\n\n\n@cython.boundscheck(False)\n@cython.wraparound(False)\ndef nogil_zslice_2D(double[:, ::1] q,\n double[:, ::1] p,\n double p0,\n double mask_val=NaN):\n\n cdef int IJ = q.shape[1]\n cdef int K = q.shape[0]\n cdef int ij, k\n\n cdef np.ndarray[double, ndim=1, mode='c'] q_iso = np.empty(IJ, dtype=np.float64)\n\n with nogil:\n for ij in range(IJ):\n q_iso[ij] = mask_val\n for k in range(K-1):\n if (((p[k, ij] < p0) and (p[k+1, ij] > p0)) or\n ((p[k, ij] > p0) and (p[k+1, ij] < p0))):\n q_iso[ij] = (q[k, ij] +\n (q[k+1, ij] - q[k, ij]) * # dq\n (p0 - p[k, ij]) / # dp0\n (p[k+1, ij] - p[k, ij])) # dp\n return q_iso\n") # Note that cython won't allow any conversion to Python objects without GIL. # That is why we had to eliminate the temporary variables (`dq`, `dp0`, and `dp`). # Hopefully we are sacrificing readability to gain some speed. # # The second difference has nothing to do with the GIL! # I modified the code to work with 2D arrays (depth, y_x_space) instead of 3D # arrays (depth, y, x). # The reason is to make this code more # [UGRID](http://ocefpaf.github.io/ugrid-conventions/) friendly. # # Again we will use our poor man's benchmarking. # In[7]: K, J, I = q.shape qr = q.reshape(K, -1) pr = p.reshape(K, -1) # In[8]: gil = get_ipython().run_line_magic('timeit', '-n1000 -o gil_zslice(q, p, p0)') # In[9]: nogil = get_ipython().run_line_magic('timeit', '-n1000 -o nogil_zslice_2D(qr, pr, p0)') # In[10]: gil.best / nogil.best # Note that, under the same test conditions, we got a slightly better performance # than numba (175 µs). # # # Due to the the input shape change and the fact that the cython code expects only `np.float64` we need to wrap the cython function to prepare the input. # That is a small price to pay to have a single code that can deal with any ocean model grid. # # The two functions below should take care of the input for us. # In[11]: def _save_typecast(arr): if arr.dtype is not 'float64' and np.can_cast(arr, np.float64): arr = arr.astype(np.float64) return arr def zslice(q, p, p0): q = _save_typecast(q) p = _save_typecast(p) p0 = -abs(p0) if q.ndim == 3: K, J, I = q.shape iso = nogil_zslice_2D(q.reshape(K, -1), p.reshape(K, -1), p0) return iso.reshape(J, I) elif q.ndim == 2: return nogil_zslice_2D(q, p, p0) else: msg = "Expected 2 (ugrid) or 3 (r-/s-grid) dimensions. Got {}." raise ValueError(msg(q.ndim)) # All that is left is to test the nogil and UGRID friendly version against real # data. First some UGRID (FVCOM) data. # In[12]: import iris url = ('http://crow.marine.usf.edu:8080/thredds/dodsC/' 'FVCOM-Nowcast-Agg.nc') cubes = iris.load_raw(url) # Last time step. temp = cubes.extract_strict('sea_water_potential_temperature')[-1, ...] p = temp.coord('sea_surface_height_above_reference_ellipsoid').points q = temp.data # (In the future iris will use `pyugrid` and the next cell won't be necessary.) # In[13]: import pyugrid ugrid = pyugrid.UGrid.from_ncfile(url) lon = ugrid.nodes[:, 0] lat = ugrid.nodes[:, 1] triangles = ugrid.faces[:] # In[14]: import matplotlib.tri as tri triang = tri.Triangulation(lon, lat, triangles=triangles) # In[15]: import matplotlib.pyplot as plt import cartopy.crs as ccrs from cartopy.io import shapereader from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER cmap = plt.cm.viridis def make_map(projection=ccrs.PlateCarree()): fig, ax = plt.subplots(figsize=(9, 13), subplot_kw=dict(projection=projection)) gl = ax.gridlines(draw_labels=True) gl.xlabels_top = gl.ylabels_right = False gl.xformatter = LONGITUDE_FORMATTER gl.yformatter = LATITUDE_FORMATTER ax.coastlines('50m') return fig, ax # Let's compute a slice of potential temperature at -25 meters. # # I could not figure out how to use masked array and `tricontourf`. # The code was segfaulting! # I ended up filling the data with `-999` and specifying the contour levels instead. # In[16]: import numpy.ma as ma temp_slice = zslice(q, p, -25) mask = temp_slice = ma.masked_invalid(temp_slice) vmin, vmax = temp_slice.min(), temp_slice.max() temp_slice = temp_slice.filled(fill_value=-999) # In[17]: fig, ax = make_map() extent = [lon.min(), lon.max(), lat.min(), lat.max()] ax.set_extent(extent) levels = np.arange(vmin, vmax, 0.5) kw = dict(cmap=cmap, alpha=0.9, levels=levels) cs = ax.tricontourf(triang, temp_slice, **kw) kw = dict(shrink=0.5, orientation='vertical') cbar = fig.colorbar(cs, **kw) # UGRID seems to be OK. Let's try some SGRID (ROMS) now. # In[18]: url = ('http://tds.marine.rutgers.edu/thredds/dodsC/roms/espresso/2013_da/avg/' 'ESPRESSO_Real-Time_v2_Averages_Best') cubes = iris.load_raw(url) salt = cubes.extract_strict('sea_water_salinity')[-1, ...] # Last time step. lon = salt.coord(axis='X').points lat = salt.coord(axis='Y').points p = salt.coord('sea_surface_height_above_reference_ellipsoid').points q = salt.data # In[19]: salt_slice = ma.masked_invalid(zslice(q, p, 300)) fig, ax = make_map() extent = [lon.min(), lon.max(), lat.min(), lat.max()] ax.set_extent(extent) cs = ax.pcolormesh(lon, lat, salt_slice, cmap=cmap) kw = dict(shrink=0.5, orientation='vertical', extend='both') cbar = fig.colorbar(cs, **kw) # I known we still have a bug when requesting levels where the data will fall in the top or bottom levels. # I am working to find a proper way to return a valid slice in those places. # However, we are pretty close to a code that works with any ocean model grid! # In[20]: HTML(html)