#!/usr/bin/env python # coding: utf-8 # In[ ]: get_ipython().run_line_magic('matplotlib', 'inline') # # Binning # # ## Binning2D # # Statistical data binning is a way to group several more or less # continuous values into a smaller number of *bins*. For example, if you # have irregularly distributed data over the oceans, you can organize # these observations into a lower number of geographical intervals (for # example, by grouping them all five degrees into latitudes and # longitudes). # # In this example, we will calculate drifter velocity statistics on the # Black Sea over a period of 9 years. # In[ ]: import cartopy.crs import matplotlib import matplotlib.pyplot import numpy import pyinterp import pyinterp.backends.xarray import pyinterp.tests # The first step is to load the data into memory and create the # interpolator object: # In[ ]: ds = pyinterp.tests.load_aoml() # Let's start by calculating the standard for vectors u and v. # In[ ]: norm = (ds.ud**2 + ds.vd**2)**0.5 # Now, we will describe the grid used to calculate our # [binned](https://pangeo-pyinterp.readthedocs.io/en/latest/generated/pyinterp.Binning2D.html#pyinterp.Binning2D) # statistics. # In[ ]: binning = pyinterp.Binning2D( pyinterp.Axis(numpy.arange(27, 42, 0.3), is_circle=True), pyinterp.Axis(numpy.arange(40, 47, 0.3))) binning # We push the loaded data into the different defined bins using [simple # binning](https://pangeo-pyinterp.readthedocs.io/en/latest/generated/pyinterp.Binning2D.push.html#bilinear-binning). # In[ ]: binning.clear() binning.push(ds.lon, ds.lat, norm, True) # It is possible to retrieve other statistical # [variables](https://pangeo-pyinterp.readthedocs.io/en/latest/generated/pyinterp.Binning2D.variable.html#pyinterp.Binning2D.variable) # such as variance, minimum, maximum, etc. # In[ ]: nearest = binning.variable('mean') # Then, we push the loaded data into the different defined bins using # [linear binning](https://pangeo-pyinterp.readthedocs.io/en/latest/generated/pyinterp.Binning2D.push.html#bilinear-binning) # In[ ]: binning.clear() binning.push(ds.lon, ds.lat, norm, False) linear = binning.variable('mean') # We visualize our result # In[ ]: fig = matplotlib.pyplot.figure(figsize=(10, 8)) ax1 = fig.add_subplot(211, projection=cartopy.crs.PlateCarree()) lon, lat = numpy.meshgrid(binning.x, binning.y, indexing='ij') pcm = ax1.pcolormesh(lon, lat, nearest, cmap='jet', shading='auto', vmin=0, vmax=1, transform=cartopy.crs.PlateCarree()) ax1.coastlines() ax1.set_title("Simple binning.") ax2 = fig.add_subplot(212, projection=cartopy.crs.PlateCarree()) lon, lat = numpy.meshgrid(binning.x, binning.y, indexing='ij') pcm = ax2.pcolormesh(lon, lat, linear, cmap='jet', shading='auto', vmin=0, vmax=1, transform=cartopy.crs.PlateCarree()) ax2.coastlines() ax2.set_title("Linear binning.") fig.colorbar(pcm, ax=[ax1, ax2], shrink=0.8) fig.show() # ## Histogram2D # # This class, like the previous one, allows calculating a binning using # histograms. In addition, this approach calculates the quantiles of the # distribution and obtains the median value of the pixels. # # Note that the algorithm used defines a maximum size of the number of bins # handled by each histogram. If the number of observations is greater than the # capacity of the histogram, the histogram will be compressed to best present # this distribution in limited memory size. The description of the exact # algorithm is in the article [A Streaming Parallel Decision Tree Algorithm]( # (http://jmlr.org/papers/v11/ben-haim10a.html). # In[ ]: hist2d = pyinterp.Histogram2D( pyinterp.Axis(numpy.arange(27, 42, 0.3), is_circle=True), pyinterp.Axis(numpy.arange(40, 47, 0.3))) hist2d # We push the loaded data into the different defined bins using the method # `push`. # In[ ]: hist2d.push(ds.lon, ds.lat, norm) # We visualize the mean vs median of the distribution. # In[ ]: fig = matplotlib.pyplot.figure(figsize=(10, 8)) ax1 = fig.add_subplot(211, projection=cartopy.crs.PlateCarree()) lon, lat = numpy.meshgrid(binning.x, binning.y, indexing='ij') pcm = ax1.pcolormesh(lon, lat, hist2d.variable("mean"), cmap='jet', shading='auto', vmin=0, vmax=1, transform=cartopy.crs.PlateCarree()) ax1.coastlines() ax1.set_title("Mean") ax2 = fig.add_subplot(212, projection=cartopy.crs.PlateCarree()) lon, lat = numpy.meshgrid(binning.x, binning.y, indexing='ij') pcm = ax2.pcolormesh(lon, lat, hist2d.variable("quantile", 0.5), cmap='jet', shading='auto', vmin=0, vmax=1, transform=cartopy.crs.PlateCarree()) ax2.coastlines() ax2.set_title("Median") fig.colorbar(pcm, ax=[ax1, ax2], shrink=0.8) fig.show()