import xarray as xr
import numpy as np
import pandas as pd
a = xr.DataArray(np.random.RandomState(0).randn(3),
coords={'y': ('x', [0, 1, 1]),
'z': ('x', ['a', 'a', 'b'])},
dims=['x'])
a.groupby(['x', 'y']).mean()
<xarray.DataArray (grouped_x_y: 3)> array([ 1.76405235, 0.40015721, 0.97873798]) Coordinates: * grouped_x_y (grouped_x_y) object (0, 0) (1, 1) (2, 1)
a.groupby(['y', 'z']).apply(lambda x: x)
<xarray.DataArray (x: 3)> array([ 1.76405235, 0.40015721, 0.97873798]) Coordinates: y (x) int64 0 1 1 * x (x) int64 0 1 2 z (x) <U1 'a' 'a' 'b' grouped_y_z (x) object (0, 'a') (1, 'a') (1, 'b')
coords = {'a': ('x', [0, 0, 1, 1]), 'b': ('y', [0, 0, 1, 1])}
square = xr.DataArray(np.arange(16).reshape(4, 4), coords=coords, dims=['x', 'y'])
square
<xarray.DataArray (x: 4, y: 4)> array([[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11], [12, 13, 14, 15]]) Coordinates: a (x) int64 0 0 1 1 b (y) int64 0 0 1 1 * x (x) int64 0 1 2 3 * y (y) int64 0 1 2 3
square.groupby(['a', 'b']).mean()
<xarray.DataArray (a: 2, b: 2)> array([[ 2.5, 4.5], [ 10.5, 12.5]]) Coordinates: * a (a) int64 0 1 * b (b) int64 0 1
square.groupby(['x', 'y']).mean()
<xarray.DataArray (x: 4, y: 4)> array([[ 0., 1., 2., 3.], [ 4., 5., 6., 7.], [ 8., 9., 10., 11.], [ 12., 13., 14., 15.]]) Coordinates: * x (x) int64 0 1 2 3 * y (y) int64 0 1 2 3
square.groupby(['a', 'b']).apply(lambda x: x - x.mean())
<xarray.DataArray (x: 4, y: 4)> array([[-2.5, -1.5, -2.5, -1.5], [ 1.5, 2.5, 1.5, 2.5], [-2.5, -1.5, -2.5, -1.5], [ 1.5, 2.5, 1.5, 2.5]]) Coordinates: a (x, y) int64 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 b (x, y) int64 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 grouped_a_b (x, y) object (0, 0) (0, 0) (0, 1) (0, 1) (0, 0) (0, 0) ... * x (x) int64 0 1 2 3 * y (y) int64 0 1 2 3
square.groupby(['a', 'x']).mean()
<xarray.DataArray (grouped_a_x: 4)> array([ 1.5, 5.5, 9.5, 13.5]) Coordinates: * grouped_a_x (grouped_a_x) object (0, 0) (0, 1) (1, 2) (1, 3)
square.groupby(['a', 'y']).mean()
<xarray.DataArray (a: 2, y: 4)> array([[ 2., 3., 4., 5.], [ 10., 11., 12., 13.]]) Coordinates: * a (a) int64 0 1 * y (y) int64 0 1 2 3
square.groupby(['x', 'b']).mean()
<xarray.DataArray (b: 2, x: 4)> array([[ 0.5, 4.5, 8.5, 12.5], [ 2.5, 6.5, 10.5, 14.5]]) Coordinates: * b (b) int64 0 1 * x (x) int64 0 1 2 3
square.groupby(['x', 'b']).apply(lambda x: x - x.mean())
<xarray.DataArray (x: 4, y: 4)> array([[-0.5, 0.5, -0.5, 0.5], [-0.5, 0.5, -0.5, 0.5], [-0.5, 0.5, -0.5, 0.5], [-0.5, 0.5, -0.5, 0.5]]) Coordinates: a (x, y) int64 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 b (x, y) int64 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 grouped_x_b (x, y) object (0, 0) (0, 0) (1, 0) (1, 0) (0, 1) (0, 1) ... * x (x) int64 0 1 2 3 * y (y) int64 0 1 2 3
Maybe add automatic unstacking to the iterator interface?
list(square.groupby(['a', 'b']))
[((0, 0), <xarray.DataArray (stacked_x_y: 4)> array([0, 1, 4, 5]) Coordinates: a (stacked_x_y) int64 0 0 0 0 b (stacked_x_y) int64 0 0 0 0 * stacked_x_y (stacked_x_y) object (0, 0) (0, 1) (1, 0) (1, 1)), ((0, 1), <xarray.DataArray (stacked_x_y: 4)> array([2, 3, 6, 7]) Coordinates: a (stacked_x_y) int64 0 0 0 0 b (stacked_x_y) int64 1 1 1 1 * stacked_x_y (stacked_x_y) object (0, 2) (0, 3) (1, 2) (1, 3)), ((1, 0), <xarray.DataArray (stacked_x_y: 4)> array([ 8, 9, 12, 13]) Coordinates: a (stacked_x_y) int64 1 1 1 1 b (stacked_x_y) int64 0 0 0 0 * stacked_x_y (stacked_x_y) object (2, 0) (2, 1) (3, 0) (3, 1)), ((1, 1), <xarray.DataArray (stacked_x_y: 4)> array([10, 11, 14, 15]) Coordinates: a (stacked_x_y) int64 1 1 1 1 b (stacked_x_y) int64 1 1 1 1 * stacked_x_y (stacked_x_y) object (2, 2) (2, 3) (3, 2) (3, 3))]
b = xr.DataArray(np.random.RandomState(0).randn(2, 3, 4),
coords={'xy': (('x', 'y'), [['a', 'b', 'c'], ['b', 'c', 'c']])},
dims=['x', 'y', 'z'])
b
<xarray.DataArray (x: 2, y: 3, z: 4)> array([[[ 1.76405235, 0.40015721, 0.97873798, 2.2408932 ], [ 1.86755799, -0.97727788, 0.95008842, -0.15135721], [-0.10321885, 0.4105985 , 0.14404357, 1.45427351]], [[ 0.76103773, 0.12167502, 0.44386323, 0.33367433], [ 1.49407907, -0.20515826, 0.3130677 , -0.85409574], [-2.55298982, 0.6536186 , 0.8644362 , -0.74216502]]]) Coordinates: xy (x, y) <U1 'a' 'b' 'c' 'b' 'c' 'c' * x (x) int64 0 1 * y (y) int64 0 1 2 * z (z) int64 0 1 2 3
b.groupby(['x', 'y']).sum()
<xarray.DataArray (x: 2, y: 3)> array([[ 5.38384074, 1.68901132, 1.90569673], [ 1.6602503 , 0.74789277, -1.77710004]]) Coordinates: * x (x) int64 0 1 * y (y) int64 0 1 2
b.sum('z')
<xarray.DataArray (x: 2, y: 3)> array([[ 5.38384074, 1.68901132, 1.90569673], [ 1.6602503 , 0.74789277, -1.77710004]]) Coordinates: xy (x, y) <U1 'a' 'b' 'c' 'b' 'c' 'c' * x (x) int64 0 1 * y (y) int64 0 1 2
b.groupby(['x', 'y']).apply(lambda x: x - x.mean())
<xarray.DataArray (z: 4, x: 2, y: 3)> array([[[ 0.41809216, 1.44530516, -0.57964303], [ 0.34597515, 1.30710588, -2.10871481]], [[-0.94580298, -1.39953071, -0.06582568], [-0.29338756, -0.39213146, 1.09789361]], [[-0.3672222 , 0.52783559, -0.33238061], [ 0.02880066, 0.12609451, 1.30871121]], [[ 0.89493301, -0.57361004, 0.97784932], [-0.08138825, -1.04106893, -0.29789001]]]) Coordinates: * z (z) int64 0 1 2 3 xy (x, y) <U1 'a' 'b' 'c' 'b' 'c' 'c' grouped_x_y (x, y) object (0, 0) (0, 1) (0, 2) (1, 0) (1, 1) (1, 2) * x (x) int64 0 1 * y (y) int64 0 1 2
b - b.mean(['z'])
<xarray.DataArray (x: 2, y: 3, z: 4)> array([[[ 0.41809216, -0.94580298, -0.3672222 , 0.89493301], [ 1.44530516, -1.39953071, 0.52783559, -0.57361004], [-0.57964303, -0.06582568, -0.33238061, 0.97784932]], [[ 0.34597515, -0.29338756, 0.02880066, -0.08138825], [ 1.30710588, -0.39213146, 0.12609451, -1.04106893], [-2.10871481, 1.09789361, 1.30871121, -0.29789001]]]) Coordinates: xy (x, y) <U1 'a' 'b' 'c' 'b' 'c' 'c' * x (x) int64 0 1 * y (y) int64 0 1 2 * z (z) int64 0 1 2 3
b.groupby(['x', 'xy']).mean()
<xarray.DataArray (xy: 3, x: 2)> array([[ 1.34596018, nan], [ 0.42225283, 0.41506258], [ 0.47642418, -0.12865091]]) Coordinates: * xy (xy) object 'a' 'b' 'c' * x (x) int64 0 1
b.groupby('xy').mean()
<xarray.DataArray (xy: 3)> array([ 1.34596018, 0.4186577 , 0.07304079]) Coordinates: * xy (xy) object 'a' 'b' 'c'
b.groupby('xy').mean('z')
<xarray.DataArray (x: 2, y: 3)> array([[ 1.34596018, 0.42225283, 0.47642418], [ 0.41506258, 0.18697319, -0.44427501]]) Coordinates: xy (x, y) <U1 'a' 'b' 'c' 'b' 'c' 'c' * x (x) int64 0 1 * y (y) int64 0 1 2
b.groupby('xy').mean('stacked_x_y')
<xarray.DataArray (z: 4, xy: 3)> array([[ 1.76405235, 1.31429786, -0.38737653], [ 0.40015721, -0.42780143, 0.28635294], [ 0.97873798, 0.69697583, 0.44051582], [ 2.2408932 , 0.09115856, -0.04732908]]) Coordinates: * z (z) int64 0 1 2 3 * xy (xy) object 'a' 'b' 'c'
list(b.groupby('xy'))
[('a', <xarray.DataArray (z: 4, stacked_x_y: 1)> array([[ 1.76405235], [ 0.40015721], [ 0.97873798], [ 2.2408932 ]]) Coordinates: xy (stacked_x_y) <U1 'a' * z (z) int64 0 1 2 3 * stacked_x_y (stacked_x_y) object (0, 0)), ('b', <xarray.DataArray (z: 4, stacked_x_y: 2)> array([[ 1.86755799, 0.76103773], [-0.97727788, 0.12167502], [ 0.95008842, 0.44386323], [-0.15135721, 0.33367433]]) Coordinates: xy (stacked_x_y) <U1 'b' 'b' * z (z) int64 0 1 2 3 * stacked_x_y (stacked_x_y) object (0, 1) (1, 0)), ('c', <xarray.DataArray (z: 4, stacked_x_y: 3)> array([[-0.10321885, 1.49407907, -2.55298982], [ 0.4105985 , -0.20515826, 0.6536186 ], [ 0.14404357, 0.3130677 , 0.8644362 ], [ 1.45427351, -0.85409574, -0.74216502]]) Coordinates: xy (stacked_x_y) <U1 'c' 'c' 'c' * z (z) int64 0 1 2 3 * stacked_x_y (stacked_x_y) object (0, 2) (1, 1) (1, 2))]
[(k, v.unstack('stacked_x_y')) for k, v in b.groupby('xy')]
[('a', <xarray.DataArray (z: 4, x: 2, y: 3)> array([[[ 1.76405235, nan, nan], [ nan, nan, nan]], [[ 0.40015721, nan, nan], [ nan, nan, nan]], [[ 0.97873798, nan, nan], [ nan, nan, nan]], [[ 2.2408932 , nan, nan], [ nan, nan, nan]]]) Coordinates: xy (x, y) object 'a' nan nan nan nan nan * z (z) int64 0 1 2 3 * x (x) int64 0 1 * y (y) int64 0 1 2), ('b', <xarray.DataArray (z: 4, x: 2, y: 3)> array([[[ nan, 1.86755799, nan], [ 0.76103773, nan, nan]], [[ nan, -0.97727788, nan], [ 0.12167502, nan, nan]], [[ nan, 0.95008842, nan], [ 0.44386323, nan, nan]], [[ nan, -0.15135721, nan], [ 0.33367433, nan, nan]]]) Coordinates: xy (x, y) object nan 'b' nan 'b' nan nan * z (z) int64 0 1 2 3 * x (x) int64 0 1 * y (y) int64 0 1 2), ('c', <xarray.DataArray (z: 4, x: 2, y: 3)> array([[[ nan, nan, -0.10321885], [ nan, 1.49407907, -2.55298982]], [[ nan, nan, 0.4105985 ], [ nan, -0.20515826, 0.6536186 ]], [[ nan, nan, 0.14404357], [ nan, 0.3130677 , 0.8644362 ]], [[ nan, nan, 1.45427351], [ nan, -0.85409574, -0.74216502]]]) Coordinates: xy (x, y) object nan nan 'c' nan 'c' 'c' * z (z) int64 0 1 2 3 * x (x) int64 0 1 * y (y) int64 0 1 2)]