xnd is a module that implements a container type for mapping all python values relevant for scientific computing directly to memory. xnd has a superset of features for typed memory found in similar libraries like numpy and apache arrow.
from xnd import xnd
import numpy as np
import sys
print('Python %s' % sys.version)
Python 3.7.3 | packaged by conda-forge | (default, Mar 27 2019, 23:01:00) [GCC 7.3.0]
xnd([1, 2, 3, 4, 5]) # xnd
xnd([1, 2, 3, 4, 5], type='5 * int64')
np.array([1, 2, 3, 4, 5]) # numpy
array([1, 2, 3, 4, 5])
xnd([[1., 1.5], [-1.5, 1.]]) # xnd
xnd([[1.0, 1.5], [-1.5, 1.0]], type='2 * 2 * float64')
np.array([[1, 1.5], [-1.5, 1]]) # numpy
array([[ 1. , 1.5], [-1.5, 1. ]])
You can see some differences with numpy at this level already, such as the array dimensionality being included in the type.
xnd(['this', 'is', 'a', 'test', 'notebook']) # xnd
xnd(['this', 'is', 'a', 'test', 'notebook'], type='5 * string')
np.array(['this', 'is', 'a', 'test', 'notebook']) # numpy
array(['this', 'is', 'a', 'test', 'notebook'], dtype='<U8')
xnd([[1, 5, 2], [1], [7, 9, 10, 20, 13]]) # xnd
xnd([[1, 5, 2], [1], [7, 9, 10, 20, 13]], type='var * var * int64')
np.array([[1, 5, 2], [1], [7, 9, 10, 20, 13]]) # numpy
array([list([1, 5, 2]), list([1]), list([7, 9, 10, 20, 13])], dtype=object)
levels = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet']
rainbow = xnd(['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet', 'red', 'green'], levels=levels)
rainbow
xnd(['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet', 'red', 'green'], type='9 * categorical('red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet')')
rainbow.type
ndt("9 * categorical('red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet')")
rainbow.value
['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet', 'red', 'green']
data = [{'title': 'Introduction to Digital Signal Processing',
'speaker': 'Allen Downey',
'room': 10},
{'title': 'Making Art with Python',
'speaker':'Emily Xie',
'room': 16},
{'title': 'Foundations of Numerical Computing in Python',
'speaker': 'Scott Sanderson',
'room': 20},
{'title':'Exploratory Data Visualization with Vega, Vega-Lite, and Altair',
'speaker':'Jake VanderPlas',
'room': 21}]
x = xnd(data)
x
xnd([{'title': 'Introduction to Digital Signal Processing', 'speaker': 'Allen Downey', 'room': 10}, {'title': 'Making Art with Python', 'speaker': 'Emily Xie', 'room': 16}, {'title': 'Foundations of Numerical Computing in Python', 'speaker': 'Scott Sanderson', 'room': 20}, {'title': 'Exploratory Data Visualization with Vega, Vega-Lite, and Altair', 'speaker': 'Jake VanderPlas', 'room': 21}], type='4 * {title : string, speaker : string, room : int64}')
x[0]
xnd({'title': 'Introduction to Digital Signal Processing', 'speaker': 'Allen Downey', 'room': 10}, type='{title : string, speaker : string, room : int64}')
x[1, 0]
xnd('Making Art with Python', type='string')
data = np.random.random(size=(3, 4, 5))
xnd.from_buffer(data)
xnd([[[0.11817033651791786, 0.6839453361584004, 0.2536092535496538, 0.5055059760011743, 0.5067576432750356], [0.2407058584752727, 0.917665583250738, 0.8704926424782194, 0.7343222993465005, 0.5910180874913052], [0.5784732883146575, 0.24073382655422526, 0.4994625009464535, 0.9768528225906218, 0.6219891558285271], [0.6283109904322957, 0.6100928008157002, 0.09671910142133089, 0.9730523719507609, 0.40202043302233015]], [[0.8123631051139746, 0.2764564198782935, 0.5577962501643701, 0.34262584837570087, 0.6934284288379087], [0.8883322974424327, 0.34710528373845473, 0.5859331417671697, 0.9287901094069656, 0.7268583870243034], [0.717240914249425, 0.023725909478674367, 0.8919150596382612, 0.9975249827738114, 0.6919546876794813], [0.11544921395164576, 0.8551688052855212, 0.9800429375894515, 0.0999515741040673, 0.0964583576381497]], [[0.24356407513913303, 0.5536004544475481, 0.3720522474923057, 0.630410357936431, 0.849146997258151], [0.299330177070638, 0.4173690236773434, 0.8213706636491637, 0.268687077192604, 0.059057608602548384], [0.6632819719439912, 0.09713680970610383, 0.1583002524139403, 0.4984218536455166, 0.6383858635378117], [0.4485071912623374, 0.2522084331523653, 0.7343169569711295, 0.6571516222532012, 0.42948442645729346]]], type='3 * 4 * 5 * float64')
record_array = np.rec.array([('Hello', (1, 2)),
('World', (3, 4))],
dtype=[('foo', 'S6'),
('bar', [('A', int), ('B', int)])])
xnd.from_buffer(record_array)
xnd([{'foo': b'Hello\x00', 'bar': {'A': 1, 'B': 2}}, {'foo': b'World\x00', 'bar': {'A': 3, 'B': 4}}], type='2 * {foo : fixed_bytes(size=6), bar : {A : int64, B : int64}}')
Creating an xnd container with explicit types has significant performance advantages for large arrays. This is because xnd does not have to infer the type for each element.
N = 1_000_000
a_list = [1] * N
%%timeit
xnd(a_list)
22.5 ms ± 1.19 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
%%timeit
types = f'{N} * int64'
xnd(a_list, type=types)
11.3 ms ± 266 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)