#!/usr/bin/env python # coding: utf-8 # # Custom serialization in IPython parallel # # IPython parallel uses a `canning` module to prepare objects for serialization. # In[3]: from ipyparallel.serialize import canning import numpy as np # Canning is a no-op for simple or unrecognized types: # In[45]: canning.can(5) # In[49]: class C(): pass c = C() canning.can(c) # Canning provides special CannedObject wrappers for custom serialization of particular types: # In[65]: A = np.ones(2048) cannedA = canning.can(A) cannedA # This accomplishes two goals: # # 1. enable serialization of a few things pickle doesn't (moot if you use dill/cloudpickle) # 2. allow optimized zero-copy serialization of objects we recognize (e.g. numpy arrays) # # The main optimization feature of a CannedObject is that it has a `.buffers` # attribute, which is a list of buffer-API-providing objects (`bytes`, `memoryviews`): # In[66]: cannedA.buffers # A serialized object is always a **list** of buffers. This allows objects to have an arbitrary number of discontiguous buffers as part of its serialization, which can be used without concatenating them during serialization\*. # # Serialization involves: # # 1. detach buffers, if any, from CannedObjects # 2. add buffers to message frames # 3. pickle CannedObject without buffers # # \*note: we don't do this for discontiguous numpy arrays, but we could. # In[67]: from ipyparallel.serialize import serialize_object bufs = serialize_object(A) bufs # In[68]: bufs[1].nbytes # See how the second buffer is the same as cannedA.buffers[0], and is in fact just a *view* on the original array's memory: # In[70]: memoryview(A) # Serializing an object that doesn't get canned is just pickling, # but always to a *list* of bytes of length 1: # In[60]: serialize_object(5) # The pickled object has had its buffers removed: # In[71]: import pickle cannedA2 = pickle.loads(bufs[0]) cannedA2 # In[72]: cannedA2.buffers # Deserialization involves restoring the buffers, and then calling the CannedObject's `get_object` method: # In[63]: cannedA2.buffers = bufs[1:] # In[64]: cannedA2.get_object() # Since we can have multiple objects in one message, # deserialize returns the unpacked object and the remaining buffers: # In[74]: from ipyparallel.serialize import deserialize_object obj, remaining = deserialize_object(bufs) obj # A message can contain multiple objects, which we can chain since every serialized object is a list of blobs: # In[78]: from itertools import chain objects = [5, 'hi', A] message = list(chain(*(serialize_object(obj) for obj in objects))) message # In[79]: remaining = message received = [] while remaining: obj, remaining = deserialize_object(remaining) received.append(obj) received