Custom JSON serialization in IPython
This is an illustration of what would be needed for objects to define their own custom JSON serialization (to strings) in IPython.
IPython currently supports objects defining their own serialization to JSONable types, but not JSON strings without potentially expensive double-encoding.
First, we define a simple JSON serialization function that allows this functionality via _raw_json_()
methods:
import json
from jupyter_client.jsonutil import date_default
def json_strings(obj, default=date_default):
"""Yield string fragments that should result in JSON after joining.
Yielding fragments allows objects that define `_raw_json_()`
to return their own JSON representation.
"""
if hasattr(obj, '_raw_json_'):
# If an object has a `_raw_json_` method, call it instead
# _raw_json_ should return a JSON-serialized string
yield obj._raw_json_()
elif isinstance(obj, (list, tuple)):
yield '['
first = True
for item in obj:
if not first:
yield ','
else:
first = False
for s in json_strings(item):
yield s
yield ']'
elif isinstance(obj, dict):
yield '{'
first = True
for key, value in obj.items():
if not first:
yield ','
else:
first = False
yield json.dumps(key)
yield ': '
for s in json_strings(value):
yield s
yield '}'
else:
yield json.dumps(obj, default=default)
def my_dumps(obj):
"""Serialize (to bytes) with JSON
allowing objects to define their own JSON serialization.
"""
return ''.join(s for s in json_strings(obj)).encode('utf8')
my_dumps({
'a': [
5,
'b',
(1,'x')
]
})
b'{"a": [5,"b",[1,"x"]]}'
Next, we create a toy object that defines _raw_json_
returning a JSON string
and an _ipython_display_
method for displaying itself using this.
from IPython.display import display
class MyObject:
def __init__(self, value):
self.value = value
def _raw_json_(self):
"""I know how to JSON-serialize myself"""
return json.dumps({
'classname': self.__class__.__name__,
'value': self.value,
})
def _ipython_display_(self):
display({'application/myobject+json': obj}, raw=True)
obj = MyObject(10)
my_dumps(obj)
b'{"value": 10, "classname": "MyObject"}'
But this doesn't get through IPython, because our JSON serialization isn't in use:
obj
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) /Users/minrk/dev/ip/ipython/IPython/core/formatters.py in __call__(self, obj) 874 method = get_real_method(obj, self.print_method) 875 if method is not None: --> 876 method() 877 return True 878 <ipython-input-3-f02056052654> in _ipython_display_(self) 16 17 def _ipython_display_(self): ---> 18 display({'application/myobject+json': obj}, raw=True) /Users/minrk/dev/ip/ipython/IPython/core/display.py in display(include, exclude, metadata, transient, display_id, *objs, **kwargs) 201 for obj in objs: 202 if raw: --> 203 publish_display_data(data=obj, metadata=metadata, **kwargs) 204 else: 205 format_dict, md_dict = format(obj, include=include, exclude=exclude) /Users/minrk/dev/ip/ipython/IPython/core/display.py in publish_display_data(data, metadata, source, transient, **kwargs) 131 data=data, 132 metadata=metadata, --> 133 **kwargs 134 ) 135 /Users/minrk/dev/ip/kernel/ipykernel/zmqshell.py in publish(self, data, metadata, source, transient, update) 127 # hooks before potentially sending. 128 msg = self.session.msg( --> 129 msg_type, json_clean(content), 130 parent=self.parent_header 131 ) /Users/minrk/dev/ip/kernel/ipykernel/jsonutil.py in json_clean(obj) 165 out = {} 166 for k,v in iteritems(obj): --> 167 out[unicode_type(k)] = json_clean(v) 168 return out 169 if isinstance(obj, datetime): /Users/minrk/dev/ip/kernel/ipykernel/jsonutil.py in json_clean(obj) 165 out = {} 166 for k,v in iteritems(obj): --> 167 out[unicode_type(k)] = json_clean(v) 168 return out 169 if isinstance(obj, datetime): /Users/minrk/dev/ip/kernel/ipykernel/jsonutil.py in json_clean(obj) 171 172 # we don't understand it, it's probably an unserializable object --> 173 raise ValueError("Can't clean for JSON: %r" % obj) ValueError: Can't clean for JSON: <__main__.MyObject object at 0x10b770e80>
<__main__.MyObject at 0x10b770e80>
To get something like this to work, we will need a public API for allowing objects to register their own JSON serializers, and ensure that custom objects arrive at the custom serializer.
The first step is to register my_dumps
as the serializer for all messages.
This will allow _raw_json_
-having objects
to be present in the message object and get serialized by their own definition:
session = get_ipython().kernel.session
session.pack = my_dumps
But we run into a problem where IPython is checking types before passing things to JSON. For now, we can disable type checking in json_clean, which will need to be modified to make this work as an official API:
# disable json_clean
from ipykernel import jsonutil
jsonutil.json_clean = lambda obj: obj
Now we can display the object, which will show up in the document:
obj
import nbformat
nb = nbformat.read('custom-json.ipynb', as_version=4)
nb.cells[-2].outputs
[{'data': {'application/myobject+json': {'classname': 'MyObject', 'value': 10}}, 'metadata': {}, 'output_type': 'display_data'}]