To have a simple, compact and reversible solution, the interface uses the JSON-NTV format (Named and Typed Value) - which integrates the notion of type - and its JSON-TAB variation for tabular data.
This solution allows to include a large number of types (not necessarily Pandas dtype).
JSON-TAB is also applicable for multidimensional data (e.g. Xarray).
This NoteBook uses examples to present some key points
(active link on jupyter Notebook or Nbviewer)
This Notebook can also be viewed at nbviewer
import math
import json
from pprint import pprint
import pandas as pd
import ntv_pandas as npd
from shapely.geometry import Point, Polygon
from json_ntv import Ntv
from datetime import date, datetime, time
tab_data = {'index': [100, 200, 300, 400, 500, 600],
'dates::date': pd.Series([date(1964,1,1), date(1985,2,5), date(2022,1,21), date(1964,1,1),
date(1985,2,5), date(2022,1,21)], dtype='category'),
'valid': [True, False, True, True, False, True],
'value32': pd.Series([12, 12, 22, 22, 32, 32], dtype='int32'),
'::month': [1, 2, 1, 1, 2, 1],
'coord::point': pd.Series([Point(1,2), Point(3,4), Point(5,6), Point(7,8), Point(3,4), Point(5,6)]),
'names': pd.Series(['john', 'eric', 'judith', 'mila', 'hector', 'maria'], dtype='string'),
'unique::year': 2021 }
df = pd.DataFrame(tab_data).set_index('index')
df
dates::date | valid | value32 | ::month | coord::point | names | unique::year | |
---|---|---|---|---|---|---|---|
index | |||||||
100 | 1964-01-01 | True | 12 | 1 | POINT (1 2) | john | 2021 |
200 | 1985-02-05 | False | 12 | 2 | POINT (3 4) | eric | 2021 |
300 | 2022-01-21 | True | 22 | 1 | POINT (5 6) | judith | 2021 |
400 | 1964-01-01 | True | 22 | 1 | POINT (7 8) | mila | 2021 |
500 | 1985-02-05 | False | 32 | 2 | POINT (3 4) | hector | 2021 |
600 | 2022-01-21 | True | 32 | 1 | POINT (5 6) | maria | 2021 |
df_to_json = df.npd.to_json()
pprint(df_to_json, width=120)
{':tab': {'::month': [1, 2, 1, 1, 2, 1], 'coord::point': [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [3.0, 4.0], [5.0, 6.0]], 'dates': [{'::date': ['1964-01-01', '1985-02-05', '2022-01-21']}, [1]], 'index': [100, 200, 300, 400, 500, 600], 'names::string': ['john', 'eric', 'judith', 'mila', 'hector', 'maria'], 'unique:year': 2021, 'valid': [True, False, True, True, False, True], 'value32::int32': [12, 12, 22, 22, 32, 32]}}
df_from_json = npd.read_json(df_to_json)
print('df created from JSON-NTV is equal to initial df ? ', df_from_json.equals(df))
df_from_json
df created from JSON-NTV is equal to initial df ? True
dates::date | valid | value32 | ::month | coord::point | names | unique::year | |
---|---|---|---|---|---|---|---|
100 | 1964-01-01 | True | 12 | 1 | POINT (1 2) | john | 2021 |
200 | 1985-02-05 | False | 12 | 2 | POINT (3 4) | eric | 2021 |
300 | 2022-01-21 | True | 22 | 1 | POINT (5 6) | judith | 2021 |
400 | 1964-01-01 | True | 22 | 1 | POINT (7 8) | mila | 2021 |
500 | 1985-02-05 | False | 32 | 2 | POINT (3 4) | hector | 2021 |
600 | 2022-01-21 | True | 32 | 1 | POINT (5 6) | maria | 2021 |
field_data = {'value': [1, 2, 3]}
sr = npd.read_json({':field': field_data})
# pandas dtype conform to Ntv type
print('pandas object :\n' + str(sr))
print('\nJson representation : \n ', sr.npd.to_json())
print('\nis Json translation reversible ? ', sr.equals(npd.read_json(sr.npd.to_json())))
print('\nis pandas translation reversible ? ', json.dumps(sr.npd.to_json()) == json.dumps({':field': field_data}))
pandas object : 0 1 1 2 2 3 Name: value, dtype: int64 Json representation : {':field': {'value': [1, 2, 3]}} is Json translation reversible ? True is pandas translation reversible ? True
field_data = {'dates::datetime': ['1964-01-01', '1985-02-05', '2022-01-21']}
sr = npd.read_json({':field': field_data})
# pandas dtype conform to Ntv type
print('pandas object :\n' + str(sr))
print('\nJson representation : \n ', sr.npd.to_json())
print('\nis Json translation reversible ? ', sr.equals(npd.read_json(sr.npd.to_json())))
pandas object : 0 1964-01-01 1 1985-02-05 2 2022-01-21 Name: dates, dtype: datetime64[ns] Json representation : {':field': {'dates::datetime': ['1964-01-01T00:00:00.000', '1985-02-05T00:00:00.000', '2022-01-21T00:00:00.000']}} is Json translation reversible ? True
field_data = {'dates::date': ['1964-01-01', '1985-02-05', '2022-01-21']}
sr = npd.read_json({':field': field_data})
# pandas dtype conform to Ntv type
print('pandas object :\n' + str(sr))
print('\nJson representation : \n ', sr.npd.to_json())
print('\nis Json translation reversible ? ', sr.equals(npd.read_json(sr.npd.to_json())))
print('\nis pandas translation reversible ? ', json.dumps(sr.npd.to_json()) == json.dumps({':field': field_data}))
pandas object : 0 1964-01-01 1 1985-02-05 2 2022-01-21 Name: dates::date, dtype: object Json representation : {':field': {'dates::date': ['1964-01-01', '1985-02-05', '2022-01-21']}} is Json translation reversible ? True is pandas translation reversible ? True
field_data = {'coord::point': [[1,2], [3,4], [5,6]]}
sr = npd.read_json({':field': field_data})
# pandas dtype conform to Ntv type
print('pandas object :\n' + str(sr))
print('\nJson representation : \n ', sr.npd.to_json())
print('\nis Json translation reversible ? ', sr.equals(npd.read_json(sr.npd.to_json())))
pandas object : 0 POINT (1 2) 1 POINT (3 4) 2 POINT (5 6) Name: coord::point, dtype: object Json representation : {':field': {'coord::point': [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]}} is Json translation reversible ? True
field_data = {"integer": [[1, 2], [0, 1, 1, 0]]}
sr = npd.read_json({':field': field_data})
# pandas dtype conform to Ntv type
print('pandas object :\n' + str(sr))
print('\nJson representation : \n ', sr.npd.to_json())
print('\nis Json translation reversible ? ', sr.equals(npd.read_json(sr.npd.to_json())))
print('\nis pandas translation reversible ? ', json.dumps(sr.npd.to_json()) == json.dumps({':field': field_data}))
pandas object : 0 1 1 2 2 2 3 1 Name: integer, dtype: category Categories (2, Int64): [1, 2] Json representation : {':field': {'integer': [[1, 2], [0, 1, 1, 0]]}} is Json translation reversible ? True is pandas translation reversible ? True
field_data = {'dates': [{'::date': ['1964-01-01', '1985-02-05', '2022-01-21']}, [0, 1, 0, 2]]}
sr = npd.read_json({':field': field_data})
# pandas dtype conform to Ntv type
print('pandas object :\n' + str(sr))
print('\nJson representation : \n ', sr.npd.to_json())
print('\nis Json translation reversible ? ', sr.equals(npd.read_json(sr.npd.to_json())))
pandas object : 0 1964-01-01 1 1985-02-05 2 1964-01-01 3 2022-01-21 Name: dates::date, dtype: category Categories (3, object): [1964-01-01, 1985-02-05, 2022-01-21] Json representation : {':field': {'dates': [{'::date': ['1964-01-01', '1985-02-05', '2022-01-21']}, [0, 1, 0, 2]]}} is Json translation reversible ? True
field_data = {'test_array': [{'::array': [[1,2], [3,4], [5,6]]}, [0, 1, 0, 2]]}
sr = npd.read_json({':field': field_data})
# pandas dtype conform to Ntv type
print('pandas object :\n' + str(sr))
print('\nJson representation : \n ', sr.npd.to_json())
print('\nis Json translation reversible ? ', sr.equals(npd.read_json(sr.npd.to_json())))
print('\nis pandas translation reversible ? ', json.dumps(sr.npd.to_json()) == json.dumps({':field': field_data}))
pandas object : 0 (1, 2) 1 (3, 4) 2 (1, 2) 3 (5, 6) Name: test_array::array, dtype: category Categories (3, object): [(1, 2), (3, 4), (5, 6)] Json representation : {':field': {'test_array': [{'::array': [(1, 2), (3, 4), (5, 6)]}, [0, 1, 0, 2]]}} is Json translation reversible ? True is pandas translation reversible ? True
df = pd.DataFrame({"A": list("abca"), "B": list("bccd")})
print('pandas dtype :\n' + str(df.dtypes))
print('\npandas object :\n' + str(df))
print('\nJson representation : \n ', df.npd.to_json())
print('\nis Json translation reversible ? ', df.equals(npd.read_json(df.npd.to_json())))
pandas dtype : A object B object dtype: object pandas object : A B 0 a b 1 b c 2 c c 3 a d Json representation : {':tab': {'index': [0, 1, 2, 3], 'A': ['a', 'b', 'c', 'a'], 'B': ['b', 'c', 'c', 'd']}} is Json translation reversible ? True
tab_data = {'index': [100, 200, 300, 400, 500, 600],
'dates::date': ['1964-01-01', '1985-02-05', '2022-01-21', '1964-01-01', '1985-02-05', '2022-01-21'],
'value': [10, 10, 20, 20, 30, 30],
'value32::int32': [12, 12, 22, 22, 32, 32],
'res': [10, 20, 30, 10, 20, 30],
'coord::point': [[1,2], [3,4], [5,6], [7,8], [3,4], [5,6]],
'names::string': ['john', 'eric', 'judith', 'mila', 'hector', 'maria'],
'unique': True }
df = npd.read_json({':tab': tab_data})
print('pandas dtype :\n' + str(df.dtypes))
print('\npandas object :\n' + str(df))
print('\nJson representation :')
pprint(df.npd.to_json(), width=140)
print('\nis Json translation reversible ? ', df.equals(npd.read_json(df.npd.to_json())))
pandas dtype : dates::date object value int64 value32 int32 res int64 coord::point object names string[python] unique bool dtype: object pandas object : dates::date value value32 res coord::point names unique 100 1964-01-01 10 12 10 POINT (1 2) john True 200 1985-02-05 10 12 20 POINT (3 4) eric True 300 2022-01-21 20 22 30 POINT (5 6) judith True 400 1964-01-01 20 22 10 POINT (7 8) mila True 500 1985-02-05 30 32 20 POINT (3 4) hector True 600 2022-01-21 30 32 30 POINT (5 6) maria True Json representation : {':tab': {'coord::point': [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [3.0, 4.0], [5.0, 6.0]], 'dates::date': ['1964-01-01', '1985-02-05', '2022-01-21', '1964-01-01', '1985-02-05', '2022-01-21'], 'index': [100, 200, 300, 400, 500, 600], 'names::string': ['john', 'eric', 'judith', 'mila', 'hector', 'maria'], 'res': [10, 20, 30, 10, 20, 30], 'unique': True, 'value': [10, 10, 20, 20, 30, 30], 'value32::int32': [12, 12, 22, 22, 32, 32]}} is Json translation reversible ? True
tab_data = {'index': [100, 200, 300, 400, 500, 600],
'dates::date': ['1964-01-01', '1985-02-05', '2022-01-21', '1964-01-01', '1985-02-05', '2022-01-21'],
'value': [10, 10, 20, 20, {'valid?': 30}, 30],
'value32::int32': [12, 12, 22, 22, 32, 32],
'res': {'res1': 10, 'res2': 20, 'res3': 30, 'res4': 10, 'res5': 20, 'res6': 30},
'coord::point': [[1,2], [3,4], [5,6], [7,8], {'same as 2nd point': [3,4]}, [5,6]],
'names::string': ['john', 'eric', 'judith', 'mila', 'hector', 'maria'],
'unique': True }
df2 = npd.read_json({':tab': tab_data}, annotated=True)
print('is DataFrame identical ? ', df.equals(df2))
is DataFrame identical ? True
df = pd.DataFrame({"A": list("abca"), "B": list("bccd")}, dtype="category")
print('pandas dtype :\n' + str(df.dtypes))
print('\npandas object :\n' + str(df))
print('\nJson representation : \n ', df.npd.to_json())
print('\nis Json translation reversible ? ', df.equals(npd.read_json(df.npd.to_json())))
pandas dtype : A category B category dtype: object pandas object : A B 0 a b 1 b c 2 c c 3 a d Json representation : {':tab': {'index': [0, 1, 2, 3], 'A': [['a', 'b', 'c'], [0, 1, 2, 0]], 'B': [['b', 'c', 'd'], [0, 1, 1, 2]]}} is Json translation reversible ? True
tab_data = {'index': [100, 200, 300, 400, 500, 600],
'dates': [{'::date': ['1964-01-01', '1985-02-05', '2022-01-21']}, [0, 1, 2, 0, 1, 2]],
'value': [[10, 20, {'valid?': 30}], [0, 0, 1, 1, 2, 2]],
'value32::int32': [12, 12, 22, 22, 32, 32],
'res': {'res1': 10, 'res2': 20, 'res3': 30, 'res4': 10, 'res5': 20, 'res6': 30},
'coord::point': [[1,2], [3,4], [5,6], [7,8], {'same as 2nd point': [3,4]}, [5,6]],
'names::string': ['john', 'eric', 'judith', 'mila', 'hector', 'maria'],
'unique:boolean': True }
df = npd.read_json({':tab': tab_data}, annotated=True)
print('pandas dtype :\n' + str(df.dtypes))
print('\npandas object :\n' + str(df))
print('\nJson representation :')
pprint(df.npd.to_json(), width=140)
print('\nis Json translation reversible ? ', df.equals(npd.read_json(df.npd.to_json())))
pandas dtype : dates::date category value category value32 int32 res int64 coord::point object names string[python] unique::boolean bool dtype: object pandas object : dates::date value value32 res coord::point names unique::boolean 100 1964-01-01 10 12 10 POINT (1 2) john True 200 1985-02-05 10 12 20 POINT (3 4) eric True 300 2022-01-21 20 22 30 POINT (5 6) judith True 400 1964-01-01 20 22 10 POINT (7 8) mila True 500 1985-02-05 30 32 20 POINT (3 4) hector True 600 2022-01-21 30 32 30 POINT (5 6) maria True Json representation : {':tab': {'coord::point': [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [3.0, 4.0], [5.0, 6.0]], 'dates': [{'::date': ['1964-01-01', '1985-02-05', '2022-01-21']}, [1]], 'index': [100, 200, 300, 400, 500, 600], 'names::string': ['john', 'eric', 'judith', 'mila', 'hector', 'maria'], 'res': [10, 20, 30, 10, 20, 30], 'unique:boolean': True, 'value': [[10, 20, 30], [2]], 'value32::int32': [12, 12, 22, 22, 32, 32]}} is Json translation reversible ? True
index = pd.Series([100, 200, 300, 400, 500, 600])
dates = pd.Series(name='dates::date', data=[date(1964, 1, 1), date(1985, 2, 5), date(2022, 1, 21), date(1964, 1, 1),
date(1985, 2, 5), date(2022, 1, 21)], dtype='object').astype('category')
value = pd.Series(name='value', data=[10,10,20,20,30,30], dtype='Int64').astype('category') #alias mandatory
value32 = pd.Series(name='value32', data=[12, 12, 22, 22, 32, 32], dtype='int32')
coord = pd.Series(name='coord::point', data=[Point(1,2), Point(3,4), Point(5,6), Point(7,8), Point(3,4), Point(5,6)])
names = pd.Series(name='names', data=['john', 'eric', 'judith', 'mila', 'hector', 'maria'], dtype='string')
unique = pd.Series(name='unique', data=[True, True, True, True, True, True])
df = pd.DataFrame({ser.name: ser for ser in [index, dates, value, value32, coord, names, unique]}).set_index(None)
print('pandas dtype :\n' + str(df.dtypes))
print('\npandas object :\n' + str(df))
print('\nJson representation :')
pprint(df.npd.to_json(), width=140)
print('\nis Json translation reversible ? ', df.equals(npd.read_json(df.npd.to_json())))
pandas dtype : dates::date category value category value32 int32 coord::point object names string[python] unique bool dtype: object pandas object : dates::date value value32 coord::point names unique 100 1964-01-01 10 12 POINT (1 2) john True 200 1985-02-05 10 12 POINT (3 4) eric True 300 2022-01-21 20 22 POINT (5 6) judith True 400 1964-01-01 20 22 POINT (7 8) mila True 500 1985-02-05 30 32 POINT (3 4) hector True 600 2022-01-21 30 32 POINT (5 6) maria True Json representation : {':tab': {'coord::point': [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [3.0, 4.0], [5.0, 6.0]], 'dates': [{'::date': ['1964-01-01', '1985-02-05', '2022-01-21']}, [1]], 'index': [100, 200, 300, 400, 500, 600], 'names::string': ['john', 'eric', 'judith', 'mila', 'hector', 'maria'], 'unique': True, 'value': [[10, 20, 30], [2]], 'value32::int32': [12, 12, 22, 22, 32, 32]}} is Json translation reversible ? True
data = {"quantity": ["1 kg", "1 kg", "1 kg", "1 kg", "10 kg", "10 kg", "10 kg", "10 kg"],
"product": ["banana", "orange", "apple", "peppers", "banana", "orange", "apple", "peppers"],
"plants": ["fruit", "fruit", "fruit", "vegetable", "fruit", "fruit", "fruit", "vegetable"],
"price": [0.5, 2, 1, 1.5, 5, 20, 10, 15]}
df = pd.DataFrame(data)
df2 = pd.DataFrame(data, dtype='category').sort_values(by=['quantity', 'product'])
df2
quantity | product | plants | price | |
---|---|---|---|---|
2 | 1 kg | apple | fruit | 1.0 |
0 | 1 kg | banana | fruit | 0.5 |
1 | 1 kg | orange | fruit | 2.0 |
3 | 1 kg | peppers | vegetable | 1.5 |
6 | 10 kg | apple | fruit | 10.0 |
4 | 10 kg | banana | fruit | 5.0 |
5 | 10 kg | orange | fruit | 20.0 |
7 | 10 kg | peppers | vegetable | 15.0 |
json_df = Ntv.obj(df).to_obj()[':tab']
print('json_df is the JSON-TAB format with "full" mode\n')
pprint(json_df, width=200)
json_xar = Ntv.obj(df2).to_obj()[':tab']
print('\njson_xa is the JSON-TAB format with "optimize" mode\n')
pprint(json_xar, width=200)
df_from_xar = Ntv.obj({':tab': json_xar}).to_obj(format='obj').sort_index()
print('\nDataFrame from the two JSON-TAB format are identical ? ', df.astype('object').equals(df_from_xar.astype('object')))
print('\nThe "optimize" JSON-TAB format is the image of the DataArray Xarray')
from tab_dataset import Sdataset
Sdataset.ntv(json_df).setcanonorder().to_xarray(varname='price')
json_df is the JSON-TAB format with "full" mode {'index': [0, 1, 2, 3, 4, 5, 6, 7], 'plants': ['fruit', 'fruit', 'fruit', 'vegetable', 'fruit', 'fruit', 'fruit', 'vegetable'], 'price': [0.5, 2.0, 1.0, 1.5, 5.0, 20.0, 10.0, 15.0], 'product': ['banana', 'orange', 'apple', 'peppers', 'banana', 'orange', 'apple', 'peppers'], 'quantity': ['1 kg', '1 kg', '1 kg', '1 kg', '10 kg', '10 kg', '10 kg', '10 kg']} json_xa is the JSON-TAB format with "optimize" mode {'index': [2, 0, 1, 3, 6, 4, 5, 7], 'plants': [['fruit', 'vegetable'], [0, 0, 0, 1, 0, 0, 0, 1]], 'price': [[0.5, 1.0, 1.5, 2.0, 5.0, 10.0, 15.0, 20.0], [1, 0, 3, 2, 5, 4, 7, 6]], 'product': [['apple', 'banana', 'orange', 'peppers'], [1]], 'quantity': [['1 kg', '10 kg'], [4]]} DataFrame from the two JSON-TAB format are identical ? True The "optimize" JSON-TAB format is the image of the DataArray Xarray
<xarray.DataArray 'price' (quantity: 2, product: 4)> Size: 64B array([[0.5, 2.0, 1.0, 1.5], [5.0, 20.0, 10.0, 15.0]], dtype=object) Coordinates: * quantity (quantity) object 16B '1 kg' '10 kg' * product (product) object 32B 'banana' 'orange' 'apple' 'peppers' plants (product) object 32B 'fruit' 'fruit' 'fruit' 'vegetable'
# json interface ok
srs = [
# without ntv_type, without dtype
pd.Series([{'a': 2, 'e':4}, {'a': 3, 'e':5}, {'a': 4, 'e':6}]),
pd.Series([[1,2], [3,4], [5,6]]),
pd.Series([[1,2], [3,4], {'a': 3, 'e':5}]),
pd.Series([True, False, True]),
pd.Series(['az', 'er', 'cd']),
pd.Series(['az', 'az', 'az']),
pd.Series([1,2,3]),
pd.Series([1.1,2,3]),
# without ntv_type, with dtype
pd.Series([10,20,30], dtype='Int64'),
pd.Series([True, False, True], dtype='boolean'),
pd.Series([1.1, 2, 3], dtype='float64'),
# with ntv_type only in json data (not numbers)
pd.Series([pd.NaT, pd.NaT, pd.NaT]),
pd.Series([datetime(2022, 1, 1), datetime(2022, 1, 2)], dtype='datetime64[ns]'),
pd.Series(pd.to_timedelta(['1D', '2D'])),
pd.Series(['az', 'er', 'cd'], dtype='string'),
# with ntv_type only in json data (numbers)
pd.Series([1,2,3], dtype='Int32'),
pd.Series([1,2,3], dtype='UInt64'),
pd.Series([1,2,3], dtype='float32'),
# with ntv_type in Series name and in json data (numbers)
pd.Series([1,2,3], name='::int64'),
pd.Series([1,2,3], dtype='Float64', name='::float64'), # force dtype dans la conversion json
# with ntv_type in Series name and in json data (not numbers)
pd.Series([[1,2], [3,4], [5,6]], name='::array'),
pd.Series([{'a': 2, 'e':4}, {'a': 3, 'e':5}, {'a': 4, 'e':6}], name='::object'),
pd.Series([None, None, None], name='::null'),
pd.Series(["geo:13.412 ,103.866", "mailto:John.Doe@example.com"], name='::uri', dtype='string'),
pd.Series(["///path/to/file", "//host.example.com/path/to/file"], name='::file', dtype='string'),
# with ntv_type converted in object dtype (not in datetime)
pd.Series([date(2022, 1, 1), date(2022, 1, 2)], name='::date'),
pd.Series([time(10, 21, 1), time(8, 1, 2)], name='::time'),
# with ntv_type unknown in pandas and with pandas conversion
pd.Series([1,2,3], dtype='int64', name='::day'),
pd.Series([2001,2002,2003], dtype='int64', name='::year'),
pd.Series([21,10,55], name='::minute'),
# with ntv_type unknown in pandas and NTV conversion
pd.Series([Point(1, 0), Point(1, 1), Point(1, 2)], name='::point'),
]
for sr in srs:
print(sr.npd.equals(npd.read_json(sr.npd.to_json())), sr.npd.to_json())
True {':field': [{'a': 2, 'e': 4}, {'a': 3, 'e': 5}, {'a': 4, 'e': 6}]} True {':field': [[1, 2], [3, 4], [5, 6]]} True {':field': [[1, 2], [3, 4], {'a': 3, 'e': 5}]} True {':field': [True, False, True]} True {':field': ['az', 'er', 'cd']} True {':field': ['az', 'az', 'az']} True {':field': [1, 2, 3]} True {':field': [1.1, 2.0, 3.0]} True {':field': [10, 20, 30]} True {':field': [True, False, True]} True {':field': [1.1, 2.0, 3.0]} True {':field': {'::datetime': [None, None, None]}} True {':field': {'::datetime': ['2022-01-01T00:00:00.000', '2022-01-02T00:00:00.000']}} True {':field': {'::duration': ['P1DT0H0M0S', 'P2DT0H0M0S']}} True {':field': {'::string': ['az', 'er', 'cd']}} True {':field': {'::int32': [1, 2, 3]}} True {':field': {'::uint64': [1, 2, 3]}} True {':field': {'::float32': [1.0, 2.0, 3.0]}} True {':field': {'::int64': [1, 2, 3]}} True {':field': {'::float64': [1.0, 2.0, 3.0]}} True {':field': {'::array': [[1, 2], [3, 4], [5, 6]]}} True {':field': {'::object': [{'a': 2, 'e': 4}, {'a': 3, 'e': 5}, {'a': 4, 'e': 6}]}} True {':field': {'::null': [None, None, None]}} True {':field': {'::uri': ['geo:13.412 ,103.866', 'mailto:John.Doe@example.com']}} True {':field': {'::file': ['///path/to/file', '//host.example.com/path/to/file']}} True {':field': {'::date': ['2022-01-01', '2022-01-02']}} True {':field': {'::time': ['10:21:01', '08:01:02']}} True {':field': {'::day': [1, 2, 3]}} True {':field': {'::year': [2001, 2002, 2003]}} True {':field': {'::minute': [21, 10, 55]}} True {':field': {'::point': [[1.0, 0.0], [1.0, 1.0], [1.0, 2.0]]}}
# json interface ok
for a in [{'test::int32': [1,2,3]},
{'test': [1,2,3]},
[1.0, 2.1, 3.0],
['er', 'et', 'ez'],
[True, False, True],
{'::boolean': [True, False, True]},
{'::string': ['er', 'et', 'ez']},
{'test::float32': [1.0, 2.5, 3.0]},
{'::int64': [1,2,3]},
{'::datetime': ["2021-12-31T23:00:00.000","2022-01-01T23:00:00.000"] },
{'::date': ["2021-12-31", "2022-01-01"] },
{'::time': ["23:00:00", "23:01:00"] },
{'::object': [{'a': 3, 'e':5}, {'a': 4, 'e':6}]},
{'::array': [[1,2], [3,4], [5,6]]},
True,
{':boolean': True}
]:
field = {':field': a}
print(npd.read_json(field).npd.to_json() == field, field)
True {':field': {'test::int32': [1, 2, 3]}} True {':field': {'test': [1, 2, 3]}} True {':field': [1.0, 2.1, 3.0]} True {':field': ['er', 'et', 'ez']} True {':field': [True, False, True]} True {':field': {'::boolean': [True, False, True]}} True {':field': {'::string': ['er', 'et', 'ez']}} True {':field': {'test::float32': [1.0, 2.5, 3.0]}} True {':field': {'::int64': [1, 2, 3]}} True {':field': {'::datetime': ['2021-12-31T23:00:00.000', '2022-01-01T23:00:00.000']}} True {':field': {'::date': ['2021-12-31', '2022-01-01']}} True {':field': {'::time': ['23:00:00', '23:01:00']}} True {':field': {'::object': [{'a': 3, 'e': 5}, {'a': 4, 'e': 6}]}} True {':field': {'::array': [[1, 2], [3, 4], [5, 6]]}} True {':field': True} True {':field': {':boolean': True}}
# json interface ok (categorical data)
for a in [{'test': [{'::int32': [1, 2, 3]}, [0,1,2,0,1]]},
{'test': [[1, 2, 3], [0,1,2,0,1]]},
[[1.0, 2.1, 3.0], [0,1,2,0,1]],
[['er', 'et', 'ez'], [0,1,2,0,1]],
[[True, False], [0,1,0,1,0]],
[{'::string': ['er', 'et', 'ez']}, [0,1,2,0,1]],
{'test':[{'::float32': [1.0, 2.5, 3.0]}, [0,1,2,0,1]]},
[{'::int64': [1, 2, 3]}, [0,1,2,0,1]],
[{'::datetime': ["2021-12-31T23:00:00.000", "2022-01-01T23:00:00.000"] }, [0,1,0,1,0]],
[{'::date': ["2021-12-31", "2022-01-01"] }, [0,1,0,1,0]],
[{'::time': ["23:00:00", "23:01:00"] }, [0,1,0,1,0]],
{'test_date': [{'::datetime': ["2021-12-31T23:00:00.000", "2022-01-01T23:00:00.000"] }, [0,1,0,1,0]]},
[{'::boolean': [True, False]}, [0,1,0,1,0]],
[[True], [2]], # periodic Series
{'quantity': [['1 kg', '10 kg'], [4]]}]: # periodic Series
field = {':field': a}
print(npd.read_json(field).npd.to_json() == field, field)
True {':field': {'test': [{'::int32': [1, 2, 3]}, [0, 1, 2, 0, 1]]}} True {':field': {'test': [[1, 2, 3], [0, 1, 2, 0, 1]]}} True {':field': [[1.0, 2.1, 3.0], [0, 1, 2, 0, 1]]} True {':field': [['er', 'et', 'ez'], [0, 1, 2, 0, 1]]} True {':field': [[True, False], [0, 1, 0, 1, 0]]} True {':field': [{'::string': ['er', 'et', 'ez']}, [0, 1, 2, 0, 1]]} True {':field': {'test': [{'::float32': [1.0, 2.5, 3.0]}, [0, 1, 2, 0, 1]]}} True {':field': [{'::int64': [1, 2, 3]}, [0, 1, 2, 0, 1]]} True {':field': [{'::datetime': ['2021-12-31T23:00:00.000', '2022-01-01T23:00:00.000']}, [0, 1, 0, 1, 0]]} True {':field': [{'::date': ['2021-12-31', '2022-01-01']}, [0, 1, 0, 1, 0]]} True {':field': [{'::time': ['23:00:00', '23:01:00']}, [0, 1, 0, 1, 0]]} True {':field': {'test_date': [{'::datetime': ['2021-12-31T23:00:00.000', '2022-01-01T23:00:00.000']}, [0, 1, 0, 1, 0]]}} True {':field': [{'::boolean': [True, False]}, [0, 1, 0, 1, 0]]} True {':field': [[True], [2]]} True {':field': {'quantity': [['1 kg', '10 kg'], [4]]}}
# json interface ko
srs = [# without ntv_type
pd.Series([math.nan, math.nan]), # bug pandas conversion json : datetime NaT
# without ntv_type, with dtype
pd.Series([math.nan, math.nan], dtype='float64'), # bug pandas conversion json : datetime NaT
# with ntv_type in Series name and in json data
pd.Series([1,2,3], dtype='UInt64', name='::uint64'), # name inutile
# with ntv_type unknown in pandas
pd.Series([datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2022, 1, 3)], dtype='datetime64[ns, UTC]'), #à traiter
]
for sr in srs:
print(sr.npd.equals(npd.read_json(sr.npd.to_json())),
npd.read_json(sr.npd.to_json()).name == sr.name,
sr.npd.to_json(text=True))
False True {':field': [None, None]} False True {':field': [None, None]} False False {':field': {'::uint64': [1, 2, 3]}} False True {':field': ['2022-01-01T00:00:00.000', '2022-01-02T00:00:00.000', '2022-01-03T00:00:00.000']}
# json interface ko (categorical data)
for a in [{'test_array': [{'::array': [[1,2], [3,4], [5,6], [7,8]]}, [0, 1, 0, 2, 3]]}]: # list -> tuple to be hashable
field = {':field': a}
print(npd.read_json(field).npd.to_json() == field, field)
False {':field': {'test_array': [{'::array': [[1, 2], [3, 4], [5, 6], [7, 8]]}, [0, 1, 0, 2, 3]]}}