import pandas
from pandas.io.json import json_normalize
import json
local_file_path = '../files/arbres_remarquables_paris.json'
with open(local_file_path, 'r') as json_file:
data_json = json.load(json_file)
df = pandas.DataFrame(json_normalize(data_json['records']))
df.head()
datasetid | recordid | record_timestamp | fields.geom_x_y | fields.libellefrancais | fields.objectid | fields.idemplacement | fields.arrondissement | fields.circonferenceencm | fields.hauteurenm | ... | fields.stadedeveloppement | fields.remarquable | fields.idbase | fields.genre | fields.complementadresse | fields.typeemplacement | fields.dateplantation | geometry.type | geometry.coordinates | fields.varieteoucultivar | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | arbresremarquablesparis | a236960f295288698ca57fdccfd9825547ffb4d0 | 2019-10-04T10:00:41.063000+00:00 | [48.8630096006, 2.2417737512] | Pin | 49884 | 000901001 | BOIS DE BOULOGNE | 241.0 | 30.0 | ... | M | 1 | 2002349.0 | Pinus | 16-08 | Arbre | 1882-01-01T00:09:21+00:00 | Point | [2.2417737512, 48.8630096006] | NaN |
1 | arbresremarquablesparis | d7635937c3636051c99963676d3a97dd4d9dd8c9 | 2019-10-04T10:00:41.063000+00:00 | [48.8731110898, 2.24886478886] | Hêtre | 52761 | 000701004 | BOIS DE BOULOGNE | 231.0 | 16.0 | ... | M | 1 | 2002354.0 | Fagus | 16-17 | Arbre | 1868-01-01T00:09:21+00:00 | Point | [2.24886478886, 48.8731110898] | ''Pendula'' |
2 | arbresremarquablesparis | 460e8eccd3ebc816f16317702223bf406e7d75e6 | 2019-10-04T10:00:41.063000+00:00 | [48.8448875264, 2.35366916319] | Hêtre | 62456 | 00000075 | PARIS 5E ARRDT | 85.0 | 3.0 | ... | M | 1 | 132669.0 | Fagus | NaN | Arbre | 1905-01-01T00:09:21+00:00 | Point | [2.35366916319, 48.8448875264] | ''Tortuosa'' |
3 | arbresremarquablesparis | b60b80e0f8327afba6d0c513b0b6b85639e516b6 | 2019-10-04T10:00:41.063000+00:00 | [48.8787029872, 2.30647417371] | Erable | 68801 | 00NO0144 | PARIS 8E ARRDT | 450.0 | 28.0 | ... | M | 1 | 305507.0 | Acer | 08-01 | Arbre | 1700-01-01T00:09:21+00:00 | Point | [2.30647417371, 48.8787029872] | NaN |
4 | arbresremarquablesparis | 47e163d6f0536bc9e26ba2993447c6f50b14a6f6 | 2019-10-04T10:00:41.063000+00:00 | [48.8671184424, 2.25294211329] | Platane | 82341 | 001201001 | BOIS DE BOULOGNE | 534.0 | 45.0 | ... | M | 1 | 2002389.0 | Platanus | 16-10 | Arbre | 1872-01-01T00:09:21+00:00 | Point | [2.25294211329, 48.8671184424] | NaN |
5 rows × 24 columns
df.columns = df.columns.str.replace(".", "_")
df[['lon','lat']] = pandas.DataFrame(df['geometry_coordinates'].values.tolist(), index=df.index)
df.loc[:, ['geometry_coordinates', 'lon', 'lat']]
geometry_coordinates | lon | lat | |
---|---|---|---|
0 | [2.2417737512, 48.8630096006] | 2.241774 | 48.863010 |
1 | [2.24886478886, 48.8731110898] | 2.248865 | 48.873111 |
2 | [2.35366916319, 48.8448875264] | 2.353669 | 48.844888 |
3 | [2.30647417371, 48.8787029872] | 2.306474 | 48.878703 |
4 | [2.25294211329, 48.8671184424] | 2.252942 | 48.867118 |
... | ... | ... | ... |
177 | [2.38368012582, 48.8814687366] | 2.383680 | 48.881469 |
178 | [2.35069308175, 48.8330296265] | 2.350693 | 48.833030 |
179 | [2.32633191622, 48.8940926568] | 2.326332 | 48.894093 |
180 | [2.43334109188, 48.8395251386] | 2.433341 | 48.839525 |
181 | [2.33940394029, 48.857749011] | 2.339404 | 48.857749 |
182 rows × 3 columns
from geopandas import GeoDataFrame, points_from_xy
gdf = GeoDataFrame(df, geometry=points_from_xy(df['lon'], df['lat']))
gdf.head()
datasetid | recordid | record_timestamp | fields_geom_x_y | fields_libellefrancais | fields_objectid | fields_idemplacement | fields_arrondissement | fields_circonferenceencm | fields_hauteurenm | ... | fields_genre | fields_complementadresse | fields_typeemplacement | fields_dateplantation | geometry_type | geometry_coordinates | fields_varieteoucultivar | lon | lat | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | arbresremarquablesparis | a236960f295288698ca57fdccfd9825547ffb4d0 | 2019-10-04T10:00:41.063000+00:00 | [48.8630096006, 2.2417737512] | Pin | 49884 | 000901001 | BOIS DE BOULOGNE | 241.0 | 30.0 | ... | Pinus | 16-08 | Arbre | 1882-01-01T00:09:21+00:00 | Point | [2.2417737512, 48.8630096006] | NaN | 2.241774 | 48.863010 | POINT (2.24177 48.86301) |
1 | arbresremarquablesparis | d7635937c3636051c99963676d3a97dd4d9dd8c9 | 2019-10-04T10:00:41.063000+00:00 | [48.8731110898, 2.24886478886] | Hêtre | 52761 | 000701004 | BOIS DE BOULOGNE | 231.0 | 16.0 | ... | Fagus | 16-17 | Arbre | 1868-01-01T00:09:21+00:00 | Point | [2.24886478886, 48.8731110898] | ''Pendula'' | 2.248865 | 48.873111 | POINT (2.24886 48.87311) |
2 | arbresremarquablesparis | 460e8eccd3ebc816f16317702223bf406e7d75e6 | 2019-10-04T10:00:41.063000+00:00 | [48.8448875264, 2.35366916319] | Hêtre | 62456 | 00000075 | PARIS 5E ARRDT | 85.0 | 3.0 | ... | Fagus | NaN | Arbre | 1905-01-01T00:09:21+00:00 | Point | [2.35366916319, 48.8448875264] | ''Tortuosa'' | 2.353669 | 48.844888 | POINT (2.35367 48.84489) |
3 | arbresremarquablesparis | b60b80e0f8327afba6d0c513b0b6b85639e516b6 | 2019-10-04T10:00:41.063000+00:00 | [48.8787029872, 2.30647417371] | Erable | 68801 | 00NO0144 | PARIS 8E ARRDT | 450.0 | 28.0 | ... | Acer | 08-01 | Arbre | 1700-01-01T00:09:21+00:00 | Point | [2.30647417371, 48.8787029872] | NaN | 2.306474 | 48.878703 | POINT (2.30647 48.87870) |
4 | arbresremarquablesparis | 47e163d6f0536bc9e26ba2993447c6f50b14a6f6 | 2019-10-04T10:00:41.063000+00:00 | [48.8671184424, 2.25294211329] | Platane | 82341 | 001201001 | BOIS DE BOULOGNE | 534.0 | 45.0 | ... | Platanus | 16-10 | Arbre | 1872-01-01T00:09:21+00:00 | Point | [2.25294211329, 48.8671184424] | NaN | 2.252942 | 48.867118 | POINT (2.25294 48.86712) |
5 rows × 27 columns
from cartoframes.viz import Layer
Layer(gdf)
gdf['plantation_date'] = pandas.to_datetime(gdf['fields_dateplantation'], errors='coerce')
gdf = gdf[gdf['plantation_date'].notnull()]
gdf['plantation_year'] = pandas.to_numeric(gdf['plantation_date'].dt.strftime('%Y'))
gdf.loc[:, ['plantation_date', 'plantation_year']]
plantation_date | plantation_year | |
---|---|---|
0 | 1882-01-01 00:09:21+00:00 | 1882 |
1 | 1868-01-01 00:09:21+00:00 | 1868 |
2 | 1905-01-01 00:09:21+00:00 | 1905 |
3 | 1700-01-01 00:09:21+00:00 | 1700 |
4 | 1872-01-01 00:09:21+00:00 | 1872 |
... | ... | ... |
177 | 1700-01-01 00:09:21+00:00 | 1700 |
178 | 1894-01-01 00:09:21+00:00 | 1894 |
179 | 1700-01-01 00:09:21+00:00 | 1700 |
180 | 1918-01-01 00:00:00+00:00 | 1918 |
181 | 1700-01-01 00:09:21+00:00 | 1700 |
181 rows × 2 columns
from cartoframes.viz import Layer, color_bins_style
Layer(gdf, color_bins_style('plantation_year', breaks=[1700, 1800, 1900, 2000], palette='Mint'))