You can compare the result to Regionalatlas Statistisches Bundesamt.
Steps:
Load package datenguidepy
Get region code on nuts3 level with "get_all_regions().query('level == nuts3')"
Search for statistics codes with "get_statistics().query('XXX')"
Define a year filter with "add_args({'year' : [2015]})"
Define Query using statistics codes, region code and add a filter
Collect data
Plot the data on a map, e.g. with geoviews. (Note that you need to add data on the shape of the regions first.)
import os
if not os.path.basename(os.getcwd()) == "datenguide-python":
os.chdir("..")
from datenguidepy import Query
from datenguidepy.query_helper import get_regions, get_statistics
reg = get_regions()
relevant_region_subset = reg.query('level == "nuts3"').index
def get_data(regcode, stat):
query = Query.region(regcode)
field = query.add_field(stat)
field.add_args({'year': [2015]})
df = query.results()
return df
print(get_statistics().query('short_description.str.contains("Geburten-/Gestorbenenüberschuss je 10.000")', engine='python'))
print(' ------------- ')
print(get_statistics().query('short_description.str.contains("Lebendgeborene je 10.000 Einwohner")', engine='python'))
short_description \ statistic AI0211 Geburten-/Gestorbenenüberschuss je 10.000 Einw. long_description statistic AI0211 wiki\n==Geburtenüberschuss (+) bzw. Gestorbene... ------------- short_description \ statistic AI0209 Lebendgeborene je 10.000 Einwohner long_description statistic AI0209 wiki\n==Lebendgeborene je 10.000 Einwohner==\n...
import pandas as pd
dataout = pd.DataFrame()
statlist = ['AI0211', 'AI0209']
for stat in statlist:
data = pd.DataFrame()
for regcode in relevant_region_subset:
df = get_data(str(regcode), stat)
if df.empty:
print("no data for region code " + regcode + " and statistic " + stat)
pass
else:
data = data.append(df).copy()
if dataout.empty:
dataout = data.copy()
else:
dataout = dataout.merge(data, on = ['id', 'name', 'year']).copy()
dataout.head()
id | name | year | AI0211 | AI0211_source_title_de | AI0211_source_valid_from | AI0211_source_periodicity | AI0211_source_name | AI0211_source_url | AI0209 | AI0209_source_title_de | AI0209_source_valid_from | AI0209_source_periodicity | AI0209_source_name | AI0209_source_url | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 10041 | Saarbrücken, Regionalverband | 2015 | -56.1 | Regionalatlas Deutschland | 1995-01-01T00:00:00 | JAEHRLICH | 99910 | None | 81.1 | Regionalatlas Deutschland | 1995-01-01T00:00:00 | JAEHRLICH | 99910 | None |
1 | 10042 | Merzig-Wadern, Landkreis | 2015 | -64.6 | Regionalatlas Deutschland | 1995-01-01T00:00:00 | JAEHRLICH | 99910 | None | 69.2 | Regionalatlas Deutschland | 1995-01-01T00:00:00 | JAEHRLICH | 99910 | None |
2 | 10043 | Neunkirchen, Landkreis | 2015 | -73.3 | Regionalatlas Deutschland | 1995-01-01T00:00:00 | JAEHRLICH | 99910 | None | 70.3 | Regionalatlas Deutschland | 1995-01-01T00:00:00 | JAEHRLICH | 99910 | None |
3 | 10044 | Saarlouis, Landkreis | 2015 | -54.2 | Regionalatlas Deutschland | 1995-01-01T00:00:00 | JAEHRLICH | 99910 | None | 77.8 | Regionalatlas Deutschland | 1995-01-01T00:00:00 | JAEHRLICH | 99910 | None |
4 | 10045 | Saarpfalz-Kreis | 2015 | -55.5 | Regionalatlas Deutschland | 1995-01-01T00:00:00 | JAEHRLICH | 99910 | None | 76.8 | Regionalatlas Deutschland | 1995-01-01T00:00:00 | JAEHRLICH | 99910 | None |
Add information on the shape of the regions (downloaded from):
import geopandas as gpd
de = gpd.read_file(YOUR_PATH + '/vg250_2019-01-01.gk3.shape.ebenen/vg250_ebenen/VG250_KRS.shp')
de['AGS'].sort_values()
de=de.to_crs({'init': 'epsg:4326'})
dataout['AGS'] = dataout['id']
df = gpd.GeoDataFrame(de.merge(dataout, on = 'AGS')).drop_duplicates(subset=['id', 'year'])[['geometry', 'name', 'year', 'AI0211', 'AI0209']]
/home/konrad/programming/python/correlaid/datenguide-python/venv/lib/python3.7/site-packages/pyproj/crs/crs.py:53: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6 return _prepare_from_string(" ".join(pjargs))
import geoviews as gv
gv.extension('bokeh','matplotlib')
df_plt = df[(df['year']==2015) & (df['AI0211']!=0)][['geometry', 'name', 'AI0211']].dropna()
gv.Polygons(df_plt, vdims=['name', 'AI0211'], label="Geburten-/Gestorbenenüberschuss je 10.000 (2015)").opts(
tools=['hover'], width=550, height=700, color_index='AI0211',
colorbar=True, cmap='coolwarm', toolbar='left')