The aim of this use case is to analyse if there is a correlation between the economic welfare and migrational movement. The hypothesis is people are moving to places where welfare is higher.
# Import necessary libraries
import pandas as pd
import seaborn as sns
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
# Import datenguidepy package
import os
if not os.path.basename(os.getcwd()) == "datenguide-python":
os.chdir("..")
from datenguidepy.query_builder import Query
from datenguidepy.query_helper import get_regions
/home/konrad/programming/python/correlaid/datenguide-python/venv/lib/python3.7/site-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead. import pandas.util.testing as tm
# First get the region codes to query data from the Bundesländer.
region_codes = get_regions().query('level == "nuts1"').name
region_codes
region_id 10 Saarland 11 Berlin 12 Brandenburg 13 Mecklenburg-Vorpommern 14 Sachsen 15 Sachsen-Anhalt 16 Thüringen 01 Schleswig-Holstein 02 Hamburg 03 Niedersachsen 04 Bremen 05 Nordrhein-Westfalen 06 Hessen 07 Rheinland-Pfalz 08 Baden-Württemberg, Land 09 Bayern Name: name, dtype: object
# Create a query fpr the regions and variables of interest
region_query = Query.region(list(region_codes.index))
region_query.add_field('BEVSTD') # population
region_query.add_field('BEV085') # moving in administrative districts
region_query.add_field('BEV086') # moving out administrative districts
region_query.add_field('BIP804') # GDP per capita
# Get the results for the query
results = region_query.results()
# Preview the first data rows
results.head()
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) <ipython-input-3-4d7dcbd27610> in <module> 7 8 # Get the results for the query ----> 9 results = region_query.results() 10 11 # Preview the first data rows ~/programming/python/correlaid/datenguide-python/datenguidepy/query_builder.py in results(self, verbose_statistics, verbose_enums, add_units) 631 result = QueryExecutioner( 632 statistics_meta_data_provider=self._stat_meta_data_provider --> 633 ).run_query(self) 634 if result: 635 # It is currently assumed that all graphql queries ~/programming/python/correlaid/datenguide-python/datenguidepy/query_execution.py in run_query(self, query) 491 all_results = [ 492 self._run_single_query_json(query_json, query._get_fields_with_types()) --> 493 for query_json in self._generate_post_json(query) 494 ] 495 if not any(map(lambda r: r is None, all_results)): ~/programming/python/correlaid/datenguide-python/datenguidepy/query_execution.py in <listcomp>(.0) 491 all_results = [ 492 self._run_single_query_json(query_json, query._get_fields_with_types()) --> 493 for query_json in self._generate_post_json(query) 494 ] 495 if not any(map(lambda r: r is None, all_results)): ~/programming/python/correlaid/datenguide-python/datenguidepy/query_execution.py in _run_single_query_json(self, query_json, query_fields_with_types) 519 page += 1 520 else: --> 521 single_result = self._send_request(query_json) 522 if single_result is None: 523 return None ~/programming/python/correlaid/datenguide-python/datenguidepy/query_execution.py in _send_request(self, query_json) 558 if resp.status_code == 200: 559 body_json = resp.json() --> 560 check_http200_body_error(body_json) 561 return body_json 562 else: ~/programming/python/correlaid/datenguide-python/datenguidepy/query_execution.py in check_http200_body_error(body_json) 79 if "errors" in body_json: 80 raise RuntimeError( ---> 81 "Body contains the following error content\n" + str(body_json) 82 ) 83 RuntimeError: Body contains the following error content {'errors': [{'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'locations': [{'line': 1, 'column': 28}], 'path': ['region', 'BEVSTD'], 'extensions': {'code': 'INTERNAL_SERVER_ERROR', 'exception': {'type': 'FeathersError', 'name': 'GeneralError', 'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'code': 500, 'className': 'general-error', 'data': {'msg': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'path': '/genesapi-regionalstatistik-3/_search', 'query': {'size': 10, 'scroll': '10s'}, 'body': '{"query":{"constant_score":{"filter":{"bool":{"must":[{"term":{"region_id":"10"}},{"exists":{"field":"BEVSTD"}}],"must_not":[{"exists":{"field":"ALTX75"}},{"exists":{"field":"GES"}},{"exists":{"field":"ALTX21"}},{"exists":{"field":"NAT"}},{"exists":{"field":"ALTX76"}},{"exists":{"field":"ALTX20"}}]}}}}}', 'statusCode': 500, 'response': '{"error":{"root_cause":[{"type":"exception","reason":"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting."}],"type":"search_phase_execution_exception","reason":"all shards failed","phase":"query","grouped":true,"failed_shards":[{"shard":0,"index":"genesapi-regionalstatistik-3","node":"oVtg5PqCQhGPfOpV0eyhKg","reason":{"type":"exception","reason":"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting."}}]},"status":500}'}, 'errors': {}, 'hook': {'type': 'before', 'method': 'find', 'path': 'genesapiRawQuery', 'params': {'query': {'index': 'genesapi-regionalstatistik-3', 'size': 10, 'scroll': '10s', 'body': {'query': {'constant_score': {'filter': {'bool': {'must': [{'term': {'region_id': '10'}}, {'exists': {'field': 'BEVSTD'}}], 'must_not': [{'exists': {'field': 'ALTX75'}}, {'exists': {'field': 'GES'}}, {'exists': {'field': 'ALTX21'}}, {'exists': {'field': 'NAT'}}, {'exists': {'field': 'ALTX76'}}, {'exists': {'field': 'ALTX20'}}]}}}}}}}}}}}, {'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'locations': [{'line': 1, 'column': 238}], 'path': ['region', 'BIP804'], 'extensions': {'code': 'INTERNAL_SERVER_ERROR', 'exception': {'type': 'FeathersError', 'name': 'GeneralError', 'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'code': 500, 'className': 'general-error', 'data': {'msg': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'path': '/genesapi-regionalstatistik-3/_search', 'query': {'size': 10, 'scroll': '10s'}, 'body': '{"query":{"constant_score":{"filter":{"bool":{"must":[{"term":{"region_id":"10"}},{"exists":{"field":"BIP804"}}],"must_not":[]}}}}}', 'statusCode': 500, 'response': '{"error":{"root_cause":[{"type":"exception","reason":"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting."}],"type":"search_phase_execution_exception","reason":"all shards failed","phase":"query","grouped":true,"failed_shards":[{"shard":0,"index":"genesapi-regionalstatistik-3","node":"oVtg5PqCQhGPfOpV0eyhKg","reason":{"type":"exception","reason":"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting."}}]},"status":500}'}, 'errors': {}, 'hook': {'type': 'before', 'method': 'find', 'path': 'genesapiRawQuery', 'params': {'query': {'index': 'genesapi-regionalstatistik-3', 'size': 10, 'scroll': '10s', 'body': {'query': {'constant_score': {'filter': {'bool': {'must': [{'term': {'region_id': '10'}}, {'exists': {'field': 'BIP804'}}], 'must_not': []}}}}}}}}}}}, {'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'locations': [{'line': 1, 'column': 98}], 'path': ['region', 'BEV085'], 'extensions': {'code': 'INTERNAL_SERVER_ERROR', 'exception': {'type': 'FeathersError', 'name': 'GeneralError', 'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'code': 500, 'className': 'general-error', 'data': {'msg': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'path': '/genesapi-regionalstatistik-3/_search', 'query': {'size': 10, 'scroll': '10s'}, 'body': '{"query":{"constant_score":{"filter":{"bool":{"must":[{"term":{"region_id":"10"}},{"exists":{"field":"BEV085"}}],"must_not":[{"exists":{"field":"GES"}},{"exists":{"field":"NAT"}},{"exists":{"field":"ALTX01"}}]}}}}}', 'statusCode': 500, 'response': '{"error":{"root_cause":[{"type":"exception","reason":"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting."}],"type":"search_phase_execution_exception","reason":"all shards failed","phase":"query","grouped":true,"failed_shards":[{"shard":0,"index":"genesapi-regionalstatistik-3","node":"oVtg5PqCQhGPfOpV0eyhKg","reason":{"type":"exception","reason":"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting."}}]},"status":500}'}, 'errors': {}, 'hook': {'type': 'before', 'method': 'find', 'path': 'genesapiRawQuery', 'params': {'query': {'index': 'genesapi-regionalstatistik-3', 'size': 10, 'scroll': '10s', 'body': {'query': {'constant_score': {'filter': {'bool': {'must': [{'term': {'region_id': '10'}}, {'exists': {'field': 'BEV085'}}], 'must_not': [{'exists': {'field': 'GES'}}, {'exists': {'field': 'NAT'}}, {'exists': {'field': 'ALTX01'}}]}}}}}}}}}}}, {'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'locations': [{'line': 1, 'column': 168}], 'path': ['region', 'BEV086'], 'extensions': {'code': 'INTERNAL_SERVER_ERROR', 'exception': {'type': 'FeathersError', 'name': 'GeneralError', 'message': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'code': 500, 'className': 'general-error', 'data': {'msg': '[exception] Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting.', 'path': '/genesapi-regionalstatistik-3/_search', 'query': {'size': 10, 'scroll': '10s'}, 'body': '{"query":{"constant_score":{"filter":{"bool":{"must":[{"term":{"region_id":"10"}},{"exists":{"field":"BEV086"}}],"must_not":[{"exists":{"field":"GES"}},{"exists":{"field":"NAT"}},{"exists":{"field":"ALTX01"}}]}}}}}', 'statusCode': 500, 'response': '{"error":{"root_cause":[{"type":"exception","reason":"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting."}],"type":"search_phase_execution_exception","reason":"all shards failed","phase":"query","grouped":true,"failed_shards":[{"shard":0,"index":"genesapi-regionalstatistik-3","node":"oVtg5PqCQhGPfOpV0eyhKg","reason":{"type":"exception","reason":"Trying to create too many scroll contexts. Must be less than or equal to: [500]. This limit can be set by changing the [search.max_open_scroll_context] setting."}}]},"status":500}'}, 'errors': {}, 'hook': {'type': 'before', 'method': 'find', 'path': 'genesapiRawQuery', 'params': {'query': {'index': 'genesapi-regionalstatistik-3', 'size': 10, 'scroll': '10s', 'body': {'query': {'constant_score': {'filter': {'bool': {'must': [{'term': {'region_id': '10'}}, {'exists': {'field': 'BEV086'}}], 'must_not': [{'exists': {'field': 'GES'}}, {'exists': {'field': 'NAT'}}, {'exists': {'field': 'ALTX01'}}]}}}}}}}}}}}], 'data': {'region': {'id': '10', 'name': 'Saarland', 'BEVSTD': None, 'BEV085': None, 'BEV086': None, 'BIP804': None}}}
# Calculate the net migration per 1000 inhabitants: (people movin in - people moving out) / population * 1000
results['migration'] = (results.BEV085 - results.BEV086) / results.BEVSTD * 1000
# Overview of the dataset:
results.describe()
# Determine plot size
ax = plt.subplots(figsize=(25, 15))
# Make a scatterplot with adjusted color and size mapping
plt.scatter(results["BIP804"], results["migration"], c=results["year"], s=(results["BEVSTD"] / 10000), cmap='GnBu', alpha = 0.5)
plt.colorbar(label='Year', ticks=range(min(results["year"]), max(results["year"])+1, 2))
plt.clim(min(results["year"]), max(results["year"]))
# Draw the regression line
sns.regplot(x="BIP804", y="migration", data=results, scatter=False, color="red")
# Create a legend for the population size.
for pop in np.linspace(min(results["BEVSTD"]), max(results["BEVSTD"]+1), num=4):
plt.scatter([], [], c='k', alpha=0.3, s=(pop/10000), label=' ' + str(int(pop/1000000)) + ' Million')
plt.legend(scatterpoints=1, frameon=False, labelspacing=3, title='Population', title_fontsize="larger", loc='best', bbox_to_anchor=(0.5, -0.25, 0.5, 0.5))
# Add title and labels
plt.title('Migration and GDP', fontweight="bold")
plt.xlabel('Gross Domestic Product per Capita', fontweight="bold")
plt.ylabel('Net Migartion per 1000 Inhabitants', fontweight="bold");
The analysis is based on data for all 16 Bundesländer in gemany from 1995 to 2017. Over the years GDP and migration increased. The plot shows a positive correlation between (net) migration and GDP in general.