#!/usr/bin/env python # coding: utf-8 # # GPS velocities for the Alpine region # # This is a compilation of 3D GPS velocities for the Alps by # [Sánchez et al. (2018)](https://doi.org/10.1594/PANGAEA.886889). # The horizontal velocities are reference to the Eurasian frame. # All velocity components and even the position have error estimates, # which is very useful and rare to find in a lot of datasets. # # License: CC-BY # # Here, we download the data from 3 separate files (coordinates, vertical velocity, horizontal velocities) and make sure they are aligned and represent the same stations. There are some mistakes in the station names of horizontal velocity file that we fix manually (verified by the coordinates). # In[1]: import os import matplotlib.pyplot as plt import numpy as np import pandas as pd import verde as vd import pooch import pyproj import pygmt # ## Download the data # # Use Pooch to download and cache the data files. # In[2]: fname_position = pooch.retrieve( url="https://store.pangaea.de/Publications/Sanchez-etal_2018/ALPS2017_NEH.CRD", known_hash="sha256:24b88a0e5ab6ea93c67424ef52542d8b8a8254a150284e1a54afddbfd93e4399", ) fname_velocity = pooch.retrieve( url="https://store.pangaea.de/Publications/Sanchez-etal_2018/ALPS2017_NEH.VEL", known_hash="sha256:0f2eff87a39260e2b3218897763dbfecdf0f464bf877bef460eff34a70e00aa7", ) fname_velocity_eurasia = pooch.retrieve( url="https://store.pangaea.de/Publications/Sanchez-etal_2018/ALPS2017_REP.VEL", known_hash="sha256:578677246230e893c828205391d262da4af39bb24a8ca66ff5a95a88c71fe509", ) # ## Load the data # # These data are in a strange format and getting pandas to read it would be more work than parsing it by hand. So that's what we're going to do. # # First, the horizontal velocities since there are less points and we will only want the vertical and positions of these stations. # In[3]: station = [] velocity_north_mm_yr = [] velocity_north_error_mm_yr = [] velocity_east_mm_yr = [] velocity_east_error_mm_yr = [] velocity_up_mm_yr = [] velocity_up_error_mm_yr = [] with open(fname_velocity_eurasia, encoding="latin-1") as input_file: for i, line in enumerate(input_file): if i < 19 or not line.strip(): continue columns = line.split() station_id = columns[0] # Fix these names manually. # They were confirmed by comparing the coordinates. if station_id == "CH1Z": station_id = "CHIZ" if station_id == "IE1G": station_id = "IENG" values = columns[3:] station.append(station_id) velocity_east_mm_yr.append(1e3 * float(values[0])) velocity_north_mm_yr.append(1e3 * float(values[1])) velocity_east_error_mm_yr.append(1e3 * float(values[2])) velocity_north_error_mm_yr.append(1e3 * float(values[3])) # Merge everything into a DataFrame. # Use the station ID as the index to help us merge the data later. data_horizontal = pd.DataFrame( data={ "station_id": station, "velocity_east_mmyr": velocity_east_mm_yr, "velocity_north_mmyr": velocity_north_mm_yr, "velocity_east_error_mmyr": velocity_east_error_mm_yr, "velocity_north_error_mmyr": velocity_north_error_mm_yr, }, index=station, ) data_horizontal # Now load the position and vertical velocity, keeping only the points that have the horizontal components as well. # In[4]: station = [] latitude = [] latitude_error_m = [] longitude = [] longitude_error_m = [] height_m = [] height_error_m = [] with open(fname_position, encoding="latin-1") as input_file: for i, line in enumerate(input_file): if i < 15 or i > 304 or not line.strip(): continue columns = line.split() if len(columns) == 12: station_id = columns[0] values = columns[2:8] else: station_id = columns[0] values = columns[1:7] # Only interested in the stations that have horizontal if station_id not in data_horizontal.station_id: continue # Skip repeated stations because it's easier this way if station_id in station: continue values = [float(x) for x in values] # Make longitude be in [-180, 180] for easier plotting if values[2] > 300: values[2] -= 360 station.append(station_id) latitude.append(values[0]) latitude_error_m.append(values[1]) longitude.append(values[2]) longitude_error_m.append(values[3]) height_m.append(values[4]) height_error_m.append(values[5]) # Merge everything into a DataFrame. data_position = pd.DataFrame( data={ "station_id": station, "latitude": latitude, "longitude": longitude, "height_m": height_m, "latitude_error_m": latitude_error_m, "longitude_error_m": longitude_error_m, "height_error_m": height_error_m, }, index=station, ) data_position # In[5]: station = [] velocity_up_mm_yr = [] velocity_up_error_mm_yr = [] with open(fname_velocity, encoding="latin-1") as input_file: for i, line in enumerate(input_file): if i < 15 or i > 303 or not line.strip(): continue columns = line.split() if len(columns) == 12: station_id = columns[0] values = columns[6:8] else: station_id = columns[0] values = columns[5:7] # Only interested in the stations that have horizontal if station_id not in data_horizontal.station_id: continue # Skip repeated stations because it's easier this way if station_id in station: continue station.append(station_id) velocity_up_mm_yr.append(1e3 * float(values[0])) velocity_up_error_mm_yr.append(1e3 * float(values[1])) # Merge everything into a DataFrame. data_vertical = pd.DataFrame( data={ "station_id": station, "velocity_up_mmyr": velocity_up_mm_yr, "velocity_up_error_mmyr": velocity_up_error_mm_yr, }, index=station, ) data_vertical # Merge all of the DataFrames into a single one. # In[6]: data = pd.merge(pd.merge(data_horizontal, data_vertical), data_position) data # ## Plot the data # # Make a quick plot of the data to see if it looks the same as the paper figures. # In[7]: angle = np.degrees(np.arctan2(data.velocity_north_mmyr, data.velocity_east_mmyr)) length = np.hypot(data.velocity_north_mmyr, data.velocity_east_mmyr) # In[8]: region = vd.pad_region(vd.get_region((data.longitude, data.latitude)), pad=1) # In[9]: fig = pygmt.Figure() with fig.subplot( nrows=1, ncols=2, figsize=("35c", "15c"), sharey="l", # shared y-axis on the left side frame="WSrt", ): with fig.set_panel(0): fig.basemap(region=region, projection="M?", frame="af") fig.coast(area_thresh=1e4, land="#eeeeee") scale_factor = 2 / length.max() fig.plot( x=data.longitude, y=data.latitude, direction=[angle, length * scale_factor], style="v0.15c+e", color="blue", pen="1p,blue", ) # Plot a quiver caption fig.plot( x=-4, y=42, direction=[[0], [1 * scale_factor]], style="v0.15c+e", color="blue", pen="1p,blue", ) fig.text( x=-4, y=42.2, text=f"1 mm/yr", justify="BL", font="10p,Helvetica,blue", ) with fig.set_panel(1): fig.basemap(region=region, projection="M?", frame="af") fig.coast(area_thresh=1e4, land="#eeeeee") pygmt.makecpt(cmap="polar", series=[data.velocity_up_mmyr.min(), data.velocity_up_mmyr.max()]) fig.plot( x=data.longitude, y=data.latitude, color=data.velocity_up_mmyr, style="c0.2c", cmap=True, pen="0.5p,black", ) fig.colorbar( frame='af+l"vertical velocity [mm/yr]"', position="jTL+w7c/0.3c+h+o1/1", ) fig.savefig("preview.jpg", dpi=200) fig.show(width=1000) # This looks very similar to the plots in [Sánchez et al. (2018)](https://doi.org/10.5194/essd-10-1503-2018). # ## Export # # Save the data to a compressed CSV. # In[10]: export = pd.DataFrame({ "station_id": data.station_id, "longitude": data.longitude.map(lambda x: "{:.7f}".format(x)), "latitude": data.latitude.map(lambda x: "{:.7f}".format(x)), "height_m": data.height_m.map(lambda x: "{:.3f}".format(x)), "velocity_east_mmyr": data.velocity_east_mmyr.map(lambda x: "{:.1f}".format(x)), "velocity_north_mmyr": data.velocity_north_mmyr.map(lambda x: "{:.1f}".format(x)), "velocity_up_mmyr": data.velocity_up_mmyr.map(lambda x: "{:.1f}".format(x)), "longitude_error_m": data.longitude_error_m.map(lambda x: "{:.4f}".format(x)), "latitude_error_m": data.latitude_error_m.map(lambda x: "{:.4f}".format(x)), "height_error_m": data.height_error_m.map(lambda x: "{:.3f}".format(x)), "velocity_east_error_mmyr": data.velocity_east_error_mmyr.map(lambda x: "{:.1f}".format(x)), "velocity_north_error_mmyr": data.velocity_north_error_mmyr.map(lambda x: "{:.1f}".format(x)), "velocity_up_error_mmyr": data.velocity_up_error_mmyr.map(lambda x: "{:.1f}".format(x)), }) export # In[11]: fname = "alps-gps-velocity.csv.xz" export.to_csv(fname, index=False) print(fname) print(f"size: {os.path.getsize(fname) / 1e6} Mb") for alg in ["md5", "sha256"]: print(f"{alg}:{pooch.file_hash(fname, alg=alg)}") # ## Read back the data and plot it # # Verify that the output didn't corrupt anything. # In[12]: gps = pd.read_csv(fname) gps # In[13]: projection = pyproj.Proj(proj="merc", lat_ts=gps.latitude.mean()) easting, northing = projection(gps.longitude, gps.latitude) # In[14]: fig, axes = plt.subplots( 1, 2, figsize=(18, 6) ) # Plot the horizontal velocity vectors ax = axes[0] ax.set_title("Horizontal velocities") ax.quiver( easting, northing, gps.velocity_east_mmyr.values, gps.velocity_north_mmyr.values, scale=30, width=0.002, ) ax.set_aspect("equal") ax.set_xlabel("easting (m)") ax.set_ylabel("northing (m)") # Plot the vertical velocity ax = axes[1] ax.set_title("Vertical velocity") maxabs = vd.maxabs(gps.velocity_up_mmyr) tmp = ax.scatter( easting, northing, c=gps.velocity_up_mmyr, s=30, vmin=-maxabs / 3, vmax=maxabs / 3, cmap="seismic", ) plt.colorbar(tmp, ax=ax, label="mm/year", pad=0, aspect=50) ax.set_aspect("equal") ax.set_xlabel("easting (m)") ax.set_ylabel("northing (m)") plt.tight_layout(pad=0) plt.show()