#!/usr/bin/env python
# coding: utf-8

# # GPS velocities for the Alpine region
# 
# This is a compilation of 3D GPS velocities for the Alps by
# [Sánchez et al. (2018)](https://doi.org/10.1594/PANGAEA.886889).
# The horizontal velocities are reference to the Eurasian frame. 
# All velocity components and even the position have error estimates,
# which is very useful and rare to find in a lot of datasets.
# 
# License: CC-BY
# 
# Here, we download the data from 3 separate files (coordinates, vertical velocity, horizontal velocities) and make sure they are aligned and represent the same stations. There are some mistakes in the station names of horizontal velocity file that we fix manually (verified by the coordinates).

# In[1]:


import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import verde as vd
import pooch
import pyproj
import pygmt


# ## Download the data
# 
# Use Pooch to download and cache the data files.

# In[2]:


fname_position = pooch.retrieve(
    url="https://store.pangaea.de/Publications/Sanchez-etal_2018/ALPS2017_NEH.CRD",
    known_hash="sha256:24b88a0e5ab6ea93c67424ef52542d8b8a8254a150284e1a54afddbfd93e4399",
)
fname_velocity = pooch.retrieve(
    url="https://store.pangaea.de/Publications/Sanchez-etal_2018/ALPS2017_NEH.VEL",
    known_hash="sha256:0f2eff87a39260e2b3218897763dbfecdf0f464bf877bef460eff34a70e00aa7",
)
fname_velocity_eurasia = pooch.retrieve(
    url="https://store.pangaea.de/Publications/Sanchez-etal_2018/ALPS2017_REP.VEL",
    known_hash="sha256:578677246230e893c828205391d262da4af39bb24a8ca66ff5a95a88c71fe509",
)


# ## Load the data
# 
# These data are in a strange format and getting pandas to read it would be more work than parsing it by hand. So that's what we're going to do.
# 
# First, the horizontal velocities since there are less points and we will only want the vertical and positions of these stations.

# In[3]:


station = []
velocity_north_mm_yr = []
velocity_north_error_mm_yr = []
velocity_east_mm_yr = []
velocity_east_error_mm_yr = []
velocity_up_mm_yr = []
velocity_up_error_mm_yr = []
with open(fname_velocity_eurasia, encoding="latin-1") as input_file:
    for i, line in enumerate(input_file):
        if i < 19 or not line.strip():
            continue
        columns = line.split()
        station_id = columns[0]
        # Fix these names manually. 
        # They were confirmed by comparing the coordinates.
        if station_id == "CH1Z":
            station_id = "CHIZ"
        if station_id == "IE1G":
            station_id = "IENG"
        values = columns[3:]
        station.append(station_id)
        velocity_east_mm_yr.append(1e3 * float(values[0]))
        velocity_north_mm_yr.append(1e3 * float(values[1]))
        velocity_east_error_mm_yr.append(1e3 * float(values[2]))
        velocity_north_error_mm_yr.append(1e3 * float(values[3]))
# Merge everything into a DataFrame. 
# Use the station ID as the index to help us merge the data later.
data_horizontal = pd.DataFrame(
    data={
        "station_id": station,
        "velocity_east_mmyr": velocity_east_mm_yr,
        "velocity_north_mmyr": velocity_north_mm_yr,
        "velocity_east_error_mmyr": velocity_east_error_mm_yr,
        "velocity_north_error_mmyr": velocity_north_error_mm_yr,
    },
    index=station,
)
data_horizontal


# Now load the position and vertical velocity, keeping only the points that have the horizontal components as well.

# In[4]:


station = []
latitude = []
latitude_error_m = []
longitude = []
longitude_error_m = []
height_m = []
height_error_m = []
with open(fname_position, encoding="latin-1") as input_file:
    for i, line in enumerate(input_file):
        if i < 15 or i > 304 or not line.strip():
            continue
        columns = line.split()
        if len(columns) == 12:
            station_id = columns[0]
            values = columns[2:8]
        else:
            station_id = columns[0]
            values = columns[1:7]
        # Only interested in the stations that have horizontal
        if station_id not in data_horizontal.station_id:
            continue
        # Skip repeated stations because it's easier this way
        if station_id in station:
            continue
        values = [float(x) for x in values]
        # Make longitude be in [-180, 180] for easier plotting
        if values[2] > 300:
            values[2] -= 360
        station.append(station_id)
        latitude.append(values[0])
        latitude_error_m.append(values[1])
        longitude.append(values[2])
        longitude_error_m.append(values[3])
        height_m.append(values[4])
        height_error_m.append(values[5])
# Merge everything into a DataFrame. 
data_position = pd.DataFrame(
    data={
        "station_id": station,
        "latitude": latitude,
        "longitude": longitude,
        "height_m": height_m,
        "latitude_error_m": latitude_error_m,
        "longitude_error_m": longitude_error_m,
        "height_error_m": height_error_m,
    },
    index=station,
)
data_position


# In[5]:


station = []
velocity_up_mm_yr = []
velocity_up_error_mm_yr = []
with open(fname_velocity, encoding="latin-1") as input_file:
    for i, line in enumerate(input_file):
        if i < 15 or i > 303 or not line.strip():
            continue
        columns = line.split()        
        if len(columns) == 12:
            station_id = columns[0]
            values = columns[6:8]
        else:
            station_id = columns[0]
            values = columns[5:7]
        # Only interested in the stations that have horizontal
        if station_id not in data_horizontal.station_id:
            continue
        # Skip repeated stations because it's easier this way
        if station_id in station:
            continue
        station.append(station_id)
        velocity_up_mm_yr.append(1e3 * float(values[0]))
        velocity_up_error_mm_yr.append(1e3 * float(values[1]))
# Merge everything into a DataFrame. 
data_vertical = pd.DataFrame(
    data={
        "station_id": station,
        "velocity_up_mmyr": velocity_up_mm_yr,
        "velocity_up_error_mmyr": velocity_up_error_mm_yr,
    },
    index=station,
)
data_vertical


# Merge all of the DataFrames into a single one.

# In[6]:


data = pd.merge(pd.merge(data_horizontal, data_vertical), data_position)
data


# ## Plot the data
# 
# Make a quick plot of the data to see if it looks the same as the paper figures.

# In[7]:


angle = np.degrees(np.arctan2(data.velocity_north_mmyr, data.velocity_east_mmyr))
length = np.hypot(data.velocity_north_mmyr, data.velocity_east_mmyr)


# In[8]:


region = vd.pad_region(vd.get_region((data.longitude, data.latitude)), pad=1)


# In[9]:


fig = pygmt.Figure()
with fig.subplot(
    nrows=1,
    ncols=2,
    figsize=("35c", "15c"),
    sharey="l",  # shared y-axis on the left side
    frame="WSrt",
):
    with fig.set_panel(0):
        fig.basemap(region=region, projection="M?", frame="af")
        fig.coast(area_thresh=1e4, land="#eeeeee")        
        scale_factor = 2 / length.max()
        fig.plot(
            x=data.longitude,
            y=data.latitude,
            direction=[angle, length * scale_factor],
            style="v0.15c+e",
            color="blue",
            pen="1p,blue",
        )    
        # Plot a quiver caption
        fig.plot(
            x=-4,
            y=42,
            direction=[[0], [1 * scale_factor]],
            style="v0.15c+e",
            color="blue",
            pen="1p,blue",
        )
        fig.text(
            x=-4,
            y=42.2,
            text=f"1 mm/yr",
            justify="BL",
            font="10p,Helvetica,blue",
        )
    with fig.set_panel(1):
        fig.basemap(region=region, projection="M?", frame="af")
        fig.coast(area_thresh=1e4, land="#eeeeee")
        pygmt.makecpt(cmap="polar", series=[data.velocity_up_mmyr.min(), data.velocity_up_mmyr.max()])
        fig.plot(
            x=data.longitude,
            y=data.latitude,
            color=data.velocity_up_mmyr,
            style="c0.2c",
            cmap=True,
            pen="0.5p,black",
        )
        fig.colorbar(
            frame='af+l"vertical velocity [mm/yr]"',
            position="jTL+w7c/0.3c+h+o1/1",
        )
fig.savefig("preview.jpg", dpi=200)
fig.show(width=1000)


# This looks very similar to the plots in [Sánchez et al. (2018)](https://doi.org/10.5194/essd-10-1503-2018).

# ## Export
# 
# Save the data to a compressed CSV.

# In[10]:


export = pd.DataFrame({
    "station_id": data.station_id,
    "longitude": data.longitude.map(lambda x: "{:.7f}".format(x)),
    "latitude": data.latitude.map(lambda x: "{:.7f}".format(x)),    
    "height_m": data.height_m.map(lambda x: "{:.3f}".format(x)),
    "velocity_east_mmyr": data.velocity_east_mmyr.map(lambda x: "{:.1f}".format(x)),
    "velocity_north_mmyr": data.velocity_north_mmyr.map(lambda x: "{:.1f}".format(x)),
    "velocity_up_mmyr": data.velocity_up_mmyr.map(lambda x: "{:.1f}".format(x)),
    "longitude_error_m": data.longitude_error_m.map(lambda x: "{:.4f}".format(x)),
    "latitude_error_m": data.latitude_error_m.map(lambda x: "{:.4f}".format(x)),
    "height_error_m": data.height_error_m.map(lambda x: "{:.3f}".format(x)),
    "velocity_east_error_mmyr": data.velocity_east_error_mmyr.map(lambda x: "{:.1f}".format(x)),
    "velocity_north_error_mmyr": data.velocity_north_error_mmyr.map(lambda x: "{:.1f}".format(x)),
    "velocity_up_error_mmyr": data.velocity_up_error_mmyr.map(lambda x: "{:.1f}".format(x)),
})
export


# In[11]:


fname = "alps-gps-velocity.csv.xz"
export.to_csv(fname, index=False)
print(fname)
print(f"size: {os.path.getsize(fname) / 1e6} Mb")
for alg in ["md5", "sha256"]:
    print(f"{alg}:{pooch.file_hash(fname, alg=alg)}")


# ## Read back the data and plot it
# 
# Verify that the output didn't corrupt anything.

# In[12]:


gps = pd.read_csv(fname)
gps


# In[13]:


projection = pyproj.Proj(proj="merc", lat_ts=gps.latitude.mean())
easting, northing = projection(gps.longitude, gps.latitude)


# In[14]:


fig, axes = plt.subplots(
    1, 2, figsize=(18, 6)
)
# Plot the horizontal velocity vectors
ax = axes[0]
ax.set_title("Horizontal velocities")
ax.quiver(
    easting, northing,
    gps.velocity_east_mmyr.values,
    gps.velocity_north_mmyr.values,
    scale=30,
    width=0.002,
)
ax.set_aspect("equal")
ax.set_xlabel("easting (m)")
ax.set_ylabel("northing (m)")
# Plot the vertical velocity
ax = axes[1]
ax.set_title("Vertical velocity")
maxabs = vd.maxabs(gps.velocity_up_mmyr)
tmp = ax.scatter(
    easting, northing,
    c=gps.velocity_up_mmyr,
    s=30,
    vmin=-maxabs / 3,
    vmax=maxabs / 3,
    cmap="seismic",
)
plt.colorbar(tmp, ax=ax, label="mm/year", pad=0, aspect=50)
ax.set_aspect("equal")
ax.set_xlabel("easting (m)")
ax.set_ylabel("northing (m)")
plt.tight_layout(pad=0)
plt.show()