#!/usr/bin/env python
# coding: utf-8

# # Osborne Mine and Lightning Creek Sill Complex aeromagnetic data
# 
# This is a section of a survey acquired in 1990 by the Queensland Government, Australia. The data are good quality with approximately 80 m terrain clearance and 200 m line spacing. The section contains the total field magnetic anomalies associated with the Osborne Mine, Lightning Creek sill complex, and the Brumby prospect. 
# 
# > **WARNING**: This notebook required \~9 Gb of RAM to run. The dataset is a bit large (\~3 Gb on disk and ~90 million points) and the GA provided netCDF file is not very useful for out-of-memory indexing since the latitude and longitude aren't properly set as coordinates. 
# 
# License: [CC-BY](http://pid.geoscience.gov.au/dataset/ga/142419)
# 
# Original source: Geophysical Acquisition & Processing Section 2019. MIM Data from Mt Isa Inlier, QLD (P1029), magnetic line data, AWAGS levelled. Geoscience Australia, Canberra. http://pid.geoscience.gov.au/dataset/ga/142419
# 
# Useful references for prior interpretations and geological context:
# 
# * [Austin, et al. (2013)](https://doi.org/10.1190/INT-2013-0005.1)
# * [Gazley et al. (2016)](https://publications.csiro.au/rpr/download?pid=csiro:EP165511&dsid=DS2)
# * [Rezaie (2021)](https://doi.org/10.1007/s00024-021-02747-6)
# * [Elllis et al. (2019)](https://doi.org/10.1071/ASEG2012ab117)
# * [Perring et al. (2000)](https://doi.org/10.2113/gsecongeo.95.5.1067)

# In[1]:


import os
import numpy as np
import xarray as xr
import pandas as pd
import pygmt
import pyproj
import verde as vd
import pooch


# ## Download the data archive
# 
# Use Pooch to download the data archive provided by Geoscience Australia in netCDF format.

# In[2]:


fname = pooch.retrieve(
    url="http://dapds00.nci.org.au/thredds/fileServer/iv65/Geoscience_Australia_Geophysics_Reference_Data_Collection/airborne_geophysics/QLD/line/P1029/P1029-line-magnetic-AWAGS_MAG_2010.nc",
    known_hash="sha256:119b472da05365f0df4e9dc0b2d4b0e5c213705acb4f79efcaa07e1aeeb7242c",
)


# Check the file size in Mb just so we know what we're dealing with.

# In[3]:


print(f"size: {os.path.getsize(fname) / 1e6 : 7.2f} Mb")


# ## Load the data
# 
# That's around 3 Gb of data (likely compressed) so it's best to open the `xarray.Dataset` without reading everything into memory at once (as `xarray.load_dataset` would do).

# In[4]:


data_nc = xr.open_dataset(fname)
data_nc


# The fields we're interested in are:
# 
# * Coordinates which are in the GDA94 datum
# * AWAGS leveled magnetic anomaly (since the observed field values aren't present)
# * The terrain clearance (which is what I'm assuming `altitude` is since it can't be observation height)
# * The line number in case we need to group points by line
# 
# We'll read these fields only into a `pandas.DataFrame` for easier slicing.

# In[5]:


data_full = pd.DataFrame({
    "longitude": data_nc.longitude.data,
    "latitude": data_nc.latitude.data,
    "terrain_clearance_m": data_nc.altitude.data.astype(np.float32),
    "total_field_anomaly_nt": data_nc.mag_awagsLevelled.data.astype(np.float32),
    "flight_line": data_nc.line_index.data.astype(np.uint16),
})


# Now we can cut the data to a smaller region containing only the anomalies we want. This will help make the file size smaller and operations faster for demonstrations and tutorials.

# In[6]:


# West, East, South, North
region = (140 + 30 / 60, 140 + 50 / 60, -22 - 10 / 60, -21 - 45 / 60)
selection = vd.inside((data_full.longitude, data_full.latitude), region)
data = data_full[selection].reset_index(drop=True).copy()
data


# ## Convert the horizontal coordinates
# 
# Now that we have less data, we can convert the coordinates into WGS84 for uniformity with our other datasets. 

# In[7]:


gda_to_wgs = pyproj.Transformer.from_crs("epsg:4283", "epsg:4326", always_xy=True)
longitude, latitude = gda_to_wgs.transform(
    data.longitude.values, 
    data.latitude.values,
)
data = data.assign(longitude=longitude, latitude=latitude)
data


# ## Calculate the observation height
# 
# The data only contain the terrain clearance information, so we won't know the height of observations unless we know the height of topography. 
# 
# We can use PyGMT to get a high-resolution grid of SRTM topography (as orthometric height) and then interpolate the values onto our data locations. We'll get a slightly larger grid than our data region to avoid edge effects during interpolation.

# In[8]:


srtm = vd.grid_to_table(pygmt.grdcut("@earth_relief_01s_g", region=vd.pad_region(region, 2 / 60)))
srtm


# Now that we have the topography, we'll do the interpolation using a fast nearest-neighbor method. Since we only have access to a Cartesian version of the interpolation, we need to first project the input coordinates (using a Mercator projection for simplicity).

# In[9]:


projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())

nearest = vd.ScipyGridder(method="nearest")
nearest.fit(projection(srtm.lon.values, srtm.lat.values), srtm.z)
topography = nearest.predict(projection(data.longitude.values, data.latitude.values))

data = data.assign(
    height_orthometric_m=topography + data.terrain_clearance_m,
)
data


# ## Inspect the data
# 
# Make a quick plot of the data to see what it looks like. We'll use a blocked median filter to reduce the data size a bit for plotting.

# In[10]:


spacing = 3 / 3600
reducer = vd.BlockReduce(np.median, spacing=spacing)
coordinates, (total_field_anomaly, height) = reducer.filter(
    (data.longitude, data.latitude), 
    (data.total_field_anomaly_nt, data.height_orthometric_m),
)


# In[11]:


fig = pygmt.Figure()
with fig.subplot(
    nrows=1,
    ncols=2,
    figsize=("30c", "20c"),
    sharey="l",  # shared y-axis on the left side
    frame="WSrt",
):
    with fig.set_panel(0):
        fig.basemap(projection="M?", region=region, frame="af")
        scale = 1500
        pygmt.makecpt(cmap="polar+h", series=[-scale, scale], background=True)
        fig.plot(
            x=coordinates[0],
            y=coordinates[1],
            color=total_field_anomaly,
            style="c0.075c",
            cmap=True,
        )
        fig.colorbar(
            frame='af+l"total field magnetic anomaly [nT]"', 
            position="JBC+h+o0/1c+e",
        )
    with fig.set_panel(1):
        fig.basemap(projection="M?", region=region, frame="af")
        pygmt.makecpt(cmap="viridis", series=[height.min(), height.max()])
        fig.plot(
            x=coordinates[0],
            y=coordinates[1],
            color=height,
            style="c0.075c",
            cmap=True,
        )
        fig.colorbar(
            frame='af+l"observation height [m]"', 
            position="JBC+h+o0/1c",
        )
fig.savefig("preview.jpg", dpi=200)
fig.show(width=800)


# The Osborne Mine is the small dipolar anomaly in the Southwest of the map. The Lightning Creek complex is the large anomaly in the Northeast.

# ## Export
# 
# Save the data to a compressed CSV file for easier loading with pandas directly. We'll first make a string encoded version of the `DataFrame` so we can control the precision of the output and save some storage space. For our purposes, 1 m precision should be more than enough for coordinates and topography and 1 nT for the magnetic anomaly (~0.1% of the anomaly amplitudes).

# In[12]:


export = pd.DataFrame({  
    "flight_line": data.flight_line, 
    "longitude": data.longitude.map(lambda x: "{:.5f}".format(x)),
    "latitude": data.latitude.map(lambda x: "{:.5f}".format(x)),    
    "height_orthometric_m": data.height_orthometric_m.map(lambda x: "{:.0f}".format(x)),   
    "total_field_anomaly_nt": data.total_field_anomaly_nt.map(lambda x: "{:.0f}".format(x)), 
})
export


# Save the export data and calculate the file size and hashes.

# In[13]:


fname = "osborne-magnetic.csv.xz"
export.to_csv(fname, index=False)
print(fname)
print(f"size: {os.path.getsize(fname) / 1e6} Mb")
for alg in ["md5", "sha256"]:
    print(f"{alg}:{pooch.file_hash(fname, alg=alg)}")


# ## Read the data back
# 
# To check that everything worked well, read the data back and make a plot.

# In[14]:


data_back = pd.read_csv(fname)
data_back


# In[15]:


coordinates, (total_field_anomaly, height) = reducer.filter(
    (data_back.longitude, data_back.latitude), 
    (data_back.total_field_anomaly_nt, data_back.height_orthometric_m),
)


# In[16]:


fig = pygmt.Figure()
with fig.subplot(
    nrows=1,
    ncols=2,
    figsize=("30c", "20c"),
    sharey="l",  # shared y-axis on the left side
    frame="WSrt",
):
    with fig.set_panel(0):
        fig.basemap(projection="M?", region=region, frame="af")
        scale = 1500
        pygmt.makecpt(cmap="polar+h", series=[-scale, scale], background=True)
        fig.plot(
            x=coordinates[0],
            y=coordinates[1],
            color=total_field_anomaly,
            style="c0.075c",
            cmap=True,
        )
        fig.colorbar(
            frame='af+l"total field magnetic anomaly [nT]"', 
            position="JBC+h+o0/1c+e",
        )
    with fig.set_panel(1):
        fig.basemap(projection="M?", region=region, frame="af")
        pygmt.makecpt(cmap="viridis", series=[height.min(), height.max()])
        fig.plot(
            x=coordinates[0],
            y=coordinates[1],
            color=height,
            style="c0.075c",
            cmap=True,
        )
        fig.colorbar(
            frame='af+l"observation height [m]"', 
            position="JBC+h+o0/1c",
        )
fig.show(width=800)