#!/usr/bin/env python
# coding: utf-8

# # Exercise 11.1
# ## Air-shower reconstruction
# Follow the description of a cosmic-ray observatory in Example 11.2 and Fig. 11.2(b).
# The simulated data contain 9 × 9 detector stations which record traversing particles from the cosmic-ray induced air shower.  
# Each station measures two quantities, which are stored in the form of a map (2D array) corresponding to the station positions in offset coordinates:
# - arrival time `T`: time point of detection in seconds
# - signal `S`: signal strength in arbitrary units
# 
# The following shower properties need to be reconstructed:
# - `axis`: x, y, z unit vector of the shower arrival direction
# - `core`: position of the shower core in meters
# - `logE`: $\log_{10} (E / \mathrm{eV})$, energy of the cosmic ray
# 
# Reconstruct the properties of the arriving cosmic rays by analyzing their
# air showers:
# 
# ### Tasks 
# 1. Set up a multi-task regression network for simultaneously predicting shower direction, shower core position, and energy. The network should consist of a common part to the three properties, followed by an individual subnetwork for each property. Combine the mean squared errors of the different properties using weights $w_j$.  
# 
# 
# 2. Train the model to the following precisions:
# - Better than $1.5^\circ$ angular resolution
# - Better than $25$ m core position resolution
# - Better than $10\%$ relative energy uncertainty $\left(\frac{E_\mathrm{pred} - E_\mathrm{true}}{E_\mathrm{true}}\right)$
# 
#   Estimate what these requirements mean in terms of the mean squared error loss and adjust the relative weights in the objective function accordingly.  
# 
# 3. Plot and interpret the resulting training curves, both with and without the weights $w_j$ in the objective function.
# 
# 
# ##### Hint: using a GPU for this task may be advantageous!

# In[3]:


from tensorflow import keras
import numpy as np
from matplotlib import pyplot as plt

layers = keras.layers


# ### Download data

# In[4]:


import os
import gdown
url = "https://drive.google.com/u/0/uc?export=download&confirm=HgGH&id=1nQDddS36y4AcJ87ocoMjyx46HGueiU6k"
output = 'airshowers.npz'

if os.path.exists(output) == False:
    gdown.download(url, output, quiet=True)

f = np.load(output)


# ### Input 1: arrival times

# In[5]:


# time map
T = f['time']
T -= np.nanmean(T)
T /= np.nanstd(T)
T[np.isnan(T)] = 0

print(T.shape)


# #### Plot four example arrival time maps

# In[6]:


nsamples=len(T)
random_samples = np.random.choice(nsamples, 4)

def rectangular_array(n=9):
    """ Return x,y coordinates for rectangular array with n^2 stations. """
    n0 = (n - 1) / 2
    return (np.mgrid[0:n, 0:n].astype(float) - n0)


for i,j in enumerate(random_samples):
    plt.subplot(2,2,i+1)
    footprint=T[j,...]
    xd, yd = rectangular_array()
    mask = footprint != 0
    mask[5, 5] = True
    marker_size = 50 * footprint[mask]
    plot = plt.scatter(xd, yd, c='grey', s=10, alpha=0.3, label="silent")
    circles = plt.scatter(xd[mask], yd[mask], c=footprint[mask], 
                          cmap="viridis", alpha=1, label="loud")
    cbar = plt.colorbar(circles)
    cbar.set_label('normalized time [a.u.]')
    plt.grid(True)

plt.tight_layout()
plt.show()


# ### Input 2: signals

# In[7]:


# signal map
S = f['signal']
S = np.log10(1 + S)
S -= np.nanmin(S)
S /= np.nanmax(S)
S[np.isnan(S)] = 0

print(S.shape)


# In[8]:


for i,j in enumerate(random_samples):
    plt.subplot(2,2,i+1)
    footprint=S[j,...]
    xd, yd = rectangular_array()
    mask = footprint != 0
    mask[5, 5] = True
    marker_size = 200 * footprint[mask] + 20
    plot = plt.scatter(xd, yd, c='grey', s=10, alpha=0.3, label="silent")
    circles = plt.scatter(xd[mask], yd[mask], c=footprint[mask], s=marker_size,
                          cmap="autumn_r", alpha=1, label="loud")
    cbar = plt.colorbar(circles)
    cbar.set_label('normalized signals [a.u.]')
    plt.grid(True)

plt.tight_layout()
plt.show()


# Combine inputs

# In[9]:


X = np.stack([T, S], axis=-1)


# ### Labels

# In[10]:


axis = f['showeraxis']


# In[11]:


core = f['showercore'][:, 0:2]
core /= 750


# In[12]:


# energy - log10(E/eV) in range [18.5, 20]
logE = f['logE']
logE -= 19.25


# In[13]:


X_train, X_test = np.split(X, [-20000])
axis_train, axis_test = np.split(axis, [-20000])
core_train, core_test = np.split(core, [-20000])
logE_train, logE_test = np.split(logE, [-20000])


# ## Define Model
# ## Task 
# Set up a multi-task regression network for simultaneously predicting shower direction, shower core position, and energy. The network should consist of a common part to the three properties, followed by an individual subnetwork for each property.

# In[ ]:


input1 = layers.Input(shape=(9, 9, 2))

# TODO: define a suitable network consisting of 2 parts:
# 1) a common network part (you can try a convolutional stack with ResNet- or
#    DenseNet-like shortcuts)
#   z = ...
# 2) separate network parts for the individual objectives
#   z1 = ...
#   z2 = ...
#   z3 = ...

output1 = layers.Dense(3, name='direction')(z1)
output2 = layers.Dense(2, name='core')(z2)
output3 = layers.Dense(1, name='energy')(z3)


# ### Task
# Train the model to the following precisions:
# - Better than $1.5^\circ$ angular resolution
# - Better than $25$ m core position resolution
# - Better than $10\%$ relative energy uncertainty $\left(\frac{E_\mathrm{pred} - E_\mathrm{true}}{E_\mathrm{true}}\right)$  
#   Estimate what these requirements mean in terms of the mean squared error loss and adjust the relative weights in the objective function accordingly.

# In[ ]:


loss_weights=[1, 1, 1]  # you can give more weight to individual objectives

model.compile(
    loss=['mse', 'mse', 'mse'],
    loss_weights=loss_weights,
    optimizer=keras.optimizers.Adam(lr=1e-3))


# In[ ]:


from keras.callbacks import ReduceLROnPlateau, EarlyStopping

fit = model.fit(
    X_train,
    [axis_train, core_train, logE_train],
    batch_size=128,
    epochs=40,
    verbose=2,
    validation_split=0.1,
    callbacks=[ReduceLROnPlateau(factor=0.67, patience=10, verbose=1),
               EarlyStopping(patience=20, verbose=1)])


# ### Plot training curves

# In[ ]:


def plot_history(history, weighted=False):
    fig, ax = plt.subplots(1)
    n = np.arange(len(history['loss']))
    for i, s in enumerate(['axis', 'core', 'energy']):
        w = loss_weights[i] if weighted else 1
        l1 = w * np.array(history['%s_loss' % s])
        l2 = w * np.array(history['val_%s_loss' % s])
        color = 'C%i' % i
        ax.plot(n, l1, c=color, ls='--')
        ax.plot(n, l2, c=color, label=s)

    ax.plot(n, history['loss'], ls='--', c='k')
    ax.plot(n, history['val_loss'], label='sum', c='k')

    ax.set_xlabel('Epoch')
    ax.set_ylabel('Weighted Loss' if weighted else 'Loss')
    ax.legend()
    ax.semilogy()
    ax.grid()
    plt.show()


# #### Unweighted losses

# In[ ]:


plot_history(fit.history, weighted=False)


# #### Weighted losses

# In[ ]:


plot_history(fit.history, weighted=True)


# ## Study performance of the DNN

# In[ ]:


axis_pred, core_pred, logE_pred = model.predict(X_test, batch_size=128, verbose=1)
logE_pred = logE_pred[:,0]  # remove keras dummy axis


# ### Reconstruction performance of the shower axis

# In[ ]:


d = np.sum(axis_pred * axis_test, axis=1) / np.sum(axis_pred**2, axis=1)**.5
d = np.arccos(np.clip(d, 0, 1)) * 180 / np.pi
reso = np.percentile(d, 68)
plt.figure()
plt.hist(d, bins=np.linspace(0, 3, 41))
plt.axvline(reso, color='C1')
plt.text(0.95, 0.95, '$\sigma_{68} = %.2f^\circ$' % reso, ha='right', va='top', transform=plt.gca().transAxes)
plt.xlabel(r'$\Delta \alpha$ [deg]')
plt.ylabel('#')
plt.grid()
plt.tight_layout()


# ### Reconstruction performance of the shower core

# In[ ]:


d = np.sum((750 * (core_test - core_pred))**2, axis=1)**.5
reso = np.percentile(d, 68)
plt.figure()
plt.hist(d, bins=np.linspace(0, 40, 41))
plt.axvline(reso, color='C1')
plt.text(0.95, 0.95, '$\sigma_{68} = %.2f m$' % reso, ha='right', va='top', transform=plt.gca().transAxes)
plt.xlabel('$\Delta r$ [m]')
plt.ylabel('#')
plt.grid()
plt.tight_layout()


# ### Reconstruction performance of the shower energy

# In[ ]:


d = 10**(logE_pred - logE_test) - 1
reso = np.std(d)
plt.figure()
plt.hist(d, bins=np.linspace(-0.3, 0.3, 41))
plt.xlabel('($E_\mathrm{rec} - E_\mathrm{true}) / E_\mathrm{true}$')
plt.ylabel('#')
plt.text(0.95, 0.95, '$\sigma_\mathrm{rel} = %.3f$' % reso, ha='right', va='top', transform=plt.gca().transAxes)
plt.grid()
plt.tight_layout()


# In[ ]:


plt.figure()
plt.scatter(19.25 + logE_test, 19.25 + logE_pred, s=2, alpha=0.3)
plt.plot([18.5, 20], [18.5, 20], color='black')
plt.xlabel('$\log_{10}(E_\mathrm{true}/\mathrm{eV})$')
plt.ylabel('$\log_{10}(E_\mathrm{rec}/\mathrm{eV})$')
plt.grid()
plt.tight_layout()