#!/usr/bin/env python
# coding: utf-8

# ## Assess the impact of atmosphere on offset time-seris
# 
# Run the following to prepare the datasets in HDF5/MintPy format for this notebook:
# 
# ```bash
# prep_gim_tec.ipynb                                        # calc SUB/JPH TEC
# iono_tec.py timeseriesRg.h5 -g inputs/geometryRadar.h5    # calc JPL/COD TEC
# offset_timeseries.py -t ChileSenAT149.txt --plot          # calc SAR, SET and ERA5
# ```

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
import os
import datetime as dt
import numpy as np
from scipy import stats
from matplotlib import pyplot as plt, ticker
from mintpy.objects import timeseries
from mintpy.utils import ptime, readfile, utils as ut, plot as pp
from mintpy import tsview
from ipynb.fs.full import utils
figsize_ts  = [9, 2.5]
figsize_dot = [2.5, 2.5]

# config
n = 3                                 # number of med abs dev
save = True

# location of interest
#lalo = [-21.29, -69.58]; box_suffix = 'boxCL'   # box in the center left with low  altitude; box = (10,  55,  20, 70) for SenAT149
lalo = [-21.30, -67.39]; box_suffix = 'boxLR'   # box in the lower right with high altitude; box = (200, 15, 215, 30) for SenAT149

# dir
proj_dir = os.path.expanduser('~/data/geolocation/ChileSenAT149')
#proj_dir = os.path.expanduser('~/data/geolocation/ChileSenDT156')
#proj_dir = os.path.expanduser('~/data/geolocation/KyushuAlos2DT23');  lalo = None; box_suffix = None

proj_name = os.path.basename(proj_dir)
work_dir = os.path.join(proj_dir, 'offset_comp')
if box_suffix is not None:
    work_dir = os.path.join(work_dir, box_suffix)
os.chdir(work_dir)
print('Go to directory', work_dir)

# matplotlib setup
plt.rcParams.update({'font.size': 12})
if proj_name == 'ChileSenDT156':
    plt.rcParams.update({'lines.linewidth': 1.5})
else:
    plt.rcParams.update({'lines.linewidth': 3.0})


# ### 0. Read all time-series

# In[17]:


# data files
geom_file = os.path.join(proj_dir, 'mintpy_offset/inputs/geometryRadar.h5')
mask_file = os.path.join(proj_dir, 'mintpy_offset/maskResInv.h5')
sar_file  = os.path.join(proj_dir, 'mintpy_offset/timeseriesRg.h5')  # timeseriesRg_S1Bias.h5 for subTEC
era_file  = os.path.join(proj_dir, 'mintpy_offset/inputs/ERA5.h5')
set_file  = os.path.join(proj_dir, 'mintpy_offset/inputs/SET.h5')
tec_files = [
    # opt 1 - used TEC [choose ONE ONLY]
    os.path.join(proj_dir, 'mintpy_offset/inputs/TECjhr.h5'),
    #os.path.join(proj_dir, 'mintpy_offset/inputs/TECsub.h5'),
    # opt 2 - as background comparison
    os.path.join(proj_dir, 'mintpy_offset/inputs/TECjlr.h5'),
    os.path.join(proj_dir, 'mintpy_offset/inputs/TECclr.h5'),
]

fnames = [sar_file] + tec_files + [set_file, era_file]
dnames = ['SAR'] + [os.path.splitext(os.path.basename(i))[0] for i in fnames[1:]]

# date info
date_list = timeseries(sar_file).get_date_list()
num_date = len(date_list)

# local solar time
atr = readfile.read_attribute(sar_file)
utc_time = dt.datetime.fromisoformat(atr['startUTC'])
longitude = (float(atr['LON_REF1']) + float(atr['LON_REF2'])) / 2
solar_time = ptime.utc2solar_time(utc_time, longitude)
print('local solar time: {}'.format(solar_time))

# lalo --> box
if lalo is None:
    box = None
else:
    coord = ut.coordinate(atr, lookup_file=geom_file)
    y, x = coord.geo2radar(lalo[0], lalo[1])[:2]
    win = 10
    box = (x-win, y-win, x+win+1, y+win+1)

# read data files into iDict
iDict = {}
for fname, dname in zip(fnames, dnames):
    if not os.path.isfile(fname):
        next
    else:
        print('read data from file: {}'.format(fname))
        ts_data = readfile.read(fname, box=box)[0].reshape(num_date, -1)
        # mask
        if dname == 'SAR':
            ## reference in time
            #ts_data -= np.tile(ts_data[ref_ind, :].reshape(1, -1), (ts_data.shape[0], 1))
            # mask out unreliable SAR offset, including 
            # 1. pixels not inverted, thus with default zero value
            # 2. pixels inverted, but unreliable due to large inversion residual
            mask = readfile.read(mask_file, box=box)[0].flatten()
            ts_data[:, mask==0] = np.nan
        else:
            # mask out invalid pixels
            ts_data[ts_data == 0] = np.nan
        iDict[dname] = {}
        iDict[dname]['data'] = ts_data
        iDict[dname]['med'] = np.nanmedian(ts_data, axis=-1)
        iDict[dname]['mad'] = ut.median_abs_deviation(ts_data)

# maskout anomaly date of 2015-04-02 for Sentinel-1 desc track 156
mask_anomaly = True
if mask_anomaly and proj_name == 'ChileSenDT156':
    ex_dates = ['20150402']
    print('Ignore anomaly dates: {}'.format(ex_dates))
    flag = [i in ex_dates for i in date_list]
    for dname in iDict.keys():
        iDict[dname]['med'][flag] = np.nan


# ### 1. SAR vs. TEC

# In[18]:


### Read Data
x1 = ptime.date_list2vector(date_list)[0]
# dset 1: SAR
y1  = np.array(iDict[dnames[0]]['med'])
y1e = np.array(iDict[dnames[0]]['mad']) * n
# dset 2: JHR/SUB TEC
y2  = np.array(iDict[dnames[1]]['med']) * -1
y2e = np.array(iDict[dnames[1]]['mad']) * n
# dset 3: JLR TEC
y3  = np.array(iDict[dnames[2]]['med']) * -1
y3e = np.array(iDict[dnames[2]]['mad']) * n
# dset 4: CLR TEC
y4  = np.array(iDict[dnames[3]]['med']) * -1
y4e = np.array(iDict[dnames[3]]['mad']) * n
# align other data to SAR with a constant
y1 -= np.nanmedian(y1)
y2 -= np.nanmedian(y2 - y1)
y3 -= np.nanmedian(y3 - y1)
y4 -= np.nanmedian(y4 - y1)
print('Min/max:')
print(f'  {dnames[0]:<6}: {np.min(y1*100):.1f} / {np.max(y1*100):.1f} cm')
print(f'  {dnames[3]:<6}: {np.min(y4*100):.1f} / {np.max(y4*100):.1f} cm')
print(f'  {dnames[2]:<6}: {np.min(y3*100):.1f} / {np.max(y3*100):.1f} cm')
print(f'  {dnames[1]:<6}: {np.min(y2*100):.1f} / {np.max(y2*100):.1f} cm')

# RMSE
print('RMSE:')
rmse = ut.root_mean_sq_error(y1);     print('  SAR         : {:.1f} cm'.format(rmse*100))
rmse = ut.root_mean_sq_error(y1, y4); print('  SAR - {}: {:.1f} cm'.format(dnames[3], rmse*100))
rmse = ut.root_mean_sq_error(y1, y3); print('  SAR - {}: {:.1f} cm'.format(dnames[2], rmse*100))
rmse = ut.root_mean_sq_error(y1, y2); print('  SAR - {}: {:.1f} cm'.format(dnames[1], rmse*100))

### Figure 1 - TS
if proj_name == 'ChileSenAT149':
    vstep = 0.4; ylim = [-0.8, 0.4]; loc = 'lower right'; xlim = [dt.datetime(2015, 5, 1), dt.datetime(2020, 5, 1)]
elif proj_name == 'ChileSenDT156':
    vstep = 0.4; ylim = [-0.4, 0.8]; loc = 'best';        xlim = None
elif proj_name == 'KyushuAlos2DT23':
    vstep = 6;   ylim = [-12, 3];    loc = 'best';        xlim = None
mks = ['-.',':', '-', '--']
cs = ['0.2', '0.6', 'C0', 'C1']

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize_ts)
ps = []
for i, (y, ye, c, m) in enumerate(zip([y4, y3, y1, y2], [y4e, y3e, y1e, y2e], cs, mks)):
    flag = ~np.isnan(y)
    y  = np.array(y)[flag]
    ye = np.array(ye)[flag]
    x  = np.array(x1)[flag]
    p0  = ax.fill_between(x, y-ye, y+ye, fc=c, ec='none', alpha=0.2)
    kwargs = dict(linewidth=1.5) if i == 0 else dict(linewidth=4.0) if i == 1 else dict()
    p1, = ax.plot(x, y, m, color=c, ms=4, **kwargs)
    ps.append((p0, p1))
# axis format
utils.adjust_ts_axis_format(ax, x=x1, vstep=vstep, xlim=xlim, ylim=ylim, ylabel=None)
labels = ['GIM$_\mathrm{{{}}}$'.format(dnames[3].replace('TEC','').upper()),
          'GIM$_\mathrm{{{}}}$'.format(dnames[2].replace('TEC','').upper()),
          'GIM$_\mathrm{{{}}}$'.format(dnames[1].replace('TEC','').upper())]
xname = labels[-1]
ax.legend(handles=ps, labels=[labels[-3], labels[-2], 'SAR', labels[-1]], loc=loc, ncol=len(ps), handlelength=2.5)
# output
out_fig = os.path.join(work_dir, 'TS_SAR_vs_{}.pdf'.format(dnames[1]))
print('save figure to file', out_fig)
plt.savefig(out_fig, bbox_inches='tight', transparent=True, dpi=300)

### Figure 2 - DOT
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize_dot)
utils.plot_dot_figure(ax, x=y1, y=y2, vlim=ylim, vstep=vstep)
#fig.tight_layout()
ax.set_xlabel('{} [m]'.format('SAR'), color='k')
ax.set_ylabel('{} [m]'.format(xname), color='k', labelpad=None)
# output
out_fig = os.path.join(work_dir, 'DOT_SAR_vs_{}.pdf'.format(dnames[1]))
print('save figure to file', out_fig) 
plt.savefig(out_fig, bbox_inches='tight', transparent=True, dpi=300)

plt.show()


# ### 2. SAR - TEC vs. SET

# In[19]:


### Read Data
x1 = ptime.date_list2vector(date_list)[0]
# dset 1: SAR - TEC
y1  = np.array(iDict['SAR']['med'])
y1 -= np.array(iDict[dnames[1]]['med']) * -1
y1e = (iDict['SAR']['mad']**2 + iDict[dnames[1]]['mad']**2) ** 0.5 * n
# dset 2: SET
y2  = np.array(iDict['SET']['med'])
y2e = np.array(iDict['SET']['mad']) * n
# align all data with a constant
y1 -= np.nanmedian(y1)
y2 -= np.nanmedian(y2 - y1)

### Figure 1 - TS
if proj_name == 'ChileSenAT149':
    vstep = 0.3; ylim = [-0.3, 0.3]; loc = 'lower right'; xlim = [dt.datetime(2015, 5, 1), dt.datetime(2020, 5, 1)]
elif proj_name == 'ChileSenDT156':
    vstep = 0.4; ylim = [-0.4, 0.4]; loc = 'best';        xlim = None
elif proj_name == 'KyushuAlos2DT23':
    vstep = 2;   ylim = [-2, 2];     loc = 'best';        xlim = None

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize_ts)
ps = []
for i, (y, ye, c, m) in enumerate(zip([y1, y2], [y1e, y2e], ['C0', 'C1'], ['-', '--'])):
    flag = ~np.isnan(y)
    y  = np.array(y)[flag]
    ye = np.array(ye)[flag]
    x  = np.array(x1)[flag]
    p0  = ax.fill_between(x, y-ye, y+ye, fc=c, ec='none', alpha=0.2)
    p1, = ax.plot(x, y, m, color=c, ms=4)
    ps.append((p0, p1))
# axis format
utils.adjust_ts_axis_format(ax, x=x1, vstep=vstep, xlim=xlim, ylim=ylim, ylabel=None)
xname = 'SAR - GIM$_\mathrm{{{}}}$'.format(dnames[1][-3:].upper())
ax.legend(labels=[xname, 'SET'], handles=ps, loc=loc, ncol=len(ps))
#fig.tight_layout()
# output
out_fig = os.path.join(work_dir, 'TS_SAR_{}_vs_SET.pdf'.format(dnames[1]))
print('save figure to file', out_fig)
plt.savefig(out_fig, bbox_inches='tight', transparent=True, dpi=300)

### Figure 2 - DOT
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize_dot)
utils.plot_dot_figure(ax, x=y1, y=y2, vlim=ylim, vstep=vstep)
#fig.tight_layout()
ax.set_xlabel('{} [m]'.format(xname), color='k')
ax.set_ylabel('{} [m]'.format('SET'), color='k', labelpad=-4)
# output
out_fig = os.path.join(work_dir, 'DOT_SAR_{}_vs_SET.pdf'.format(dnames[1]))
print('save figure to file', out_fig)
plt.savefig(out_fig, bbox_inches='tight', transparent=True, dpi=300)

plt.show()


# ### 3. SAR - TEC - SET vs. ERA5

# In[21]:


### Read Data
# dset 1: SAR - TEC
x1 = ptime.date_list2vector(date_list)[0]
y1  = np.array(iDict['SAR']['med'])
y1 -= np.array(iDict[dnames[1]]['med']) * -1
y1 -= np.array(iDict['SET']['med'])
y1e = (iDict['SAR']['mad']**2 + iDict[dnames[1]]['mad']**2 + iDict['SET']['mad']**2) ** 0.5 * n
# dset 2: ERA5
y2  = np.array(iDict['ERA5']['med'])
y2e = np.array(iDict['ERA5']['mad']) * n
# align all data with a constant
y1 -= np.nanmedian(y1)
y2 -= np.nanmedian(y2 - y1)
print('peak to valley variation: {:.1f} m'.format(np.nanmax(y1 - y2) - np.nanmin(y1 - y2)))

### Figure 1 - TS
if proj_name == 'ChileSenAT149':
    vstep = 0.3; ylim = [-0.3, 0.3]; loc = 'best'; xlim = [dt.datetime(2015, 5, 1), dt.datetime(2020, 5, 1)]
elif proj_name == 'ChileSenDT156':
    vstep = 0.4; ylim = [-0.4, 0.4]; loc = 'best'; xlim = None
elif proj_name == 'KyushuAlos2DT23':
    vstep = 2;   ylim = [-2, 2];     loc = 'best'; xlim = None

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize_ts)
ps = []
for i, (y, ye, c, m) in enumerate(zip([y1, y2], [y1e, y2e], ['C0', 'C1'], ['-', '--'])):
    flag = ~np.isnan(y)
    y  = np.array(y)[flag]
    ye = np.array(ye)[flag]
    x  = np.array(x1)[flag]
    p0  = ax.fill_between(x, y-ye, y+ye, fc=c, ec='none', alpha=0.2)
    p1, = ax.plot(x, y, m, color=c, ms=4)
    ps.append((p0, p1))
# axis format
utils.adjust_ts_axis_format(ax, x=x1, vstep=vstep, xlim=xlim, ylim=ylim, ylabel=None)
xname = 'SAR - GIM$_\mathrm{{{}}}$ - SET'.format(dnames[1][-3:].upper())
ax.legend(labels=[xname, 'ERA5'], handles=ps, loc=loc, ncol=len(ps))
# plot S1A/B divider
if proj_name == 'ChileSenAT149' and not tec_files[0].endswith('TECsub.h5'):
    ax.axvline(x=dt.datetime(2016,10,7), linestyle='--', color='k', lw=1.5)   # S1A / S1B
    ax.annotate('S1A      S1B', xy=(dt.datetime(2016,6,1), 0.18))
#fig.tight_layout()
# output
out_fig = os.path.join(work_dir, 'TS_SAR_{}_SET_vs_ERA5.pdf'.format(dnames[1]))
print('save figure to file', out_fig)
plt.savefig(out_fig, bbox_inches='tight', transparent=True, dpi=300)

### Figure 2 - DOT
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize_dot)
utils.plot_dot_figure(ax, x=y1, y=y2, vlim=ylim, vstep=vstep)
#fig.tight_layout()
ax.set_xlabel('{} [m]'.format(xname),  color='k')
ax.set_ylabel('{} [m]'.format('ERA5'), color='k', labelpad=-4)
# output
out_fig = os.path.join(work_dir, 'DOT_SAR_{}_SET_vs_ERA5.pdf'.format(dnames[1]))
print('save figure to file', out_fig)
plt.savefig(out_fig, bbox_inches='tight', transparent=True, dpi=300)

plt.show()


# ### 4. Plot AOI

# In[4]:


from cartopy import crs as ccrs, feature as cfeature

loc_dict = {
    'japan' : [32, 131],
    'chile' : [-21.3, -68]
}

for name, lalo in loc_dict.items():
    lat, lon = lalo
    proj = ccrs.NearsidePerspective(central_longitude=lon, central_latitude=lat, satellite_height=600e3)
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[2, 2], subplot_kw=dict(projection=proj))
    ax.add_feature(cfeature.LAND, edgecolor='none', facecolor='0.9')
    ax.add_feature(cfeature.BORDERS, lw=0.5)
    ax.coastlines(resolution='50m', lw=0.5, alpha=1)
    ax.scatter(lon, lat, marker='s', s=6**2, c='C1', zorder=3)
    ax.set_global()

    # output
    out_dir = os.path.expanduser('~/Papers/2021_Geolocation/figs_src/TS_comp')
    out_fig = os.path.join(out_dir, f'loc_{name}.pdf')
    print('save figure to file:', out_fig)
    #fig.savefig(out_fig, bbox_inches='tight', transparent=True, dpi=600)
plt.show()


# ### Backup: Apply noise reductions: TEC, SET and ERA5
# 
# By run `offset_timeseries.py -t template_file` for each of the dataset.
# 
# #### Number of pixels masked out:
# 
# + `ChileSenAT149`
# 
# ```bash
# number of pixels                                : 28413
# number of pixels after removing water body      : 28165
# number of pixels after removing SNR = NaN       : 24783
# number of pixels after removing edge effect     : 24783
# number of pixels after removing residual > 0.125: 21854
# number of pixels after adding Vel-to-Res > 0.5  : 22100
# ```
# 
# + `ChileSenDT156`
# 
# ```bash
# generating mask of reliable pixels ...
# number of pixels                                : 36424
# number of pixels after removing water body      : 36059
# number of pixels after removing SNR = NaN       : 28870
# number of pixels after removing edge effect     : 28870
# number of pixels after removing residual > 0.125: 23573
# number of pixels after adding Vel-to-Res > 0.5  : 23573
# ```
# 
# + `KyushuAlos2DT23`
# 
# ```bash
# number of pixels                                : 92287
# number of pixels after removing water body      : 92192
# number of pixels after removing SNR = NaN       : 92192
# number of pixels after removing edge effect     : 92192
# number of pixels after removing residual > 1.000: 77073
# number of pixels after adding Vel-to-Res > 0.5  : 77073
# ```

# In[1]:


m_dict = {'ChileSenAT149'  : [24783, 21854],
          'ChileSenDT156'  : [28870, 23573],
          'KyushuAlos2DT23': [92192, 77073],}
for key, [np0, np1] in m_dict.items():
    print(f'{key:20s}: number of pixels after thresholding residual: {np0} -> {np1} ({(np0-np1)/np0*100:.0f}%)')


# ### Backup: Plot residual time-series for POI

# In[47]:


proj_dir = os.path.expanduser('~/data/geolocation/ChileSenAT149'); ylim = ['-120', '40']
proj_dir = os.path.expanduser('~/data/geolocation/ChileSenDT156'); ylim = ['-40', '120']
proj_name = os.path.basename(proj_dir)
os.chdir(os.path.join(proj_dir, 'mintpy_offset'));  print('Go to directory:', os.path.join(proj_dir, 'mintpy_offset'))

ts_file = 'timeseriesRg_subTEC_SET_ERA5.h5'
ts_file = 'timeseriesRg_gimTEC_SET_ERA5.h5'
iargs = [ts_file, '--save', '--figsize', '8', '3', '--lalo', str(lalo[0]), str(lalo[1])]
iargs += ['--ylim'] + ylim
#iargs += ['--ex', 'S1B_date.txt']
#iargs += ['--noverbose']
tsview.main(iargs)

os.chdir(work_dir);  print('Go to directory:', work_dir)


# ### RMSE of the pixel above for Sentinel-1 desc T156

# In[34]:


proj_dir = os.path.expanduser('~/data/geolocation/ChileSenDT156')
geom_file = os.path.join(proj_dir, 'mintpy_offset/inputs/geometryRadar.h5')
ts_file = os.path.join(proj_dir, 'mintpy_offset/timeseriesRg_gimTEC_SET_ERA5.h5')
date_list = timeseries(ts_file).get_date_list()
dis_ts = ut.read_timeseries_lalo(lalo[0], lalo[1], ts_file, lookup_file=geom_file)[1]

# RMSE - original
print('RMSE (original): {:.1f} cm'.format(ut.root_mean_sq_error(dis_ts - np.nanmedian(dis_ts))*100.))

# RMSE - remove the two anomaly dates
ex_dates = ['20150402'] #, '20150613']
flag = [i not in ex_dates for i in date_list]
date_list = np.array(date_list)[flag].tolist()
dis_ts = dis_ts[flag]
print('RMSE (w/o 2015-04-02): {:.1f} cm'.format(ut.root_mean_sq_error(dis_ts - np.nanmedian(dis_ts))*100.))

# RMSE - remove a linear velocity
from scipy import linalg
model = dict(polynomial=1)
G = timeseries.get_design_matrix4time_func(date_list, model)
m, e2 = np.linalg.lstsq(G, dis_ts, rcond=None)[:2]
dis_ts_res = dis_ts - np.dot(G, m)
print('RMSE (deramp): {:.1f} cm'.format(ut.root_mean_sq_error(dis_ts_res - np.nanmedian(dis_ts_res))*100.))