# # This code installs the netCDF4 module
# # Run this code once per session, then comment it out
# !pip install netcdf4

# # This code allows Cartopy to work with Google Colab
# # Run this code once per session, then comment it out
# !grep '^deb ' /etc/apt/sources.list | \
#   sed 's/^deb /deb-src /g' | \
#   tee /etc/apt/sources.list.d/deb-src.list
# !apt-get -qq update
# !apt-get -qq build-dep python3-cartopy
# !pip uninstall -y shapely
# !pip install shapely --no-binary shapely
# !pip install cartopy

# Import NumPy, xarray, Matplotlib, Cartopy (and related imports)
import numpy as np
import pandas as pd
from scipy import interpolate, stats
import xarray as xr
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

# Give Colab access to Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Folder for tide gauge records
folder = '/content/drive/MyDrive/OCEAN 215 - Autumn \'20/OCEAN 215 - Autumn \'20 - Course documents/Zoom class slides and notebooks/2020-11-24 - class #15 - data/'

# Load netCDF files using xarray
key_west_xr = xr.open_dataset(folder + 'tide_gauge_key_west_fl.nc')
key_west_xr = key_west_xr.drop('record_id').squeeze()
pensacola_xr = xr.open_dataset(folder + 'tide_gauge_pensacola_fl.nc')
pensacola_xr = pensacola_xr.drop('record_id').squeeze()

# Display file
display(key_west_xr)

# Convert each sea level record to Pandas Series
key_west = key_west_xr['sea_level'].to_pandas()
key_west.name = 'sea_level'
pensacola = pensacola_xr['sea_level'].to_pandas()
pensacola.name = 'sea_level'

# Display new Pandas series
display(pensacola)

fig, (ax0,ax1,ax2) = plt.subplots(ncols=1,nrows=3,figsize=(16,14))
ax0.plot(key_west.index.values,key_west.values,lw=0.25,c='turquoise',label='Key West (original)')
ax0.plot(pensacola.index.values,pensacola.values,lw=0.25,c='navy',label='Pensacola (original)')
ax0.grid()
ax0.legend()
ax0.set_ylabel('Relative sea level (mm)')
ax0.set_title('Full hourly tide gauge records from FL')

ax1.plot(key_west.index.values,key_west.values,lw=0.75,c='turquoise',label='Key West (original)')
ax1.plot(pensacola.index.values,pensacola.values,lw=0.75,c='navy',label='Pensacola (original)')
ax1.grid()
ax1.legend()
ax1.set_ylabel('Relative sea level (mm)')
ax1.set_title('Eight months of tide gauge records from FL')
ax1.set_xlim([datetime(2020,1,1),datetime(2020,9,1)])
ax1.set_ylim([1200,4000])

ax2.plot(key_west.index.values,key_west.values,lw=2,c='turquoise',label='Key West (original)')
ax2.plot(pensacola.index.values,pensacola.values,lw=2,c='navy',label='Pensacola (original)')
ax2.grid()
ax2.legend()
ax2.set_ylabel('Relative sea level (mm)')
ax2.set_title('One week of tide gauge records from FL')
ax2.set_xlim([datetime(2020,8,1),datetime(2020,8,7)])
ax2.set_ylim([1200,3500]);

# 1a. Correlate the two Pandas Series, "key_west" and "pensacola", using the .corr() function.
# 1b. From this, calculate and print r^2.
# 1c. How much of the hourly variance in Key West sea level can be explained by sea level in Pensacola?
r = key_west.corr(pensacola)
print('The r^2 value is:',r**2)
print('This means that ~22% of the variance in one location can be explained by the other location\'s record.\n')

# 2. Calculate daily averages of both time series using .resample().
#    Recall you can find the frequency offset codes here: https://pandas.pydata.org/docs/user_guide/timeseries.html#offset-aliases
#
#    Save these as two new variables. Display one of them to check that it worked.
key_west_daily = key_west.resample('D').mean()
pensacola_daily = pensacola.resample('D').mean()
display(key_west_daily)

# 3. Repeat Question 1 using the daily average variables.
r = key_west_daily.corr(pensacola_daily)
print('\nThe r^2 value is:',r**2)
print('This means that ~53% of the daily-average variance in one location can be explained by the other location\'s record.')

# 4. Run this code to plot eight months of the original records.
plt.figure(figsize=(16,5))
plt.plot(key_west.index.values,key_west.values,lw=0.75,alpha=0.4,c='turquoise',label='Key West (original)')
plt.plot(pensacola.index.values,pensacola.values,lw=0.75,alpha=0.4,c='navy',label='Pensacola (original)')
plt.grid()
plt.ylabel('Relative sea level (mm)')
plt.title('Eight months of tide gauge records from FL')
plt.xlim([datetime(2020,1,1),datetime(2020,9,1)])
plt.ylim([1200,4000])

# 5. Write code to add dashed lines to the plot for each daily average time series.
#    Use a thicker line (e.g. width of 3).
#    Set the label argument for the new lines, and add a legend.
plt.plot(key_west_daily.index.values,key_west_daily.values,lw=3,ls='--',c='turquoise',label='Key West (daily average)')
plt.plot(pensacola_daily.index.values,pensacola_daily.values,lw=3,ls='--',c='navy',label='Pensacola (daily average)')
plt.legend(loc='upper left');

# 1a. Correlate the two Pandas Series, "key_west" and "pensacola", using the .corr() function.
# 1b. From this, calculate and print r^2.
# 1c. How much of the hourly variance in Key West sea level can be explained by sea level in Pensacola?
r = key_west.corr(pensacola)
print('The r^2 value is:',r**2)
print('This means that ~22% of the variance in one location can be explained by the other location\'s record.\n')

# 2. Calculate weekly averages of both time series using .resample().
#    Recall you can find the frequency offset codes here: https://pandas.pydata.org/docs/user_guide/timeseries.html#offset-aliases
#
#    Save these as two new variables. Display one of them to check that it worked.
key_west_weekly = key_west.resample('W').mean()
pensacola_weekly = pensacola.resample('W').mean()
display(key_west_weekly)

# 3. Repeat Question 1 using the weekly average variables.
r = key_west_weekly.corr(pensacola_weekly)
print('\nThe r^2 value is:',r**2)
print('This means that ~61% of the weekly-average variance in one location can be explained by the other location\'s record.')

# 4. Run this code to plot eight months of the original records.
plt.figure(figsize=(16,5))
plt.plot(key_west.index.values,key_west.values,lw=0.75,alpha=0.4,c='turquoise',label='Key West (original)')
plt.plot(pensacola.index.values,pensacola.values,lw=0.75,alpha=0.4,c='navy',label='Pensacola (original)')
plt.grid()
plt.ylabel('Relative sea level (mm)')
plt.title('Eight months of tide gauge records from FL')
plt.xlim([datetime(2020,1,1),datetime(2020,9,1)])
plt.ylim([1200,4000])

# 5. Write code to add dashed lines to the plot for each weekly average time series.
#    Use a thicker line (e.g. width of 3).
#    Set the label argument for the new lines, and add a legend.
plt.plot(key_west_weekly.index.values,key_west_weekly.values,lw=3,ls='--',c='turquoise',label='Key West (weekly average)')
plt.plot(pensacola_weekly.index.values,pensacola_weekly.values,lw=3,ls='--',c='navy',label='Pensacola (weekly average)')
plt.legend(loc='upper left');

# 1a. Correlate the two Pandas Series, "key_west" and "pensacola", using the .corr() function.
# 1b. From this, calculate and print r^2.
# 1c. How much of the hourly variance in Key West sea level can be explained by sea level in Pensacola?
r = key_west.corr(pensacola)
print('The r^2 value is:',r**2)
print('This means that ~22% of the variance in one location can be explained by the other location\'s record.\n')

# 2. Calculate monthly running means of both hourly time series using .rolling().
#    Save these as two new variables. Display one of them to check that it worked.
key_west_monthly = key_west.rolling(window=24*30,min_periods=24).mean()
pensacola_monthly = pensacola.rolling(window=24*30,min_periods=24).mean()
display(key_west_monthly)

# 3. Repeat Question 1 using the monthly running mean variables.
r = key_west_monthly.corr(pensacola_monthly)
print('\nThe r^2 value is:',r**2)
print('This means that ~71% of the monthly running mean variance in one location can be explained by the other location\'s record.')

# 4. Run this code to plot the original records from 2000-2020.
plt.figure(figsize=(16,5))
plt.plot(key_west.index.values,key_west.values,lw=0.25,alpha=0.4,c='turquoise',label='Key West (original)')
plt.plot(pensacola.index.values,pensacola.values,lw=0.25,alpha=0.4,c='navy',label='Pensacola (original)')
plt.grid()
plt.ylabel('Relative sea level (mm)')
plt.title('Hourly tide gauge records from FL')
plt.xlim([datetime(2000,1,1),datetime(2020,12,31)])
plt.ylim([1000,4000])

# 5. Write code to add dashed lines to the plot for each monthly running mean time series.
#    Use a thicker line (e.g. width of 2).
#    Set the label argument for the new lines, and add a legend.
plt.plot(key_west_monthly.index.values,key_west_monthly.values,lw=2,ls='--',c='turquoise',label='Key West (monthly running mean)')
plt.plot(pensacola_monthly.index.values,pensacola_monthly.values,lw=2,ls='--',c='navy',label='Pensacola (monthly running mean)')
plt.legend(loc='upper left');

# 1a. Correlate the two Pandas Series, "key_west" and "pensacola", using the .corr() function.
# 1b. From this, calculate and print r^2.
# 1c. How much of the hourly variance in Key West sea level can be explained by sea level in Pensacola?
r = key_west.corr(pensacola)
print('The r^2 value is:',r**2)
print('This means that ~22% of the variance in one location can be explained by the other location\'s record.\n')

# 2. Calculate annual running means of both hourly time series using .rolling().
#    Save these as two new variables. Display one of them to check that it worked.
key_west_annual = key_west.rolling(window=24*365,min_periods=24*30).mean()
pensacola_annual = pensacola.rolling(window=24*365,min_periods=24*30).mean()
display(key_west_annual)

# 3. Repeat Question 1 using the annual running mean variables.
r = key_west_annual.corr(pensacola_annual)
print('\nThe r^2 value is:',r**2)
print('This means that ~87% of the annual running mean variance in one location can be explained by the other location\'s record.')

# 4. Run this code to set up two blank subplots.
fig, (ax0,ax1) = plt.subplots(ncols=1,nrows=2,figsize=(16,9))
ax0.grid()
ax0.set_ylabel('Relative sea level (mm)')
ax0.set_title('Annual running mean tide gauge records from Pensacola, FL')
ax0.set_xlim([datetime(1915,1,1),datetime(2020,12,31)])
ax1.grid()
ax1.set_ylabel('Relative sea level (mm)')
ax1.set_title('Annual running mean tide gauge records from Key West, FL')
ax1.set_xlim([datetime(1915,1,1),datetime(2020,12,31)])
plt.tight_layout()

# 5. Write code to add a line for each annual running mean time series on each subplot.
ax0.plot(pensacola_annual.index.values,pensacola_annual.values,c='navy',label='Pensacola (annual running mean)')
ax1.plot(key_west_annual.index.values,key_west_annual.values,c='turquoise',label='Key West (annual running mean)')
# plt.legend(loc='upper left');

# # This code installs the netCDF4 module
# # Run this code once per session, then comment it out
# !pip install netcdf4

# # This code allows Cartopy to work with Google Colab
# # Run this code once per session, then comment it out
# !grep '^deb ' /etc/apt/sources.list | \
#   sed 's/^deb /deb-src /g' | \
#   tee /etc/apt/sources.list.d/deb-src.list
# !apt-get -qq update
# !apt-get -qq build-dep python3-cartopy
# !pip uninstall -y shapely
# !pip install shapely --no-binary shapely
# !pip install cartopy

# Import NumPy, xarray, Matplotlib, Cartopy (and related imports)
import numpy as np
import pandas as pd
from scipy import interpolate, stats
import xarray as xr
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

# Give Colab access to Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Filepath for SLR data set
filepath = '/content/drive/MyDrive/OCEAN 215 - Autumn \'20/OCEAN 215 - Autumn \'20 - Course documents/Zoom class slides and notebooks/2020-11-24 - class #15 - data/GlobalLinearSeaLevelTrends.csv'

# Load CSV file using Pandas
data = pd.read_csv(filepath)

# Display CSV file
display(data)

# Describe CSV file
display(data.describe())

# We learn that:
# - There are 365 tide gauge stations in the data set
# - On average, the trends have been calculated based on 69.8 years of data
# - On average, the tide gauge records are about 87% complete
# - On average, the sea level rise trend is 0.46 feet per century (note that average rate in past 2 decades is twice that)
# - Longitudes span about -180 to 180°E; latitudes span 65°S to 82°N

# Convert Pandas columns to 1-D NumPy arrays with data
lon_orig = data['Longitude'].values
lat_orig = data['Latitude'].values
msl_orig = data['MSL Trend (ft/century)'].values

# Set up world map
fig = plt.figure(figsize=(16,12))
ax = plt.axes(projection=ccrs.Robinson())
ax.coastlines(resolution='110m',linewidth=1)
gl = ax.gridlines(crs=ccrs.PlateCarree(),draw_labels=True,alpha=0.5)
ax.add_feature(cfeature.LAND,color='0.8')
ax.set_title('Global tide gauge sea level trends')

# Plot individual tide gauge trends
scat = plt.scatter(lon_orig,lat_orig,c=msl_orig,s=50,cmap='RdBu_r',vmin=-2,vmax=2,
                   transform=ccrs.PlateCarree(),zorder=2)
cbar = plt.colorbar(scat,ax=ax,extend='both',shrink=0.5)
cbar.set_label('Sea level trend (feet/century)')

# Set up regularly-spaced x- and y- coordinates
lon_coord = np.linspace(-180,180,361)
lat_coord = np.linspace(-90,90,181)
# print(lon_coord)
# print(lat_coord)

# Mesh 1-D coordinates into a 2-D grid
lon_grid, lat_grid = np.meshgrid(lon_coord,lat_coord)
# print(lon_grid)
# print(lat_grid)

# Bilinearly interpolate 1-D data arrays to 2-D grid points
msl_gridded = interpolate.griddata((lon_orig,lat_orig),msl_orig,
                                   (lon_grid, lat_grid),
                                   method='linear')  # also try 'nearest'

# Set up world map
fig = plt.figure(figsize=(16,12))
ax = plt.axes(projection=ccrs.Robinson())
ax.coastlines(resolution='110m',linewidth=1,zorder=3)
gl = ax.gridlines(crs=ccrs.PlateCarree(),draw_labels=True,alpha=0.5)
ax.add_feature(cfeature.LAND,color='0.8',zorder=2)
ax.set_title('Global tide gauge sea level trends')

# Plot interpolated tide gauge trends
pcm = plt.pcolormesh(lon_grid,lat_grid,msl_gridded,cmap='RdBu_r',vmin=-2,vmax=2,
                     transform=ccrs.PlateCarree(),zorder=1)
cbar = plt.colorbar(pcm,ax=ax,extend='both',shrink=0.5)
cbar.set_label('Sea level trend (feet/century)')

# # This code installs the netCDF4 module
# # Run this code once per session, then comment it out
# !pip install netcdf4

# # This code allows Cartopy to work with Google Colab
# # Run this code once per session, then comment it out
# !grep '^deb ' /etc/apt/sources.list | \
#   sed 's/^deb /deb-src /g' | \
#   tee /etc/apt/sources.list.d/deb-src.list
# !apt-get -qq update
# !apt-get -qq build-dep python3-cartopy
# !pip uninstall -y shapely
# !pip install shapely --no-binary shapely
# !pip install cartopy

# Import NumPy, xarray, Matplotlib, Cartopy (and related imports)
import numpy as np
import pandas as pd
from scipy import interpolate, stats
import xarray as xr
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

# Give Colab access to Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Filepath for Arctic sea ice concentration netCDF file
filepath = '/content/drive/MyDrive/OCEAN 215 - Autumn \'20/OCEAN 215 - Autumn \'20 - Course documents/Zoom class slides and notebooks/2020-11-24 - class #15 - data/nsidc_sea_ice_cdr_north_original_grid.nc'

# Load netCDF file using xarray
data = xr.open_dataset(filepath)

# Display netCDF file
display(data)

# Select SIC on 2020-10-01 on original grid
original = data['sea_ice_conc'].sel(time=datetime(2020,10,1)).squeeze()

# Mask out land values (coded as 2.54)
original.values[original.values > 2] = np.NaN

# Convert irregularly gridded lons, lats, and SIC data to flattened (1-D) NumPy arrays
lon_flat = original['longitude'].values.flatten()
lat_flat = original['latitude'].values.flatten()
sic_flat = original.values.flatten()
# print(lon_flat)
# print(lat_flat)
# print(sic_flat)

# Set up regularly-spaced x- and y- coordinates
lon_coord = np.linspace(-180,180,720)
lat_coord = np.linspace(50,90,160)
# print(lon_coord)
# print(lat_coord)

# Mesh 1-D coordinates into a 2-D grid
lon_grid, lat_grid = np.meshgrid(lon_coord,lat_coord)
# print(lon_grid)
# print(lat_grid)

# Bilinearly interpolate flattened data to 2-D grid points
sic_gridded = interpolate.griddata((lon_flat,lat_flat),sic_flat,
                                   (lon_grid, lat_grid),
                                   method='linear')

# Make plot
fig = plt.figure(figsize=(9,9))
ax = plt.axes(projection=ccrs.NorthPolarStereo(central_longitude=0))
ax.set_extent([-180, 180, 65, 90], crs=ccrs.PlateCarree())

# Option 1: pcolormesh() works for regridded data
pcm = ax.pcolormesh(lon_grid,lat_grid,100*sic_gridded,
                    transform=ccrs.PlateCarree(),cmap='Blues')
# # Option 2: contourf() works for regridded data
# pcm = ax.contourf(lon_grid,lat_grid,100*sic_gridded,
#                   transform=ccrs.PlateCarree(),cmap='Blues')
# # Option 3: pcolormesh() works for the original irregular data (CORRECTION TO VIDEO LESSON #14: sometimes this actually works)
# pcm = ax.pcolormesh(original['longitude'].values,original['latitude'].values,100*original.values,
#                     transform=ccrs.PlateCarree(),cmap='Blues')
# # Option 4: contourf() DOES NOT WORK for the original irregular data
# pcm = ax.contourf(original['longitude'].values,original['latitude'].values,100*original.values,
#                   transform=ccrs.PlateCarree(),cmap='Blues')

cbar = plt.colorbar(pcm,ax=ax,shrink=0.8)
cbar.set_label('Sea ice concentration (%)')

cs = ax.coastlines(resolution='110m',linewidth=0.5)
gl = ax.gridlines(crs=ccrs.PlateCarree(),draw_labels=True,alpha=0.5)
ax.add_feature(cfeature.LAND, color='papayawhip')

plt.figure()
plt.pcolormesh(np.array([[1,2,3],[1,2,3],[1,2,3]]),
               np.array([[3,3,3],[2,2,2],[1,1,1]]),
               np.array([[4,5,6],[7,8,9],[10,11,12]]))
plt.colorbar()

plt.figure()
plt.pcolormesh(np.array([[1,2,3],[2,3,4],[4,5,6]]),
               np.array([[3,3,3],[2,2,2],[1,1,1]]),
               np.array([[4,5,6],[7,8,9],[10,11,12]]))
plt.colorbar()

plt.figure()
plt.contourf(np.array([[1,2,3],[1,2,3],[1,2,3]]),
               np.array([[3,3,3],[2,2,2],[1,1,1]]),
               np.array([[4,5,6],[7,8,9],[10,11,12]]))
plt.colorbar()

plt.figure()
plt.contourf(np.array([[1,2,3],[2,3,4],[4,5,6]]),
               np.array([[3,3,3],[2,2,2],[1,1,1]]),
               np.array([[4,5,6],[7,8,9],[10,11,12]]))
plt.colorbar()