#!/usr/bin/env python
# coding: utf-8

# Attempt at concatenating netcdf subdomain files.

# In[1]:


import netCDF4 as nc
import numpy as np

import matplotlib.pyplot as plt

get_ipython().run_line_magic('matplotlib', 'inline')


# Strategy: For each variable on the horizontal grid, insert the data from each subdomain into the correct position of a larger array. Also, copy all of the other  
# 
# First, I need to determine how the subdomains are organized. The subdomains are divided into rows and columns. An example decomposition is:
# 
# |  |  |  |  |
# |--|--|--|--|
# |08|09|10|11|
# |04|05|06|07|
# |00|01|02|03|
# 
# 
# For my case, I have figured this layout out "by eye" and stored in an array called filenames. Ideally, we would write code to determine the decomposition automatically. This is complicated by the fact that we do not always wish to recombine over the full domain.
# 
# Next, I can determine the x/y start and end indces for each of my sudomains by knowing the dimensions of each subdomain. I will store these in a dictionary with a filename key.
# 
# Next, I will initialize the dimensions and variables of my new file by copying the dimensions and variables of the file in filenames[0,0]. 
# 
# Finally, I will loop through each file and each variable and insert the data into the correct positition of my new array. I have written some functions to help with this. 

# In[4]:


def define_shapes(filenames):
    """Creates a dictionary object that stores the beginning and ending i, j coordinate for each 
    subdomain file stored in names.
    Names should be orgnaized in a way that corresponds to the shape of the region you are compiling.
    The first axis (rows) of names is along y, second axis (columns) is along x
    names[0,0] is the bottom left subdomain
    names[-1,0] is the top left subdomain
    names[0,-1] is the bottom right subdomain
    names[-1,-1] is the top right subdomain
    
    Beginning/ending i = iss/iee, beginning/ending j = jss/jee
    
    returns: a dictionary of dictionarys. First level keys are the filenames, second level are iss,iee,jss,jee
    """
    
    jss = 0
    for j in np.arange(filenames.shape[0]):
        iss = 0
        for i in np.arange(filenames.shape[1]):
            name = filenames[j,i]
            f = nc.Dataset(name)
            x = f.dimensions['x'].__len__()
            y = f.dimensions['y'].__len__()
            shapes[name] = {}
            shapes[name]['iss'] = iss
            shapes[name]['iee'] = iss+x
            shapes[name]['jss'] = jss
            shapes[name]['jee'] = jss+y
            iss = iss+x
        jss = jss +y
    return shapes


# Check that it works for my case.

# In[5]:


shapes = {}
filenames = np.array([['CODAR_0139.nc', 'CODAR_0140.nc', 'CODAR_0141.nc'],
         ['CODAR_0151.nc', 'CODAR_0152.nc', 'CODAR_0153.nc'],
         ['CODAR_0163.nc', 'CODAR_0164.nc', 'CODAR_0165.nc']])


shapes = define_shapes(filenames)


# In[6]:


shapes


# Create a new netcdf file. 

# In[7]:


new = nc.Dataset('CODAR_all.nc', 'w')


# Iniitilize dimensions

# In[8]:


def initialize_dimensions(newfile, oldfile):
    """Initialize new file to have the same dimension names as oldfile
    Dimensions that are not associated with the horizontal grid are also given
    the same size as oldfile"
    """
    for dimname in oldfile.dimensions:
        dim = oldfile.dimensions[dimname]
        if dimname=='x' or dimname=='y':
            newdim = newfile.createDimension(dimname) 
        else:   
            newdim = newfile.createDimension(dimname, size=dim.__len__())


# In[9]:


initialize_dimensions(new, nc.Dataset(filenames[0,0]))


# Check

# In[10]:


print(new.dimensions)


# Initialize variables

# In[11]:


def initialize_variables(newfile, oldfile):
    """Initialize new file to have the same variables as oldfile
    """
    newvars = {}
    for varname in oldfile.variables:
        var = oldfile.variables[varname]
        dims = var.dimensions
        newvar = newfile.createVariable(varname, 'float32', dims)
        newvar[:]=var[:]
        newvars[varname] = newvar
    return newvars


# In[13]:


newvars=initialize_variables(new, nc.Dataset(filenames[0,0]))


# In[14]:


new.dimensions


# In[15]:


new.variables


# Add new data from all files

# In[16]:


def concatentate_variables(filenames, shapes, variables):
    """Concatentate netcdf variables listed in dictionary variables for all of the
    files stored in filenames. shapes is a dictionary object that stores the start and 
    end index for the subdomain in each file. """
    for name in filenames.flatten():
        for varname in variables.keys():
            newvar = newvars[varname]
            f = nc.Dataset(name)
            oldvar = f.variables[varname]
            x1=shapes[name]['iss']
            x2=shapes[name]['iee']
            y1=shapes[name]['jss']
            y2=shapes[name]['jee']
            if 'x' in newvar.dimensions:
                newvar[...,y1:y2,x1:x2] = oldvar[...,:, :]


# In[17]:


concatentate_variables(filenames, shapes, newvars)


# Check that it makes sense

# In[18]:


newvars['nav_lat'].shape


# In[19]:


newvars['vosaline'].shape


# In[20]:


newvars


# In[21]:


plt.pcolormesh(newvars['vosaline'][10,0,:,:])


# In[22]:


plt.pcolormesh(newvars['nav_lat'][:])


# In[23]:


plt.pcolormesh(newvars['nav_lon'][:])


# In[24]:


new.close()


# In[25]:


fig, axs = plt.subplots(3,3,figsize=(10,10))
for ax, name in zip(axs.flatten(), filenames[::-1,:].flatten()):
    f=nc.Dataset(name)
    var = f.variables['vosaline'][10,0,:,:]
    ax.pcolormesh(var,vmin=0,vmax=30)


# Improvements
# 
# Could attempt to copy over some of the metadeta...

# In[ ]: