#!/usr/bin/env python
# coding: utf-8

# # Toy weather data
# 
# Here is an example of how to easily manipulate a toy weather dataset using
# xarray and other recommended Python libraries:

# In[ ]:


import numpy as np
import pandas as pd
import seaborn as sns

import xarray as xr
get_ipython().run_line_magic('matplotlib', 'inline')


# In[ ]:


np.random.seed(123)

xr.set_options(display_style="html")

times = pd.date_range("2000-01-01", "2001-12-31", name="time")
annual_cycle = np.sin(2 * np.pi * (times.dayofyear.values / 365.25 - 0.28))

base = 10 + 15 * annual_cycle.reshape(-1, 1)
tmin_values = base + 3 * np.random.randn(annual_cycle.size, 3)
tmax_values = base + 10 + 3 * np.random.randn(annual_cycle.size, 3)

ds = xr.Dataset(
    {
        "tmin": (("time", "location"), tmin_values),
        "tmax": (("time", "location"), tmax_values),
    },
    {"time": times, "location": ["IA", "IN", "IL"]},
)

ds


# ## Examine a dataset with pandas and seaborn

# ### Convert to a pandas DataFrame

# In[ ]:


df = ds.to_dataframe()
df.head()


# In[ ]:


df.describe()


# ### Visualize using pandas

# In[ ]:


ds.mean(dim="location").to_dataframe().plot()


# ### Visualize using seaborn

# In[ ]:


sns.pairplot(df.reset_index(), vars=ds.data_vars)


# ## Probability of freeze by calendar month

# In[ ]:


freeze = (ds["tmin"] <= 0).groupby("time.month").mean("time")
freeze


# In[ ]:


freeze.to_pandas().plot()


# ## Monthly averaging

# In[ ]:


monthly_avg = ds.resample(time="1MS").mean()
monthly_avg.sel(location="IA").to_dataframe().plot(style="s-")


# Note that ``MS`` here refers to Month-Start; ``M`` labels Month-End (the last day of the month).

# ## Calculate monthly anomalies

# In climatology, "anomalies" refer to the difference between observations and
# typical weather for a particular season. Unlike observations, anomalies should
# not show any seasonal cycle.

# In[ ]:


climatology = ds.groupby("time.month").mean("time")
anomalies = ds.groupby("time.month") - climatology
anomalies.mean("location").to_dataframe()[["tmin", "tmax"]].plot()


# ## Calculate standardized monthly anomalies

# You can create standardized anomalies where the difference between the
# observations and the climatological monthly mean is
# divided by the climatological standard deviation.

# In[ ]:


climatology_mean = ds.groupby("time.month").mean("time")
climatology_std = ds.groupby("time.month").std("time")
stand_anomalies = xr.apply_ufunc(
    lambda x, m, s: (x - m) / s,
    ds.groupby("time.month"),
    climatology_mean,
    climatology_std,
)

stand_anomalies.mean("location").to_dataframe()[["tmin", "tmax"]].plot()


# ## Fill missing values with climatology

# The ``fillna`` method on grouped objects lets you easily fill missing values by group:

# In[ ]:


# throw away the first half of every month
some_missing = ds.tmin.sel(time=ds["time.day"] > 15).reindex_like(ds)
filled = some_missing.groupby("time.month").fillna(climatology.tmin)
both = xr.Dataset({"some_missing": some_missing, "filled": filled})
both


# In[ ]:


df = both.sel(time="2000").mean("location").reset_coords(drop=True).to_dataframe()
df.head()


# In[ ]:


df[["filled", "some_missing"]].plot()