#!/usr/bin/env python
# coding: utf-8

# # wetterdienst - A simple example
# 
# pip install wetterdienst

# ## Import modules necessary for general functioning

# In[1]:


import warnings
warnings.filterwarnings("ignore")

from wetterdienst.dwd.observations import DWDObservationMetadata, DWDObservationSites, \
    DWDObservationData, DWDObservationPeriod, DWDObservationResolution, \
    DWDObservationParameter, DWDObservationParameterSet

get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import cm


# Which parameters are available?

# In[2]:


# all
print("All available combinations")
print(
    DWDObservationMetadata().discover_parameters()
)
# selection
print("Selection of daily historical data")
print(
    DWDObservationMetadata(
        resolution=DWDObservationResolution.DAILY,
        period=DWDObservationPeriod.HISTORICAL
    ).discover_parameters()
)


# ## 1. First check the metadata to inform yourself of available stations
# (here we pick historical daily precipitation - hdp)

# In[3]:


sites_hdp = DWDObservationSites(
    parameter_set=DWDObservationParameterSet.PRECIPITATION_MORE,
    resolution=DWDObservationResolution.DAILY,
    period=DWDObservationPeriod.HISTORICAL
)
print("Number of stations with available data: ", sites_hdp.all().sum())
print("Some of the stations:")
sites_hdp.all().head()


# The metadata includes an id, the range of the measurements, the position
# (including height) as well as place and state of it and if it has a file. With the
# following plot we want to show a map of those stations:

# In[4]:


cmap = cm.get_cmap('viridis')
bounds = sites_hdp.all().STATION_HEIGHT.quantile([0, 0.25, 0.5, 0.75, 1]).values
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)

fig, ax = plt.subplots(figsize=(10, 10))
plot = sites_hdp.all().plot.scatter(
    x="LON", y="LAT", c="STATION_HEIGHT", cmap=cmap, norm=norm, ax=ax)
plot.set_title("Map of daily precipitation stations in Germany\n"
               "Color refers to height of station")
plt.show()


# ## 2. The usual way of retrieving data

# Usually there are three steps to follow:
# - select indexed files based on
#     - its station_id
#         - "1048" for Dresden, Germany
#     - its parameter
#         - "kl" for climate
#     - its time_resolution
#         - "daily" for daily data
#     - its period_type
#         - "historical" for data up to the end of the last year
# - download the resulting list of files
# - parse it into pandas.DataFrames
# 
# We have summarized those steps into one:
# - collect_dwd_data
# 
# Let's try it out for the above selection:

# In[5]:


print("Receiving historical daily climate data for Dresden-Klotzsche (1048)")
station_data = DWDObservationData(
    station_ids=[1048],
    parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY],
    resolution=DWDObservationResolution.DAILY,
    periods=[DWDObservationPeriod.HISTORICAL],
    tidy_data=False
).collect_safe()

station_data.dropna(axis=0).head()


# See that DATE is already parsed, so we can easily get some nice graphs with matplotlib,
# which we will do in the next part. We may as well also use the "tidy_data" option to
# receive a nicer dataframe and "humanize_column_names" to get meaningful parameters:

# In[6]:


print("Receiving historical daily climate data for Dresden-Klotzsche (1048), this time tidied.")
station_data = DWDObservationData(
    station_ids=[1048],
    parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY],
    resolution=DWDObservationResolution.DAILY,
    periods=[DWDObservationPeriod.HISTORICAL],
    tidy_data=True,
    humanize_column_names=True
).collect_safe()

station_data.dropna(axis=0).groupby("ELEMENT").first()


# With the latest update we can now also request direct parameters from the given sets.
# We know that climate_summary contains TMK, TNK, TXK and RSK, so let's request those.
# This option will automatically tidy the data.

# In[7]:


print("Receiving historical daily temperature and precipitation for Dresden-Klotzsche "
      "(1048).")
station_data = DWDObservationData(
    station_ids=[1048],
    parameters=[DWDObservationParameter.DAILY.TEMPERATURE_AIR_200,
                DWDObservationParameter.DAILY.TEMPERATURE_AIR_MAX_200,
                DWDObservationParameter.DAILY.TEMPERATURE_AIR_MIN_200,
                DWDObservationParameter.DAILY.PRECIPITATION_HEIGHT],
    resolution=DWDObservationResolution.DAILY,
    periods=[DWDObservationPeriod.HISTORICAL],
    humanize_column_names=False
).collect_safe()

station_data.dropna(axis=0).head()


# ## 3. Let's create some plots
# 
# Now that we have data, let's create some plots! We can create a time series/histogram of
# the temperatures and precipitation

# In[8]:


cmap = plt.get_cmap('viridis', 4)
colors = cmap.colors

PARAMETERS = ["TNK", "TMK", "TXK", "RSK"]

station_data_grouped = station_data.groupby(station_data["ELEMENT"], observed=True)

fig, axes = plt.subplots(nrows=len(PARAMETERS), tight_layout=True, figsize=(10, 40))

for (parameter, group), ax, color in zip(station_data_grouped, axes, colors):
    group.plot(x="DATE", y="VALUE", label=parameter, alpha=.75, ax=ax, c=color)

plt.tight_layout()
plt.subplots_adjust(top=0.9)
plt.suptitle("Temperature and precipitation time series of Dresden, Germany")

plt.show()


# ## 4. Create yearly values

# In[9]:


import numpy as np
import pandas as pd
station_data_yearly = []

for (year, parameter), group in station_data.groupby(
        [station_data["DATE"].dt.year, "ELEMENT"], as_index=False, observed=True):
    if parameter == "RSK":
        station_data_yearly.append(group.dropna().agg({"VALUE": np.sum}))
    else:
        station_data_yearly.append(group.dropna().agg({"VALUE": np.mean}))

station_data_yearly = pd.concat(station_data_yearly)

station_data_yearly


# ## 5. Find a station
# 
# We may want to find a station near to a certain area. Therefor simply call get_nearest_station

# In[10]:


DWDObservationSites(
    parameter_set=DWDObservationParameterSet.CLIMATE_SUMMARY,
    resolution=DWDObservationResolution.DAILY,
    period=DWDObservationPeriod.HISTORICAL,
    start_date="2000-01-01",
    end_date="2010-01-01"
).nearby_number(
    51.05089,
    13.73832,
    5
)