pip install wetterdienst
import warnings
warnings.filterwarnings("ignore")
from wetterdienst import metadata_for_climate_observations, \
collect_climate_observations_data, get_nearby_stations_by_number, \
discover_climate_observations
from wetterdienst import PeriodType, TimeResolution, Parameter
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import cm
Which parameters are available?
# all
print("All available combinations")
print(
discover_climate_observations()
)
# selection
print("Selection of daily historical data")
print(
discover_climate_observations(
time_resolution=TimeResolution.DAILY,
period_type=PeriodType.HISTORICAL
)
)
All available combinations { "TimeResolution.MINUTE_1": { "Parameter.PRECIPITATION": [ "PeriodType.HISTORICAL", "PeriodType.RECENT", "PeriodType.NOW" ] }, "TimeResolution.MINUTES_10": { "Parameter.PRECIPITATION": [ "PeriodType.HISTORICAL", "PeriodType.RECENT", "PeriodType.NOW" ], "Parameter.TEMPERATURE_AIR": [ "PeriodType.HISTORICAL", "PeriodType.RECENT", "PeriodType.NOW" ], "Parameter.TEMPERATURE_EXTREME": [ "PeriodType.HISTORICAL", "PeriodType.RECENT", "PeriodType.NOW" ], "Parameter.WIND_EXTREME": [ "PeriodType.HISTORICAL", "PeriodType.RECENT", "PeriodType.NOW" ], "Parameter.SOLAR": [ "PeriodType.HISTORICAL", "PeriodType.RECENT", "PeriodType.NOW" ], "Parameter.WIND": [ "PeriodType.HISTORICAL", "PeriodType.RECENT", "PeriodType.NOW" ] }, "TimeResolution.HOURLY": { "Parameter.TEMPERATURE_AIR": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.CLOUD_TYPE": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.CLOUDINESS": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.DEW_POINT": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.PRECIPITATION": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.PRESSURE": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.TEMPERATURE_SOIL": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.SOLAR": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.SUNSHINE_DURATION": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.VISIBILITY": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.WIND": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.WIND_SYNOPTIC": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ] }, "TimeResolution.SUBDAILY": { "Parameter.TEMPERATURE_AIR": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.CLOUDINESS": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.MOISTURE": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.PRESSURE": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.SOIL": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.VISIBILITY": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.WIND": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ] }, "TimeResolution.DAILY": { "Parameter.CLIMATE_SUMMARY": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.PRECIPITATION_MORE": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.TEMPERATURE_SOIL": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.SOLAR": [ "PeriodType.RECENT" ], "Parameter.WATER_EQUIVALENT": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.WEATHER_PHENOMENA": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ] }, "TimeResolution.MONTHLY": { "Parameter.CLIMATE_SUMMARY": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.PRECIPITATION_MORE": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.WEATHER_PHENOMENA": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ] }, "TimeResolution.ANNUAL": { "Parameter.CLIMATE_SUMMARY": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.PRECIPITATION_MORE": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ], "Parameter.WEATHER_PHENOMENA": [ "PeriodType.HISTORICAL", "PeriodType.RECENT" ] } } Selection of daily historical data { "TimeResolution.DAILY": { "Parameter.CLIMATE_SUMMARY": [ "PeriodType.HISTORICAL" ], "Parameter.PRECIPITATION_MORE": [ "PeriodType.HISTORICAL" ], "Parameter.TEMPERATURE_SOIL": [ "PeriodType.HISTORICAL" ], "Parameter.WATER_EQUIVALENT": [ "PeriodType.HISTORICAL" ], "Parameter.WEATHER_PHENOMENA": [ "PeriodType.HISTORICAL" ] } }
(here we pick historical daily precipitation - hdp)
metadata_hdp = metadata_for_climate_observations(
Parameter.PRECIPITATION_MORE, TimeResolution.DAILY, PeriodType.HISTORICAL)
print("Number of stations with available data: ", metadata_hdp["HAS_FILE"].sum())
print("Some of the stations:")
metadata_hdp.head()
Number of stations with available data: 5620 Some of the stations:
STATION_ID | FROM_DATE | TO_DATE | STATION_HEIGHT | LAT | LON | STATION_NAME | STATE | HAS_FILE | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | 1912-01-01 | 1986-06-30 | 478.0 | 47.8413 | 8.8493 | Aach | Baden-Württemberg | True |
1 | 2 | 1951-01-01 | 2006-12-31 | 138.0 | 50.8066 | 6.0996 | Aachen (Kläranlage) | Nordrhein-Westfalen | True |
2 | 3 | 1891-01-01 | 2011-03-31 | 202.0 | 50.7827 | 6.0941 | Aachen | Nordrhein-Westfalen | True |
3 | 4 | 1951-01-01 | 1979-10-31 | 243.0 | 50.7683 | 6.1207 | Aachen-Brand | Nordrhein-Westfalen | True |
4 | 6 | 1982-11-01 | 2020-09-22 | 455.0 | 48.8361 | 10.0598 | Aalen-Unterrombach | Baden-Württemberg | True |
The metadata includes an id, the range of the measurements, the position (including height) as well as place and state of it and if it has a file. With the following plot we want to show a map of those stations:
cmap = cm.get_cmap('viridis')
bounds = metadata_hdp.STATION_HEIGHT.quantile([0, 0.25, 0.5, 0.75, 1]).values
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
fig, ax = plt.subplots(figsize=(10, 10))
plot = metadata_hdp.plot.scatter(
x="LON", y="LAT", c="STATION_HEIGHT", cmap=cmap, norm=norm, ax=ax)
plot.set_title("Map of daily precipitation stations in Germany\n"
"Color refers to height of station")
plt.show()
Usually there are three steps to follow:
We have summarized those steps into one:
Let's try it out for the above selection:
print("Receiving historical daily climate data for Dresden-Klotzsche (1048)")
station_data = collect_climate_observations_data(
[1048],
Parameter.CLIMATE_SUMMARY,
TimeResolution.DAILY,
PeriodType.HISTORICAL,
tidy_data=False
)
station_data
Receiving historical daily climate data for Dresden-Klotzsche (1048)
STATION_ID | DATE | QN_3 | FX | FM | QN_4 | RSK | RSKF | SDK | SHK_TAG | NM | VPM | PM | TMK | UPM | TXK | TNK | TGK | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1048 | 1934-01-01 | NaN | NaN | NaN | 1 | 0.2 | 8.0 | NaN | 0.0 | 8.0 | 6.4 | 1008.60 | 0.5 | 97.00 | 0.7 | 0.2 | NaN |
1 | 1048 | 1934-01-02 | NaN | NaN | NaN | 1 | 0.1 | 7.0 | NaN | 0.0 | 8.0 | 5.9 | 1006.20 | -0.1 | 90.00 | 0.9 | -0.8 | NaN |
2 | 1048 | 1934-01-03 | NaN | NaN | NaN | 1 | 0.0 | 0.0 | NaN | 0.0 | 8.0 | 5.2 | 1001.60 | -0.7 | 85.00 | -0.1 | -1.0 | NaN |
3 | 1048 | 1934-01-04 | NaN | NaN | NaN | 1 | 0.0 | 1.0 | NaN | 0.0 | 8.0 | 4.8 | 1001.40 | -1.6 | 82.00 | -0.3 | -3.5 | NaN |
4 | 1048 | 1934-01-05 | NaN | NaN | NaN | 1 | 10.1 | 8.0 | NaN | 0.0 | 5.3 | 6.3 | 996.00 | 0.9 | 92.00 | 2.8 | -3.8 | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
25987 | 1048 | 2019-12-27 | 3 | 8.2 | 2.9 | 3 | 0.1 | 8.0 | 0.000 | 0.0 | 7.7 | 6.2 | 999.98 | 2.5 | 84.46 | 3.6 | 1.3 | 0.9 |
25988 | 1048 | 2019-12-28 | 3 | 7.7 | 3.1 | 3 | 0.4 | 7.0 | 0.000 | 0.0 | 7.4 | 5.2 | 1011.49 | 0.4 | 82.54 | 1.3 | -0.7 | -1.0 |
25989 | 1048 | 2019-12-29 | 3 | 7.7 | 2.2 | 3 | 0.0 | 0.0 | 6.233 | 0.0 | 4.6 | 4.2 | 1010.52 | -0.5 | 71.75 | 2.2 | -3.1 | -5.4 |
25990 | 1048 | 2019-12-30 | 3 | 8.7 | 4.6 | 3 | 0.0 | 0.0 | 1.483 | 0.0 | 5.5 | 4.4 | 1002.62 | 2.5 | 61.08 | 7.6 | -1.0 | -2.0 |
25991 | 1048 | 2019-12-31 | 3 | 12.3 | 5.4 | 3 | 0.0 | 6.0 | 1.133 | 0.0 | 5.6 | 5.6 | 1000.98 | 5.4 | 63.88 | 8.2 | 3.5 | 2.1 |
25992 rows × 18 columns
See that DATE is already parsed, so we can easily get some nice graphs with matplotlib, which we will do in the next part.
First to handle the data easier, we want it to be transformed from tabular to column data, which means instead of having the data in several columns with an additional date column, we want columns, where a set of all columns defines the date of measured data, element and the exact value. We will therefor use the tidy_data option.
print("Receiving historical daily climate data for Dresden-Klotzsche (1048), this time tidied.")
station_data = collect_climate_observations_data(
[1048],
Parameter.CLIMATE_SUMMARY,
TimeResolution.DAILY,
PeriodType.HISTORICAL,
tidy_data=True
)
station_data.head()
Receiving historical daily climate data for Dresden-Klotzsche (1048), this time tidied.
STATION_ID | PARAMETER | ELEMENT | DATE | VALUE | QUALITY | |
---|---|---|---|---|---|---|
0 | 1048 | CLIMATE_SUMMARY | FX | 1934-01-01 | NaN | NaN |
1 | 1048 | CLIMATE_SUMMARY | FX | 1934-01-02 | NaN | NaN |
2 | 1048 | CLIMATE_SUMMARY | FX | 1934-01-03 | NaN | NaN |
3 | 1048 | CLIMATE_SUMMARY | FX | 1934-01-04 | NaN | NaN |
4 | 1048 | CLIMATE_SUMMARY | FX | 1934-01-05 | NaN | NaN |
We can create a time series/histogram of some elements to compare the distribution of the values, here for example precipitation and mean temperature:
elements_to_plot = ["RSK", "TMK"]
station_data_filtered = station_data[
station_data["ELEMENT"].isin(elements_to_plot)
].sort_values(["STATION_ID", "ELEMENT", "DATE"])
station_data_grouped = station_data_filtered.groupby("ELEMENT")[["DATE", "VALUE"]]
cmap = plt.get_cmap('viridis')
colors = ("blue", "orange")
fig, axes = plt.subplots(len(elements_to_plot), len(elements_to_plot), figsize=(10, 10))
for (k, v), (ax1, ax2), color in zip(station_data_grouped, axes.T, colors):
v.plot(x="DATE", y="VALUE", label=k, alpha=.75, ax=ax1, c=color)
v.plot(y="VALUE", kind="hist", label=k, alpha=.75, ax=ax2, color=color)
plt.tight_layout()
plt.subplots_adjust(top=0.9)
plt.suptitle("Precipitation/Mean temperature time series of Dresden, Germany")
plt.show()
We can see here that the precipitation is completely left-skewed and not normal distributed, while the temperature is almost normaldistributed! Also the timeseries gives a glimpse on how much data is available! Sad notice here is the gap of WW2.
for parameter, group in station_data_filtered.groupby("ELEMENT")[["DATE", "VALUE"]]:
if parameter == "RSK":
print(group.groupby(group["DATE"].dt.year)["VALUE"].sum())
else:
print(group.groupby(group["DATE"].dt.year)["VALUE"].mean())
DATE 1934 675.6 1935 643.1 1936 555.2 1937 793.7 1938 621.2 ... 2015 629.8 2016 721.9 2017 612.3 2018 421.1 2019 503.3 Name: VALUE, Length: 72, dtype: float64 DATE 1934 10.645479 1935 9.053425 1936 8.801366 1937 8.957260 1938 9.021096 ... 2015 10.790959 2016 10.081148 2017 10.109315 2018 11.222466 2019 11.229041 Name: VALUE, Length: 72, dtype: float64
We may want to find a station near to a certain area. Therefor simply call get_nearest_station
get_nearby_stations_by_number(
51.05089,
13.73832,
5,
Parameter.CLIMATE_SUMMARY,
TimeResolution.DAILY,
PeriodType.HISTORICAL,
"2000-01-01",
"2010-01-01"
)
STATION_ID | FROM_DATE | TO_DATE | STATION_HEIGHT | LAT | LON | STATION_NAME | STATE | HAS_FILE | DISTANCE_TO_LOCATION | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1051 | 1936-01-01 | 2020-09-22 | 120.0 | 51.0248 | 13.7750 | Dresden-Strehlen | Sachsen | True | 5.005144 |
1 | 1048 | 1934-01-01 | 2020-09-22 | 227.0 | 51.1280 | 13.7543 | Dresden-Klotzsche | Sachsen | True | 8.756424 |
2 | 1050 | 1949-01-01 | 2020-09-22 | 114.0 | 51.0221 | 13.8470 | Dresden-Hosterwitz | Sachsen | True | 12.501498 |
3 | 991 | 1954-09-01 | 2020-09-22 | 359.0 | 50.9116 | 13.7087 | Dippoldiswalde-Reinberg | Sachsen | True | 15.834661 |
4 | 3234 | 1956-06-01 | 2020-09-22 | 158.0 | 51.1294 | 13.4328 | Garsebach bei Meißen | Sachsen | True | 35.076014 |