#!/usr/bin/env python # coding: utf-8 # # Dfs0 # # See [Dfs0 in MIKE IO Documentation](https://dhi.github.io/mikeio/user-guide/dfs0.html) # # In[ ]: import pandas as pd import mikeio # ## Reading data # In[ ]: ds = mikeio.read("data/Oresund_ts.dfs0") ds # In[ ]: type(ds) # The mikeio read function returns a `Dataset` which is a container of `DataArray`s. # # A `DataArray` can be selected by name or by index. # In[ ]: da = ds["Drogden: Surface elevation"] # or ds.Drogden_Surface_elevation or ds[2] da # Upon `read`, specific items can be selected with the `items` argument using name or index. # In[ ]: ds = mikeio.read("data/Oresund_ts.dfs0", items=[0,2,3]) ds # Wildcards can be used to select multiple items: # In[ ]: ds = mikeio.read("data/Oresund_ts.dfs0", items="*Surf*") ds # A specific time subset can be using .sel: # In[ ]: ds.sel(time=slice("2018-03-04","2018-03-04 12:00")) # Or with positional indexing using .isel: # In[ ]: ds.isel(time=slice(10,20)) # The Dataset and DataArray have a number of useful attributes like `time`, `items`, `ndims`, `shape`, `values` (only DataArray) etc # In[ ]: ds.time # In[ ]: ds.items # In[ ]: da.item # In[ ]: da.shape # In[ ]: da.values # The time series can be plotted with the plot method. # In[ ]: ds.plot(); # A simple timeseries Dataset can easily be converted to a Pandas DataFrame. # In[ ]: df = ds.to_pandas() df # ## Writing data # # Often, time series data will come from a csv or an Excel file. Here is an example of how to read a csv file with pandas and then write the pandas DataFrame to a dfs0 file. # In[ ]: df = pd.read_csv("data/naples_fl.csv", skiprows=1, parse_dates=True, index_col=0) df # You will probably have the need to parse certain a specific data formats many times, then it is a good idea to create a function. # In[ ]: def read_ncei_obs(filename): # old name : new name mapping = {'TAVG (Degrees Fahrenheit)': 'temperature_avg_f', 'TMAX (Degrees Fahrenheit)': 'temperature_max_f', 'TMIN (Degrees Fahrenheit)': 'temperature_min_f', 'PRCP (Inches)': 'prec_in'} df_renamed = ( pd.read_csv(filename, skiprows=1, parse_dates=True, index_col=0) .rename(columns=mapping) ) sel_cols = mapping.values() # No need to repeat ['temperature_avg_f',...] df_selected = df_renamed[sel_cols] return df_selected # In[ ]: df = read_ncei_obs("data/naples_fl.csv") df.head() # In[ ]: df.tail() # In[ ]: df.shape # Convert temperature to Celsius and precipitation to mm. # In[ ]: df_final = df.assign(temperature_max_c=(df['temperature_max_f'] - 32)/1.8, prec_mm=df['prec_in'] * 25.4) df_final.head() # In[ ]: df_final.loc['2021'].plot(); # Creating a dfs0 file from a dataframe is pretty straightforward. # # 1. Convert the dataframe to a `Dataset` # In[ ]: ds = mikeio.from_pandas(df_final) ds # 2. Write the `Dataset` to a dfs0 file. # In[ ]: ds.to_dfs("output/naples_fl.dfs0") # Let's read it back in again... # In[ ]: saved_ds = mikeio.read("output/naples_fl.dfs0") saved_ds # By default, EUM types are undefined. But it can be specified. Let's select a few colums. # In[ ]: df2 = df_final[['temperature_max_c', 'prec_in']] df2.head() # In[ ]: from mikeio import ItemInfo, EUMType, EUMUnit ds2 = mikeio.from_pandas(df2, items=[ ItemInfo(EUMType.Temperature), ItemInfo(EUMType.Precipitation_Rate, EUMUnit.inch_per_day)] ) ds2 # ## EUM # In[ ]: from mikeio.eum import ItemInfo, EUMType, EUMUnit EUMType.search("wind") # In[ ]: EUMType.Wind_speed.units # ### Inline Exercise # # What is the best EUM Type for "peak wave direction"? What is the default unit? # In[ ]: # insert your code here # ## Precipitation data # In[ ]: df = pd.read_csv("data/precipitation.csv", parse_dates=True, index_col=0) df.head() # In[ ]: from mikecore.DfsFile import DataValueType (mikeio.from_pandas(df, items=ItemInfo(EUMType.Precipitation_Rate, EUMUnit.mm_per_hour, data_value_type=DataValueType.MeanStepBackward)) .to_dfs("output/precipitation.dfs0") ) # ## Selecting # In[ ]: ds = mikeio.read("output/precipitation.dfs0", items=[1,4]) # select item by item number (starting from zero) ds # In[ ]: ds = mikeio.read("output/precipitation.dfs0", items=["Precipitation station 5","Precipitation station 1"]) # or by name (in the order you like it) ds # ### Inline Exercise # # Read all items to a variable ds. Select "Precipitation station 3" - which different ways can you select this item? # In[ ]: # insert your code here # In[ ]: import utils utils.sysinfo() # In[ ]: