Probability map of phytoplankton in the North Sea using DIVAnd and a neural network¶

The first step is to load the required modules

In [ ]:

using DIVAnd
using DIVAndNN
using LinearAlgebra
using Statistics
using Random
using Dates

The domain and the directory path datadir is defined in the file emodnet_bio_grid.jl

In [ ]:

include("../scripts/emodnet_bio_grid.jl");
include("../scripts/validate_probability.jl");
include("../scripts/PhytoInterp.jl");

Create working directories

In [ ]:

mkpath(datadir)
mkpath(joinpath(datadir,"tmp"))

Helper function to download file from an URL is necessary

In [ ]:

function maybedownload(url,fname)
    if !isfile(fname)
        mv(download(url),fname)
    else
        @info("$url is already downloaded")
    end
end

Download the GEBCO Bathymetry

In [ ]:

bathname = joinpath(datadir,"gebco_30sec_4.nc");
bathisglobal = true
maybedownload("https://dox.ulg.ac.be/index.php/s/RSwm4HPHImdZoQP/download",
              joinpath(datadir,"gebco_30sec_4.nc"))

Download a sample data file. Here we use the Biddulphia sinensis prepared by Deltares, NL

In [ ]:

datafile = joinpath(datadir, "Biddulphia sinensis-1995-2020.csv")
maybedownload("https://dox.ulg.ac.be/index.php/s/VgLglubaTLetHzc/download", datafile)

Mask and bathymetry¶

Interpolate land-sea mask

In [ ]:

maskname = joinpath(datadir,"mask.nc")
DIVAndNN.prep_mask(bathname,bathisglobal,gridlon,gridlat,years,maskname)

Load the mask (true: sea, false: land)

In [ ]:

ds = Dataset(maskname,"r")
mask = nomissing(ds["mask"][:,:]) .== 1
close(ds)

Interpolate the bathymetry

In [ ]:

DIVAndNN.prep_bath(bathname,bathisglobal,gridlon,gridlat,datadir)

Environmental covariables¶

These files are quite large and processing them takes some time. We therefore download the prepared data files for the North Sea.

These files can be generated by:

maybedownload("https://ec.oceanbrowser.net/data/emodnet-projects/Phase-3/Combined/Water_body_phosphate_combined_V1.nc",
              joinpath(datadir,"tmp","Water_body_phosphate_combined_V1.nc"))

maybedownload("https://ec.oceanbrowser.net/data/emodnet-projects/Phase-3/Combined/Water_body_nitrogen_combined_V1.nc",
              joinpath(datadir,"tmp","Water_body_nitrogen_combined_V1.nc"))

maybedownload("https://ec.oceanbrowser.net/data/emodnet-projects/Phase-3/Combined/Water_body_silicate_combined_V1.nc",
              joinpath(datadir,"tmp","Water_body_silicate_combined_V1.nc"))

DIVAndNN.prep_tempsalt(gridlon,gridlat,data_TS,datadir)

In [ ]:

maybedownload("https://dox.ulg.ac.be/index.php/s/y9Z0c1wb5YshVDW/download",
              joinpath(datadir,"silicate.nc"))

maybedownload("https://dox.ulg.ac.be/index.php/s/A1NPSWwQYkx6Wy6/download",
              joinpath(datadir,"phosphate.nc"))

maybedownload("https://dox.ulg.ac.be/index.php/s/LDPbPWBvW6wPmCw/download",
              joinpath(datadir,"nitrogen.nc"))


BLAS.set_num_threads(1)

Compute local resolution

In [ ]:

mask_unused,pmn,xyi = DIVAnd.domain(bathname,bathisglobal,gridlon,gridlat);

Next we load the covariables. The entries below correspond to the file name, the variable name and transformation function

In [ ]:

covars_fname = [
    ("bathymetry.nc" , "batymetry" , identity),
    ("nitrogen.nc"   , "nitrogen"  , identity),
    ("phosphate.nc"  , "phosphate" , identity),
    ("silicate.nc"   , "silicate"  , identity),
]

Add datadir to the file file names

In [ ]:

covars_fname = map(entry -> (joinpath(datadir,entry[1]),entry[2:end]...),covars_fname)

field = DIVAndNN.loadcovar((gridlon,gridlat),covars_fname;
                           covars_const = true);

Normalize covariables

In [ ]:

DIVAndNN.normalize!(mask,field)

Inventory of all data files For this example we have just one file

In [ ]:

data_analysis = DIVAndNN.Format2020(datadir,"")
scientificname_accepted = listnames(data_analysis);

Parameters for the analysis Except len, all parameters are adimensional.

In [ ]:

niter = 500                         # number of iterations
trainfrac = 0.01                    # fraction of data using during training
epsilon2ap = 10                     # data constraint parameter
epsilon2_background = 10            # error variance of obs. relative to background
NLayers = [size(field)[end],4,1]    # number of layers of the neural network
learning_rate = 0.001               # learning rate for the optimizer
L2reg = 0.0001                      # L2 regularization for the weights
dropoutprob = 0.6                   # drop-out probability
len = 75e3                          # correlation length-scale (meters)

output directory

In [ ]:

outdir = joinpath(datadir,"Results","test")
mkpath(outdir)

sname = String(scientificname_accepted[1])

@info sname

load data

In [ ]:

lon_a,lat_a,obstime_a,value_a,ids_a = loadbyname(data_analysis,years,sname)

Random.seed!(1234)

xobs_a = (lon_a,lat_a)
lenxy = (len,len)

Start the analysis¶

In [ ]:

value_analysis,fw0 = DIVAndNN.analysisprob(
    mask,pmn,xyi,xobs_a,
    value_a,
    lenxy,epsilon2ap,
    field,
    NLayers,
    costfun = DIVAndNN.nll,
    niter = niter,
    dropoutprob = dropoutprob,
    L2reg = L2reg,
    learning_rate = learning_rate,
    rmaverage = true,
    trainfrac = trainfrac,
    epsilon2_background = epsilon2_background,
);

Save the results¶

In [ ]:

outname = joinpath(outdir,"DIVAndNN_$(sname)_interp.nc")
create_nc_results(outname, gridlon, gridlat, value_analysis, sname;
                  varname = "probability", long_name="occurrence probability");

Plots¶

In [ ]:

include("../scripts/emodnet_bio_plot2.jl")

This notebook was generated using Literate.jl.