Notebook

In [ ]:

import glob
import os
import shutil

import contextily as cx
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
from config import SETTINGS

import seabeepy as sb

plt.style.use("ggplot")

In [ ]:

# Login to MinIO
minio_client = sb.storage.minio_login(
    user=SETTINGS.MINIO_ACCESS_ID, password=SETTINGS.MINIO_SECRET_KEY
)

SeaBee annotation workflow¶

This notebook includes useful functions to streamline the SeaBee annotation workflow for habitat classification. See the documentation here for details.

1. User input¶

In [ ]:

# Version of the Excel class definitions to use
class_version = "1-0"

# Campaign folder. Should be a folder within '/niva/tidy/annotation'
camp_fold = r"/home/notebook/shared-seabee-ns9879k/niva-tidy/annotation/olberg_2023"

# Field/column identifying unique subareas in 'subarea_boundaries' shapefile
subarea_id_field = "id"

# Output CRS for geopackage. Should match the CRS of the drone images
crs = "epsg:25832"

# Folder for saving intermediates
temp_fold = r"/home/notebook/annotation_temp"

2. Create class definition file¶

Create a class definition file compatible with ArcGIS Pro from an Excel table. The Excel file must be structured as illustrated below:

A	B	C	D	E	F	G
lev1_name	lev2_name	lev3_name	lev3_desc	lev1_hex	lev2_hex	lev3_hex
ALGAE	RED	VERLA	Vertebrata lanosa (grisetangdokke/trøffeltang)	#996633	#CF4D50	#47223E
ALGAE	TURF	TURF	Unspecified turf (lurv)	#996633	#779A7F	#779A7F
ANGIO	ANGIO	ZOSMA	Zostera marina (ålegras)	#A7FD67	#A7FD67	#A7FD67
MAERL	MAERL	MAERL	Maerl (løstliggende rugl)	#CAA2A9	#CAA2A9	#CAA2A9
URCHIN	URCHIN	ECHES	Echinus esculentus (rød kråkebolle)	#868509	#868509	#ED7D31
URCHIN	URCHIN	GRAAC	Gracilechinus acutus (langpiggsjøpiggsvin)	#868509	#868509	#C98C58
STARFISH	STARFISH	OPHNI	Ophiocomina nigra (svart slangestjerne)	#80258F	#80258F	#80258F
STARFISH	STARFISH	ASTRU	Asterias rubens (vanlig korstroll)	#80258F	#80258F	#BE4BD1

The output is an .ecs file (JSON) that can be loaded into the Training Samples Manager in ArcGIS Pro's Image Analyst extension. Both this file and the original Excel file should be pushed to the annotation repository on GitHub.

In [ ]:

# Read template data from Excel
class_version = class_version.replace(".", "-")
major_ver_no = int(class_version.split("-")[0])
xl_path = f"../class_definitions/seabee_habitat_classes_v{class_version}.xlsx"
if major_ver_no < 1:
    # Use old template structure
    df = pd.read_excel(xl_path, usecols="A:D")
else:
    # Use updated template
    df = pd.read_excel(xl_path, usecols="A:G")
df.head()

In [ ]:

# Convert to .ecs
name = f"seabee_class_definitions_v{class_version}"
desc = f"v{class_version} of the SeaBee class definition file."
out_fold = r"../class_definitions"

if major_ver_no < 1:
    sb.anno.class_definition_from_df_deprecated(
        df,
        name,
        out_fold=out_fold,
        version=1,
        org="NIVA",
        desc=desc,
    )
else:
    sb.anno.class_definition_from_df(
        df,
        name,
        out_fold=out_fold,
        version=1,
        org="NIVA",
        desc=desc,
    )

3. Process annotation¶

3.1. Check data structure¶

In [ ]:

# Check data structure within 'camp_fold'
def check_single_shapefile(folder_path):
    search_path = os.path.join(folder_path, "*.shp")
    flist = list(glob.glob(search_path))
    assert (
        len(flist) == 1
    ), f"Folder '{os.path.basename(folder_path)}' should contain a single shapefile."
    return flist[0]


subfolds = ["annotation_by_subarea", "region_of_interest", "subarea_boundaries"]
for subfold in subfolds:
    assert os.path.isdir(
        os.path.join(camp_fold, subfold)
    ), f"Subfolder '{subfold}' does not exist in 'camp_fold'."

# Check 'subarea_boundaries' and 'region_of_interest' contain a single shapefile
subarea_shp_path = check_single_shapefile(os.path.join(camp_fold, "subarea_boundaries"))
roi_shp_path = check_single_shapefile(os.path.join(camp_fold, "region_of_interest"))

3.2. Merge shapefiles¶

During annotation, all users should work with the same class definition file (i.e. the .ecs created above). Annotations from all users for the same area can then be exported as shapefiles and added to a single folder, named annotation_by_subarea. As long as the same class definition file has been used by everyone, the shapefiles will have a consistent structure with the same classes. These can therefore be merged and "dissolved" to create a single annotation dataset for the whole area.

In [ ]:

# Merge and dissolve annotation shapefiles for each subarea
# Make sure 'shp_fold' ONLY contains annotation shapefiles!
shp_fold = os.path.join(camp_fold, "annotation_by_subarea")
gdf = sb.anno.merge_shapefiles(shp_fold)
gdf.head()

3.4. Assign subareas¶

The merged shapefile above is intersected with the subarea polygons to create a set of training samples split by subarea.

In [ ]:

# Assign subarea IDs
subarea_gdf = gpd.read_file(subarea_shp_path)
subarea_gdf.rename({subarea_id_field: "subarea_id"}, axis="columns", inplace=True)
subarea_gdf = subarea_gdf[["subarea_id", "geometry"]]
assert subarea_gdf.crs == gdf.crs
gdf = gdf.overlay(subarea_gdf, how="intersection")
gdf.head()

3.5. Rebuild the class hierarchy¶

ArcGIS Pro stores all annotation in a single field (Classname), regardless of the level in the .ecs hierarchy. For machine learning, it is better to have one column of class labels per level, as this makes it easier to generate raster training datasets using labels for any level. To get around this limitation, class_definition_from_df embeds the hierarchy in the Classcode, so that it can be reconstructed afterwards. This is done by the function below.

In [ ]:

# Extract annotation levels to separate columns
gdf = sb.anno.rebuild_class_hierarchy(gdf, class_version)

# Save
merged_anno_dir = os.path.join(temp_fold, "merged_annotation")
os.makedirs(merged_anno_dir, exist_ok=True)
anno_shp_path = os.path.join(merged_anno_dir, f"merged_annotation_v{class_version}.shp")
gdf.to_file(anno_shp_path, index=False)
gdf.head()

In [ ]:

# Plot
ax = gdf.plot(
    column="lev2_name", cmap="Set3", categorical=True, legend=True, figsize=(10, 10)
)
cx.add_basemap(
    ax,
    crs=gdf.crs.to_string(),
    attribution=False,
    source="https://opencache.statkart.no/gatekeeper/gk/gk.open_gmaps?layers=topo4&zoom={z}&x={x}&y={y}",
)
leg = ax.get_legend()
leg.set_bbox_to_anchor((1, 0.0, 0.3, 1))
ax.axis("off");

3.6. Create geopackage¶

In [ ]:

# Save annotation, ROI and subareas to a geopackage
camp_name = os.path.basename(camp_fold)
shp_path_dict = {
    f"{camp_name}_merged_annotation_v{class_version}": anno_shp_path,
    f"{camp_name}_region_of_interest": roi_shp_path,
    f"{camp_name}_subareas": subarea_shp_path,
}
gpkg_name = f"{camp_name}_annotation.gpkg"
gpkg_path = os.path.join(temp_fold, gpkg_name)
for layer, shp_path in shp_path_dict.items():
    gdf = gpd.read_file(shp_path).to_crs(crs)
    gdf.to_file(gpkg_path, layer=layer, driver="GPKG", index=False)

In [ ]:

# Copy to MinIO and tidy
sb.storage.copy_folder(
    merged_anno_dir,
    camp_fold,
    minio_client,
    overwrite=False,
    clean_dst=False,
    containing_folder=True,
)
sb.storage.copy_file(
    gpkg_path,
    os.path.join(camp_fold, gpkg_name),
    minio_client,
    overwrite=False,
)
shutil.rmtree(merged_anno_dir)
os.remove(gpkg_path)

4. Publish annotation¶

In [ ]:

# Define layer styling using SeaBee standard SLDs.
# 'style_level' is the level in the hierarchy used for colouring polygons
style_level = 2
style_dict = {
    f"{camp_name}_merged_annotation_v{class_version}": f"annotation_classes_v{class_version}_level{style_level}.sld",
    f"{camp_name}_region_of_interest": "black_outline.sld",
    f"{camp_name}_subareas": "red_outline.sld",
}

# Identify geopackage
camp_name = os.path.basename(camp_fold)
gpkg_name = f"{camp_name}_annotation.gpkg"
gpkg_path = os.path.join(camp_fold, gpkg_name)
layers = list(style_dict.keys())

# Upload layers to GeoServer
store_name = sb.geo.upload_geopackage_layers_to_geoserver(
    gpkg_path,
    layers,
    SETTINGS.GEOSERVER_USER,
    SETTINGS.GEOSERVER_PASSWORD,
    workspace="geonode",
    style_dict=style_dict,
)

# Publish to GeoNode
for layer in layers:
    sb.geo.publish_to_geonode(
        layer,
        SETTINGS.GEONODE_USER,
        SETTINGS.GEONODE_PASSWORD,
        store_name=store_name,
        workspace="geonode",
    )