import glob
import os
import shutil
import contextily as cx
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
from config import SETTINGS
import seabeepy as sb
plt.style.use("ggplot")
# Login to MinIO
minio_client = sb.storage.minio_login(
user=SETTINGS.MINIO_ACCESS_ID, password=SETTINGS.MINIO_SECRET_KEY
)
# Version of the Excel class definitions to use
class_version = "1-0"
# Campaign folder. Should be a folder within '/niva/tidy/annotation'
camp_fold = r"/home/notebook/shared-seabee-ns9879k/niva-tidy/annotation/olberg_2023"
# Field/column identifying unique subareas in 'subarea_boundaries' shapefile
subarea_id_field = "id"
# Output CRS for geopackage. Should match the CRS of the drone images
crs = "epsg:25832"
# Folder for saving intermediates
temp_fold = r"/home/notebook/annotation_temp"
Create a class definition file compatible with ArcGIS Pro from an Excel table. The Excel file must be structured as illustrated below:
A | B | C | D | E | F | G |
---|---|---|---|---|---|---|
lev1_name | lev2_name | lev3_name | lev3_desc | lev1_hex | lev2_hex | lev3_hex |
ALGAE | RED | VERLA | Vertebrata lanosa (grisetangdokke/trøffeltang) | #996633 | #CF4D50 | #47223E |
ALGAE | TURF | TURF | Unspecified turf (lurv) | #996633 | #779A7F | #779A7F |
ANGIO | ANGIO | ZOSMA | Zostera marina (ålegras) | #A7FD67 | #A7FD67 | #A7FD67 |
MAERL | MAERL | MAERL | Maerl (løstliggende rugl) | #CAA2A9 | #CAA2A9 | #CAA2A9 |
URCHIN | URCHIN | ECHES | Echinus esculentus (rød kråkebolle) | #868509 | #868509 | #ED7D31 |
URCHIN | URCHIN | GRAAC | Gracilechinus acutus (langpiggsjøpiggsvin) | #868509 | #868509 | #C98C58 |
STARFISH | STARFISH | OPHNI | Ophiocomina nigra (svart slangestjerne) | #80258F | #80258F | #80258F |
STARFISH | STARFISH | ASTRU | Asterias rubens (vanlig korstroll) | #80258F | #80258F | #BE4BD1 |
The output is an .ecs
file (JSON) that can be loaded into the Training Samples Manager in ArcGIS Pro's Image Analyst extension. Both this file and the original Excel file should be pushed to the annotation
repository on GitHub.
# Read template data from Excel
class_version = class_version.replace(".", "-")
major_ver_no = int(class_version.split("-")[0])
xl_path = f"../class_definitions/seabee_habitat_classes_v{class_version}.xlsx"
if major_ver_no < 1:
# Use old template structure
df = pd.read_excel(xl_path, usecols="A:D")
else:
# Use updated template
df = pd.read_excel(xl_path, usecols="A:G")
df.head()
# Convert to .ecs
name = f"seabee_class_definitions_v{class_version}"
desc = f"v{class_version} of the SeaBee class definition file."
out_fold = r"../class_definitions"
if major_ver_no < 1:
sb.anno.class_definition_from_df_deprecated(
df,
name,
out_fold=out_fold,
version=1,
org="NIVA",
desc=desc,
)
else:
sb.anno.class_definition_from_df(
df,
name,
out_fold=out_fold,
version=1,
org="NIVA",
desc=desc,
)
# Check data structure within 'camp_fold'
def check_single_shapefile(folder_path):
search_path = os.path.join(folder_path, "*.shp")
flist = list(glob.glob(search_path))
assert (
len(flist) == 1
), f"Folder '{os.path.basename(folder_path)}' should contain a single shapefile."
return flist[0]
subfolds = ["annotation_by_subarea", "region_of_interest", "subarea_boundaries"]
for subfold in subfolds:
assert os.path.isdir(
os.path.join(camp_fold, subfold)
), f"Subfolder '{subfold}' does not exist in 'camp_fold'."
# Check 'subarea_boundaries' and 'region_of_interest' contain a single shapefile
subarea_shp_path = check_single_shapefile(os.path.join(camp_fold, "subarea_boundaries"))
roi_shp_path = check_single_shapefile(os.path.join(camp_fold, "region_of_interest"))
During annotation, all users should work with the same class definition file (i.e. the .ecs
created above). Annotations from all users for the same area can then be exported as shapefiles and added to a single folder, named annotation_by_subarea
. As long as the same class definition file has been used by everyone, the shapefiles will have a consistent structure with the same classes. These can therefore be merged and "dissolved" to create a single annotation dataset for the whole area.
# Merge and dissolve annotation shapefiles for each subarea
# Make sure 'shp_fold' ONLY contains annotation shapefiles!
shp_fold = os.path.join(camp_fold, "annotation_by_subarea")
gdf = sb.anno.merge_shapefiles(shp_fold)
gdf.head()
The merged shapefile above is intersected with the subarea polygons to create a set of training samples split by subarea.
# Assign subarea IDs
subarea_gdf = gpd.read_file(subarea_shp_path)
subarea_gdf.rename({subarea_id_field: "subarea_id"}, axis="columns", inplace=True)
subarea_gdf = subarea_gdf[["subarea_id", "geometry"]]
assert subarea_gdf.crs == gdf.crs
gdf = gdf.overlay(subarea_gdf, how="intersection")
gdf.head()
ArcGIS Pro stores all annotation in a single field (Classname
), regardless of the level in the .ecs
hierarchy. For machine learning, it is better to have one column of class labels per level, as this makes it easier to generate raster training datasets using labels for any level. To get around this limitation, class_definition_from_df
embeds the hierarchy in the Classcode
, so that it can be reconstructed afterwards. This is done by the function below.
# Extract annotation levels to separate columns
gdf = sb.anno.rebuild_class_hierarchy(gdf, class_version)
# Save
merged_anno_dir = os.path.join(temp_fold, "merged_annotation")
os.makedirs(merged_anno_dir, exist_ok=True)
anno_shp_path = os.path.join(merged_anno_dir, f"merged_annotation_v{class_version}.shp")
gdf.to_file(anno_shp_path, index=False)
gdf.head()
# Plot
ax = gdf.plot(
column="lev2_name", cmap="Set3", categorical=True, legend=True, figsize=(10, 10)
)
cx.add_basemap(
ax,
crs=gdf.crs.to_string(),
attribution=False,
source="https://opencache.statkart.no/gatekeeper/gk/gk.open_gmaps?layers=topo4&zoom={z}&x={x}&y={y}",
)
leg = ax.get_legend()
leg.set_bbox_to_anchor((1, 0.0, 0.3, 1))
ax.axis("off");
# Save annotation, ROI and subareas to a geopackage
camp_name = os.path.basename(camp_fold)
shp_path_dict = {
f"{camp_name}_merged_annotation_v{class_version}": anno_shp_path,
f"{camp_name}_region_of_interest": roi_shp_path,
f"{camp_name}_subareas": subarea_shp_path,
}
gpkg_name = f"{camp_name}_annotation.gpkg"
gpkg_path = os.path.join(temp_fold, gpkg_name)
for layer, shp_path in shp_path_dict.items():
gdf = gpd.read_file(shp_path).to_crs(crs)
gdf.to_file(gpkg_path, layer=layer, driver="GPKG", index=False)
# Copy to MinIO and tidy
sb.storage.copy_folder(
merged_anno_dir,
camp_fold,
minio_client,
overwrite=False,
clean_dst=False,
containing_folder=True,
)
sb.storage.copy_file(
gpkg_path,
os.path.join(camp_fold, gpkg_name),
minio_client,
overwrite=False,
)
shutil.rmtree(merged_anno_dir)
os.remove(gpkg_path)
# Define layer styling using SeaBee standard SLDs.
# 'style_level' is the level in the hierarchy used for colouring polygons
style_level = 2
style_dict = {
f"{camp_name}_merged_annotation_v{class_version}": f"annotation_classes_v{class_version}_level{style_level}.sld",
f"{camp_name}_region_of_interest": "black_outline.sld",
f"{camp_name}_subareas": "red_outline.sld",
}
# Identify geopackage
camp_name = os.path.basename(camp_fold)
gpkg_name = f"{camp_name}_annotation.gpkg"
gpkg_path = os.path.join(camp_fold, gpkg_name)
layers = list(style_dict.keys())
# Upload layers to GeoServer
store_name = sb.geo.upload_geopackage_layers_to_geoserver(
gpkg_path,
layers,
SETTINGS.GEOSERVER_USER,
SETTINGS.GEOSERVER_PASSWORD,
workspace="geonode",
style_dict=style_dict,
)
# Publish to GeoNode
for layer in layers:
sb.geo.publish_to_geonode(
layer,
SETTINGS.GEONODE_USER,
SETTINGS.GEONODE_PASSWORD,
store_name=store_name,
workspace="geonode",
)