#!/usr/bin/env python # coding: utf-8 # # Parse map coordinates from metadata # # The harvest of digitised maps metadata includes a `coordinates` column that provides a string representation of either a point or a bounding box. This notebook attempts to parse the coordinate string and convert the values to decimals. It then uses the decimal values to explore the geographical context of Trove's digitised map collection. # # The coordinate strings are either: # # * Points in the format `(Longitude/Latitude)`, for example: '(E 145°33ʹ/S 37°42ʹ)'. # * Bounding boxes in the format `(W--E/N--S)`, for example: '(E 114°00ʹ00ʺ--E 130°00ʹ00ʺ/S 14°00ʹ00ʺ--S 34°00ʹ00ʺ)'. # # I'm using [lat_lon_parser](https://github.com/NOAA-ORR-ERD/lat_lon_parser) to convert degrees/minutes/seconds to decimal values. # In[12]: import logging from operator import itemgetter import altair as alt import folium import ipywidgets as widgets import pandas as pd from folium.plugins import FastMarkerCluster from ipyleaflet import ImageOverlay, Map, WidgetControl from lat_lon_parser import parse from vega_datasets import data as vega_data # In[13]: # Save the parsing errors in a log file logging.basicConfig( filename="parse_errors.log", level=logging.DEBUG, format="%(message)s" ) def check_coord(value, lat_lon): """ Make sure that lat/longs are within expected range. Drop values if outside range. """ if lat_lon == "lat" and abs(value) <= 90: return value elif lat_lon == "lon" and abs(value) <= 180: return value else: raise ValueError return None def get_center(parsed): """ Get the centre of a bounding box. Returns point coords. See: https://gis.stackexchange.com/a/394860 """ e, w, n, s = itemgetter("east", "west", "north", "south")(parsed) width = max(w, e) - min(w, e) # get the box height height = max(s, n) - min(s, n) # compute the center center = check_coord(round(min(s, n) + height / 2, 4), "lat"), check_coord( round(min(w, e) + width / 2, 4), "lon" ) return center def parse_value(value): """ Parse latitude or longitude values. """ values = value.split("--") # Sometimes single hyphens are used if len(values) == 1: values = value.split("-") coords = [parse(v) for v in values] return sorted(coords) def parse_coords(coords): """ Parses a coordinate string, converting values to decimal. For points -- returns latitude and longitude. For boxes -- returns centre of box as latitude, longitude, and bounds as east, west, north, and south. """ parsed = {} # Default values for c in ["east", "west", "north", "south", "latitude", "longitude"]: parsed[c] = None try: # Split string into lat and long using / long, lat = coords.split("/") if long.startswith("N"): long, lat = lat, long longs = parse_value(long) lats = parse_value(lat) except (ValueError, TypeError): logging.error(coords) else: try: # Bounding box if len(longs) == 2 and len(lats) == 2: parsed["east"] = check_coord(longs[-1], "lon") parsed["west"] = check_coord(longs[0], "lon") parsed["north"] = check_coord(lats[-1], "lat") parsed["south"] = check_coord(lats[0], "lat") # Get centre of bounding box latitude, longitude = get_center(parsed) parsed["latitude"] = latitude parsed["longitude"] = longitude # Point elif len(longs) == 1 and len(lats) == 1: parsed["latitude"] = check_coord(lats[0], "lat") parsed["longitude"] = check_coord(longs[0], "lon") except ValueError: logging.error(coords) return parsed def get_coords(row): """ Process a row of the dataset, converting coordinate string into decimal values. """ coords = ( str(row["coordinates"]).strip(".").strip("(").strip(")").strip("[").strip("]") ) return parse_coords(coords) # Load the harvested data. # In[14]: df = pd.read_csv( "https://raw.githubusercontent.com/GLAM-Workbench/trove-maps-data/main/single_maps_20230131.csv" ) # How many digitised maps have coordinate values? # In[15]: df.loc[df["coordinates"].notnull()].shape[0] # Parse the coordinate strings. # In[16]: # Extract a subset of the harvested data df_coords = df.loc[df["coordinates"].notnull()][["title", "url", "coordinates"]].copy() # Parse the coordinate values and save the results to new columns df_coords[["east", "west", "north", "south", "latitude", "longitude"]] = df_coords.loc[ df_coords["coordinates"].notnull() ].apply(get_coords, axis=1, result_type="expand") # Let's have a peek at the parsed data. # In[17]: df_coords.head() # How many of the coordinate strings could be successfuly parsed as decimal values? # In[18]: df_coords.loc[ (df_coords["latitude"].notnull()) & (df_coords["longitude"].notnull()) ].shape[0] # Save the parsed coordinates. # In[8]: df_coords.to_csv("single_maps_20230131_coordinates.csv", index=False) # ## Display the geographical distribution of the digitised maps # # To visualise the locations of the maps we can plot the centre points using Altair. Mouseover the points to view the map titles. # In[19]: # This loads the country boundaries data countries = alt.topo_feature(vega_data.world_110m.url, feature="countries") # First we'll create the world map using the boundaries background = ( alt.Chart(countries) .mark_geoshape(fill="lightgray", stroke="white") .project("equirectangular") .properties(width=900, height=500) ) # Then we'll plot the positions of places using circles points = ( # By loading the data from url we stop the notebook from getting bloated alt.Chart( "https://raw.githubusercontent.com/GLAM-Workbench/trove-maps-data/main/single_maps_20230131_coordinates.csv" ) .mark_circle( # Style the circles size=10, color="steelblue", opacity=0.4, ) .encode( # Provide the coordinates longitude="longitude:Q", latitude="latitude:Q", # More info on hover tooltip=["title:N", "url:N"], ) .properties(width=900, height=500) ) # Finally we layer the plotted points on top of the backgroup map alt.layer(background, points) # The obvious 'crosshairs' at the 0,0 point suggest that some of the coordinates might be missing data. If you explore the points you'll probably find some that seem to be in the wrong position, perhaps because the latitudes or longitudes have been flipped. More checking would be needed if you were using this dataset for detailed analysis. # We can also use Folium to plot the map locations, creating a marker for each point and grouping them into clusters. Folium's FastMarkerCluster plugin lets us add thousands of markers without slowing things down. # In[ ]: m = folium.Map(location=(0, 0), zoom_start=2) callback = ( "function (row) {" 'var marker = L.marker(new L.LatLng(row[0], row[1]), {color: "blue", tooltip: row[2]});' "var popup = L.popup({maxWidth: '300'});" "var icon = L.AwesomeMarkers.icon({" "icon: 'globe'," "iconColor: 'white'," "markerColor: 'blue'," "prefix: 'glyphicon'," "extraClasses: 'fa-rotate-0'" "});" "marker.setIcon(icon);" "const title = row[2];" "const url = row[3];" "var mytext = $(`

${title}

${url}
`)[0];" "popup.setContent(mytext);" "marker.bindPopup(popup);" "return marker};" ) m.add_child( FastMarkerCluster( df_coords.loc[ (df_coords["latitude"].notnull()) & (df_coords["longitude"].notnull()) ][["latitude", "longitude", "title", "url"]], callback=callback, ) ) m # I've also saved the Folium map as [an HTML file](https://glam-workbench.net/trove-maps/trove-map-clusters.html) for easy browsing. # ## Display map image on a modern basemap # # To view a single map in context, we can use IPyLeaflet to layer the map image on top of a modern basemap, using the bounding box data to position the image. We'll also add a slider to change the opacity of the map image, so we can compare it to the basemap underneath. # # # # The results will, of course, depend on both the accuracy of the coordinates and the nature of the map. For more accurate results we'd want to use something like [MapWarper](https://mapwarper.net/) to georectify the map image. Note too that if a map image has a lot of white space or text around the map itself, the proportions of the image might not match the bounds of the map. This will result in the image being distorted. # In[ ]: # Select a random record with a bounding box random = df_coords.loc[df_coords["east"].notnull()].sample(n=1).iloc[0] default_layout = widgets.Layout(width="900px", height="540px") # Create the basemap and centre it on the map location m = Map( center=(random["latitude"], random["longitude"]), zoom=8, scroll_wheel_zoom=True, layout=default_layout, ) # Add the map image as an overlay image = ImageOverlay( # Image url url=f"{random['url']}/image", # Postion the image using the bounding box data bounds=((random["south"], random["west"]), (random["north"], random["east"])), ) # Add a slider to control the opacity of the image slider = widgets.FloatSlider( min=0, max=1, value=1, # Opacity is valid in [0,1] range orientation="vertical", # Vertical slider is what we want readout=False, # No need to show exact value layout=widgets.Layout(width="2em"), ) # Connect slider value to opacity property of the Image Layer widgets.jslink((slider, "value"), (image, "opacity")) m.add_control(WidgetControl(widget=slider)) m.add_layer(image) print(random["title"]) print(random["url"]) # Set the map zoom so you can see all of the image. m.fit_bounds([[random["south"], random["west"]], [random["north"], random["east"]]]) m # ---- # # Created by [Tim Sherratt](https://timsherratt.org/) for the [GLAM Workbench](https://glam-workbench.net/).