#!/usr/bin/env python # coding: utf-8 # # Classification of table rows with custom measurements # This notebook demonstrates using the apoc `TableRowClasifer` # In[1]: import apoc import numpy as np from numpy.random import default_rng from skimage import draw from skimage.measure import regionprops_table import pyclesperanto_prototype as cle import pandas as pd # In[2]: cle.select_device() # ## Example data # We start by setting up an image with circles and squares # In[3]: def get_square_indices(center_row, center_column, half_width=5): """Get the indices to fill in a square""" start = np.array([center_row, center_column]) - half_width extent = (2 * half_width, 2 * half_width) return draw.rectangle(start, extent=extent) def get_circle_indices(center_row, center_column, half_width=5): """Get the indices to fill in a circle""" center = (center_row, center_column) radius = half_width return draw.disk(center, radius) # In[4]: # create a label image with randomly placed squares and circles rng = default_rng(42) label_image = np.zeros((200, 200), dtype=np.uint16) label_index = 1 ground_truth = [] for center_row in np.arange(10, 200, 20): for center_column in np.arange(10, 200, 20): shape_type = rng.choice(["square", "circle"]) if shape_type == "square": shape_function = get_square_indices ground_truth.append(1) else: shape_function = get_circle_indices ground_truth.append(2) shape_rows, shape_columns = shape_function(center_row, center_column, half_width=5) label_image[shape_rows, shape_columns] = label_index label_index += 1 ground_truth = np.asarray(ground_truth) # In[5]: # view the label image cle.imshow(label_image, labels=True) # ## Feature extraction # We now set up a table of measurements. Here we use [scikit-image's regionprops_table](https://scikit-image.org/docs/dev/api/skimage.measure.html#skimage.measure.regionprops_table). # In[6]: # measure the using skimage's regionprops_table function measurement_table = regionprops_table(label_image, properties=("label", "area", "perimeter", "extent", "eccentricity")) labels = measurement_table.pop("label") # In[7]: # extract the first 50 measurements to use as training measurements_training = {key: values[0:50] for key, values in measurement_table.items()} ground_truth_training = ground_truth[0:50] # ## Classifier training # We next train a `TableRowClassifier` and save it to a `.cl` file. # In[8]: # create the classifier and train it cl_filename = "shape_classifier.model.cl" num_trees = 1000 max_depth = 1 apoc.erase_classifier(cl_filename) classifier = apoc.TableRowClassifier(cl_filename, num_ensembles=num_trees, max_depth=max_depth) classifier.train(measurements_training, ground_truth_training) # ## Prediction # Next, we use the classifier to generate a prediction and visualize it as parametric / classification label image. # In[9]: # predict on the full table prediction = classifier.predict(measurement_table) # prefix class==0 for the background (first entry, label=0) annotation = [0] + prediction.tolist() # create an image where the objects are colored by the predicted shape shape_image = cle.replace_intensities(label_image, annotation) cle.imshow(shape_image, labels=True, min_display_intensity=0) # ## Classifier statistics # We can also visualize the share of the decision trees take a given parameter into account when making their decision. This allows us to differentiate parameters that are useful for making the classification from those which are not. # # Note: Multiple of these parameters may be correleated. If you enter 11 parameters, which all allow to make the classification similarly, but 10 of those are correlated, these 10 may appear with a share of about 0.05 while the 11th parameter has a share of 0.5. Thus, study these values with care. # In[10]: def colorize(styler): styler.background_gradient(axis=None, cmap="rainbow") return styler shares, counts = classifier.statistics() df = pd.DataFrame(shares).T df.style.pipe(colorize) # In[11]: classifier.train(pd.DataFrame(measurements_training)[["area", "perimeter"]], ground_truth_training) shares, counts = classifier.statistics() df = pd.DataFrame(shares).T df.style.pipe(colorize) # In[12]: classifier.feature_importances() # In[ ]: