#!/usr/bin/env python
# coding: utf-8

# # Cluster plots
# Using `stackview.clusterplot` we can visualize contents of pandas DataFrames and corresponding segmented objects in an sime side-by-side. In such a plot you can select objects and visualize the selection. This might be useful for exploring feature extraction parameter spaces.

# In[1]:


import pandas as pd
import numpy as np
import stackview
import pandas as pd
from skimage.measure import regionprops_table
from skimage.io import imread
from skimage.filters import threshold_otsu
from skimage.measure import label
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler 
from umap import UMAP

stackview.__version__


# To demonstrate this, we need an image, a segmentation and a table of extracted features.

# In[2]:


image = imread('data/blobs.tif')

# segment image
thresh = threshold_otsu(image)
binary_image = image > thresh
labeled_image = label(binary_image)


# In[4]:


properties = regionprops_table(labeled_image, intensity_image=image, properties=[
    'mean_intensity', 'std_intensity',
    'centroid', 'area', 'feret_diameter_max', 
    'minor_axis_length', 'major_axis_length'])

df = pd.DataFrame(properties)

# Select numeric columns
numeric_cols = df.select_dtypes(include=[np.number]).columns

# Scale the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[numeric_cols])

# Create UMAP embedding
umap = UMAP(n_components=2, random_state=42) 
umap_coords = umap.fit_transform(scaled_data)

# Add UMAP coordinates to dataframe 
df['UMAP1'] = umap_coords[:, 0]
df['UMAP2'] = umap_coords[:, 1]

df.head()


# In[5]:


num_objects = df.shape[0]
pre_selection = np.zeros(num_objects)
pre_selection[:int(num_objects/2)] = 1

df["selection"] = pre_selection


# ## Interaction
# Using some more involved code we can also draw the image and the scatter plot side-by-side and make them interact. You can select data points in the plot on the right and the visualization on the left will be updated accordingly.

# In[9]:


stackview.clusterplot(image=image,
                     labels=labeled_image,
                     df=df,
                     column_x="centroid-0",
                     column_y="centroid-1",
                     zoom_factor=1.5,
                     markersize=15)


# Every time the user selects different data points, the selection in our dataframe is update

# In[10]:


df["selection"]


# In[ ]: