#!/usr/bin/env python # coding: utf-8 # ## Using whylogs to Profile Images # --- # This notebook provides an example how you can use whylogs to profile unstructure data like images. # In[1]: from PIL import Image import numpy as np import os from matplotlib.pyplot import imshow import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') import seaborn as sns # In[2]: with open("flower2.jpg","rb") as img_f: img= Image.open(img_f) imshow(np.asarray(img)) w,h = img.size total_num_pixels= w*h print("withd :\t{}\nheight:\t{}\nnumber of pixels:{}".format(w,h,total_num_pixels)) # We can create logger and create a profile sketch of the image data # In[3]: from whylogs import get_or_create_session _session=None session = get_or_create_session() logger=session.logger("image_dataset2") logger.log_image("flower2.jpg") profile=logger.profile # You can obtain the histogram sketch of image data features. e.g Saturation below # In[4]: imageProfiles = profile.flat_summary()["hist"] print(imageProfiles["Saturation"]) # Along with all the metadata collected from the image # In[5]: print(profile.flat_summary()["summary"]["column"].values) # ## Custom Functions # --- # # One can also create custom functions to profile image specific features. E.g. Two example below demostrate get the average of image pixels per column, while the second function simple allow you to create a distribution sketch of the blue values. Also ComposeTransforms functions allow you mix and match functions to create new features to monitor. # In[6]: class AvgValue: def __call__(self, x): return np.mean(np.array(x)).reshape(-1,1) def __repr__(self,): return self.__class__.__name__ # In[7]: class MyBlue: def __call__(self, x): _,_,b= x.split() return np.array(b).reshape(-1,1) def __repr__(self,): return self.__class__.__name__ # In[ ]: from whylogs.features.transforms import ComposeTransforms, Brightness,Saturation _session=None session=None session = get_or_create_session() logger2=session.logger("image_dataset_custom_functions") logger2.log_image("flower2.jpg",feature_transforms = [ AvgValue(), MyBlue(), ComposeTransforms([MyBlue(),AvgValue()])]) profile2=logger2.profile print(profile2.flat_summary()["summary"]["column"].values) # ## Check histograms # We can obtain the idenvidual histograms for the features # In[9]: minnpf = np.frompyfunc(lambda x, y: min(x,y), 2, 1) maxnpf = np.frompyfunc(lambda x, y: max(x,y), 2, 1) def get_custom_histogram_info(profiles, variable, n_bins): summaries = [profile.flat_summary()["summary"] for profile in profiles] min_range= minnpf.accumulate([ summary[summary["column"]==variable]["min"].values[0] for summary in summaries], dtype=np.object).astype(np.int) max_range= maxnpf.accumulate([ summary[summary["column"]==variable]["max"].values[0] for summary in summaries], dtype=np.object).astype(np.int) bins = np.linspace(int(min_range), int(max_range), int((max_range-min_range)/n_bins)) counts= [ profile.columns[variable].number_tracker.histogram.get_pmf(bins[:-1]) for profile in profiles] return bins, counts def plot_distribution_shift(profiles, variable, n_bins): """Visualization for distribution shift""" bins, counts = get_custom_histogram_info(profiles, variable, n_bins) fig, ax = plt.subplots(figsize=(10, 3)) for idx, profile in enumerate(profiles): sns.histplot(x=bins, weights=counts[idx], bins=n_bins, label=profile.name, alpha=0.7, ax=ax) ax.legend() plt.show() # In[10]: plot_distribution_shift([profile2],"MyBlue",10) # In[11]: plot_distribution_shift([profile],"Saturation",10) # In[ ]: