#!/usr/bin/env python # coding: utf-8 # [![image](https://raw.githubusercontent.com/visual-layer/visuallayer/main/imgs/vl_horizontal_logo.png)](https://www.visual-layer.com) # # Tutorial for working directly with feature vectors # # [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/visual-layer/fastdup/blob/main/examples/feature_vectors.ipynb) # [![Open in Kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://kaggle.com/kernels/welcome?src=https://github.com/visual-layer/fastdup/blob/main/examples/feature_vectors.ipynb) # ## Use case 1: compute feature vectors with fastdup and load them with numpy for further processing # In[4]: get_ipython().run_line_magic('pip', 'install -U fastdup') # In[1]: import fastdup import numpy as np #chnage to your image folder input_dir = '/Users/dannybickson/visual_database/cxx/unittests/two_images/' # Run fastup on an input image folder to create embeddings fd = fastdup.create(input_dir=input_dir, work_dir='out') fd.run(overwrite=True, print_summary=False) # Read the embeddings to use them in python # There are two images in the input_dir, so the embedding matrix is 2x 576. # Each row in the embedding matrix is an image. flist, embedding_matrix = fastdup.load_binary_feature(filename='./out/atrain_features.dat') print('Read embedding matrix of shape', embedding_matrix.shape) print('Image filenames are') print(flist) # # Use case 2: Save your own binary features to work with fastdup # ## Version 0.2 # In[2]: import fastdup import numpy as np import os input_dir = '/Users/dannybickson/visual_database/cxx/unittests/two_images/' flist = os.listdir(input_dir) flist = [os.path.join(input_dir, f) for f in flist] # replace the below code with computation of your own features matrix = np.random.rand(2, 576).astype('float32') # save the embedding along the filenames into a working folder get_ipython().system('mkdir -p embedding_input') fastdup.save_binary_feature('embedding_input', flist, matrix) fastdup.run('~/visual_database/cxx/unittests/two_images/', run_mode=2, work_dir='embedding_input') # ## Version 1.0 # In[7]: # Note: files should contain absolute path and not relative path import fastdup import numpy as np import os input_dir = '/Users/dannybickson/visual_database/cxx/unittests/two_images/' flist = os.listdir(input_dir) flist = [os.path.join(input_dir, f) for f in flist] # replace the below code with computation of your own features matrix = np.random.rand(2, 576).astype('float32') fd2 = fastdup.create(input_dir=input_dir, work_dir='output2') fd2.run(annotations=flist, embeddings=matrix, print_summary=False, overwrite=True) # ## Wrap Up # # Next, feel free to check out other tutorials - # # + โšก [**Quickstart**](https://nbviewer.org/github/visual-layer/fastdup/blob/main/examples/quick-dataset-analysis.ipynb): Learn how to install fastdup, load a dataset and analyze it for potential issues such as duplicates/near-duplicates, broken images, outliers, dark/bright/blurry images, and view visually similar image clusters. If you're new, start here! # + ๐Ÿงน [**Clean Image Folder**](https://nbviewer.org/github/visual-layer/fastdup/blob/main/examples/cleaning-image-dataset.ipynb): Learn how to analyze and clean a folder of images from potential issues and export a list of problematic files for further action. If you have an unorganized folder of images, this is a good place to start. # + ๐Ÿ–ผ [**Analyze Image Classification Dataset**](https://nbviewer.org/github/visual-layer/fastdup/blob/main/examples/analyzing-image-classification-dataset.ipynb): Learn how to load a labeled image classification dataset and analyze for potential issues. If you have labeled ImageNet-style folder structure, have a go! # + ๐ŸŽ [**Analyze Object Detection Dataset**](https://nbviewer.org/github/visual-layer/fastdup/blob/main/examples/analyzing-object-detection-dataset.ipynb): Learn how to load bounding box annotations for object detection and analyze for potential issues. If you have a COCO-style labeled object detection dataset, give this example a try. # # ## VL Profiler # If you prefer a no-code platform to inspect and visualize your dataset, [**try our free cloud product VL Profiler**](https://app.visual-layer.com) - VL Profiler is our first no-code commercial product that lets you visualize and inspect your dataset in your browser. # # [Sign up](https://app.visual-layer.com) now, it's free. # # [![image](https://raw.githubusercontent.com/visual-layer/fastdup/main/gallery/vl_profiler_promo.svg)](https://app.visual-layer.com) # # As usual, feedback is welcome! # # Questions? Drop by our [Slack channel](https://visualdatabase.slack.com/join/shared_invite/zt-19jaydbjn-lNDEDkgvSI1QwbTXSY6dlA#/shared-invite/email) or open an issue on [GitHub](https://github.com/visual-layer/fastdup/issues). #