from fastai.vision import * from fastai.widgets import DatasetFormatter, ImageCleaner, ImageDownloader, download_google_images from fastai.gen_doc.nbdoc import * %reload_ext autoreload %autoreload 2 path = untar_data(URLs.MNIST_SAMPLE) data = ImageDataBunch.from_folder(path) learn = cnn_learner(data, models.resnet18, metrics=error_rate) learn.fit_one_cycle(2) learn.save('stage-1') db = (ImageList.from_folder(path) .split_none() .label_from_folder() .databunch()) learn = cnn_learner(db, models.resnet18, metrics=[accuracy]) learn.load('stage-1'); show_doc(DatasetFormatter) show_doc(DatasetFormatter.from_similars) from fastai.gen_doc.nbdoc import * from fastai.widgets.image_cleaner import * show_doc(DatasetFormatter.from_toplosses) show_doc(ImageCleaner) ds, idxs = DatasetFormatter().from_toplosses(learn) ImageCleaner(ds, idxs, path) df = pd.read_csv(path/'cleaned.csv', header='infer') # We create a databunch from our csv. We include the data in the training set and we don't use a validation set (DatasetFormatter uses only the training set) np.random.seed(42) db = (ImageList.from_df(df, path) .split_none() .label_from_df() .databunch(bs=64)) learn = cnn_learner(db, models.resnet18, metrics=error_rate) learn = learn.load('stage-1') ds, idxs = DatasetFormatter().from_similars(learn, layer_ls=[0,7,1], pool=None) ImageCleaner(ds, idxs, path, duplicates=True) show_doc(ImageDownloader) path = Config.data_path()/'image_downloader' os.makedirs(path, exist_ok=True) ImageDownloader(path) path = Config.data_path()/'image_downloader' files = download_google_images(path, 'aussie shepherd', size='>1024*768', n_images=30) len(files) show_doc(download_google_images) # Setup path and labels to search for path = Config.data_path()/'image_downloader' labels = ['boston terrier', 'french bulldog'] # Download images for label in labels: download_google_images(path, label, size='>400*300', n_images=50) # Build a databunch and train! src = (ImageList.from_folder(path) .split_by_rand_pct() .label_from_folder() .transform(get_transforms(), size=224)) db = src.databunch(bs=16, num_workers=0) learn = cnn_learner(db, models.resnet34, metrics=[accuracy]) learn.fit_one_cycle(3) show_doc(ImageCleaner.make_dropdown_widget) show_doc(ImageCleaner.next_batch) show_doc(DatasetFormatter.sort_idxs) show_doc(ImageCleaner.make_vertical_box) show_doc(ImageCleaner.relabel) show_doc(DatasetFormatter.largest_indices) show_doc(ImageCleaner.delete_image) show_doc(ImageCleaner.empty) show_doc(ImageCleaner.empty_batch) show_doc(DatasetFormatter.comb_similarity) show_doc(ImageCleaner.get_widgets) show_doc(ImageCleaner.write_csv) show_doc(ImageCleaner.create_image_list) show_doc(ImageCleaner.render) show_doc(DatasetFormatter.get_similars_idxs) show_doc(ImageCleaner.on_delete) show_doc(ImageCleaner.make_button_widget) show_doc(ImageCleaner.make_img_widget) show_doc(DatasetFormatter.get_actns) show_doc(ImageCleaner.batch_contains_deleted) show_doc(ImageCleaner.make_horizontal_box) show_doc(DatasetFormatter.get_toplosses_idxs) show_doc(DatasetFormatter.padded_ds)