from fastai.gen_doc.nbdoc import * from fastai.basics import * np.random.seed(42) from fastai.vision import * path = untar_data(URLs.MNIST_TINY) tfms = get_transforms(do_flip=False) path.ls() (path/'train').ls() data = ImageDataBunch.from_folder(path, ds_tfms=tfms, size=64) data = (ImageList.from_folder(path) #Where to find the data? -> in path and its subfolders .split_by_folder() #How to split in train/valid? -> use the folders .label_from_folder() #How to label? -> depending on the folder of the filenames .add_test_folder() #Optionally add a test set (here default name is test) .transform(tfms, size=64) #Data augmentation? -> use tfms with a size of 64 .databunch()) #Finally? -> use the defaults for conversion to ImageDataBunch data.show_batch(3, figsize=(6,6), hide_axis=False) planet = untar_data(URLs.PLANET_TINY) planet_tfms = get_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.) pd.read_csv(planet/"labels.csv").head() data = ImageDataBunch.from_csv(planet, folder='train', size=128, suffix='.jpg', label_delim = ' ', ds_tfms=planet_tfms) planet.ls() pd.read_csv(planet/"labels.csv").head() data = (ImageList.from_csv(planet, 'labels.csv', folder='train', suffix='.jpg') #Where to find the data? -> in planet 'train' folder .split_by_rand_pct() #How to split in train/valid? -> randomly with the default 20% in valid .label_from_df(label_delim=' ') #How to label? -> use the second column of the csv file and split the tags by ' ' .transform(planet_tfms, size=128) #Data augmentation? -> use tfms with a size of 128 .databunch()) #Finally -> use the defaults for conversion to databunch data.show_batch(rows=2, figsize=(9,7)) camvid = untar_data(URLs.CAMVID_TINY) path_lbl = camvid/'labels' path_img = camvid/'images' codes = np.loadtxt(camvid/'codes.txt', dtype=str); codes get_y_fn = lambda x: path_lbl/f'{x.stem}_P{x.suffix}' data = (SegmentationItemList.from_folder(path_img) #Where to find the data? -> in path_img and its subfolders .split_by_rand_pct() #How to split in train/valid? -> randomly with the default 20% in valid .label_from_func(get_y_fn, classes=codes) #How to label? -> use the label function on the file name of the data .transform(get_transforms(), tfm_y=True, size=128) #Data augmentation? -> use tfms with a size of 128, also transform the label images .databunch()) #Finally -> use the defaults for conversion to databunch data.show_batch(rows=2, figsize=(7,5)) coco = untar_data(URLs.COCO_TINY) images, lbl_bbox = get_annotations(coco/'train.json') img2bbox = dict(zip(images, lbl_bbox)) get_y_func = lambda o:img2bbox[o.name] data = (ObjectItemList.from_folder(coco) #Where are the images? -> in coco and its subfolders .split_by_rand_pct() #How to split in train/valid? -> randomly with the default 20% in valid .label_from_func(get_y_func) #How to find the labels? -> use get_y_func on the file name of the data .transform(get_transforms(), tfm_y=True) #Data augmentation? -> Standard transforms; also transform the label images .databunch(bs=16, collate_fn=bb_pad_collate)) #Finally we convert to a DataBunch, use a batch size of 16, # and we use bb_pad_collate to collate the data into a mini-batch data.show_batch(rows=2, ds_type=DatasetType.Valid, figsize=(6,6)) from fastai.text import * imdb = untar_data(URLs.IMDB_SAMPLE) data_lm = (TextList .from_csv(imdb, 'texts.csv', cols='text') #Where are the text? Column 'text' of texts.csv .split_by_rand_pct() #How to split it? Randomly with the default 20% in valid .label_for_lm() #Label it for a language model .databunch()) #Finally we convert to a DataBunch data_lm.show_batch() data_clas = (TextList.from_csv(imdb, 'texts.csv', cols='text') .split_from_df(col='is_valid') .label_from_df(cols='label') .databunch()) data_clas.show_batch() from fastai.tabular import * adult = untar_data(URLs.ADULT_SAMPLE) df = pd.read_csv(adult/'adult.csv') dep_var = 'salary' cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'] cont_names = ['education-num', 'hours-per-week', 'age', 'capital-loss', 'fnlwgt', 'capital-gain'] procs = [FillMissing, Categorify, Normalize] data = (TabularList.from_df(df, path=adult, cat_names=cat_names, cont_names=cont_names, procs=procs) .split_by_idx(valid_idx=range(800,1000)) .label_from_df(cols=dep_var) .databunch()) data.show_batch() show_doc(ItemList, title_level=3) from fastai.vision import * path_data = untar_data(URLs.MNIST_TINY) il_data = ItemList.from_folder(path_data, extensions=['.csv']) il_data il_data.path il_data.items len(il_data) il_data[1] il_data[:1] il_data.add(il_data); il_data from fastai.vision import * path_data = untar_data(URLs.MNIST_TINY); path_data.ls() itemlist = ItemList.from_folder(path_data/'test') itemlist itemlist = ItemList.from_folder(path_data/'test', presort=True) itemlist itemlist[0] show_doc(ItemList.from_folder) path = untar_data(URLs.MNIST_TINY) path.ls() ImageList.from_folder(path) show_doc(ItemList.from_df) path = untar_data(URLs.MNIST_SAMPLE) path.ls() df = pd.read_csv(path/'labels.csv') df.head() ImageList.from_df(df, path) show_doc(ItemList.from_csv) path = untar_data(URLs.MNIST_SAMPLE) path.ls() ImageList.from_csv(path, 'labels.csv') show_doc(ItemList.filter_by_func) path = untar_data(URLs.MNIST_SAMPLE) df = pd.read_csv(path/'labels.csv') df.head() Path(df.name[0]).suffix ImageList.from_df(df, path).filter_by_func(lambda fname: Path(fname).suffix == '.png') show_doc(ItemList.filter_by_folder) show_doc(ItemList.filter_by_rand) path = untar_data(URLs.MNIST_SAMPLE) ImageList.from_folder(path).filter_by_rand(0.5) ImageList.from_folder(path) show_doc(ItemList.to_text) path = untar_data(URLs.MNIST_SAMPLE) pd.read_csv(path/'labels.csv').head() file_name = "item_list.txt" ImageList.from_folder(path).to_text(file_name) ! cat {path/file_name} | head show_doc(ItemList.use_partial_data) path = untar_data(URLs.MNIST_SAMPLE) ImageList.from_folder(path).use_partial_data(0.5) ImageList.from_folder(path) show_doc(ItemList.analyze_pred) show_doc(ItemList.get) path_data = untar_data(URLs.MNIST_TINY); path_data.ls() il_data_base = ItemList.from_folder(path=path_data, extensions=['.png'], include=['test']) il_data_base il_data_base[15] il_data_image = ImageList.from_folder(path=path_data, extensions=['.png'], include=['test']) il_data_image il_data_image[15] show_doc(ItemList.new) path_data = untar_data(URLs.MNIST_TINY); path_data.ls() itemlist1 = ItemList.from_folder(path=path_data/'valid', extensions=['.png']) itemlist1 itemlist1.copy_new == ['x', 'label_cls', 'path'] ((itemlist1.x == itemlist1.label_cls == itemlist1.inner_df == None) and (itemlist1.path == Path('/Users/Natsume/.fastai/data/mnist_tiny/valid'))) itemlist1.copy_new = ['x', 'label_cls', 'path', 'inner_df'] itemlist1.x = itemlist1.label_cls = itemlist1.path = itemlist1.inner_df = 'test' itemlist2 = itemlist1.new(items=itemlist1.items) (itemlist2.inner_df == itemlist2.x == itemlist2.label_cls == 'test' and itemlist2.path == Path('test')) show_doc(ItemList.reconstruct) show_doc(ItemList.split_none) show_doc(ItemList.split_by_rand_pct) show_doc(ItemList.split_subsets) show_doc(ItemList.split_by_files) show_doc(ItemList.split_by_fname_file) show_doc(ItemList.split_by_folder) jekyll_note("This method looks at the folder immediately after `self.path` for `valid` and `train`.") path_data = untar_data(URLs.MNIST_TINY); path_data.ls() il = ItemList.from_folder(path=path_data); il sd = il.split_by_folder(train='train', valid='valid'); sd train_idx = il._get_by_folder(name='train') train_idx[:5], train_idx[-5:], len(train_idx) valid_idx = il._get_by_folder(name='valid') valid_idx[:5], valid_idx[-5:],len(valid_idx) show_doc(ItemList.split_by_idx) path = untar_data(URLs.MNIST_SAMPLE) df = pd.read_csv(path/'labels.csv') df.head() data = (ImageList.from_df(df, path) .split_by_idx(list(range(1000)))) data show_doc(ItemList.split_by_idxs) sd = il.split_by_idxs(train_idx=train_idx, valid_idx=valid_idx); sd show_doc(ItemList.split_by_list) sd = il.split_by_list(train=il[train_idx], valid=il[valid_idx]); sd show_doc(ItemList.split_by_valid_func) show_doc(ItemList.split_from_df) path = untar_data(URLs.MNIST_SAMPLE) df = pd.read_csv(path/'labels.csv') # Create a new column for is_valid df['is_valid'] = [True]*(df.shape[0]//2) + [False]*(df.shape[0]//2) # Randomly shuffle dataframe df = df.reindex(np.random.permutation(df.index)) print(df.shape) df.head() data = (ImageList.from_df(df, path) .split_from_df()) data jekyll_warn("This method assumes the data has been created from a csv file or a dataframe.") show_doc(ItemList.get_label_cls) from fastai.vision import * path_data = untar_data(URLs.MNIST_TINY) sd = ImageList.from_folder(path_data).split_by_folder('train', 'valid'); sd labels = ['7', '3'] label_cls = sd.train.get_label_cls(labels); label_cls labels = [7, 3] label_cls = sd.train.get_label_cls(labels); label_cls labels = [7.0, 3.0] label_cls = sd.train.get_label_cls(labels); label_cls labels = [[7, 3],] label_cls = sd.train.get_label_cls(labels); label_cls labels = [['7', '3'],] label_cls = sd.train.get_label_cls(labels); label_cls path = untar_data(URLs.MNIST_TINY) ll = ImageList.from_folder(path).split_by_folder().label_from_folder().train ll.to_df().head() show_doc(ItemList.label_empty) show_doc(ItemList.label_from_df) jekyll_warn("This method only works with data objects created with either `from_csv` or `from_df` methods.") show_doc(ItemList.label_const) show_doc(ItemList.label_from_folder) jekyll_note("This method looks at the last subfolder in the path to determine the classes.") from fastai.vision import * path_data = untar_data(URLs.MNIST_TINY); path_data.ls() sd_train = ImageList.from_folder(path_data/'train').split_none() ll_train = sd_train.label_from_folder(); ll_train show_doc(ItemList.label_from_func) from fastai.vision import * path_data = untar_data(URLs.MNIST_TINY) sd = ImageList.from_folder(path_data).split_by_folder('train', 'valid');sd func=lambda o: (o.parts if isinstance(o, Path) else o.split(os.path.sep))[-2] ll = sd.label_from_func(func); ll show_doc(ItemList.label_from_re) show_doc(CategoryList, title_level=3) from fastai.vision import * path_data = untar_data(URLs.MNIST_TINY) ll = ImageList.from_folder(path_data).split_by_folder('train', 'valid').label_from_folder() ll.train.y.items, ll.train.y.classes, ll.train.y[0] cl = CategoryList(ll.train.y.items, ll.train.y.classes); cl from fastai.vision import * items = np.array([0, 1, 2, 1, 0]) cl = CategoryList(items, classes=['3', '7', '9']); cl items = np.array(['3', '7', '9', '7', '3']) classes = {'3':3, '7':7, '9':9} cl = CategoryList(items, classes); cl show_doc(MultiCategoryList, title_level=3) show_doc(FloatList, title_level=3) show_doc(EmptyLabelList, title_level=3) show_doc(PreProcessor, title_level=3) show_doc(PreProcessor.process_one) show_doc(PreProcessor.process) show_doc(CategoryProcessor, title_level=3) show_doc(CategoryProcessor.generate_classes) show_doc(CategoryProcessor.process) show_doc(MultiCategoryProcessor, title_level=3) show_doc(MultiCategoryProcessor.generate_classes) show_doc(LabelLists.transform) show_doc(LabelLists.add_test) jekyll_note("Here `items` can be an `ItemList` or a collection.") show_doc(LabelLists.add_test_folder) jekyll_warn("In fastai the test set is unlabeled! No labels will be collected even if they are available.") show_doc(LabelLists.databunch) show_doc(LabelList, title_level=3) from fastai.vision import * path_data = untar_data(URLs.MNIST_TINY) ll = ImageList.from_folder(path_data).split_by_folder('train', 'valid').label_from_folder() ll.train.x, ll.train.y LabelList(x=ll.train.x, y=ll.train.y) show_doc(LabelList.export) show_doc(LabelList.transform_y) show_doc(LabelList.get_state) show_doc(LabelList.load_empty) show_doc(LabelList.load_state) show_doc(LabelList.process) from fastai.vision import * path_data = untar_data(URLs.MNIST_TINY) sd = ImageList.from_folder(path_data).split_by_folder('train', 'valid') sd.train = sd.train.label_from_folder(from_item_lists=True) sd.valid = sd.valid.label_from_folder(from_item_lists=True) sd.__class__ = LabelLists xp,yp = sd.get_processors() xp,yp sd.train.process(xp, yp) show_doc(LabelList.set_item) show_doc(LabelList.to_df) show_doc(LabelList.to_csv) show_doc(LabelList.transform) show_doc(ItemLists, title_level=3) from fastai.vision import * path_data = untar_data(URLs.MNIST_TINY); path_data.ls() il_train = ImageList.from_folder(path_data/'train') il_valid = ImageList.from_folder(path_data/'valid') il_test = ImageList.from_folder(path_data/'test') ils = ItemLists(path=path_data, train=il_train, valid=il_valid); ils ils.test = il_test; ils il = ImageList.from_folder(path_data); il sd = il.split_by_folder(train='train', valid='valid'); sd ll = sd.label_from_folder(); ll il_train = ImageList.from_folder(path_data/'train') sd_train = il_train.split_none(); sd_train ll_valid_empty = sd_train.label_from_folder(); ll_valid_empty ld_inner = sd.__getattr__('label_from_folder'); ld_inner() show_doc(ItemLists.label_from_lists) show_doc(ItemLists.transform) show_doc(ItemLists.transform_y) show_doc(LabelLists, title_level=3) from fastai.vision import * path_data = untar_data(URLs.MNIST_TINY); path_data.ls() il_train = ImageList.from_folder(path_data/'train') il_valid = ImageList.from_folder(path_data/'valid') ll_test = LabelLists(path_data, il_train, il_valid); ll_test.test = il_valid = ImageList.from_folder(path_data/'test') ll_test show_doc(LabelLists.get_processors) from fastai.vision import * path_data = untar_data(URLs.MNIST_TINY) sd = ImageList.from_folder(path_data).split_by_folder('train', 'valid') sd.train = sd.train.label_from_folder(from_item_lists=True) sd.valid = sd.valid.label_from_folder(from_item_lists=True) sd.__class__ = LabelLists xp,yp = sd.get_processors() xp,yp show_doc(LabelLists.load_empty) show_doc(LabelLists.load_state) show_doc(LabelLists.process) show_doc(ItemList.process) show_doc(get_files) path_data = untar_data(URLs.MNIST_TINY) path_data.ls() list_FilePath_noRecurse = get_files(path_data) list_FilePath_noRecurse list_FilePath_recurse = get_files(path_data, recurse=True) list_FilePath_recurse[:3] list_FilePath_recurse[-2:] list_FilePath_recurse_csv = get_files(path_data, recurse=True, extensions=['.csv']) list_FilePath_recurse_csv list_FilePath_include = get_files(path_data, recurse=True, extensions=['.png','.jpg','.jpeg'], include=['test']) list_FilePath_include[:3] list_FilePath_include[-3:] show_doc(CategoryList.new) show_doc(LabelList.new) show_doc(CategoryList.get) show_doc(LabelList.predict) show_doc(ItemList.new) show_doc(ItemList.process_one) show_doc(MultiCategoryProcessor.process_one) show_doc(FloatList.get) show_doc(CategoryProcessor.process_one) show_doc(CategoryProcessor.create_classes) show_doc(MultiCategoryList.get) show_doc(FloatList.new) show_doc(FloatList.reconstruct) show_doc(MultiCategoryList.analyze_pred) show_doc(MultiCategoryList.reconstruct) show_doc(CategoryList.reconstruct) show_doc(CategoryList.analyze_pred) show_doc(EmptyLabelList.reconstruct) show_doc(EmptyLabelList.get) show_doc(LabelList.databunch) show_doc(ItemList.add)