from fastai.gen_doc.nbdoc import * from fastai.basics import * np.random.seed(42) from fastai.vision import * path = untar_data(URLs.MNIST_TINY) tfms = get_transforms(do_flip=False) path.ls() (path/'train').ls() data = ImageDataBunch.from_folder(path, ds_tfms=tfms, size=64) data = (ImageList.from_folder(path) #Where to find the data? -> in path and its subfolders .split_by_folder() #How to split in train/valid? -> use the folders .label_from_folder() #How to label? -> depending on the folder of the filenames .add_test_folder() #Optionally add a test set (here default name is test) .transform(tfms, size=64) #Data augmentation? -> use tfms with a size of 64 .databunch()) #Finally? -> use the defaults for conversion to ImageDataBunch data.show_batch(3, figsize=(6,6), hide_axis=False) planet = untar_data(URLs.PLANET_TINY) planet_tfms = get_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.) data = ImageDataBunch.from_csv(planet, folder='train', size=128, suffix='.jpg', label_delim = ' ', ds_tfms=planet_tfms) data = (ImageList.from_csv(planet, 'labels.csv', folder='train', suffix='.jpg') #Where to find the data? -> in planet 'train' folder .split_by_rand_pct() #How to split in train/valid? -> randomly with the default 20% in valid .label_from_df(label_delim=' ') #How to label? -> use the csv file .transform(planet_tfms, size=128) #Data augmentation? -> use tfms with a size of 128 .databunch()) #Finally -> use the defaults for conversion to databunch data.show_batch(rows=2, figsize=(9,7)) camvid = untar_data(URLs.CAMVID_TINY) path_lbl = camvid/'labels' path_img = camvid/'images' codes = np.loadtxt(camvid/'codes.txt', dtype=str); codes get_y_fn = lambda x: path_lbl/f'{x.stem}_P{x.suffix}' data = (SegmentationItemList.from_folder(path_img) .split_by_rand_pct() .label_from_func(get_y_fn, classes=codes) .transform(get_transforms(), tfm_y=True, size=128) .databunch()) data.show_batch(rows=2, figsize=(7,5)) coco = untar_data(URLs.COCO_TINY) images, lbl_bbox = get_annotations(coco/'train.json') img2bbox = dict(zip(images, lbl_bbox)) get_y_func = lambda o:img2bbox[o.name] data = (ObjectItemList.from_folder(coco) #Where are the images? -> in coco .split_by_rand_pct() #How to split in train/valid? -> randomly with the default 20% in valid .label_from_func(get_y_func) #How to find the labels? -> use get_y_func .transform(get_transforms(), tfm_y=True) #Data augmentation? -> Standard transforms with tfm_y=True .databunch(bs=16, collate_fn=bb_pad_collate)) #Finally we convert to a DataBunch and we use bb_pad_collate data.show_batch(rows=2, ds_type=DatasetType.Valid, figsize=(6,6)) from fastai.text import * imdb = untar_data(URLs.IMDB_SAMPLE) data_lm = (TextList.from_csv(imdb, 'texts.csv', cols='text') #Where are the inputs? Column 'text' of this csv .split_by_rand_pct() #How to split it? Randomly with the default 20% .label_for_lm() #Label it for a language model .databunch()) data_lm.show_batch() data_clas = (TextList.from_csv(imdb, 'texts.csv', cols='text') .split_from_df(col='is_valid') .label_from_df(cols='label') .databunch()) data_clas.show_batch() from fastai.tabular import * adult = untar_data(URLs.ADULT_SAMPLE) df = pd.read_csv(adult/'adult.csv') dep_var = 'salary' cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'] cont_names = ['education-num', 'hours-per-week', 'age', 'capital-loss', 'fnlwgt', 'capital-gain'] procs = [FillMissing, Categorify, Normalize] data = (TabularList.from_df(df, path=adult, cat_names=cat_names, cont_names=cont_names, procs=procs) .split_by_idx(valid_idx=range(800,1000)) .label_from_df(cols=dep_var) .databunch()) data.show_batch() show_doc(ItemList, title_level=3) show_doc(ItemList.from_folder) show_doc(ItemList.from_df) show_doc(ItemList.from_csv) show_doc(ItemList.filter_by_func) show_doc(ItemList.filter_by_folder) show_doc(ItemList.filter_by_rand) show_doc(ItemList.to_text) show_doc(ItemList.use_partial_data) show_doc(ItemList.analyze_pred) show_doc(ItemList.get) show_doc(ItemList.new) show_doc(ItemList.reconstruct) show_doc(ItemList.split_none) show_doc(ItemList.split_by_rand_pct) show_doc(ItemList.split_subsets) show_doc(ItemList.split_by_files) show_doc(ItemList.split_by_fname_file) show_doc(ItemList.split_by_folder) jekyll_note("This method looks at the folder immediately after `self.path` for `valid` and `train`.") show_doc(ItemList.split_by_idx) show_doc(ItemList.split_by_idxs) show_doc(ItemList.split_by_list) show_doc(ItemList.split_by_valid_func) show_doc(ItemList.split_from_df) jekyll_warn("This method assumes the data has been created from a csv file or a dataframe.") show_doc(ItemList.get_label_cls) path = untar_data(URLs.MNIST_TINY) ll = ImageList.from_folder(path).split_by_folder().label_from_folder().train ll.to_df().head() show_doc(ItemList.label_empty) show_doc(ItemList.label_from_df) jekyll_warn("This method only works with data objects created with either `from_csv` or `from_df` methods.") show_doc(ItemList.label_const) show_doc(ItemList.label_from_folder) jekyll_note("This method looks at the last subfolder in the path to determine the classes.") show_doc(ItemList.label_from_func) show_doc(ItemList.label_from_re) show_doc(CategoryList, title_level=3) show_doc(MultiCategoryList, title_level=3) show_doc(FloatList, title_level=3) show_doc(EmptyLabelList, title_level=3) show_doc(PreProcessor, title_level=3) show_doc(PreProcessor.process_one) show_doc(PreProcessor.process) show_doc(CategoryProcessor, title_level=3) show_doc(CategoryProcessor.generate_classes) show_doc(MultiCategoryProcessor, title_level=3) show_doc(MultiCategoryProcessor.generate_classes) show_doc(LabelLists.transform) show_doc(LabelLists.add_test) jekyll_note("Here `items` can be an `ItemList` or a collection.") show_doc(LabelLists.add_test_folder) jekyll_warn("In fastai the test set is unlabeled! No labels will be collected even if they are available.") show_doc(LabelLists.databunch) show_doc(LabelList, title_level=3) show_doc(LabelList.export) show_doc(LabelList.transform_y) show_doc(LabelList.get_state) show_doc(LabelList.load_empty) show_doc(LabelList.load_state) show_doc(LabelList.process) show_doc(LabelList.set_item) show_doc(LabelList.to_df) show_doc(LabelList.to_csv) show_doc(LabelList.transform) show_doc(ItemLists, title_level=3) show_doc(ItemLists.label_from_lists) show_doc(ItemLists.transform) show_doc(ItemLists.transform_y) show_doc(LabelLists, title_level=3) show_doc(LabelLists.get_processors) show_doc(LabelLists.load_empty) show_doc(LabelLists.load_state) show_doc(LabelLists.process) show_doc(get_files) show_doc(CategoryList.new) show_doc(LabelList.new) show_doc(CategoryList.get) show_doc(LabelList.predict) show_doc(ItemList.new) show_doc(ItemList.process_one) show_doc(ItemList.process) show_doc(MultiCategoryProcessor.process_one) show_doc(FloatList.get) show_doc(CategoryProcessor.process_one) show_doc(CategoryProcessor.create_classes) show_doc(CategoryProcessor.process) show_doc(MultiCategoryList.get) show_doc(FloatList.new) show_doc(FloatList.reconstruct) show_doc(MultiCategoryList.analyze_pred) show_doc(MultiCategoryList.reconstruct) show_doc(CategoryList.reconstruct) show_doc(CategoryList.analyze_pred) show_doc(EmptyLabelList.reconstruct) show_doc(EmptyLabelList.get) show_doc(LabelList.databunch) show_doc(ItemList.add)