from fastai.gen_doc.nbdoc import *
from fastai.vision import * 
from fastai import *
np.random.seed(42)

path = untar_data(URLs.MNIST_TINY)
tfms = get_transforms(do_flip=False)
path.ls()

(path/'train').ls()

data = ImageDataBunch.from_folder(path, ds_tfms=tfms, size=24)

data = (ImageItemList.from_folder(path) #Where to find the data? -> in path and its subfolders
        .split_by_folder()              #How to split in train/valid? -> use the folders
        .label_from_folder()            #How to label? -> depending on the folder of the filenames
        .add_test_folder()              #Optionally add a test set (here default name is test)
        .transform(tfms, size=64)       #Data augmentation? -> use tfms with a size of 64
        .databunch())                   #Finally? -> use the defaults for conversion to ImageDataBunch

data.show_batch(3, figsize=(6,6), hide_axis=False)

data.train_ds[0], data.test_ds.classes

planet = untar_data(URLs.PLANET_TINY)
planet_tfms = get_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.)

data = ImageDataBunch.from_csv(planet, folder='train', size=128, suffix='.jpg', sep = ' ', ds_tfms=planet_tfms)

data = (ImageItemList.from_csv(planet, 'labels.csv', folder='train', suffix='.jpg')
        #Where to find the data? -> in planet 'train' folder
        .random_split_by_pct()
        #How to split in train/valid? -> randomly with the default 20% in valid
        .label_from_df(sep=' ')
        #How to label? -> use the csv file
        .transform(planet_tfms, size=128)
        #Data augmentation? -> use tfms with a size of 128
        .databunch())                          
        #Finally -> use the defaults for conversion to databunch

data.show_batch(rows=2, figsize=(9,7))

camvid = untar_data(URLs.CAMVID_TINY)
path_lbl = camvid/'labels'
path_img = camvid/'images'

codes = np.loadtxt(camvid/'codes.txt', dtype=str); codes

get_y_fn = lambda x: path_lbl/f'{x.stem}_P{x.suffix}'

data = (SegmentationItemList.from_folder(path_img)
        .random_split_by_pct()
        .label_from_func(get_y_fn, classes=codes)
        .transform(get_transforms(), tfm_y=True, size=128)
        .databunch())

data.show_batch(rows=2, figsize=(7,5))

coco = untar_data(URLs.COCO_TINY)
images, lbl_bbox = get_annotations(coco/'train.json')
img2bbox = dict(zip(images, lbl_bbox))
get_y_func = lambda o:img2bbox[o.name]

data = (ObjectItemList.from_folder(coco)
        #Where are the images? -> in coco
        .random_split_by_pct()                          
        #How to split in train/valid? -> randomly with the default 20% in valid
        .label_from_func(get_y_func)
        #How to find the labels? -> use get_y_func
        .transform(get_transforms(), tfm_y=True)
        #Data augmentation? -> Standard transforms with tfm_y=True
        .databunch(bs=16, collate_fn=bb_pad_collate))   
        #Finally we convert to a DataBunch and we use bb_pad_collate

data.show_batch(rows=2, ds_type=DatasetType.Valid, figsize=(6,6))

from fastai.text import *

imdb = untar_data(URLs.IMDB_SAMPLE)

data_lm = (TextList.from_csv(imdb, 'texts.csv', cols='text')
           #Where are the inputs? Column 'text' of this csv
                   .random_split_by_pct()
           #How to split it? Randomly with the default 20%
                   .label_for_lm()
           #Label it for a language model
                   .databunch())

data_lm.show_batch()

data_clas = (TextList.from_csv(imdb, 'texts.csv', cols='text')
                   .split_from_df(col='is_valid')
                   .label_from_df(cols='label')
                   .databunch())

data_clas.show_batch()

from fastai.tabular import *

adult = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(adult/'adult.csv')
dep_var = '>=50k'
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']
cont_names = ['education-num', 'hours-per-week', 'age', 'capital-loss', 'fnlwgt', 'capital-gain']
procs = [FillMissing, Categorify, Normalize]

data = (TabularList.from_df(df, path=adult, cat_names=cat_names, cont_names=cont_names, procs=procs)
                           .split_by_idx(valid_idx=range(800,1000))
                           .label_from_df(cols=dep_var)
                           .databunch())

data.show_batch()

show_doc(ItemList, title_level=3, doc_string=False)

show_doc(ItemList.from_folder)

show_doc(ItemList.from_df)

show_doc(ItemList.from_csv)

show_doc(ItemList.filter_by_func)

show_doc(ItemList.filter_by_folder)

show_doc(ItemList.predict)

show_doc(ItemList.random_split_by_pct)

show_doc(ItemList.split_by_files)

show_doc(ItemList.split_by_fname_file)

show_doc(ItemList.split_by_folder)

jekyll_note("This method looks at the folder immediately after `self.path` for `valid` and `train`.")

show_doc(ItemList.split_by_idx)

show_doc(ItemList.split_by_idxs)

show_doc(ItemList.split_by_list)

show_doc(ItemList.split_by_valid_func)

show_doc(ItemList.split_from_df)

jekyll_warn("This method assumes the data has been created from a csv file or a dataframe.")

show_doc(ItemList.label_from_list)

show_doc(ItemList.label_from_df)

jekyll_warn("This method assumes the data has been created from a csv file or a dataframe.")

show_doc(ItemList.label_const)

show_doc(ItemList.label_from_folder)

jekyll_note("This method looks at the last subfolder in the path to determine the classes.")

show_doc(ItemList.label_from_func)

show_doc(ItemList.label_from_re)

show_doc(CategoryList, title_level=3)

show_doc(MultiCategoryList, title_level=3)

show_doc(FloatList, title_level=3)

show_doc(PreProcessor, title_level=3)

show_doc(PreProcessor.process_one)

show_doc(PreProcessor.process)

show_doc(CategoryProcessor, title_level=3)

show_doc(MultiCategoryProcessor, title_level=3)

show_doc(LabelLists.transform)

show_doc(LabelLists.add_test)

jekyll_note("Here `items` can be an `ItemList` or a collection.")

show_doc(LabelLists.add_test_folder)

show_doc(LabelLists.databunch)

show_doc(LabelList, title_level=3, doc_string=False)

show_doc(LabelList.from_lists)

show_doc(ItemLists, doc_string=False, title_level=3)

show_doc(ItemLists.label_from_lists)

show_doc(LabelLists, title_level=3, doc_string=False)

show_doc(get_files)

show_doc(ItemList.get)

show_doc(CategoryList.new)

show_doc(ItemList.label_cls)

show_doc(LabelLists.get_processors)

show_doc(LabelList.from_lists)

show_doc(LabelList.set_item)

show_doc(LabelList.new)

show_doc(CategoryList.get)

show_doc(LabelList.predict)

show_doc(ItemList.new)

show_doc(LabelList.clear_item)

show_doc(ItemList.process_one)

show_doc(ItemList.process)

show_doc(LabelLists.process)

show_doc(CategoryList.predict)

show_doc(ItemLists.transform)

show_doc(LabelList.process)

show_doc(LabelList.transform)

show_doc(MultiCategoryProcessor.process_one)

show_doc(FloatList.get)

show_doc(CategoryProcessor.process_one)

show_doc(CategoryProcessor.create_classes)

show_doc(CategoryProcessor.process)

show_doc(MultiCategoryList.get)

show_doc(FloatList.new)

show_doc(MultiCategoryProcessor.generate_classes)

show_doc(CategoryProcessor.generate_classes)