from fastai.gen_doc.nbdoc import * from fastai.tabular.models import * from fastai.tabular import * path = untar_data(URLs.ADULT_SAMPLE) path df = pd.read_csv(path/'adult.csv') df.head() procs = [FillMissing, Categorify, Normalize] valid_idx = range(len(df)-2000, len(df)) dep_var = 'salary' cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'] data = TabularDataBunch.from_df(path, df, dep_var, valid_idx=valid_idx, procs=procs, cat_names=cat_names) print(data.train_ds.cont_names) # `cont_names` defaults to: set(df)-set(cat_names)-{dep_var} (cat_x,cont_x),y = next(iter(data.train_dl)) for o in (cat_x, cont_x, y): print(to_np(o[:5])) learn = tabular_learner(data, layers=[200,100], emb_szs={'native-country': 10}, metrics=accuracy) learn.fit_one_cycle(1, 1e-2) learn.predict(df.iloc[0])