Lesson 4 - Tabular models

In [1]:
from fastai.tabular import *

Tabular data should be in a Pandas DataFrame.

In [3]:
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path / 'adult.csv')
In [4]:
dep_var = '>=50k'
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [FillMissing, Categorify, Normalize]
In [5]:
test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)
In [6]:
data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)
                            .split_by_idx(list(range(800, 1000)))
                            .label_from_df(cols=dep_var)
                            .add_test(test, label=0)
                            .databunch())
In [7]:
data.show_batch(rows=10)
workclass education marital-status occupation relationship race education-num_na age fnlwgt education-num target
Private 7th-8th Married-civ-spouse Machine-op-inspct Husband White False -0.2629 -0.9428 -2.3781 1
Self-emp-inc HS-grad Married-civ-spouse Transport-moving Husband White False 2.0093 -1.0762 -0.4224 1
Self-emp-not-inc Some-college Never-married Craft-repair Not-in-family White False -0.3362 -0.3120 -0.0312 0
Local-gov HS-grad Never-married Craft-repair Own-child White False 0.5434 -0.8287 -0.4224 0
Private Masters Never-married Tech-support Other-relative White False -0.9226 -1.5147 1.5334 0
Private 10th Widowed Transport-moving Not-in-family Black False 1.2030 -0.7890 -1.5958 0
State-gov Bachelors Never-married Prof-specialty Not-in-family White False -1.1425 2.9637 1.1422 0
Private Assoc-acdm Divorced Craft-repair Not-in-family White False 0.8365 0.1033 0.7511 0
Private Some-college Separated Sales Unmarried Black False -0.6294 0.2097 -0.0312 0
Private HS-grad Married-civ-spouse Machine-op-inspct Husband White False -0.7760 0.0061 -0.4224 0
In [8]:
learn = tabular_learner(data, layers=[200, 100], metrics=accuracy)
In [9]:
learn.fit(1, 1e-2)
Total time: 00:03

epoch train_loss valid_loss accuracy
1 0.361543 0.376106 0.815000

Inference

In [10]:
row = df.iloc[0]
In [12]:
learn.predict(row)
Out[12]:
(Category 1, tensor(1), tensor([0.2809, 0.7191]))