from fastai import * # Quick accesss to most common functionality
from fastai.tabular import * # Quick accesss to tabular functionality # Access to example data provided with fastai
Tabular data should be in a Pandas DataFrame
.
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')
dep_var = '>=50k'
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [FillMissing, Categorify, Normalize]
test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)
data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)
.split_by_idx(list(range(800,1000)))
.label_from_df(cols=dep_var)
.add_test(test, label=0)
.databunch())
data.show_batch(rows=10)
workclass | education | marital-status | occupation | relationship | race | education-num_na | age | fnlwgt | education-num |
---|---|---|---|---|---|---|---|---|---|
Private | HS-grad | Married-civ-spouse | Craft-repair | Husband | Asian-Pac-Islander | False | 0.1769 | -0.1070 | -0.4224 |
Local-gov | Some-college | Never-married | Prof-specialty | Own-child | White | False | -0.0430 | 0.4453 | -0.0312 |
Private | Some-college | Married-civ-spouse | Transport-moving | Husband | White | False | 0.9098 | 1.1638 | -0.0312 |
Private | Some-college | Married-civ-spouse | Adm-clerical | Wife | White | False | -0.9959 | -1.0879 | -0.0312 |
? | HS-grad | Widowed | ? | Unmarried | White | False | 2.0093 | -0.9140 | -0.4224 |
Private | HS-grad | Divorced | Adm-clerical | Not-in-family | White | False | 0.9831 | -0.6828 | -0.4224 |
Private | HS-grad | Married-civ-spouse | Craft-repair | Husband | White | False | -0.4828 | -1.3949 | -0.4224 |
Private | Bachelors | Never-married | Prof-specialty | Not-in-family | White | False | 0.4701 | -0.2632 | 1.1422 |
Private | Assoc-acdm | Married-civ-spouse | Adm-clerical | Husband | Asian-Pac-Islander | False | 0.5434 | -0.1371 | 0.7511 |
Private | Bachelors | Married-civ-spouse | Exec-managerial | Husband | White | False | -0.0430 | 0.0111 | 1.1422 |
learn = tabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit(1, 1e-2)
Total time: 00:04 epoch train_loss valid_loss accuracy 1 0.373440 0.381403 0.825000 (00:04)
row = df.iloc[0]
learn.predict(row)
(1, tensor(0), tensor([0.5647, 0.4353]))