Tabular example¶

In [ ]:

from fastai.tabular import *  # Quick accesss to tabular functionality

Tabular data should be in a Pandas DataFrame.

In [ ]:

path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')

In [ ]:

df['salary'].unique()

Out[ ]:

array(['>=50k', '<50k'], dtype=object)

In [ ]:

# function import
from fastai.utils.mem import *

In [ ]:

# other function teset
gpu_with_max_free_mem()

Out[ ]:

(0, 7812)

In [ ]:

# test reduce_mem_usage(df)

In [ ]:

df.head()

Out[ ]:

	age	workclass	fnlwgt	education	education-num	marital-status	occupation	relationship	race	sex	capital-gain	capital-loss	hours-per-week	native-country	salary
0	49	Private	101320	Assoc-acdm	12.0	Married-civ-spouse	NaN	Wife	White	Female	0	1902	40	United-States	>=50k
1	44	Private	236746	Masters	14.0	Divorced	Exec-managerial	Not-in-family	White	Male	10520	0	45	United-States	>=50k
2	38	Private	96185	HS-grad	NaN	Divorced	NaN	Unmarried	Black	Female	0	0	32	United-States	<50k
3	38	Self-emp-inc	112847	Prof-school	15.0	Married-civ-spouse	Prof-specialty	Husband	Asian-Pac-Islander	Male	0	0	40	United-States	>=50k
4	42	Self-emp-not-inc	82297	7th-8th	NaN	Married-civ-spouse	Other-service	Wife	Black	Female	0	0	50	United-States	<50k

In [ ]:

dep_var = 'salary'
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [FillMissing, Categorify, Normalize]

In [ ]:

test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)

In [ ]:

data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)
                           .split_by_idx(list(range(800,1000)))
                           .label_from_df(cols=dep_var)
                           .add_test(test)
                           .databunch())

In [ ]:

data.show_batch(rows=10)

workclass	education	marital-status	occupation	relationship	race	education-num_na	age	fnlwgt	education-num	target
Private	Bachelors	Never-married	Exec-managerial	Not-in-family	White	False	-1.1425	-0.9280	1.1422	<50k
Private	HS-grad	Never-married	Other-service	Not-in-family	White	False	-0.5561	0.7244	-0.4224	<50k
Private	Some-college	Never-married	Other-service	Own-child	White	False	-1.5090	-0.1673	-0.0312	<50k
Private	HS-grad	Divorced	Adm-clerical	Other-relative	Amer-Indian-Eskimo	False	1.2763	-0.8370	-0.4224	<50k
Local-gov	Bachelors	Divorced	Transport-moving	Not-in-family	White	False	0.2502	-1.3617	1.1422	<50k
Private	Some-college	Married-civ-spouse	Prof-specialty	Husband	White	False	-0.8493	-0.3286	-0.0312	>=50k
Private	11th	Never-married	Prof-specialty	Own-child	White	False	-1.5090	0.8521	-1.2046	<50k
Private	7th-8th	Married-civ-spouse	Tech-support	Husband	White	False	-0.2629	0.0550	-2.3781	<50k
Private	HS-grad	Never-married	Transport-moving	Not-in-family	White	False	-0.8493	-0.3286	-0.4224	<50k
Private	Bachelors	Never-married	Adm-clerical	Not-in-family	White	False	-0.7760	1.3159	1.1422	<50k

In [ ]:

learn = tabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit(1, 1e-2)

Total time: 00:02

epoch	train_loss	valid_loss	accuracy	time
0	0.357122	0.381649	0.790000	00:02

Inference¶

In [ ]:

row = df.iloc[0]

In [ ]:

learn.predict(row)

Out[ ]:

(Category >=50k, tensor(1), tensor([0.3581, 0.6419]))

In [ ]: