from fastai.tabular import * # Quick accesss to tabular functionality
Tabular data should be in a Pandas DataFrame
.
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')
df['salary'].unique()
array(['>=50k', '<50k'], dtype=object)
# function import
from fastai.utils.mem import *
# other function teset
gpu_with_max_free_mem()
(1, 8109)
# test reduce_mem_usage(df)
Memory usage of dataframe is 3.73 MB Memory usage after optimization is: 0.78 MB Decreased by 79.0%
age | workclass | fnlwgt | education | education-num | marital-status | occupation | relationship | race | sex | capital-gain | capital-loss | hours-per-week | native-country | salary | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 49 | Private | 101320 | Assoc-acdm | 12.0 | Married-civ-spouse | NaN | Wife | White | Female | 0 | 1902 | 40 | United-States | >=50k |
1 | 44 | Private | 236746 | Masters | 14.0 | Divorced | Exec-managerial | Not-in-family | White | Male | 10520 | 0 | 45 | United-States | >=50k |
2 | 38 | Private | 96185 | HS-grad | NaN | Divorced | NaN | Unmarried | Black | Female | 0 | 0 | 32 | United-States | <50k |
3 | 38 | Self-emp-inc | 112847 | Prof-school | 15.0 | Married-civ-spouse | Prof-specialty | Husband | Asian-Pac-Islander | Male | 0 | 0 | 40 | United-States | >=50k |
4 | 42 | Self-emp-not-inc | 82297 | 7th-8th | NaN | Married-civ-spouse | Other-service | Wife | Black | Female | 0 | 0 | 50 | United-States | <50k |
5 | 20 | Private | 63210 | HS-grad | 9.0 | Never-married | Handlers-cleaners | Own-child | White | Male | 0 | 0 | 15 | United-States | <50k |
6 | 49 | Private | 44434 | Some-college | 10.0 | Divorced | NaN | Other-relative | White | Male | 0 | 0 | 35 | United-States | <50k |
7 | 37 | Private | 138940 | 11th | 7.0 | Married-civ-spouse | NaN | Husband | White | Male | 0 | 0 | 40 | United-States | <50k |
8 | 46 | Private | 328216 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0 | 0 | 40 | United-States | >=50k |
9 | 36 | Self-emp-inc | 216711 | HS-grad | NaN | Married-civ-spouse | NaN | Husband | White | Male | 99999 | 0 | 50 | ? | >=50k |
10 | 23 | Private | 529223 | Bachelors | 13.0 | Never-married | NaN | Own-child | Black | Male | 0 | 0 | 10 | United-States | <50k |
11 | 18 | Private | 216284 | 11th | NaN | Never-married | Adm-clerical | Own-child | White | Female | 0 | 0 | 20 | United-States | <50k |
12 | 30 | Private | 151989 | Assoc-voc | NaN | Married-civ-spouse | NaN | Wife | White | Female | 0 | 0 | 40 | United-States | <50k |
13 | 30 | Private | 55291 | Bachelors | NaN | Married-civ-spouse | NaN | Husband | White | Male | 0 | 0 | 40 | United-States | >=50k |
14 | 43 | Private | 84661 | Assoc-voc | NaN | Married-civ-spouse | Sales | Husband | White | Male | 0 | 0 | 45 | United-States | <50k |
15 | 51 | Private | 284329 | HS-grad | 9.0 | Widowed | NaN | Unmarried | White | Male | 0 | 0 | 40 | United-States | <50k |
16 | 38 | Private | 170174 | 10th | NaN | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0 | 0 | 40 | United-States | >=50k |
17 | 35 | Private | 261293 | Masters | 14.0 | Never-married | NaN | Not-in-family | White | Male | 0 | 0 | 60 | United-States | <50k |
18 | 56 | State-gov | 274111 | Masters | 14.0 | Divorced | NaN | Not-in-family | White | Male | 0 | 1669 | 40 | United-States | <50k |
19 | 45 | Private | 267967 | Bachelors | NaN | Married-civ-spouse | Prof-specialty | Husband | White | Male | 0 | 0 | 45 | United-States | >=50k |
20 | 40 | Private | 188942 | Some-college | NaN | Married-civ-spouse | NaN | Wife | Black | Female | 0 | 0 | 40 | Puerto-Rico | <50k |
21 | 26 | Private | 746432 | HS-grad | 9.0 | Never-married | Handlers-cleaners | Own-child | Black | Male | 0 | 0 | 48 | United-States | <50k |
22 | 46 | Private | 117605 | 9th | NaN | Divorced | Sales | Not-in-family | White | Male | 0 | 0 | 35 | United-States | <50k |
23 | 29 | Private | 1268339 | HS-grad | NaN | Married-spouse-absent | NaN | Own-child | Black | Male | 0 | 0 | 40 | United-States | <50k |
24 | 49 | Private | 247294 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0 | 0 | 45 | United-States | >=50k |
25 | 55 | Self-emp-inc | 222615 | Masters | 14.0 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0 | 0 | 60 | United-States | <50k |
26 | 47 | Self-emp-not-inc | 213745 | Some-college | NaN | Divorced | NaN | Unmarried | White | Female | 0 | 0 | 45 | United-States | <50k |
27 | 41 | Self-emp-inc | 151089 | Some-college | NaN | Married-civ-spouse | NaN | Husband | White | Male | 0 | 0 | 50 | United-States | <50k |
28 | 27 | Private | 153078 | Prof-school | NaN | Never-married | Prof-specialty | Own-child | Asian-Pac-Islander | Male | 0 | 0 | 40 | United-States | <50k |
29 | 42 | Private | 70055 | 11th | 7.0 | Married-civ-spouse | NaN | Husband | White | Male | 0 | 0 | 45 | United-States | <50k |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
32531 | 25 | Private | 203871 | Assoc-voc | 11.0 | Married-civ-spouse | Prof-specialty | Wife | White | Female | 0 | 1887 | 40 | United-States | >=50k |
32532 | 52 | State-gov | 71344 | Masters | 14.0 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 0 | 0 | 40 | United-States | <50k |
32533 | 19 | Private | 445728 | HS-grad | 9.0 | Never-married | Craft-repair | Not-in-family | White | Male | 0 | 0 | 40 | United-States | <50k |
32534 | 21 | Private | 222490 | Some-college | 10.0 | Never-married | Handlers-cleaners | Own-child | White | Female | 0 | 0 | 40 | United-States | <50k |
32535 | 49 | Private | 213431 | HS-grad | 9.0 | Separated | Prof-specialty | Unmarried | Black | Female | 0 | 0 | 40 | United-States | <50k |
32536 | 52 | Private | 163998 | HS-grad | 9.0 | Married-civ-spouse | Sales | Husband | White | Male | 99999 | 0 | 45 | United-States | >=50k |
32537 | 34 | Local-gov | 90934 | Assoc-voc | 11.0 | Divorced | Protective-serv | Own-child | Asian-Pac-Islander | Male | 0 | 0 | 40 | United-States | <50k |
32538 | 26 | Local-gov | 202286 | Bachelors | 13.0 | Never-married | Tech-support | Own-child | White | Male | 0 | 0 | 40 | United-States | <50k |
32539 | 44 | Private | 219441 | 10th | 6.0 | Never-married | Sales | Unmarried | Other | Female | 0 | 0 | 35 | Dominican-Republic | <50k |
32540 | 47 | Self-emp-not-inc | 162236 | Bachelors | 13.0 | Never-married | Craft-repair | Not-in-family | White | Female | 0 | 0 | 40 | United-States | <50k |
32541 | 24 | Private | 241857 | Some-college | 10.0 | Never-married | Adm-clerical | Not-in-family | Black | Female | 0 | 0 | 35 | United-States | <50k |
32542 | 34 | Private | 98283 | Prof-school | 15.0 | Never-married | Tech-support | Not-in-family | Asian-Pac-Islander | Male | 0 | 1564 | 40 | India | >=50k |
32543 | 38 | Private | 29874 | Assoc-voc | 11.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0 | 0 | 40 | United-States | <50k |
32544 | 33 | Private | 124052 | HS-grad | 9.0 | Married-civ-spouse | Craft-repair | Husband | White | Male | 0 | 0 | 40 | United-States | <50k |
32545 | 33 | Private | 206609 | Bachelors | 13.0 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0 | 0 | 45 | United-States | <50k |
32546 | 31 | Private | 188246 | Masters | 14.0 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 0 | 0 | 50 | United-States | <50k |
32547 | 23 | Private | 267955 | Some-college | 10.0 | Never-married | Sales | Not-in-family | White | Female | 0 | 0 | 40 | United-States | <50k |
32548 | 28 | Private | 187479 | Some-college | 10.0 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0 | 0 | 55 | United-States | <50k |
32549 | 27 | Private | 171655 | HS-grad | 9.0 | Never-married | Adm-clerical | Not-in-family | White | Female | 0 | 0 | 42 | United-States | <50k |
32550 | 27 | Private | 116358 | Some-college | 10.0 | Never-married | Craft-repair | Own-child | Asian-Pac-Islander | Male | 0 | 1980 | 40 | Philippines | <50k |
32551 | 60 | Private | 230545 | 7th-8th | 4.0 | Divorced | Adm-clerical | Not-in-family | White | Female | 0 | 0 | 35 | Cuba | <50k |
32552 | 39 | Private | 139743 | HS-grad | 9.0 | Separated | Adm-clerical | Not-in-family | White | Female | 0 | 0 | 20 | United-States | <50k |
32553 | 35 | Self-emp-inc | 135436 | Prof-school | 15.0 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 0 | 0 | 50 | United-States | >=50k |
32554 | 53 | Private | 35102 | Some-college | 10.0 | Divorced | Adm-clerical | Not-in-family | White | Female | 0 | 0 | 34 | United-States | <50k |
32555 | 48 | Private | 355320 | Bachelors | 13.0 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 0 | 0 | 45 | Canada | >=50k |
32556 | 36 | Private | 297449 | Bachelors | 13.0 | Divorced | Prof-specialty | Not-in-family | White | Male | 14084 | 0 | 40 | United-States | >=50k |
32557 | 23 | ? | 123983 | Bachelors | 13.0 | Never-married | ? | Own-child | Other | Male | 0 | 0 | 40 | United-States | <50k |
32558 | 53 | Private | 157069 | Assoc-acdm | 12.0 | Married-civ-spouse | Machine-op-inspct | Husband | White | Male | 0 | 0 | 40 | United-States | >=50k |
32559 | 32 | Local-gov | 217296 | HS-grad | 9.0 | Married-civ-spouse | Transport-moving | Wife | White | Female | 4064 | 0 | 22 | United-States | <50k |
32560 | 26 | Private | 182308 | Some-college | 10.0 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 0 | 0 | 40 | United-States | <50k |
32561 rows × 15 columns
df.head()
age | workclass | fnlwgt | education | education-num | marital-status | occupation | relationship | race | sex | capital-gain | capital-loss | hours-per-week | native-country | salary | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 49 | Private | 101320 | Assoc-acdm | 12.0 | Married-civ-spouse | NaN | Wife | White | Female | 0 | 1902 | 40 | United-States | >=50k |
1 | 44 | Private | 236746 | Masters | 14.0 | Divorced | Exec-managerial | Not-in-family | White | Male | 10520 | 0 | 45 | United-States | >=50k |
2 | 38 | Private | 96185 | HS-grad | NaN | Divorced | NaN | Unmarried | Black | Female | 0 | 0 | 32 | United-States | <50k |
3 | 38 | Self-emp-inc | 112847 | Prof-school | 15.0 | Married-civ-spouse | Prof-specialty | Husband | Asian-Pac-Islander | Male | 0 | 0 | 40 | United-States | >=50k |
4 | 42 | Self-emp-not-inc | 82297 | 7th-8th | NaN | Married-civ-spouse | Other-service | Wife | Black | Female | 0 | 0 | 50 | United-States | <50k |
dep_var = 'salary'
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [FillMissing, Categorify, Normalize]
test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)
data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)
.split_by_idx(list(range(800,1000)))
.label_from_df(cols=dep_var)
.add_test(test)
.databunch())
data.show_batch(rows=10)
workclass | education | marital-status | occupation | relationship | race | education-num_na | age | fnlwgt | education-num | target |
---|---|---|---|---|---|---|---|---|---|---|
Private | Bachelors | Married-civ-spouse | Exec-managerial | Husband | White | False | 1.5695 | -0.9075 | 1.1422 | >=50k |
Private | Assoc-acdm | Never-married | Craft-repair | Not-in-family | White | False | 0.0303 | -0.7171 | 0.7511 | <50k |
Private | 7th-8th | Never-married | Farming-fishing | Unmarried | White | False | -1.2891 | 1.4882 | -2.3780 | <50k |
Private | HS-grad | Widowed | Sales | Unmarried | White | False | 3.4020 | -0.7050 | -0.4223 | <50k |
Private | Some-college | Never-married | Other-service | Unmarried | Black | False | 0.0303 | -0.2917 | -0.0312 | <50k |
Private | Bachelors | Divorced | Adm-clerical | Unmarried | White | False | 1.0564 | -0.4456 | 1.1422 | <50k |
Private | HS-grad | Widowed | Sales | Not-in-family | White | False | 2.0093 | 0.8941 | -0.4223 | <50k |
Private | Some-college | Widowed | Adm-clerical | Unmarried | White | False | -0.4095 | -0.6688 | -0.0312 | <50k |
Private | Some-college | Married-civ-spouse | Exec-managerial | Own-child | Black | False | -0.7027 | -0.4258 | -0.0312 | <50k |
Private | HS-grad | Never-married | Handlers-cleaners | Not-in-family | White | False | -0.9226 | -0.7288 | -0.4223 | <50k |
learn = tabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit(1, 1e-2)
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 0.371782 | 0.411682 | 0.830000 | 00:03 |
row = df.iloc[0]
learn.predict(row)
(Category <50k, tensor(0), tensor([0.5185, 0.4815]))