#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('reload_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') get_ipython().run_line_magic('matplotlib', 'inline') import os os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"; os.environ["CUDA_VISIBLE_DEVICES"]="0"; import urllib.request import pandas as pd import numpy as np pd.set_option('display.max_columns', None) # In[2]: import ktrain from ktrain import tabular # # Predicting House Prices # # In this notebook, we will predict the prices of houses from various house attributes. The dataset [can be downloaded from Kaggle here](https://www.kaggle.com/c/house-prices-advanced-regression-techniques). # ## STEP 1: Load and Preprocess Data # In[3]: train_df = pd.read_csv('data/housing_price/train.csv', index_col=0) # In[4]: train_df.head() # In[5]: train_df.drop(['Alley','PoolQC','MiscFeature','Fence','FireplaceQu','Utilities'], 1, inplace=True) # In[6]: train_df.head() # In[7]: trn, val, preproc = tabular.tabular_from_df(train_df, is_regression=True, label_columns='SalePrice', random_state=42) # ## STEP 2: Create Model and Wrap in `Learner` # In[8]: model = tabular.tabular_regression_model('mlp', trn) learner = ktrain.get_learner(model, train_data=trn, val_data=val, batch_size=128) # ## STEP 3: Estimate LR # # In[9]: learner.lr_find(show_plot=True, max_epochs=16) # ## STEP 4: Train # In[10]: learner.autofit(1e-1) # ## Evaluate Model # In[11]: learner.evaluate(test_data=val) # In[ ]: