#!/usr/bin/env python # coding: utf-8 # ![brainome logo](./images/brainome_logo.png) # # 101 Quick Start # Running brainome in five easy steps: # 1. Install brainome via pip # 2. Download data sets # 3. Create your first predictor # 4. Validate the model # 5. Making predictions on new data # ## 1. Install brainome via pip # Pip will automatically include dependencies. # In[ ]: get_ipython().system('python3 -m pip install brainome') print("\n\nChecking brainome version number:") get_ipython().system('brainome --version') # ### Troubleshooting installation # Sometimes pip requires `--user` parameter in order to install successfully: # # > `python3 -m pip install brainome --user` # ## 2. Download this tutorial's data sets. # The titanic data set is a commonly used for introduction to data science. It is a passenger manifest of the Titanic including whether they survived the disaster or not. For more information, refer to [kaggle.com/c/titanic](https://www.kaggle.com/c/titanic) # In[ ]: import urllib.request as request response1 = request.urlretrieve('https://download.brainome.ai/data/public/titanic_train.csv', 'titanic_train.csv') response2 = request.urlretrieve('https://download.brainome.ai/data/public/titanic_validate.csv', 'titanic_validate.csv') response3 = request.urlretrieve('https://download.brainome.ai/data/public/titanic_predict.csv', 'titanic_predict.csv') get_ipython().run_line_magic('ls', '-lh titanic_train.csv titanic_validate.csv titanic_predict.csv') # ### Preview training data # The goal of the training is to predict which passenger survived the diaster. # # The passenger roster contains 11 features (PassengerId, Cabin_Class, Name, etc) for 800 passengers that can be used to create a model. Hence, the target column is 'Survived'. # # You can download the training data at [titanic_train.csv](https://download.brainome.ai/data/public/titanic_train.csv) # In[1]: # preview uses pandas to read and display csv data get_ipython().run_line_magic('pip', 'install pandas --quiet') import pandas as pd pd.read_csv('titanic_train.csv') # ## 3. Create your first predictor # In its simplest invocation, brainome will automatically measure your data, identify the best model, build it, train it, and validate it. # # It will automatically split your data into training and validation. # # The output is python source code in `predictor_101.py`. # In[ ]: get_ipython().system('brainome titanic_train.csv --yes -o predictor_101.py') # Open `predictor_101.py` to browse the predictor's source code. Notice it is on the order of 38k bytes. # In[ ]: get_ipython().run_line_magic('ls', '-lh predictor_101.py') get_ipython().run_line_magic('pycat', 'predictor_101.py') # ## 4. Validate the model # Running your predictor on an unseen data set demonstrates its effectiveness. # # You can download the validation data at [titanic_validate.csv](https://download.brainome.ai/data/public/titanic_validate.csv) # In[ ]: get_ipython().system('python3 predictor_101.py -validate titanic_validate.csv') # ## 5. Making predictions on new data # Run your predictor on an unlabeled data set to generate predictions for other passengers. # # You can download the prediction data at [titanic_predict.csv](https://download.brainome.ai/data/public/titanic_predict.csv) # In[ ]: get_ipython().system('python3 predictor_101.py titanic_predict.csv > predictions_101.csv') pd.read_csv('predictions_101.csv') # ## Next steps # - Check out [102 Using CLI](./brainome_102_Using_CLI.ipynb) # - Check out [Using Measurement to Create Better Models](./brainome_200_Using_Measurement.ipynb) # In[ ]: