#!/usr/bin/env python # coding: utf-8 # Open In Colab # # Binary Classification Tutorial (CLF101) - Level Beginner # In[ ]: get_ipython().system(' pip uninstall pycaret') get_ipython().system('pip install git+https://github.com/amjadraza/pycaret.git@feature/gcp_zure_integration') # In[2]: from pycaret.classification import * # In[3]: from pycaret.datasets import get_data dataset = get_data('credit') # In[4]: data = dataset.sample(frac=0.95, random_state=786).reset_index(drop=True) data_unseen = dataset.drop(data.index).reset_index(drop=True) print('Data for Modeling: ' + str(data.shape)) print('Unseen Data For Predictions: ' + str(data_unseen.shape)) # In[ ]: exp_clf101 = setup(data = data, target = 'default', session_id=123) # In[6]: rf = create_model('rf') # In[7]: tuned_rf = tune_model(rf) # In[8]: predict_model(tuned_rf); # # 12.0 Finalize Model for Deployment # In[9]: final_rf = finalize_model(tuned_rf) # In[10]: #Final Random Forest model parameters for deployment print(final_rf) # In[11]: predict_model(final_rf); # # 13.0 Deploy Model on Microsoft Azure # # This is the code to deploy model on Microsft azure using `pycaret` functionalities. # In[12]: get_ipython().system(' pip install azure-storage-blob') # In[ ]: ## Enter connection string when running in google colab connect_str = '' #@param {type:"string"} print(connect_str) # In[14]: #! export AZURE_STORAGE_CONNECTION_STRING=connect_str # In[21]: os.environ['AZURE_STORAGE_CONNECTION_STRING']= connect_str # In[ ]: get_ipython().system(' echo $AZURE_STORAGE_CONNECTION_STRING') # In[ ]: os.getenv('AZURE_STORAGE_CONNECTION_STRING') # In[25]: authentication = {'container': 'pycaret-cls-101'} model_name = 'rf-clf-101' deploy_model(final_rf, model_name, authentication, platform = 'azure') # In[27]: authentication = {'container': 'pycaret-cls-101'} model_name = 'rf-clf-101' model_azure = load_model(model_name, platform = 'azure', authentication = authentication, verbose=True) # In[29]: authentication = {'container': 'pycaret-cls-101'} model_name = 'rf-clf-101' unseen_predictions = predict_model(model_name, data=data_unseen, platform='azure', authentication=authentication, verbose=True) # In[30]: unseen_predictions # In[ ]: # # 13.0 Deploy Model on Google Cloud # After the model is finalised and you are happy with the model, you can deploy the model on your cloud of choice. In this section, we deploy the model on the google cloud platform. # In[ ]: from google.colab import auth auth.authenticate_user() # In[ ]: get_ipython().system(' pip install awscli') # In[ ]: # GCP project name, Change the name based on your own GCP project. CLOUD_PROJECT = 'gcpessentials-rz' # GCP project name bucket_name = 'pycaret-clf101-test1' # bucket name for storage of your model BUCKET = 'gs://' + CLOUD_PROJECT + '-{}'.format(bucket_name) # Set the gcloud consol to $CLOUD_PROJECT Environment Variable for your Desired Project) get_ipython().system('gcloud config set project $CLOUD_PROJECT') # In[ ]: authentication = {'project': CLOUD_PROJECT, 'bucket' : bucket_name} model_name = 'rf-clf' deploy_model(final_rf, model_name, authentication, platform = 'gcp') # In[ ]: authentication = {'project': CLOUD_PROJECT, 'bucket' : bucket_name} model_name = 'rf-clf' model_gcp = load_model(model_name, platform = 'gcp', authentication = authentication, verbose=True) # In[ ]: estimator_ = load_model(model_name, platform='gcp', authentication=authentication, verbose=True) # In[ ]: authentication = {'project': CLOUD_PROJECT, 'bucket' : bucket_name} model_name = 'rf-clf' unseen_predictions = predict_model(model_name, data=data_unseen, platform='gcp', authentication=authentication, verbose=True) # In[ ]: unseen_predictions # In[ ]: authentication # In[ ]: import inspect as i import sys sys.stdout.write(i.getsource(predict_model)) # # 13.0 Predict on unseen data # The `predict_model()` function is also used to predict on the unseen dataset. The only difference from section 11 above is that this time we will pass the `data_unseen` parameter. `data_unseen` is the variable created at the beginning of the tutorial and contains 5% (1200 samples) of the original dataset which was never exposed to PyCaret. (see section 5 for explanation) # In[ ]: unseen_predictions = predict_model(final_rf, data=data_unseen) unseen_predictions.head() # The `Label` and `Score` columns are added onto the `data_unseen` set. Label is the prediction and score is the probability of the prediction. Notice that predicted results are concatenated to the original dataset while all the transformations are automatically performed in the background. # # 14.0 Saving the model # We have now finished the experiment by finalizing the `tuned_rf` model which is now stored in `final_rf` variable. We have also used the model stored in `final_rf` to predict `data_unseen`. This brings us to the end of our experiment, but one question is still to be asked: What happens when you have more new data to predict? Do you have to go through the entire experiment again? The answer is no, PyCaret's inbuilt function `save_model()` allows you to save the model along with entire transformation pipeline for later use. # In[ ]: save_model(final_rf,'Final RF Model 08Feb2020') # (TIP : It's always good to use date in the filename when saving models, it's good for version control.) # # 15.0 Loading the saved model # To load a saved model at a future date in the same or an alternative environment, we would use PyCaret's `load_model()` function and then easily apply the saved model on new unseen data for prediction. # In[ ]: saved_final_rf = load_model('Final RF Model 08Feb2020') # Once the model is loaded in the environment, you can simply use it to predict on any new data using the same `predict_model()` function. Below we have applied the loaded model to predict the same `data_unseen` that we used in section 13 above. # In[ ]: new_prediction = predict_model(saved_final_rf, data=data_unseen) # In[ ]: new_prediction.head() # Notice that the results of `unseen_predictions` and `new_prediction` are identical. # # 16.0 Wrap-up / Next Steps? # This tutorial has covered the entire machine learning pipeline from data ingestion, pre-processing, training the model, hyperparameter tuning, prediction and saving the model for later use. We have completed all of these steps in less than 10 commands which are naturally constructed and very intuitive to remember such as `create_model()`, `tune_model()`, `compare_models()`. Re-creating the entire experiment without PyCaret would have taken well over 100 lines of code in most libraries. # # We have only covered the basics of `pycaret.classification`. In following tutorials we will go deeper into advanced pre-processing, ensembling, generalized stacking and other techniques that allow you to fully customize your machine learning pipeline and are must know for any data scientist. # # See you at the next tutorial. Follow the link to __[Binary Classification Tutorial (CLF102) - Intermediate Level](https://github.com/pycaret/pycaret/blob/master/Tutorials/Binary%20Classification%20Tutorial%20Level%20Intermediate%20-%20CLF102.ipynb)__