#link python site packages folder
import sys
source_directory = "/Users/pauljacob/Library/Python/3.8/lib/python/site-packages"
sys.path.append(source_directory)
#file structure
!pip3 install cookiecutter
#general
!pip3 install --upgrade pip
!pip3 install ipython-autotime --quiet
!pip3 install watermark
Defaulting to user installation because normal site-packages is not writeable Requirement already satisfied: cookiecutter in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (2.1.1) Requirement already satisfied: binaryornot>=0.4.4 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from cookiecutter) (0.4.4) Requirement already satisfied: Jinja2<4.0.0,>=2.7 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from cookiecutter) (3.1.2) Requirement already satisfied: click<9.0.0,>=7.0 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from cookiecutter) (8.1.3) Requirement already satisfied: pyyaml>=5.3.1 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from cookiecutter) (6.0) Requirement already satisfied: jinja2-time>=0.2.0 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from cookiecutter) (0.2.0) Requirement already satisfied: python-slugify>=4.0.0 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from cookiecutter) (6.1.2) Requirement already satisfied: requests>=2.23.0 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from cookiecutter) (2.28.1) Requirement already satisfied: chardet>=3.0.2 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from binaryornot>=0.4.4->cookiecutter) (5.0.0) Requirement already satisfied: MarkupSafe>=2.0 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from Jinja2<4.0.0,>=2.7->cookiecutter) (2.1.1) Requirement already satisfied: arrow in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from jinja2-time>=0.2.0->cookiecutter) (1.2.3) Requirement already satisfied: text-unidecode>=1.3 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from python-slugify>=4.0.0->cookiecutter) (1.3) Requirement already satisfied: charset-normalizer<3,>=2 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from requests>=2.23.0->cookiecutter) (2.1.1) Requirement already satisfied: idna<4,>=2.5 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from requests>=2.23.0->cookiecutter) (3.4) Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from requests>=2.23.0->cookiecutter) (1.26.12) Requirement already satisfied: certifi>=2017.4.17 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from requests>=2.23.0->cookiecutter) (2022.9.24) Requirement already satisfied: python-dateutil>=2.7.0 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from arrow->jinja2-time>=0.2.0->cookiecutter) (2.8.2) Requirement already satisfied: six>=1.5 in /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/site-packages (from python-dateutil>=2.7.0->arrow->jinja2-time>=0.2.0->cookiecutter) (1.15.0) Defaulting to user installation because normal site-packages is not writeable Requirement already satisfied: pip in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (23.2.1) Defaulting to user installation because normal site-packages is not writeable Requirement already satisfied: watermark in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (2.3.1) Requirement already satisfied: ipython in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from watermark) (8.5.0) Requirement already satisfied: backcall in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from ipython->watermark) (0.2.0) Requirement already satisfied: decorator in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from ipython->watermark) (5.1.1) Requirement already satisfied: jedi>=0.16 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from ipython->watermark) (0.18.1) Requirement already satisfied: matplotlib-inline in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from ipython->watermark) (0.1.6) Requirement already satisfied: pickleshare in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from ipython->watermark) (0.7.5) Requirement already satisfied: prompt-toolkit<3.1.0,>3.0.1 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from ipython->watermark) (3.0.31) Requirement already satisfied: pygments>=2.4.0 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from ipython->watermark) (2.13.0) Requirement already satisfied: stack-data in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from ipython->watermark) (0.5.1) Requirement already satisfied: traitlets>=5 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from ipython->watermark) (5.4.0) Requirement already satisfied: pexpect>4.3 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from ipython->watermark) (4.8.0) Requirement already satisfied: appnope in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from ipython->watermark) (0.1.3) Requirement already satisfied: parso<0.9.0,>=0.8.0 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from jedi>=0.16->ipython->watermark) (0.8.3) Requirement already satisfied: ptyprocess>=0.5 in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from pexpect>4.3->ipython->watermark) (0.7.0) Requirement already satisfied: wcwidth in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from prompt-toolkit<3.1.0,>3.0.1->ipython->watermark) (0.2.5) Requirement already satisfied: executing in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from stack-data->ipython->watermark) (1.1.0) Requirement already satisfied: asttokens in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from stack-data->ipython->watermark) (2.0.8) Requirement already satisfied: pure-eval in /Users/pauljacob/Library/Python/3.8/lib/python/site-packages (from stack-data->ipython->watermark) (0.2.2) Requirement already satisfied: six in /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/site-packages (from asttokens->stack-data->ipython->watermark) (1.15.0)
%load_ext autotime
time: 283 µs (started: 2023-09-28 17:19:55 -07:00)
#get libraries
import pandas as pd
import os
import numpy as np
import itertools
from itertools import combinations
import warnings
#data wrangling
from functools import reduce
#get visualization libraries
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import matplotlib.patches as mpatches
from matplotlib.ticker import FuncFormatter
import matplotlib.ticker as mtick
from matplotlib.ticker import PercentFormatter
from matplotlib.patches import Patch
#ML preprocessing
from sklearn.preprocessing import StandardScaler
#get ML functions
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score, GridSearchCV, StratifiedKFold, learning_curve
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn import __version__ as sklearn_version
import datetime
#get ML metric functions
from sklearn.metrics import accuracy_score, precision_score, recall_score, auc, precision_recall_curve, confusion_matrix
#get icr module and associated common functions
import in_vehicle_coupon_recommendation as icr
from in_vehicle_coupon_recommendation import p, rcp, rpp, rcr, pl, pdc, save_and_return_data_frame, initialize_custom_notebook_settings
time: 2.26 s (started: 2023-09-28 17:19:55 -07:00)
#initialize notebook
st='no'; number_of_replicates=100 if st=='yes' else 10000
filename_version='4dot3'
initialize_custom_notebook_settings()
%load_ext autoreload
%autoreload 1
%aimport in_vehicle_coupon_recommendation
%load_ext watermark
time: 31.4 ms (started: 2023-09-28 17:19:57 -07:00)
df = pd.read_csv(os.path.join('..', 'data', 'raw', 'in-vehicle-coupon-recommendation.csv'))
df = df.sample(frac=1, random_state=200) #row shuffle DataFrame
#p(df)
time: 48.1 ms (started: 2023-09-28 17:19:57 -07:00)
#rename 'passanger' and 'coupon' column
df = df.rename(columns={'passanger':'passenger', 'coupon':'coupon_venue_type'})
#decode string 'age' values to (explicit) string age ranges
column_name_age_value_list = list(df.loc[:, 'age'].unique())
column_name_age_value_list_cleaned_up = ['21-25', '46-49', '26-30', '31-35', '41-45', '50+', '36-40', '<21']
column_name_age_value_dict = dict(zip(column_name_age_value_list, column_name_age_value_list_cleaned_up))
df.loc[:, 'age'] = df.loc[:, 'age'].replace(column_name_age_value_dict)
del column_name_age_value_dict, column_name_age_value_list, column_name_age_value_list_cleaned_up
#drop column 'toCoupon_GEQ5min' because all 1's
column_name_list_not_toCoupon_GEQ5min = [column_name for column_name in df.columns if column_name != 'toCoupon_GEQ5min']
df = df.loc[:, column_name_list_not_toCoupon_GEQ5min]
del column_name_list_not_toCoupon_GEQ5min
#rename values in columns 'Bar', 'CoffeeHouse', 'CarryAway', 'RestaurantLessThan20', and 'Restaurant20To50' to math language...
#get column name list for column values uniques that are the same as column name 'CarryAway' value uniques
column_name_list_same_unique_values = []
for column_name in df.columns:
value = icr.column_name_value_sets_equal(df, column_name1='CarryAway', column_name2=column_name)
if value == 1:
column_name_list_same_unique_values = column_name_list_same_unique_values + [column_name]
#rename values in columns 'Bar', 'CoffeeHouse', 'CarryAway', 'RestaurantLessThan20', and 'Restaurant20To50' to math language
column_name_Bar_value_list = list(df.loc[:, 'Bar'].unique())
column_name_Bar_value_list_cleaned_up = ['never', '<1', '1-3', '>8', np.nan, '4-8']
column_name_Bar_value_dict = dict(zip(column_name_Bar_value_list, column_name_Bar_value_list_cleaned_up))
df.loc[:, column_name_list_same_unique_values] = df.loc[:, column_name_list_same_unique_values].replace(column_name_Bar_value_dict)
del column_name_Bar_value_dict, column_name_Bar_value_list, column_name_Bar_value_list_cleaned_up, column_name_list_same_unique_values
#clean up columns 'direction_same' and 'direction_opp' to 'direction_same_or_opposite'
#drop column 'direction_same'
column_name_list_not_direction_same = [column_name for column_name in df.columns if column_name != 'direction_same']
df = df.loc[:, column_name_list_not_direction_same]
del column_name_list_not_direction_same
#rename 'direction_opp' to 'direction_same_or_opposite'
df = df.rename(columns={'direction_opp':'direction_same_or_opposite'})
#solution1: fill in missing values with 'no response' or 'unknown'
column_name_list = ['Bar', 'CoffeeHouse', 'CarryAway', 'RestaurantLessThan20', 'Restaurant20To50', 'car']
df.loc[:, column_name_list] = df.loc[:, column_name_list].fillna('no response')
#fix income values
income_list = df.loc[:, 'income'].drop_duplicates().to_list()
income_list_fixed = ['\$12500 - \$24999', '\$87500 - \$99999', '\$37500 - \$49999', '\$50000 - \$62499', '\$75000 - \$87499', '\$25000 - \$37499', '\$100000 or More', 'Less than \$12500', '\$62500 - \$74999']
income_dictionary = dict(zip(income_list, income_list_fixed))
df.loc[:, 'income'] = df.loc[:, 'income'].replace(income_dictionary)
p(df)
(12684, 24)
destination | passenger | weather | temperature | time | coupon_venue_type | expiration | gender | age | maritalStatus | has_children | education | occupation | income | car | Bar | CoffeeHouse | CarryAway | RestaurantLessThan20 | Restaurant20To50 | toCoupon_GEQ15min | toCoupon_GEQ25min | direction_same_or_opposite | Y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
11199 | Home | Alone | Sunny | 80 | 6PM | Carry out & Take away | 2h | Male | 21-25 | Single | 0 | Bachelors degree | Student | \$12500 - \$24999 | no response | never | no response | no response | no response | never | 1 | 0 | 0 | 1 |
1474 | Work | Alone | Sunny | 55 | 7AM | Bar | 1d | Male | 46-49 | Married partner | 1 | Graduate degree (Masters or Doctorate) | Management | \$87500 - \$99999 | no response | never | <1 | <1 | never | <1 | 1 | 1 | 1 | 0 |
10836 | Home | Alone | Sunny | 30 | 6PM | Carry out & Take away | 1d | Male | 26-30 | Single | 0 | Some college - no degree | Sales & Related | \$37500 - \$49999 | no response | <1 | no response | >8 | <1 | never | 0 | 0 | 1 | 1 |
4567 | No Urgent Place | Alone | Sunny | 80 | 10AM | Bar | 1d | Female | 21-25 | Unmarried partner | 0 | Graduate degree (Masters or Doctorate) | Education&Training&Library | \$37500 - \$49999 | no response | 1-3 | <1 | <1 | <1 | never | 0 | 0 | 1 | 0 |
5658 | No Urgent Place | Alone | Sunny | 80 | 2PM | Restaurant(<20) | 2h | Female | 31-35 | Single | 1 | Bachelors degree | Production Occupations | \$37500 - \$49999 | no response | never | never | >8 | 4-8 | never | 1 | 0 | 1 | 1 |
11343 | Work | Alone | Sunny | 80 | 7AM | Restaurant(20-50) | 2h | Female | 36-40 | Single | 1 | Bachelors degree | Food Preparation & Serving Related | \$12500 - \$24999 | no response | 1-3 | <1 | >8 | never | never | 0 | 0 | 0 | 0 |
9036 | Home | Alone | Snowy | 30 | 10PM | Restaurant(<20) | 2h | Male | 26-30 | Single | 0 | Some college - no degree | Student | \$12500 - \$24999 | no response | <1 | never | >8 | never | 1-3 | 1 | 1 | 1 | 0 |
11050 | Home | Alone | Sunny | 80 | 6PM | Restaurant(20-50) | 1d | Male | 46-49 | Single | 0 | Some college - no degree | Sales & Related | Less than \$12500 | no response | <1 | <1 | >8 | >8 | >8 | 0 | 0 | 0 | 0 |
784 | Work | Alone | Sunny | 80 | 7AM | Carry out & Take away | 2h | Female | 21-25 | Single | 0 | Graduate degree (Masters or Doctorate) | Legal | \$25000 - \$37499 | no response | <1 | <1 | <1 | no response | <1 | 0 | 0 | 0 | 1 |
1818 | No Urgent Place | Kid(s) | Sunny | 80 | 10AM | Bar | 1d | Female | 36-40 | Married partner | 1 | Bachelors degree | Retired | \$50000 - \$62499 | no response | 1-3 | never | >8 | <1 | never | 1 | 0 | 1 | 0 |
time: 93.4 ms (started: 2023-09-28 17:19:57 -07:00)
#takeaway: the most represented scenario is heading to Work, alone, sunny weather, 55 degrees, 7am, bar, 1 day til expiration, >25 minute away in the opposite drive direction of destination
column_name_list_scenario = ['destination', 'passenger', 'weather', 'temperature', 'time', 'coupon_venue_type', 'expiration', 'toCoupon_GEQ15min', 'toCoupon_GEQ25min', 'direction_same_or_opposite']
print(df.loc[:, column_name_list_scenario].drop_duplicates().shape)
df.loc[:, column_name_list_scenario].value_counts().to_frame().reset_index().rename(columns={0:'Count'}).head(10)
(201, 10)
destination | passenger | weather | temperature | time | coupon_venue_type | expiration | toCoupon_GEQ15min | toCoupon_GEQ25min | direction_same_or_opposite | Count | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | Work | Alone | Sunny | 55 | 7AM | Bar | 1d | 1 | 1 | 1 | 194 |
1 | No Urgent Place | Friend(s) | Sunny | 55 | 2PM | Carry out & Take away | 1d | 1 | 0 | 1 | 181 |
2 | No Urgent Place | Friend(s) | Sunny | 80 | 2PM | Coffee House | 2h | 1 | 0 | 1 | 181 |
3 | Work | Alone | Sunny | 80 | 7AM | Carry out & Take away | 2h | 0 | 0 | 0 | 181 |
4 | No Urgent Place | Alone | Sunny | 55 | 2PM | Restaurant(<20) | 1d | 0 | 0 | 1 | 181 |
5 | No Urgent Place | Friend(s) | Sunny | 80 | 10AM | Carry out & Take away | 2h | 1 | 0 | 1 | 181 |
6 | Work | Alone | Sunny | 80 | 7AM | Restaurant(20-50) | 1d | 1 | 0 | 1 | 181 |
7 | No Urgent Place | Friend(s) | Sunny | 80 | 6PM | Restaurant(<20) | 2h | 1 | 0 | 1 | 181 |
8 | No Urgent Place | Friend(s) | Sunny | 80 | 10AM | Coffee House | 2h | 0 | 0 | 1 | 180 |
9 | Home | Alone | Sunny | 80 | 6PM | Coffee House | 2h | 0 | 0 | 1 | 180 |
time: 23.8 ms (started: 2023-09-28 17:19:57 -07:00)
#takeaway: the most represented demographic is Female, 31-35, Married partner, 0, Some college - no degree Computer & Mathematical, Income $100000 or More, car no response, Bar never, CoffeeHouse never, Takeout no response, low-cost restaurant >8, mid-range restaurant never
column_name_list_not_demographic=['destination', 'passenger', 'weather', 'temperature', 'time', 'coupon_venue_type', 'expiration', 'toCoupon_GEQ15min', 'toCoupon_GEQ25min', 'direction_same_or_opposite', 'Y',]
column_name_list_demographic=[column_name for column_name in df.columns if not column_name in column_name_list_not_demographic]
df.loc[:, column_name_list_demographic].value_counts().reset_index().rename(columns={0:'Count'}).head(10)
gender | age | maritalStatus | has_children | education | occupation | income | car | Bar | CoffeeHouse | CarryAway | RestaurantLessThan20 | Restaurant20To50 | Count | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Female | 31-35 | Married partner | 0 | Some college - no degree | Computer & Mathematical | \$100000 or More | no response | never | never | no response | >8 | never | 110 |
1 | Male | 46-49 | Married partner | 1 | Graduate degree (Masters or Doctorate) | Management | \$87500 - \$99999 | no response | never | <1 | <1 | never | <1 | 66 |
2 | Female | 26-30 | Married partner | 1 | Associates degree | Unemployed | \$50000 - \$62499 | no response | 1-3 | <1 | <1 | <1 | <1 | 66 |
3 | Male | 21-25 | Single | 0 | High School Graduate | Unemployed | \$37500 - \$49999 | no response | no response | no response | no response | no response | no response | 66 |
4 | Female | 31-35 | Married partner | 0 | Some college - no degree | Arts Design Entertainment Sports & Media | \$100000 or More | no response | never | never | no response | >8 | never | 44 |
5 | Female | 21-25 | Single | 0 | Bachelors degree | Unemployed | Less than \$12500 | no response | >8 | <1 | no response | >8 | <1 | 44 |
6 | Female | 41-45 | Married partner | 1 | Graduate degree (Masters or Doctorate) | Computer & Mathematical | \$75000 - \$87499 | no response | never | >8 | no response | <1 | never | 44 |
7 | Male | <21 | Married partner | 1 | Associates degree | Computer & Mathematical | \$100000 or More | no response | 1-3 | never | >8 | <1 | never | 44 |
8 | Female | <21 | Divorced | 1 | Graduate degree (Masters or Doctorate) | Student | \$12500 - \$24999 | no response | 1-3 | no response | no response | no response | <1 | 44 |
9 | Female | 50+ | Unmarried partner | 0 | High School Graduate | Student | Less than \$12500 | no response | 1-3 | >8 | <1 | >8 | never | 44 |
time: 23 ms (started: 2023-09-28 17:19:57 -07:00)
#category representative numeric encoding: expiration, time, age, income
expiration_category_representative_numeric_encoding={'expiration':{'2h':2, '1d':24}}
time_category_representative_numeric_encoding={'time':{'7AM':7,'10AM':10, '2PM':14, '6PM':18, '10PM':22}}
age_category_representative_numeric_encoding={'age':{'<21':18, '21-25':23, '26-30':28, '31-35':33, '36-40':38, '41-45':43, '46-49':48, '50+':56}}
income_category_representative_numeric_encoding={'income':{'Less than \$12500':6250, '\$12500 - \$24999':18749.5, '\$25000 - \$37499':31249.5, '\$37500 - \$49999':43749.5, '\$50000 - \$62499':56249.5, '\$62500 - \$74999':68749.5, '\$75000 - \$87499':81249.5, '\$87500 - \$99999':93749.5,'\$100000 or More':150000}}
#combine category representative numeric encoding dictionaries
category_representative_numeric_encoding_dictionary = expiration_category_representative_numeric_encoding|time_category_representative_numeric_encoding|age_category_representative_numeric_encoding|income_category_representative_numeric_encoding
del expiration_category_representative_numeric_encoding, time_category_representative_numeric_encoding, age_category_representative_numeric_encoding, income_category_representative_numeric_encoding
category_representative_numeric_encoding_key_list = list(category_representative_numeric_encoding_dictionary.keys())
df_category_representative_numeric_encoding = df.loc[:, category_representative_numeric_encoding_key_list].replace(category_representative_numeric_encoding_dictionary)
del category_representative_numeric_encoding_dictionary,
#rename columns
category_representative_numeric_encoding_substring='_category_representative_numeric_encoding'
column_name_list_category_representative_numeric_encoding = [str(column_name) + category_representative_numeric_encoding_substring for column_name in category_representative_numeric_encoding_key_list]
column_name_dictionary_category_representative_numeric_encoding = dict(zip(category_representative_numeric_encoding_key_list, column_name_list_category_representative_numeric_encoding))
df_category_representative_numeric_encoding = df_category_representative_numeric_encoding.rename(columns=column_name_dictionary_category_representative_numeric_encoding)
del column_name_dictionary_category_representative_numeric_encoding, column_name_list_category_representative_numeric_encoding, category_representative_numeric_encoding_key_list, category_representative_numeric_encoding_substring
#p(df_category_representative_numeric_encoding)
time: 27.8 ms (started: 2023-09-28 17:19:57 -07:00)
#binary encoding: gender, expiration
gender_binary_encoding={'gender':{'Female':0, 'Male':1}}
expiration_binary_encoding={'expiration':{'2h':0, '1d':1}}
binary_encoding_dictionary = gender_binary_encoding|expiration_binary_encoding
del gender_binary_encoding, expiration_binary_encoding
binary_encoding_key_list = list(binary_encoding_dictionary.keys())
df_binary_encoding = df.loc[:, binary_encoding_key_list].replace(binary_encoding_dictionary)
del binary_encoding_dictionary
#rename columns
binary_encoding_substring = '_binary_encoding'
column_name_list_binary_encoding = [str(column_name) + binary_encoding_substring for column_name in binary_encoding_key_list]
column_name_dictionary_binary_encoding = dict(zip(binary_encoding_key_list, column_name_list_binary_encoding))
df_binary_encoding = df_binary_encoding.rename(columns=column_name_dictionary_binary_encoding)
del column_name_dictionary_binary_encoding, column_name_list_binary_encoding, binary_encoding_key_list, binary_encoding_substring
#p(df_binary_encoding)
time: 11.9 ms (started: 2023-09-28 17:19:57 -07:00)
#ordinal integer encoding: coupon_venue_type, education, income, age, time, temperature
#category ordinal features to ordinal integer encoding
coupon_venue_type_ordinal_integer_encoding={'coupon_venue_type':{'Coffee House':1, 'Bar':2, 'Carry out & Take away':3, 'Restaurant(<20)':4, 'Restaurant(20-50)':5}}
education_ordinal_integer_encoding={'education':{'Some High School':1, 'High School Graduate':2, 'Some college - no degree':3, 'Associates degree':4, 'Bachelors degree':5,'Graduate degree (Masters or Doctorate)':6}}
income_ordinal_integer_encoding={'income':{'Less than \$12500':1, '\$12500 - \$24999':2, '\$25000 - \$37499':3, '\$37500 - \$49999':4, '\$50000 - \$62499':5, '\$62500 - \$74999':6, '\$75000 - \$87499':7, '\$87500 - \$99999':8, '\$100000 or More':9}}
age_ordinal_integer_encoding={'age':{'<21':1, '21-25':2, '26-30':3, '31-35':4, '36-40':5, '41-45':6, '46-49':7, '50+':8}}
time_ordinal_integer_encoding={'time':{'7AM':1, '10AM':2, '2PM':3, '6PM':4, '10PM':5}}
#numeric to ordinal integer encoding
temperature_ordinal_integer_encoding={'temperature':{30:1, 55:2, 80:3}}
ordinal_integer_encoding_dictionary = coupon_venue_type_ordinal_integer_encoding|education_ordinal_integer_encoding|income_ordinal_integer_encoding|age_ordinal_integer_encoding|time_ordinal_integer_encoding|temperature_ordinal_integer_encoding
del coupon_venue_type_ordinal_integer_encoding, education_ordinal_integer_encoding, income_ordinal_integer_encoding, age_ordinal_integer_encoding, time_ordinal_integer_encoding, temperature_ordinal_integer_encoding
ordinal_integer_encoding_key_list = list(ordinal_integer_encoding_dictionary.keys())
df_ordinal_integer_encoding = df.loc[:, ordinal_integer_encoding_key_list].replace(ordinal_integer_encoding_dictionary)
del ordinal_integer_encoding_dictionary
#rename columns
ordinal_integer_encoding_substring='_ordinal_integer_encoding'
column_name_list_ordinal_integer_encoding=[str(column_name) + ordinal_integer_encoding_substring for column_name in ordinal_integer_encoding_key_list]
column_name_dictionary_ordinal_integer_encoding=dict(zip(ordinal_integer_encoding_key_list,column_name_list_ordinal_integer_encoding))
df_ordinal_integer_encoding=df_ordinal_integer_encoding.rename(columns=column_name_dictionary_ordinal_integer_encoding)
del column_name_dictionary_ordinal_integer_encoding, column_name_list_ordinal_integer_encoding, ordinal_integer_encoding_key_list, ordinal_integer_encoding_substring
#p(df_ordinal_integer_encoding)
time: 32.8 ms (started: 2023-09-28 17:19:57 -07:00)
#venue type visits per month yes response to ordinal integer encoding: bar, coffeehouse, carryaway, restaurantlessthan20, restaurant20to50
bar_venue_visit_frequency_yes_response_ordinal_integer_encoding={'Bar':{'never':1, '<1':2, '1-3':3, '4-8':4, '>8':5, 'no response':0}}
coffeehouse_venue_visit_frequency_yes_response_ordinal_integer_encoding={'CoffeeHouse':{'never':1, '<1':2, '1-3':3, '4-8':4, '>8':5, 'no response':0}}
carryaway_venue_visit_frequency_yes_response_ordinal_integer_encoding={'CarryAway':{'never':1, '<1':2, '1-3':3, '4-8':4, '>8':5, 'no response':0}}
restaurantlessthan20_venue_visit_frequency_yes_response_ordinal_integer_encoding={'RestaurantLessThan20':{'never':1, '<1':2, '1-3':3, '4-8':4, '>8':5, 'no response':0}}
restaurant20to50_venue_visit_frequency_yes_response_ordinal_integer_encoding={'Restaurant20To50':{'never':1, '<1':2, '1-3':3, '4-8':4, '>8':5, 'no response':0}}
venue_visit_frequency_yes_response_ordinal_integer_encoding_dictionary=bar_venue_visit_frequency_yes_response_ordinal_integer_encoding|coffeehouse_venue_visit_frequency_yes_response_ordinal_integer_encoding|carryaway_venue_visit_frequency_yes_response_ordinal_integer_encoding|restaurantlessthan20_venue_visit_frequency_yes_response_ordinal_integer_encoding|restaurant20to50_venue_visit_frequency_yes_response_ordinal_integer_encoding
del bar_venue_visit_frequency_yes_response_ordinal_integer_encoding, carryaway_venue_visit_frequency_yes_response_ordinal_integer_encoding, restaurantlessthan20_venue_visit_frequency_yes_response_ordinal_integer_encoding, restaurant20to50_venue_visit_frequency_yes_response_ordinal_integer_encoding
#del bar_venue_visit_frequency_yes_response_ordinal_integer_encoding, coffeehouse_venue_visit_frequency_yes_response_ordinal_integer_encoding, carryaway_venue_visit_frequency_yes_response_ordinal_integer_encoding, restaurantlessthan20_venue_visit_frequency_yes_response_ordinal_integer_encoding, restaurant20to50_venue_visit_frequency_yes_response_ordinal_integer_encoding
venue_visit_frequency_yes_response_ordinal_integer_encoding_key_list=list(venue_visit_frequency_yes_response_ordinal_integer_encoding_dictionary.keys())
df_venue_visit_frequency_yes_response_ordinal_integer_encoding = df.loc[:, venue_visit_frequency_yes_response_ordinal_integer_encoding_key_list].replace(venue_visit_frequency_yes_response_ordinal_integer_encoding_dictionary)
del venue_visit_frequency_yes_response_ordinal_integer_encoding_dictionary
#rename columns
venue_visit_frequency_yes_response_ordinal_integer_encoding_substring='_venue_visit_frequency_yes_response_ordinal_integer_encoding'
column_name_list_venue_visit_frequency_yes_response_ordinal_integer_encoding=[str(column_name) + venue_visit_frequency_yes_response_ordinal_integer_encoding_substring for column_name in venue_visit_frequency_yes_response_ordinal_integer_encoding_key_list]
column_name_dictionary_venue_visit_frequency_yes_response_ordinal_integer_encoding=dict(zip(venue_visit_frequency_yes_response_ordinal_integer_encoding_key_list, column_name_list_venue_visit_frequency_yes_response_ordinal_integer_encoding))
df_venue_visit_frequency_yes_response_ordinal_integer_encoding = df_venue_visit_frequency_yes_response_ordinal_integer_encoding.rename(columns=column_name_dictionary_venue_visit_frequency_yes_response_ordinal_integer_encoding)
del column_name_dictionary_venue_visit_frequency_yes_response_ordinal_integer_encoding, venue_visit_frequency_yes_response_ordinal_integer_encoding_key_list, venue_visit_frequency_yes_response_ordinal_integer_encoding_substring, column_name_list_venue_visit_frequency_yes_response_ordinal_integer_encoding
#p(df_venue_visit_frequency_yes_response_ordinal_integer_encoding)
time: 31.6 ms (started: 2023-09-28 17:19:57 -07:00)
#venue type visits per month no response to indicator variable: bar, coffeehouse, carryaway, restaurantlessthan20, restaurant20to50
bar_venue_visit_frequency_no_response_indicator_variable={'Bar':{'never':0, '<1':0, '1-3':0, '4-8':0, '>8':0, 'no response':1}}
coffeehouse_venue_visit_frequency_no_response_indicator_variable={'CoffeeHouse':{'never':0, '<1':0, '1-3':0, '4-8':0, '>8':0, 'no response':1}}
carryaway_venue_visit_frequency_no_response_indicator_variable={'CarryAway':{'never':0, '<1':0, '1-3':0, '4-8':0, '>8':0, 'no response':1}}
restaurantlessthan20_venue_visit_frequency_no_response_indicator_variable={'RestaurantLessThan20':{'never':0, '<1':0, '1-3':0, '4-8':0, '>8':0, 'no response':1}}
restaurant20to50_venue_visit_frequency_no_response_indicator_variable={'Restaurant20To50':{'never':0, '<1':0, '1-3':0, '4-8':0, '>8':0, 'no response':1}}
venue_visit_frequency_no_response_indicator_variable_dictionary=bar_venue_visit_frequency_no_response_indicator_variable|coffeehouse_venue_visit_frequency_no_response_indicator_variable|carryaway_venue_visit_frequency_no_response_indicator_variable|restaurantlessthan20_venue_visit_frequency_no_response_indicator_variable|restaurant20to50_venue_visit_frequency_no_response_indicator_variable
del bar_venue_visit_frequency_no_response_indicator_variable, coffeehouse_venue_visit_frequency_no_response_indicator_variable, carryaway_venue_visit_frequency_no_response_indicator_variable, restaurantlessthan20_venue_visit_frequency_no_response_indicator_variable, restaurant20to50_venue_visit_frequency_no_response_indicator_variable
venue_visit_frequency_no_response_indicator_variable_key_list=list(venue_visit_frequency_no_response_indicator_variable_dictionary.keys())
df_venue_visit_frequency_no_response_indicator_variable=df.loc[:, venue_visit_frequency_no_response_indicator_variable_key_list].replace(venue_visit_frequency_no_response_indicator_variable_dictionary)
del venue_visit_frequency_no_response_indicator_variable_dictionary
#rename columns
venue_visit_frequency_no_response_indicator_variable='_venue_visit_frequency_no_response_indicator'
column_name_list_venue_visit_frequency_no_response_indicator_variable=[str(column_name) + venue_visit_frequency_no_response_indicator_variable for column_name in venue_visit_frequency_no_response_indicator_variable_key_list]
column_name_dictionary_venue_visit_frequency_no_response_indicator_variable=dict(zip(venue_visit_frequency_no_response_indicator_variable_key_list, column_name_list_venue_visit_frequency_no_response_indicator_variable))
df_venue_visit_frequency_no_response_indicator_variable=df_venue_visit_frequency_no_response_indicator_variable.rename(columns=column_name_dictionary_venue_visit_frequency_no_response_indicator_variable)
del column_name_dictionary_venue_visit_frequency_no_response_indicator_variable, column_name_list_venue_visit_frequency_no_response_indicator_variable, venue_visit_frequency_no_response_indicator_variable_key_list, venue_visit_frequency_no_response_indicator_variable
#p(df_venue_visit_frequency_no_response_indicator_variable)
time: 31.6 ms (started: 2023-09-28 17:19:57 -07:00)
#concatenate the data frames
#get data frame list
data_frame_list=[df, df_category_representative_numeric_encoding, df_binary_encoding, df_ordinal_integer_encoding, df_venue_visit_frequency_yes_response_ordinal_integer_encoding, df_venue_visit_frequency_no_response_indicator_variable]
del df_category_representative_numeric_encoding, df_binary_encoding, df_ordinal_integer_encoding, df_venue_visit_frequency_yes_response_ordinal_integer_encoding, df_venue_visit_frequency_no_response_indicator_variable
#concat data frames
df = pd.concat(data_frame_list, axis=1)
del data_frame_list
p(df)
(12684, 46)
destination | passenger | weather | temperature | time | coupon_venue_type | expiration | gender | age | maritalStatus | has_children | education | occupation | income | car | Bar | CoffeeHouse | CarryAway | RestaurantLessThan20 | Restaurant20To50 | toCoupon_GEQ15min | toCoupon_GEQ25min | direction_same_or_opposite | Y | expiration_category_representative_numeric_encoding | time_category_representative_numeric_encoding | age_category_representative_numeric_encoding | income_category_representative_numeric_encoding | gender_binary_encoding | expiration_binary_encoding | coupon_venue_type_ordinal_integer_encoding | education_ordinal_integer_encoding | income_ordinal_integer_encoding | age_ordinal_integer_encoding | time_ordinal_integer_encoding | temperature_ordinal_integer_encoding | Bar_venue_visit_frequency_yes_response_ordinal_integer_encoding | CoffeeHouse_venue_visit_frequency_yes_response_ordinal_integer_encoding | CarryAway_venue_visit_frequency_yes_response_ordinal_integer_encoding | RestaurantLessThan20_venue_visit_frequency_yes_response_ordinal_integer_encoding | Restaurant20To50_venue_visit_frequency_yes_response_ordinal_integer_encoding | Bar_venue_visit_frequency_no_response_indicator | CoffeeHouse_venue_visit_frequency_no_response_indicator | CarryAway_venue_visit_frequency_no_response_indicator | RestaurantLessThan20_venue_visit_frequency_no_response_indicator | Restaurant20To50_venue_visit_frequency_no_response_indicator | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
11199 | Home | Alone | Sunny | 80 | 6PM | Carry out & Take away | 2h | Male | 21-25 | Single | 0 | Bachelors degree | Student | \$12500 - \$24999 | no response | never | no response | no response | no response | never | 1 | 0 | 0 | 1 | 2 | 18 | 23 | 18749.5 | 1 | 0 | 3 | 5 | 2 | 2 | 4 | 3 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 0 |
1474 | Work | Alone | Sunny | 55 | 7AM | Bar | 1d | Male | 46-49 | Married partner | 1 | Graduate degree (Masters or Doctorate) | Management | \$87500 - \$99999 | no response | never | <1 | <1 | never | <1 | 1 | 1 | 1 | 0 | 24 | 7 | 48 | 93749.5 | 1 | 1 | 2 | 6 | 8 | 7 | 1 | 2 | 1 | 2 | 2 | 1 | 2 | 0 | 0 | 0 | 0 | 0 |
10836 | Home | Alone | Sunny | 30 | 6PM | Carry out & Take away | 1d | Male | 26-30 | Single | 0 | Some college - no degree | Sales & Related | \$37500 - \$49999 | no response | <1 | no response | >8 | <1 | never | 0 | 0 | 1 | 1 | 24 | 18 | 28 | 43749.5 | 1 | 1 | 3 | 3 | 4 | 3 | 4 | 1 | 2 | 0 | 5 | 2 | 1 | 0 | 1 | 0 | 0 | 0 |
4567 | No Urgent Place | Alone | Sunny | 80 | 10AM | Bar | 1d | Female | 21-25 | Unmarried partner | 0 | Graduate degree (Masters or Doctorate) | Education&Training&Library | \$37500 - \$49999 | no response | 1-3 | <1 | <1 | <1 | never | 0 | 0 | 1 | 0 | 24 | 10 | 23 | 43749.5 | 0 | 1 | 2 | 6 | 4 | 2 | 2 | 3 | 3 | 2 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 |
5658 | No Urgent Place | Alone | Sunny | 80 | 2PM | Restaurant(<20) | 2h | Female | 31-35 | Single | 1 | Bachelors degree | Production Occupations | \$37500 - \$49999 | no response | never | never | >8 | 4-8 | never | 1 | 0 | 1 | 1 | 2 | 14 | 33 | 43749.5 | 0 | 0 | 4 | 5 | 4 | 4 | 3 | 3 | 1 | 1 | 5 | 4 | 1 | 0 | 0 | 0 | 0 | 0 |
11343 | Work | Alone | Sunny | 80 | 7AM | Restaurant(20-50) | 2h | Female | 36-40 | Single | 1 | Bachelors degree | Food Preparation & Serving Related | \$12500 - \$24999 | no response | 1-3 | <1 | >8 | never | never | 0 | 0 | 0 | 0 | 2 | 7 | 38 | 18749.5 | 0 | 0 | 5 | 5 | 2 | 5 | 1 | 3 | 3 | 2 | 5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
9036 | Home | Alone | Snowy | 30 | 10PM | Restaurant(<20) | 2h | Male | 26-30 | Single | 0 | Some college - no degree | Student | \$12500 - \$24999 | no response | <1 | never | >8 | never | 1-3 | 1 | 1 | 1 | 0 | 2 | 22 | 28 | 18749.5 | 1 | 0 | 4 | 3 | 2 | 3 | 5 | 1 | 2 | 1 | 5 | 1 | 3 | 0 | 0 | 0 | 0 | 0 |
11050 | Home | Alone | Sunny | 80 | 6PM | Restaurant(20-50) | 1d | Male | 46-49 | Single | 0 | Some college - no degree | Sales & Related | Less than \$12500 | no response | <1 | <1 | >8 | >8 | >8 | 0 | 0 | 0 | 0 | 24 | 18 | 48 | 6250.0 | 1 | 1 | 5 | 3 | 1 | 7 | 4 | 3 | 2 | 2 | 5 | 5 | 5 | 0 | 0 | 0 | 0 | 0 |
784 | Work | Alone | Sunny | 80 | 7AM | Carry out & Take away | 2h | Female | 21-25 | Single | 0 | Graduate degree (Masters or Doctorate) | Legal | \$25000 - \$37499 | no response | <1 | <1 | <1 | no response | <1 | 0 | 0 | 0 | 1 | 2 | 7 | 23 | 31249.5 | 0 | 0 | 3 | 6 | 3 | 2 | 1 | 3 | 2 | 2 | 2 | 0 | 2 | 0 | 0 | 0 | 1 | 0 |
1818 | No Urgent Place | Kid(s) | Sunny | 80 | 10AM | Bar | 1d | Female | 36-40 | Married partner | 1 | Bachelors degree | Retired | \$50000 - \$62499 | no response | 1-3 | never | >8 | <1 | never | 1 | 0 | 1 | 0 | 24 | 10 | 38 | 56249.5 | 0 | 1 | 2 | 5 | 5 | 5 | 2 | 3 | 3 | 1 | 5 | 2 | 1 | 0 | 0 | 0 | 0 | 0 |
time: 24.5 ms (started: 2023-09-28 17:19:57 -07:00)
#save df collection with four object train test splits before preprocessed DataFrame
df_collection_filename='df_collection_train_test_v' + filename_version + '.pkl'
df_readback = icr.return_processed_collection_if_it_exists(filename=df_collection_filename)
if df_readback != None:
df_collection = df_readback
else:
df_collection = {}
#split the data into train and test
df_collection['X_train'], df_collection['X_test'], df_collection['Y_train'], df_collection['Y_test'] = \
train_test_split(df.drop(columns=['Y']), df.loc[:, 'Y'], test_size=.2, random_state=200)
#save preprocessed data frame collection
df_collection = icr.save_and_return_collection(data_frame_collection=df_collection, filename=df_collection_filename)
p(df_collection['X_train'])
This file already exists (10147, 45)
destination | passenger | weather | temperature | time | coupon_venue_type | expiration | gender | age | maritalStatus | has_children | education | occupation | income | car | Bar | CoffeeHouse | CarryAway | RestaurantLessThan20 | Restaurant20To50 | toCoupon_GEQ15min | toCoupon_GEQ25min | direction_same_or_opposite | expiration_category_representative_numeric_encoding | time_category_representative_numeric_encoding | age_category_representative_numeric_encoding | income_category_representative_numeric_encoding | gender_binary_encoding | expiration_binary_encoding | coupon_venue_type_ordinal_integer_encoding | education_ordinal_integer_encoding | income_ordinal_integer_encoding | age_ordinal_integer_encoding | time_ordinal_integer_encoding | temperature_ordinal_integer_encoding | Bar_venue_visit_frequency_yes_response_ordinal_integer_encoding | CoffeeHouse_venue_visit_frequency_yes_response_ordinal_integer_encoding | CarryAway_venue_visit_frequency_yes_response_ordinal_integer_encoding | RestaurantLessThan20_venue_visit_frequency_yes_response_ordinal_integer_encoding | Restaurant20To50_venue_visit_frequency_yes_response_ordinal_integer_encoding | Bar_venue_visit_frequency_no_response_indicator | CoffeeHouse_venue_visit_frequency_no_response_indicator | CarryAway_venue_visit_frequency_no_response_indicator | RestaurantLessThan20_venue_visit_frequency_no_response_indicator | Restaurant20To50_venue_visit_frequency_no_response_indicator | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
8630 | No Urgent Place | Friend(s) | Sunny | 80 | 6PM | Coffee House | 1d | Female | 21-25 | Married partner | 0 | Bachelors degree | Unemployed | \$87500 - \$99999 | no response | <1 | never | 4-8 | never | never | 0 | 0 | 1 | 24 | 18 | 23 | 93749.5 | 0 | 1 | 1 | 5 | 8 | 2 | 4 | 3 | 2 | 1 | 4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
2418 | No Urgent Place | Friend(s) | Sunny | 55 | 2PM | Coffee House | 2h | Female | 26-30 | Single | 0 | Bachelors degree | Arts Design Entertainment Sports & Media | \$12500 - \$24999 | no response | >8 | >8 | >8 | >8 | never | 0 | 0 | 1 | 2 | 14 | 28 | 18749.5 | 0 | 0 | 1 | 5 | 2 | 3 | 3 | 2 | 5 | 5 | 5 | 5 | 1 | 0 | 0 | 0 | 0 | 0 |
10804 | No Urgent Place | Alone | Rainy | 55 | 2PM | Carry out & Take away | 2h | Female | 31-35 | Single | 0 | Graduate degree (Masters or Doctorate) | Legal | \$75000 - \$87499 | no response | no response | >8 | >8 | >8 | >8 | 0 | 0 | 1 | 2 | 14 | 33 | 81249.5 | 0 | 0 | 3 | 6 | 7 | 4 | 3 | 2 | 0 | 5 | 5 | 5 | 5 | 1 | 0 | 0 | 0 | 0 |
747 | No Urgent Place | Friend(s) | Sunny | 80 | 2PM | Coffee House | 1d | Female | 36-40 | Married partner | 0 | Some college - no degree | Healthcare Support | \$25000 - \$37499 | no response | <1 | <1 | >8 | >8 | <1 | 1 | 0 | 1 | 24 | 14 | 38 | 31249.5 | 0 | 1 | 1 | 3 | 3 | 5 | 3 | 3 | 2 | 2 | 5 | 5 | 2 | 0 | 0 | 0 | 0 | 0 |
7333 | No Urgent Place | Friend(s) | Sunny | 80 | 6PM | Coffee House | 1d | Male | <21 | Unmarried partner | 1 | Some college - no degree | Building & Grounds Cleaning & Maintenance | \$25000 - \$37499 | no response | <1 | never | <1 | <1 | never | 0 | 0 | 1 | 24 | 18 | 18 | 31249.5 | 1 | 1 | 1 | 3 | 3 | 1 | 4 | 3 | 2 | 1 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 |
10949 | No Urgent Place | Kid(s) | Snowy | 30 | 6PM | Bar | 1d | Female | 31-35 | Married partner | 1 | Bachelors degree | Student | \$37500 - \$49999 | no response | 1-3 | never | no response | >8 | never | 0 | 0 | 1 | 24 | 18 | 33 | 43749.5 | 0 | 1 | 2 | 5 | 4 | 4 | 4 | 1 | 3 | 1 | 0 | 5 | 1 | 0 | 0 | 1 | 0 | 0 |
11937 | No Urgent Place | Alone | Rainy | 55 | 10AM | Bar | 1d | Female | 31-35 | Married partner | 0 | Some college - no degree | Computer & Mathematical | \$100000 or More | no response | never | never | no response | >8 | never | 1 | 0 | 1 | 24 | 10 | 33 | 150000.0 | 0 | 1 | 2 | 3 | 9 | 4 | 2 | 2 | 1 | 1 | 0 | 5 | 1 | 0 | 0 | 1 | 0 | 0 |
735 | Home | Alone | Sunny | 55 | 6PM | Restaurant(20-50) | 1d | Male | 41-45 | Single | 0 | Some college - no degree | Sales & Related | \$37500 - \$49999 | no response | 1-3 | 1-3 | >8 | <1 | never | 1 | 0 | 1 | 24 | 18 | 43 | 43749.5 | 1 | 1 | 5 | 3 | 4 | 6 | 4 | 2 | 3 | 3 | 5 | 2 | 1 | 0 | 0 | 0 | 0 | 0 |
67 | No Urgent Place | Friend(s) | Sunny | 80 | 10AM | Coffee House | 2h | Male | 46-49 | Married partner | 1 | Bachelors degree | Education&Training&Library | \$75000 - \$87499 | no response | never | <1 | <1 | <1 | never | 0 | 0 | 1 | 2 | 10 | 48 | 81249.5 | 1 | 0 | 1 | 5 | 7 | 7 | 2 | 3 | 1 | 2 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 |
3251 | No Urgent Place | Friend(s) | Sunny | 80 | 10AM | Coffee House | 2h | Female | 26-30 | Married partner | 1 | Some college - no degree | Unemployed | \$25000 - \$37499 | no response | 1-3 | never | <1 | never | never | 0 | 0 | 1 | 2 | 10 | 28 | 31249.5 | 0 | 0 | 1 | 3 | 3 | 3 | 2 | 3 | 3 | 1 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
time: 30 ms (started: 2023-09-28 17:19:57 -07:00)
color_list = ['#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6']
time: 575 µs (started: 2023-09-28 17:19:57 -07:00)
bar_orientation='vertical'
dpi=100
figure_filename = '../reports/figures/figure_coupon_acceptance_percentage_and_frequency_distribution_'+ str(bar_orientation) + '_bar_plot_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
figure_filename_axes1 = '../reports/figures/figure_coupon_acceptance_percentage_and_frequency_distribution_'+ str(bar_orientation) + '_bar_plot_axes1_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
figure_filename_axes2 = '../reports/figures/figure_coupon_acceptance_percentage_and_frequency_distribution_'+ str(bar_orientation) + '_bar_plot_axes2_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
plot_save_together_separate = 'together'
figsize=(12, 6.2)
bar_width=0.8
xlabel = 'Coupon Acceptance'
ylabel = 'Percentage'
title = 'Coupon Acceptance '+ str(ylabel) + ' Distribution'
target_value_dictionary = {1: 'Yes', 0:'No'}
df_Y_frequency = df.loc[:, ['Y']].value_counts().reset_index().rename(columns={0:'frequency'})
df_Y_frequency.loc[:, 'percentage'] = df_Y_frequency.loc[:, 'frequency'] / df_Y_frequency.loc[:, 'frequency'].sum(axis=0) * 100
df_Y_frequency_percentage = df_Y_frequency
del df_Y_frequency
x_label_list = df_Y_frequency_percentage.loc[:, 'Y'].replace(target_value_dictionary).to_list()
if plot_save_together_separate == 'together':
figure, (axes1, axes2) = plt.subplots(nrows=1, ncols=2, figsize=figsize)
elif plot_save_together_separate == 'separate':
figure1, axes1 = plt.subplots(nrows=1, ncols=1, figsize=figsize)
figure2, axes2 = plt.subplots(nrows=1, ncols=1, figsize=figsize)
#make percentage subplot
annotation_round_by_number = 0
percentage_list = df_Y_frequency_percentage.loc[:, 'percentage'].to_list()
axes1.bar(x=x_label_list, height=percentage_list, width=bar_width, color=[color_list[3], color_list[0]])
axes1.set_title(label=title, fontsize=18)
axes1.set_xlabel(xlabel=xlabel, fontsize=17)
axes1.set_ylabel(ylabel=ylabel, fontsize=17)
axes1.tick_params(axis='both', which='major', labelsize=15)
for rectangle, annotation in zip(axes1.patches, percentage_list):
axes1.text(rectangle.get_x() + rectangle.get_width() / 2, rectangle.get_height(), format(round(annotation, annotation_round_by_number), '.0f') + '%', ha="center", va="bottom", fontsize=15)
#make frequency subplot
ylabel = 'Frequency'
title = 'Coupon Acceptance '+ str(ylabel) + ' Distribution'
annotation_round_by_number = -2
frequency_list = df_Y_frequency_percentage.loc[:, 'frequency'].to_list()
axes2.bar(x=x_label_list, height=frequency_list, width=bar_width, color=[color_list[3], color_list[0]])
axes2.set_title(label=title, fontsize=18)
axes2.set_xlabel(xlabel=xlabel, fontsize=17)
axes2.set_ylabel(ylabel=ylabel, fontsize=17)
axes2.tick_params(axis='both', which='major', labelsize=15)
for rectangle, annotation in zip(axes2.patches, frequency_list):
axes2.text(rectangle.get_x() + rectangle.get_width() / 2, rectangle.get_height() + 5, round(annotation, annotation_round_by_number), ha="center", va="bottom", fontsize=15)
plt.tight_layout()
if plot_save_together_separate == 'together':
figure.savefig(figure_filename, bbox_inches='tight', dpi=dpi)
elif plot_save_together_separate == 'separate':
figure1.savefig(figure_filename_axes1, bbox_inches='tight', dpi=dpi)
figure2.savefig(figure_filename_axes2, bbox_inches='tight', dpi=dpi)
plt.show()
time: 262 ms (started: 2023-09-28 17:19:57 -07:00)
#sample size, population size, margin of error, confidence interval
#intialize parameters
z_score = 1.96; confidence_interval = .95
p_value = 0.5684326710816777
margin_of_error = .0381
N_population_size = 2.4 * 10**8
#known 652 surveys acceptance, https://jmlr.org/papers/volume18/16-003/16-003.pdf
#calculate sample size
sample_size = (z_score**2*p_value*(1-p_value)/margin_of_error**2)/(1+(z_score**2*p_value*(1-p_value)/margin_of_error**2/N_population_size))
print('The '+ str(round(confidence_interval*100, None)) +'% confidence interval for a ' + str(round(sample_size)) + ' sample size has a ' \
+ str(margin_of_error) + ' margin of error and ' + str(round(p_value, 5)) + ' p-value representing a '+ '{:,}'.format(round(N_population_size)) +' population size'
+ '. The sample of 652 survey participants is slightly more than 649.\n')
round_by_number = None
print('Therefore, '+ str(round(confidence_interval*100, None)) +'% of the time, we expect a coupon acceptance rate between ' \
+ str(round((p_value-margin_of_error/2)*100, round_by_number)) + '% and ' \
+ str(round((p_value+margin_of_error/2)*100, round_by_number)) + '%')
The 95% confidence interval for a 649 sample size has a 0.0381 margin of error and 0.56843 p-value representing a 240,000,000 population size. The sample of 652 survey participants is slightly more than 649. Therefore, 95% of the time, we expect a coupon acceptance rate between 55% and 59% time: 1.66 ms (started: 2023-09-28 17:19:58 -07:00)
feature_column_name = 'expiration'
feature_column_name_label=feature_column_name.capitalize()
xlabel=feature_column_name_label
ylabel='Frequency'
xtick_rotation=0
bar_orientation = 'vertical'
annotation_type = 'frequency'
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = ['1d', '2h']
xtick_dictionary = {'1d':'1 day', '2h':'2 hours'}
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
figsize=(8,8)
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=14, dpi=dpi, xtick_rotation=xtick_rotation, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number)
time: 170 ms (started: 2023-09-28 17:19:58 -07:00)
feature_column_name = 'income'
feature_column_name_label=feature_column_name.capitalize()
xlabel=feature_column_name_label
ylabel='Percentage'
xtick_rotation=90
bar_orientation = 'vertical'
annotation_type = 'percentage'
frequency_annotation_round_by_number=-2
rectangle_annotation_y_offset = -45
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = ['Less than \$12500', '\$12500 - \$24999', '\$25000 - \$37499', '\$37500 - \$49999', '\$50000 - \$62499', '\$62500 - \$74999', '\$75000 - \$87499', '\$87500 - \$99999', '\$100000 or More',]
xtick_dictionary = None
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
figsize=(8,8)
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=14, dpi=dpi, xtick_rotation=xtick_rotation, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, rectangle_annotation_y_offset=rectangle_annotation_y_offset)
time: 305 ms (started: 2023-09-28 17:19:58 -07:00)
feature_column_name = 'direction_same_or_opposite'
feature_column_name_label=feature_column_name.capitalize()
xlabel=feature_column_name_label
ylabel='Frequency'
xtick_rotation=0
bar_orientation = 'vertical'
annotation_type = 'percentage'
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = [0,1]
xtick_dictionary = {0:'Same', 1:'Opposite'}
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(8,8)
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=14, dpi=dpi, xtick_rotation=xtick_rotation, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit)
time: 150 ms (started: 2023-09-28 17:19:58 -07:00)
feature_column_name = 'expiration'
feature_column_name_label='Coupon '+feature_column_name.capitalize()
xlabel='Coupon '+feature_column_name
ylabel='Frequency'
xtick_rotation=0
bar_orientation = 'vertical'
annotation_type = 'percentage'
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = ['1d', '2h']
xtick_dictionary = {'1d':'1 day', '2h':'2 hours'}
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(8,8)
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=14, dpi=dpi, xtick_rotation=xtick_rotation, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit)
time: 159 ms (started: 2023-09-28 17:19:58 -07:00)
feature_column_name = 'gender'
feature_column_name_label=feature_column_name.capitalize()
xlabel=feature_column_name_label
ylabel='Frequency'
xtick_rotation=0
bar_orientation = 'vertical'
annotation_type = 'percentage'
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = ['Female','Male', ]
xtick_dictionary = None
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=7500
figsize=(8,8)
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=14, dpi=dpi, xtick_rotation=xtick_rotation, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit)
time: 164 ms (started: 2023-09-28 17:19:58 -07:00)
feature_column_name = 'destination'
feature_column_name_label=feature_column_name.capitalize()
xlabel=feature_column_name_label
ylabel='Frequency'
xtick_rotation=0
bar_orientation = 'vertical'
annotation_type = 'percentage'
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = ['No Urgent Place', 'Work', 'Home']
xtick_dictionary = {'No Urgent Place':'No urgent place', 'Work':'Work', 'Home':'Home'}
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(8,8)
rectangle_annotation_y_offset = -100
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=14, dpi=dpi, xtick_rotation=xtick_rotation, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit, rectangle_annotation_y_offset=rectangle_annotation_y_offset)
time: 172 ms (started: 2023-09-28 17:19:59 -07:00)
feature_column_name = 'passenger'
feature_column_name_label=feature_column_name.capitalize()
xlabel=feature_column_name_label
ylabel='Frequency'
xtick_rotation=0
bar_orientation = 'vertical'
annotation_type = 'percentage'
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = ['Alone', 'Friend(s)', 'Partner', 'Kid(s)', ]
xtick_dictionary = None
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(8,8)
rectangle_annotation_y_offset=-140
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=14, dpi=dpi, xtick_rotation=xtick_rotation, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit, rectangle_annotation_y_offset=rectangle_annotation_y_offset)
time: 188 ms (started: 2023-09-28 17:19:59 -07:00)
feature_column_name = 'weather'
feature_column_name_label=feature_column_name.capitalize()
xlabel=feature_column_name_label
ylabel='Frequency'
xtick_rotation=0
bar_orientation = 'vertical'
annotation_type = 'percentage'
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = ['Sunny', 'Snowy', 'Rainy', ]
xtick_dictionary = None
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(8,8)
rectangle_annotation_y_offset=-140
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=14, dpi=dpi, xtick_rotation=xtick_rotation, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit, rectangle_annotation_y_offset=rectangle_annotation_y_offset)
time: 166 ms (started: 2023-09-28 17:19:59 -07:00)
feature_column_name = 'time'
feature_column_name_label=feature_column_name.capitalize()
xlabel=feature_column_name_label
ylabel='Frequency'
xtick_rotation=0
bar_orientation = 'vertical'
annotation_type = 'percentage'
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = ['7AM', '10AM', '2PM', '6PM', '10PM']
xtick_dictionary = None
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(8,8)
rectangle_annotation_y_offset=-80
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=14, dpi=dpi, xtick_rotation=xtick_rotation, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit, rectangle_annotation_y_offset=rectangle_annotation_y_offset)
time: 206 ms (started: 2023-09-28 17:19:59 -07:00)
feature_column_name = 'coupon_venue_type'
feature_column_name_label='Coupon Venue Type'#feature_column_name.capitalize()
xlabel='Coupon venue type'#feature_column_name_label
ylabel='Frequency'
xtick_rotation=90
bar_orientation = 'vertical'
annotation_type = 'percentage'
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = ['Coffee House', 'Restaurant(<20)', 'Carry out & Take away', 'Bar', 'Restaurant(20-50)']
xtick_dictionary = {'Coffee House':'Coffee house', 'Restaurant(<20)':'Low-cost restaurant', 'Carry out & Take away':'Takeout', 'Bar':'Bar', 'Restaurant(20-50)':'Mid-range restaurant'}
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(8,6)
rectangle_annotation_y_offset=-80
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=14, dpi=dpi, xtick_rotation=xtick_rotation, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit, rectangle_annotation_y_offset=rectangle_annotation_y_offset, figsize=figsize)
time: 240 ms (started: 2023-09-28 17:19:59 -07:00)
feature_column_name = 'age'
feature_column_name_label='Age Group'#feature_column_name.capitalize()
xlabel='Age'#feature_column_name_label
ylabel='Frequency'
xtick_rotation=0
bar_orientation = 'vertical'
annotation_type = 'percentage'
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = ['<21', '21-25', '26-30', '31-35', '36-40', '41-45', '46-49', '50+']
xtick_dictionary = None #{}
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(8,6)
rectangle_annotation_y_offset=-40
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=14, dpi=dpi, xtick_rotation=xtick_rotation, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit, rectangle_annotation_y_offset=rectangle_annotation_y_offset, figsize=figsize)
time: 236 ms (started: 2023-09-28 17:20:00 -07:00)
feature_column_name = 'maritalStatus'
feature_column_name_label='Marital Status'#feature_column_name.capitalize()
xlabel='Marital status'#feature_column_name_label
ylabel='Frequency'
xtick_rotation=90
bar_orientation = 'vertical'
annotation_type = 'percentage'
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = ['Married partner', 'Single', 'Unmarried partner', 'Divorced', 'Widowed']
xtick_dictionary = None #{}
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(8,8)
rectangle_annotation_y_offset=-40
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=14, dpi=dpi, xtick_rotation=xtick_rotation, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit, rectangle_annotation_y_offset=rectangle_annotation_y_offset, figsize=figsize)
time: 217 ms (started: 2023-09-28 17:20:00 -07:00)
feature_column_name = 'education'
feature_column_name_label=feature_column_name.capitalize()
xlabel=feature_column_name_label
ylabel='Frequency'
xtick_rotation=90
bar_orientation = 'vertical'
annotation_type = 'percentage'
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = ['Some High School', 'High School Graduate', 'Some college - no degree', 'Associates degree', 'Bachelors degree', 'Graduate degree (Masters or Doctorate)',]
xtick_dictionary = None #{}
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(8,8)
rectangle_annotation_y_offset=-40
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=14, dpi=dpi, xtick_rotation=xtick_rotation, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit, rectangle_annotation_y_offset=rectangle_annotation_y_offset, figsize=figsize)
time: 253 ms (started: 2023-09-28 17:20:00 -07:00)
feature_column_name = 'occupation'
feature_column_name_label=feature_column_name.capitalize()
xlabel=feature_column_name_label
ylabel='Frequency'
xtick_rotation=90
bar_orientation = 'vertical'
annotation_type = 'percentage'
annotation_text_size = 13
frequency_annotation_round_by_number=-1
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = \
['Unemployed', 'Student', 'Computer & Mathematical', 'Sales & Related', 'Education&Training&Library', 'Management',
'Office & Administrative Support', 'Arts Design Entertainment Sports & Media',
'Business & Financial', 'Retired', 'Food Preparation & Serving Related', 'Healthcare Practitioners & Technical',
'Healthcare Support', 'Community & Social Services', 'Legal', 'Transportation & Material Moving',
'Architecture & Engineering', 'Personal Care & Service', 'Protective Service', 'Life Physical Social Science',
'Construction & Extraction', 'Installation Maintenance & Repair', 'Production Occupations', 'Farming Fishing & Forestry',
'Building & Grounds Cleaning & Maintenance']
xtick_dictionary = None #{}
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(19,10)
rectangle_annotation_y_offset=-20
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=annotation_text_size, dpi=dpi, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit, rectangle_annotation_y_offset=rectangle_annotation_y_offset, figsize=figsize, xtick_rotation=xtick_rotation)
time: 787 ms (started: 2023-09-28 17:20:00 -07:00)
icr.reverse_key_value_of_dictionary({'never':1, '<1':2, '1-3':3, '4-8':4, '>8':5, 'no response':0})
{1: 'never', 2: '<1', 3: '1-3', 4: '4-8', 5: '>8', 0: 'no response'}
time: 2.36 ms (started: 2023-09-28 17:20:01 -07:00)
feature_column_name = 'Bar_venue_visit_frequency_yes_response_ordinal_integer_encoding'
feature_column_name_label='Bar Monthly Visit'
xlabel='Bar monthly visits'
ylabel='Frequency'
xtick_rotation=0
bar_orientation = 'vertical'
annotation_type = 'percentage'
annotation_text_size = 13
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = [0, 1, 2, 3, 4, 5]
xtick_dictionary = {1: 'never', 2: '<1', 3: '1-3', 4: '4-8', 5: '>8', 0: 'no response'}
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(8,6)
rectangle_annotation_y_offset=-20
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=annotation_text_size, dpi=dpi, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit, rectangle_annotation_y_offset=rectangle_annotation_y_offset, figsize=figsize, xtick_rotation=xtick_rotation)
time: 202 ms (started: 2023-09-28 17:20:01 -07:00)
feature_column_name = 'CoffeeHouse_venue_visit_frequency_yes_response_ordinal_integer_encoding'
feature_column_name_label='Coffee House Monthly Visit'
xlabel='Coffee house monthly visits'
ylabel='Frequency'
xtick_rotation=0
bar_orientation = 'vertical'
annotation_type = 'percentage'
annotation_text_size = 13
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = [0, 1, 2, 3, 4, 5]
xtick_dictionary = {1: 'never', 2: '<1', 3: '1-3', 4: '4-8', 5: '>8', 0: 'no response'}
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(8,6)
rectangle_annotation_y_offset=-20
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=annotation_text_size, dpi=dpi, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit, rectangle_annotation_y_offset=rectangle_annotation_y_offset, figsize=figsize, xtick_rotation=xtick_rotation)
time: 199 ms (started: 2023-09-28 17:20:01 -07:00)
feature_column_name = 'CarryAway_venue_visit_frequency_yes_response_ordinal_integer_encoding'
feature_column_name_label='Takeout Monthly Visit'
xlabel='Takeout monthly visits'
ylabel='Frequency'
xtick_rotation=0
bar_orientation = 'vertical'
annotation_type = 'percentage'
annotation_text_size = 13
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = [0, 1, 2, 3, 4, 5]
xtick_dictionary = {1: 'never', 2: '<1', 3: '1-3', 4: '4-8', 5: '>8', 0: 'no response'}
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(8,6)
rectangle_annotation_y_offset=-20
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=annotation_text_size, dpi=dpi, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit, rectangle_annotation_y_offset=rectangle_annotation_y_offset, figsize=figsize, xtick_rotation=xtick_rotation)
time: 190 ms (started: 2023-09-28 17:20:01 -07:00)
feature_column_name = 'RestaurantLessThan20_venue_visit_frequency_yes_response_ordinal_integer_encoding'
feature_column_name_label='Low-Cost Restaurant Monthly Visit'
xlabel='Low-cost restaurant monthly visits'
ylabel='Frequency'
xtick_rotation=0
bar_orientation = 'vertical'
annotation_type = 'percentage'
annotation_text_size = 13
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = [0, 1, 2, 3, 4, 5]
xtick_dictionary = {1: 'never', 2: '<1', 3: '1-3', 4: '4-8', 5: '>8', 0: 'no response'}
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(8,6)
rectangle_annotation_y_offset=-20
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=annotation_text_size, dpi=dpi, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit, rectangle_annotation_y_offset=rectangle_annotation_y_offset, figsize=figsize, xtick_rotation=xtick_rotation)
time: 195 ms (started: 2023-09-28 17:20:02 -07:00)
feature_column_name = 'Restaurant20To50_venue_visit_frequency_yes_response_ordinal_integer_encoding'
feature_column_name_label='Mid-Range Restaurant Monthly Visit'
xlabel='Mid-Range restaurant monthly visits'
ylabel='Frequency'
xtick_rotation=0
bar_orientation = 'vertical'
annotation_type = 'percentage'
annotation_text_size = 13
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = [0, 1, 2, 3, 4, 5]
xtick_dictionary = {1: 'never', 2: '<1', 3: '1-3', 4: '4-8', 5: '>8', 0: 'no response'}
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(8,6)
rectangle_annotation_y_offset=-20
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=annotation_text_size, dpi=dpi, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit, rectangle_annotation_y_offset=rectangle_annotation_y_offset, figsize=figsize, xtick_rotation=xtick_rotation)
time: 199 ms (started: 2023-09-28 17:20:02 -07:00)
feature_column_name = 'temperature_ordinal_integer_encoding'
feature_column_name_label='Temperature'
xlabel='Temperature (F)'
ylabel='Frequency'
xtick_rotation=0
bar_orientation = 'vertical'
annotation_type = 'percentage'
annotation_text_size = 13
frequency_annotation_round_by_number=-2
dpi=100
figure_filename = '../reports/figures/figure_'+ str(feature_column_name) +'_frequency_distribution_'+ str(bar_orientation) + '_stacked_bar_coupon_acceptance_coupon_refusal_'+ str(annotation_type) + '_annotation_all_data_dpi_' + str(dpi) + '_v' + filename_version + '.png'
feature_value_order_list = [1, 2, 3,]
xtick_dictionary = {1: 30, 2: 55, 3: 80,}
title= str(feature_column_name_label) + ' Frequency Distribution'
color_index_list=[3,0]
colors = [color_list[color_index_list[0]], color_list[color_index_list[1]]]
y_upper_limit=None
figsize=(8,6)
rectangle_annotation_y_offset=-20
#get column name, coupon acceptance (frequency), coupon refusal (frequency)
df_income_coupon_acceptance_coupon_refusal = icr.get_feature_target_frequency_data_frame(df, feature_column_name=feature_column_name, append_percentage_true_false=True)
#sort by value order list
df_income_coupon_acceptance_coupon_refusal = icr.sort_data_frame(df=df_income_coupon_acceptance_coupon_refusal, feature_column_name=feature_column_name, feature_value_order_list=feature_value_order_list, ascending_true_false=True).drop(columns=[str(feature_column_name)+'_rank'])
#index feature column
df_index_income_coupon_refusal_coupon_acceptance = df_income_coupon_acceptance_coupon_refusal.set_index(feature_column_name)
icr.plot_vertical_stacked_bar_graph(df=df_index_income_coupon_refusal_coupon_acceptance, feature_column_name=feature_column_name, feature_column_name_label=feature_column_name_label, ylabel=ylabel, xlabel=xlabel, figure_filename=figure_filename, annotation_text_size=annotation_text_size, dpi=dpi, xtick_dictionary=xtick_dictionary, colors=colors, annotation_type=annotation_type, frequency_annotation_round_by_number=frequency_annotation_round_by_number, y_upper_limit=y_upper_limit, rectangle_annotation_y_offset=rectangle_annotation_y_offset, figsize=figsize, xtick_rotation=xtick_rotation)
time: 161 ms (started: 2023-09-28 17:20:02 -07:00)
#get column name lists
column_name_list_numeric = list(df.select_dtypes('number').columns)
column_name_list_not_numeric = [column_name for column_name in df.columns if not column_name in column_name_list_numeric]
#get categorical features, numeric features, and target data frame
df_categorical_features = df.loc[:, column_name_list_not_numeric]
df_numeric_features = df.loc[:, column_name_list_numeric].drop(columns='Y')
df_target = df.loc[:, ['Y']]
#get category indicators data frame from df_categorical_features
df_category_indicator_features = pd.get_dummies(df_categorical_features)
del df_categorical_features
#combine category indicator features and numeric features
df_indicator_numeric_features = pd.concat([df_category_indicator_features, df_numeric_features], axis=1)
del df_numeric_features,
p(df_indicator_numeric_features)
(12684, 140)
destination_Home | destination_No Urgent Place | destination_Work | passenger_Alone | passenger_Friend(s) | passenger_Kid(s) | passenger_Partner | weather_Rainy | weather_Snowy | weather_Sunny | time_10AM | time_10PM | time_2PM | time_6PM | time_7AM | coupon_venue_type_Bar | coupon_venue_type_Carry out & Take away | coupon_venue_type_Coffee House | coupon_venue_type_Restaurant(20-50) | coupon_venue_type_Restaurant(<20) | expiration_1d | expiration_2h | gender_Female | gender_Male | age_21-25 | age_26-30 | age_31-35 | age_36-40 | age_41-45 | age_46-49 | age_50+ | age_<21 | maritalStatus_Divorced | maritalStatus_Married partner | maritalStatus_Single | maritalStatus_Unmarried partner | maritalStatus_Widowed | education_Associates degree | education_Bachelors degree | education_Graduate degree (Masters or Doctorate) | education_High School Graduate | education_Some High School | education_Some college - no degree | occupation_Architecture & Engineering | occupation_Arts Design Entertainment Sports & Media | occupation_Building & Grounds Cleaning & Maintenance | occupation_Business & Financial | occupation_Community & Social Services | occupation_Computer & Mathematical | occupation_Construction & Extraction | occupation_Education&Training&Library | occupation_Farming Fishing & Forestry | occupation_Food Preparation & Serving Related | occupation_Healthcare Practitioners & Technical | occupation_Healthcare Support | occupation_Installation Maintenance & Repair | occupation_Legal | occupation_Life Physical Social Science | occupation_Management | occupation_Office & Administrative Support | occupation_Personal Care & Service | occupation_Production Occupations | occupation_Protective Service | occupation_Retired | occupation_Sales & Related | occupation_Student | occupation_Transportation & Material Moving | occupation_Unemployed | income_Less than \$12500 | income_\$100000 or More | income_\$12500 - \$24999 | income_\$25000 - \$37499 | income_\$37500 - \$49999 | income_\$50000 - \$62499 | income_\$62500 - \$74999 | income_\$75000 - \$87499 | income_\$87500 - \$99999 | car_Car that is too old to install Onstar :D | car_Mazda5 | car_Scooter and motorcycle | car_crossover | car_do not drive | car_no response | Bar_1-3 | Bar_4-8 | Bar_<1 | Bar_>8 | Bar_never | Bar_no response | CoffeeHouse_1-3 | CoffeeHouse_4-8 | CoffeeHouse_<1 | CoffeeHouse_>8 | CoffeeHouse_never | CoffeeHouse_no response | CarryAway_1-3 | CarryAway_4-8 | CarryAway_<1 | CarryAway_>8 | CarryAway_never | CarryAway_no response | RestaurantLessThan20_1-3 | RestaurantLessThan20_4-8 | RestaurantLessThan20_<1 | RestaurantLessThan20_>8 | RestaurantLessThan20_never | RestaurantLessThan20_no response | Restaurant20To50_1-3 | Restaurant20To50_4-8 | Restaurant20To50_<1 | Restaurant20To50_>8 | Restaurant20To50_never | Restaurant20To50_no response | temperature | has_children | toCoupon_GEQ15min | toCoupon_GEQ25min | direction_same_or_opposite | expiration_category_representative_numeric_encoding | time_category_representative_numeric_encoding | age_category_representative_numeric_encoding | income_category_representative_numeric_encoding | gender_binary_encoding | expiration_binary_encoding | coupon_venue_type_ordinal_integer_encoding | education_ordinal_integer_encoding | income_ordinal_integer_encoding | age_ordinal_integer_encoding | time_ordinal_integer_encoding | temperature_ordinal_integer_encoding | Bar_venue_visit_frequency_yes_response_ordinal_integer_encoding | CoffeeHouse_venue_visit_frequency_yes_response_ordinal_integer_encoding | CarryAway_venue_visit_frequency_yes_response_ordinal_integer_encoding | RestaurantLessThan20_venue_visit_frequency_yes_response_ordinal_integer_encoding | Restaurant20To50_venue_visit_frequency_yes_response_ordinal_integer_encoding | Bar_venue_visit_frequency_no_response_indicator | CoffeeHouse_venue_visit_frequency_no_response_indicator | CarryAway_venue_visit_frequency_no_response_indicator | RestaurantLessThan20_venue_visit_frequency_no_response_indicator | Restaurant20To50_venue_visit_frequency_no_response_indicator | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
11199 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 80 | 0 | 1 | 0 | 0 | 2 | 18 | 23 | 18749.5 | 1 | 0 | 3 | 5 | 2 | 2 | 4 | 3 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 0 |
1474 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 55 | 1 | 1 | 1 | 1 | 24 | 7 | 48 | 93749.5 | 1 | 1 | 2 | 6 | 8 | 7 | 1 | 2 | 1 | 2 | 2 | 1 | 2 | 0 | 0 | 0 | 0 | 0 |
10836 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 30 | 0 | 0 | 0 | 1 | 24 | 18 | 28 | 43749.5 | 1 | 1 | 3 | 3 | 4 | 3 | 4 | 1 | 2 | 0 | 5 | 2 | 1 | 0 | 1 | 0 | 0 | 0 |
4567 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 80 | 0 | 0 | 0 | 1 | 24 | 10 | 23 | 43749.5 | 0 | 1 | 2 | 6 | 4 | 2 | 2 | 3 | 3 | 2 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 |
5658 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 80 | 1 | 1 | 0 | 1 | 2 | 14 | 33 | 43749.5 | 0 | 0 | 4 | 5 | 4 | 4 | 3 | 3 | 1 | 1 | 5 | 4 | 1 | 0 | 0 | 0 | 0 | 0 |
11343 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 80 | 1 | 0 | 0 | 0 | 2 | 7 | 38 | 18749.5 | 0 | 0 | 5 | 5 | 2 | 5 | 1 | 3 | 3 | 2 | 5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
9036 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 30 | 0 | 1 | 1 | 1 | 2 | 22 | 28 | 18749.5 | 1 | 0 | 4 | 3 | 2 | 3 | 5 | 1 | 2 | 1 | 5 | 1 | 3 | 0 | 0 | 0 | 0 | 0 |
11050 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 80 | 0 | 0 | 0 | 0 | 24 | 18 | 48 | 6250.0 | 1 | 1 | 5 | 3 | 1 | 7 | 4 | 3 | 2 | 2 | 5 | 5 | 5 | 0 | 0 | 0 | 0 | 0 |
784 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 80 | 0 | 0 | 0 | 0 | 2 | 7 | 23 | 31249.5 | 0 | 0 | 3 | 6 | 3 | 2 | 1 | 3 | 2 | 2 | 2 | 0 | 2 | 0 | 0 | 0 | 1 | 0 |
1818 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 80 | 1 | 1 | 0 | 1 | 24 | 10 | 38 | 56249.5 | 0 | 1 | 2 | 5 | 5 | 5 | 2 | 3 | 3 | 1 | 5 | 2 | 1 | 0 | 0 | 0 | 0 | 0 |
time: 61 ms (started: 2023-09-28 17:20:02 -07:00)
data_frame_collection_filename='data_frame_collection_train_test_v' + filename_version + '.pkl'
df_readback = icr.return_processed_collection_if_it_exists(filename=data_frame_collection_filename, parse_dates=False)
if df_readback != None:
data_frame_collection = df_readback
else:
data_frame_collection = {}
#split the data into train and test
data_frame_collection['X_train'], data_frame_collection['X_test'], data_frame_collection['Y_train'], data_frame_collection['Y_test'] = \
train_test_split(df_indicator_numeric_features, df_target, test_size=.2, random_state=200)
del df_indicator_numeric_features
#save preprocessed data frame collection
data_frame_collection = icr.save_and_return_collection(data_frame_collection=data_frame_collection, filename=data_frame_collection_filename)
p(data_frame_collection['X_train'])
This file already exists (10147, 140)
destination_Home | destination_No Urgent Place | destination_Work | passenger_Alone | passenger_Friend(s) | passenger_Kid(s) | passenger_Partner | weather_Rainy | weather_Snowy | weather_Sunny | time_10AM | time_10PM | time_2PM | time_6PM | time_7AM | coupon_venue_type_Bar | coupon_venue_type_Carry out & Take away | coupon_venue_type_Coffee House | coupon_venue_type_Restaurant(20-50) | coupon_venue_type_Restaurant(<20) | expiration_1d | expiration_2h | gender_Female | gender_Male | age_21-25 | age_26-30 | age_31-35 | age_36-40 | age_41-45 | age_46-49 | age_50+ | age_<21 | maritalStatus_Divorced | maritalStatus_Married partner | maritalStatus_Single | maritalStatus_Unmarried partner | maritalStatus_Widowed | education_Associates degree | education_Bachelors degree | education_Graduate degree (Masters or Doctorate) | education_High School Graduate | education_Some High School | education_Some college - no degree | occupation_Architecture & Engineering | occupation_Arts Design Entertainment Sports & Media | occupation_Building & Grounds Cleaning & Maintenance | occupation_Business & Financial | occupation_Community & Social Services | occupation_Computer & Mathematical | occupation_Construction & Extraction | occupation_Education&Training&Library | occupation_Farming Fishing & Forestry | occupation_Food Preparation & Serving Related | occupation_Healthcare Practitioners & Technical | occupation_Healthcare Support | occupation_Installation Maintenance & Repair | occupation_Legal | occupation_Life Physical Social Science | occupation_Management | occupation_Office & Administrative Support | occupation_Personal Care & Service | occupation_Production Occupations | occupation_Protective Service | occupation_Retired | occupation_Sales & Related | occupation_Student | occupation_Transportation & Material Moving | occupation_Unemployed | income_Less than \$12500 | income_\$100000 or More | income_\$12500 - \$24999 | income_\$25000 - \$37499 | income_\$37500 - \$49999 | income_\$50000 - \$62499 | income_\$62500 - \$74999 | income_\$75000 - \$87499 | income_\$87500 - \$99999 | car_Car that is too old to install Onstar :D | car_Mazda5 | car_Scooter and motorcycle | car_crossover | car_do not drive | car_no response | Bar_1-3 | Bar_4-8 | Bar_<1 | Bar_>8 | Bar_never | Bar_no response | CoffeeHouse_1-3 | CoffeeHouse_4-8 | CoffeeHouse_<1 | CoffeeHouse_>8 | CoffeeHouse_never | CoffeeHouse_no response | CarryAway_1-3 | CarryAway_4-8 | CarryAway_<1 | CarryAway_>8 | CarryAway_never | CarryAway_no response | RestaurantLessThan20_1-3 | RestaurantLessThan20_4-8 | RestaurantLessThan20_<1 | RestaurantLessThan20_>8 | RestaurantLessThan20_never | RestaurantLessThan20_no response | Restaurant20To50_1-3 | Restaurant20To50_4-8 | Restaurant20To50_<1 | Restaurant20To50_>8 | Restaurant20To50_never | Restaurant20To50_no response | temperature | has_children | toCoupon_GEQ15min | toCoupon_GEQ25min | direction_same_or_opposite | expiration_category_representative_numeric_encoding | time_category_representative_numeric_encoding | age_category_representative_numeric_encoding | income_category_representative_numeric_encoding | gender_binary_encoding | expiration_binary_encoding | coupon_venue_type_ordinal_integer_encoding | education_ordinal_integer_encoding | income_ordinal_integer_encoding | age_ordinal_integer_encoding | time_ordinal_integer_encoding | temperature_ordinal_integer_encoding | Bar_venue_visit_frequency_yes_response_ordinal_integer_encoding | CoffeeHouse_venue_visit_frequency_yes_response_ordinal_integer_encoding | CarryAway_venue_visit_frequency_yes_response_ordinal_integer_encoding | RestaurantLessThan20_venue_visit_frequency_yes_response_ordinal_integer_encoding | Restaurant20To50_venue_visit_frequency_yes_response_ordinal_integer_encoding | Bar_venue_visit_frequency_no_response_indicator | CoffeeHouse_venue_visit_frequency_no_response_indicator | CarryAway_venue_visit_frequency_no_response_indicator | RestaurantLessThan20_venue_visit_frequency_no_response_indicator | Restaurant20To50_venue_visit_frequency_no_response_indicator | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
8630 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 80 | 0 | 0 | 0 | 1 | 24 | 18 | 23 | 93749.5 | 0 | 1 | 1 | 5 | 8 | 2 | 4 | 3 | 2 | 1 | 4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
2418 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 55 | 0 | 0 | 0 | 1 | 2 | 14 | 28 | 18749.5 | 0 | 0 | 1 | 5 | 2 | 3 | 3 | 2 | 5 | 5 | 5 | 5 | 1 | 0 | 0 | 0 | 0 | 0 |
10804 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 55 | 0 | 0 | 0 | 1 | 2 | 14 | 33 | 81249.5 | 0 | 0 | 3 | 6 | 7 | 4 | 3 | 2 | 0 | 5 | 5 | 5 | 5 | 1 | 0 | 0 | 0 | 0 |
747 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 80 | 0 | 1 | 0 | 1 | 24 | 14 | 38 | 31249.5 | 0 | 1 | 1 | 3 | 3 | 5 | 3 | 3 | 2 | 2 | 5 | 5 | 2 | 0 | 0 | 0 | 0 | 0 |
7333 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 80 | 1 | 0 | 0 | 1 | 24 | 18 | 18 | 31249.5 | 1 | 1 | 1 | 3 | 3 | 1 | 4 | 3 | 2 | 1 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 |
10949 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 30 | 1 | 0 | 0 | 1 | 24 | 18 | 33 | 43749.5 | 0 | 1 | 2 | 5 | 4 | 4 | 4 | 1 | 3 | 1 | 0 | 5 | 1 | 0 | 0 | 1 | 0 | 0 |
11937 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 55 | 0 | 1 | 0 | 1 | 24 | 10 | 33 | 150000.0 | 0 | 1 | 2 | 3 | 9 | 4 | 2 | 2 | 1 | 1 | 0 | 5 | 1 | 0 | 0 | 1 | 0 | 0 |
735 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 55 | 0 | 1 | 0 | 1 | 24 | 18 | 43 | 43749.5 | 1 | 1 | 5 | 3 | 4 | 6 | 4 | 2 | 3 | 3 | 5 | 2 | 1 | 0 | 0 | 0 | 0 | 0 |
67 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 80 | 1 | 0 | 0 | 1 | 2 | 10 | 48 | 81249.5 | 1 | 0 | 1 | 5 | 7 | 7 | 2 | 3 | 1 | 2 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 |
3251 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 80 | 1 | 0 | 0 | 1 | 2 | 10 | 28 | 31249.5 | 0 | 0 | 1 | 3 | 3 | 3 | 2 | 3 | 3 | 1 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
time: 36.8 ms (started: 2023-09-28 17:20:02 -07:00)
#stratified 5-fold cross validation train-test data row selection
#Stratified 5-Fold object
StratifiedKFold_5_splits = StratifiedKFold(n_splits=5, random_state=None, shuffle=False)
#get stratified 5-fold data frame splits
stratified_fold_number_X_train_X_test_Y_train_Y_test_collection = {}
for index, (train_index, test_index) in enumerate(StratifiedKFold_5_splits.split(data_frame_collection['X_train'], data_frame_collection['Y_train'].loc[:, 'Y'])):
print('fold ' + str(index) + " TRAIN:", train_index, "TEST:", test_index)
stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold ' + str(index)] = \
{'X_train' : data_frame_collection['X_train'].iloc[train_index, :], \
'X_test' : data_frame_collection['X_train'].iloc[test_index, :], \
'Y_train' : data_frame_collection['Y_train'].loc[:, 'Y'].iloc[train_index], \
'Y_test' : data_frame_collection['Y_train'].loc[:, 'Y'].iloc[test_index]}
#use .loc for selection by index label, .iloc for selection by index location
print()
p(stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold 0']['X_train'])
fold 0 TRAIN: [ 2028 2030 2031 ... 10144 10145 10146] TEST: [ 0 1 2 ... 2027 2029 2032] fold 1 TRAIN: [ 0 1 2 ... 10144 10145 10146] TEST: [2028 2030 2031 ... 4095 4101 4102] fold 2 TRAIN: [ 0 1 2 ... 10144 10145 10146] TEST: [4009 4010 4012 ... 6131 6133 6135] fold 3 TRAIN: [ 0 1 2 ... 10144 10145 10146] TEST: [6036 6037 6043 ... 8126 8127 8128] fold 4 TRAIN: [ 0 1 2 ... 8126 8127 8128] TEST: [ 8099 8105 8109 ... 10144 10145 10146] (8117, 140)
destination_Home | destination_No Urgent Place | destination_Work | passenger_Alone | passenger_Friend(s) | passenger_Kid(s) | passenger_Partner | weather_Rainy | weather_Snowy | weather_Sunny | time_10AM | time_10PM | time_2PM | time_6PM | time_7AM | coupon_venue_type_Bar | coupon_venue_type_Carry out & Take away | coupon_venue_type_Coffee House | coupon_venue_type_Restaurant(20-50) | coupon_venue_type_Restaurant(<20) | expiration_1d | expiration_2h | gender_Female | gender_Male | age_21-25 | age_26-30 | age_31-35 | age_36-40 | age_41-45 | age_46-49 | age_50+ | age_<21 | maritalStatus_Divorced | maritalStatus_Married partner | maritalStatus_Single | maritalStatus_Unmarried partner | maritalStatus_Widowed | education_Associates degree | education_Bachelors degree | education_Graduate degree (Masters or Doctorate) | education_High School Graduate | education_Some High School | education_Some college - no degree | occupation_Architecture & Engineering | occupation_Arts Design Entertainment Sports & Media | occupation_Building & Grounds Cleaning & Maintenance | occupation_Business & Financial | occupation_Community & Social Services | occupation_Computer & Mathematical | occupation_Construction & Extraction | occupation_Education&Training&Library | occupation_Farming Fishing & Forestry | occupation_Food Preparation & Serving Related | occupation_Healthcare Practitioners & Technical | occupation_Healthcare Support | occupation_Installation Maintenance & Repair | occupation_Legal | occupation_Life Physical Social Science | occupation_Management | occupation_Office & Administrative Support | occupation_Personal Care & Service | occupation_Production Occupations | occupation_Protective Service | occupation_Retired | occupation_Sales & Related | occupation_Student | occupation_Transportation & Material Moving | occupation_Unemployed | income_Less than \$12500 | income_\$100000 or More | income_\$12500 - \$24999 | income_\$25000 - \$37499 | income_\$37500 - \$49999 | income_\$50000 - \$62499 | income_\$62500 - \$74999 | income_\$75000 - \$87499 | income_\$87500 - \$99999 | car_Car that is too old to install Onstar :D | car_Mazda5 | car_Scooter and motorcycle | car_crossover | car_do not drive | car_no response | Bar_1-3 | Bar_4-8 | Bar_<1 | Bar_>8 | Bar_never | Bar_no response | CoffeeHouse_1-3 | CoffeeHouse_4-8 | CoffeeHouse_<1 | CoffeeHouse_>8 | CoffeeHouse_never | CoffeeHouse_no response | CarryAway_1-3 | CarryAway_4-8 | CarryAway_<1 | CarryAway_>8 | CarryAway_never | CarryAway_no response | RestaurantLessThan20_1-3 | RestaurantLessThan20_4-8 | RestaurantLessThan20_<1 | RestaurantLessThan20_>8 | RestaurantLessThan20_never | RestaurantLessThan20_no response | Restaurant20To50_1-3 | Restaurant20To50_4-8 | Restaurant20To50_<1 | Restaurant20To50_>8 | Restaurant20To50_never | Restaurant20To50_no response | temperature | has_children | toCoupon_GEQ15min | toCoupon_GEQ25min | direction_same_or_opposite | expiration_category_representative_numeric_encoding | time_category_representative_numeric_encoding | age_category_representative_numeric_encoding | income_category_representative_numeric_encoding | gender_binary_encoding | expiration_binary_encoding | coupon_venue_type_ordinal_integer_encoding | education_ordinal_integer_encoding | income_ordinal_integer_encoding | age_ordinal_integer_encoding | time_ordinal_integer_encoding | temperature_ordinal_integer_encoding | Bar_venue_visit_frequency_yes_response_ordinal_integer_encoding | CoffeeHouse_venue_visit_frequency_yes_response_ordinal_integer_encoding | CarryAway_venue_visit_frequency_yes_response_ordinal_integer_encoding | RestaurantLessThan20_venue_visit_frequency_yes_response_ordinal_integer_encoding | Restaurant20To50_venue_visit_frequency_yes_response_ordinal_integer_encoding | Bar_venue_visit_frequency_no_response_indicator | CoffeeHouse_venue_visit_frequency_no_response_indicator | CarryAway_venue_visit_frequency_no_response_indicator | RestaurantLessThan20_venue_visit_frequency_no_response_indicator | Restaurant20To50_venue_visit_frequency_no_response_indicator | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
11981 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 30 | 1 | 0 | 0 | 1 | 2 | 10 | 28 | 43749.5 | 0 | 0 | 3 | 5 | 4 | 3 | 2 | 1 | 3 | 1 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 |
3996 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 55 | 0 | 0 | 0 | 1 | 24 | 7 | 18 | 150000.0 | 1 | 1 | 4 | 5 | 9 | 1 | 1 | 2 | 1 | 2 | 0 | 0 | 2 | 0 | 0 | 1 | 1 | 0 |
1301 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 55 | 1 | 0 | 0 | 1 | 24 | 7 | 38 | 56249.5 | 1 | 1 | 4 | 4 | 5 | 5 | 1 | 2 | 3 | 5 | 5 | 2 | 1 | 0 | 0 | 0 | 0 | 0 |
9720 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 55 | 0 | 1 | 1 | 1 | 24 | 18 | 48 | 43749.5 | 0 | 1 | 2 | 3 | 4 | 7 | 4 | 2 | 3 | 2 | 0 | 5 | 2 | 0 | 0 | 1 | 0 | 0 |
752 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 80 | 0 | 0 | 0 | 1 | 2 | 10 | 38 | 31249.5 | 0 | 0 | 2 | 3 | 3 | 5 | 2 | 3 | 2 | 2 | 5 | 5 | 2 | 0 | 0 | 0 | 0 | 0 |
10949 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 30 | 1 | 0 | 0 | 1 | 24 | 18 | 33 | 43749.5 | 0 | 1 | 2 | 5 | 4 | 4 | 4 | 1 | 3 | 1 | 0 | 5 | 1 | 0 | 0 | 1 | 0 | 0 |
11937 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 55 | 0 | 1 | 0 | 1 | 24 | 10 | 33 | 150000.0 | 0 | 1 | 2 | 3 | 9 | 4 | 2 | 2 | 1 | 1 | 0 | 5 | 1 | 0 | 0 | 1 | 0 | 0 |
735 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 55 | 0 | 1 | 0 | 1 | 24 | 18 | 43 | 43749.5 | 1 | 1 | 5 | 3 | 4 | 6 | 4 | 2 | 3 | 3 | 5 | 2 | 1 | 0 | 0 | 0 | 0 | 0 |
67 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 80 | 1 | 0 | 0 | 1 | 2 | 10 | 48 | 81249.5 | 1 | 0 | 1 | 5 | 7 | 7 | 2 | 3 | 1 | 2 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 |
3251 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 80 | 1 | 0 | 0 | 1 | 2 | 10 | 28 | 31249.5 | 0 | 0 | 1 | 3 | 3 | 3 | 2 | 3 | 3 | 1 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
time: 68.8 ms (started: 2023-09-28 17:20:02 -07:00)
#standardize and scale fold 0 thru 4 train and test sets
number_of_folds=5
for fold_number in range(number_of_folds):
standard_scaler = StandardScaler()
standardized_scaled_stratified_fold_number_X_train_ndarray=standard_scaler.fit_transform(stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold ' + str(fold_number)]['X_train'])
stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold ' + str(fold_number)]['X_train'] = \
pd.DataFrame(standardized_scaled_stratified_fold_number_X_train_ndarray,
columns=stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold ' + str(fold_number)]['X_train'].columns,
index=stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold ' + str(fold_number)]['X_train'].index)
standardized_scaled_stratified_fold_number_X_test_ndarray=standard_scaler.transform(stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold ' + str(fold_number)]['X_test'])
stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold ' + str(fold_number)]['X_test'] = \
pd.DataFrame(standardized_scaled_stratified_fold_number_X_test_ndarray,
columns=stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold ' + str(fold_number)]['X_test'].columns,
index=stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold ' + str(fold_number)]['X_test'].index)
print()
p(stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold 0']['X_test'])
(2030, 140)
destination_Home | destination_No Urgent Place | destination_Work | passenger_Alone | passenger_Friend(s) | passenger_Kid(s) | passenger_Partner | weather_Rainy | weather_Snowy | weather_Sunny | time_10AM | time_10PM | time_2PM | time_6PM | time_7AM | coupon_venue_type_Bar | coupon_venue_type_Carry out & Take away | coupon_venue_type_Coffee House | coupon_venue_type_Restaurant(20-50) | coupon_venue_type_Restaurant(<20) | expiration_1d | expiration_2h | gender_Female | gender_Male | age_21-25 | age_26-30 | age_31-35 | age_36-40 | age_41-45 | age_46-49 | age_50+ | age_<21 | maritalStatus_Divorced | maritalStatus_Married partner | maritalStatus_Single | maritalStatus_Unmarried partner | maritalStatus_Widowed | education_Associates degree | education_Bachelors degree | education_Graduate degree (Masters or Doctorate) | education_High School Graduate | education_Some High School | education_Some college - no degree | occupation_Architecture & Engineering | occupation_Arts Design Entertainment Sports & Media | occupation_Building & Grounds Cleaning & Maintenance | occupation_Business & Financial | occupation_Community & Social Services | occupation_Computer & Mathematical | occupation_Construction & Extraction | occupation_Education&Training&Library | occupation_Farming Fishing & Forestry | occupation_Food Preparation & Serving Related | occupation_Healthcare Practitioners & Technical | occupation_Healthcare Support | occupation_Installation Maintenance & Repair | occupation_Legal | occupation_Life Physical Social Science | occupation_Management | occupation_Office & Administrative Support | occupation_Personal Care & Service | occupation_Production Occupations | occupation_Protective Service | occupation_Retired | occupation_Sales & Related | occupation_Student | occupation_Transportation & Material Moving | occupation_Unemployed | income_Less than \$12500 | income_\$100000 or More | income_\$12500 - \$24999 | income_\$25000 - \$37499 | income_\$37500 - \$49999 | income_\$50000 - \$62499 | income_\$62500 - \$74999 | income_\$75000 - \$87499 | income_\$87500 - \$99999 | car_Car that is too old to install Onstar :D | car_Mazda5 | car_Scooter and motorcycle | car_crossover | car_do not drive | car_no response | Bar_1-3 | Bar_4-8 | Bar_<1 | Bar_>8 | Bar_never | Bar_no response | CoffeeHouse_1-3 | CoffeeHouse_4-8 | CoffeeHouse_<1 | CoffeeHouse_>8 | CoffeeHouse_never | CoffeeHouse_no response | CarryAway_1-3 | CarryAway_4-8 | CarryAway_<1 | CarryAway_>8 | CarryAway_never | CarryAway_no response | RestaurantLessThan20_1-3 | RestaurantLessThan20_4-8 | RestaurantLessThan20_<1 | RestaurantLessThan20_>8 | RestaurantLessThan20_never | RestaurantLessThan20_no response | Restaurant20To50_1-3 | Restaurant20To50_4-8 | Restaurant20To50_<1 | Restaurant20To50_>8 | Restaurant20To50_never | Restaurant20To50_no response | temperature | has_children | toCoupon_GEQ15min | toCoupon_GEQ25min | direction_same_or_opposite | expiration_category_representative_numeric_encoding | time_category_representative_numeric_encoding | age_category_representative_numeric_encoding | income_category_representative_numeric_encoding | gender_binary_encoding | expiration_binary_encoding | coupon_venue_type_ordinal_integer_encoding | education_ordinal_integer_encoding | income_ordinal_integer_encoding | age_ordinal_integer_encoding | time_ordinal_integer_encoding | temperature_ordinal_integer_encoding | Bar_venue_visit_frequency_yes_response_ordinal_integer_encoding | CoffeeHouse_venue_visit_frequency_yes_response_ordinal_integer_encoding | CarryAway_venue_visit_frequency_yes_response_ordinal_integer_encoding | RestaurantLessThan20_venue_visit_frequency_yes_response_ordinal_integer_encoding | Restaurant20To50_venue_visit_frequency_yes_response_ordinal_integer_encoding | Bar_venue_visit_frequency_no_response_indicator | CoffeeHouse_venue_visit_frequency_no_response_indicator | CarryAway_venue_visit_frequency_no_response_indicator | RestaurantLessThan20_venue_visit_frequency_no_response_indicator | Restaurant20To50_venue_visit_frequency_no_response_indicator | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
8630 | -0.587167 | 1.005312 | -0.571612 | -1.159860 | 1.683508 | -0.296765 | -0.304077 | -0.325822 | -0.350262 | 0.508187 | -0.469292 | -0.434891 | -0.434490 | 1.707507 | -0.571612 | -0.438291 | -0.481562 | 1.466632 | -0.363860 | -0.525411 | 0.884749 | -0.884749 | 0.977221 | -0.977221 | 1.991112 | -0.511448 | -0.437092 | -0.412026 | -0.304561 | -0.242267 | -0.209411 | -0.339527 | -0.203573 | 1.212198 | -0.766297 | -0.459687 | -0.104691 | -0.314360 | 1.390123 | -0.415089 | -0.280739 | -0.085568 | -0.720544 | -0.111685 | -0.226771 | -0.062912 | -0.211327 | -0.143153 | -0.3549 | -0.115578 | -0.281759 | -0.055583 | -0.154401 | -0.141803 | -0.138143 | -0.102259 | -0.128573 | -0.122497 | -0.263770 | -0.226169 | -0.114479 | -0.096571 | -0.116668 | -0.203573 | -0.310798 | -0.375272 | -0.132959 | 2.388038 | -0.301167 | -0.39489 | -0.405457 | -0.430677 | -0.413048 | -0.389454 | -0.269074 | -0.268547 | 3.588205 | -0.045812 | -0.036838 | -0.041566 | -0.041566 | -0.040052 | 0.092594 | -0.837023 | -0.101022 | 2.022924 | -0.297502 | -0.612995 | -0.166909 | -0.551684 | -0.131026 | -0.587546 | -0.404426 | 1.669106 | -0.311513 | -0.109976 | 8.864448 | -0.765487 | -0.706780 | -0.417941 | -0.376553 | -0.134865 | -0.105291 | -0.860871 | -0.620607 | 2.251900 | -0.338398 | -0.445063 | -0.128077 | -0.597604 | -0.243404 | 1.042931 | -0.144492 | 0.875073 | -0.842996 | -1.149405 | -0.367756 | 0.520827 | 0.884749 | 0.768378 | -0.905191 | 0.807280 | -0.977221 | 0.884749 | -1.171215 | 0.688807 | 1.282399 | -0.920802 | 0.768470 | 0.875073 | -0.290234 | -0.856444 | 0.749330 | -0.885148 | -0.750207 | -0.166909 | -0.311513 | -0.376553 | -0.338398 | -0.144492 |
2418 | -0.587167 | 1.005312 | -0.571612 | -1.159860 | 1.683508 | -0.296765 | -0.304077 | -0.325822 | -0.350262 | 0.508187 | -0.469292 | -0.434891 | 2.301549 | -0.585649 | -0.571612 | -0.438291 | -0.481562 | 1.466632 | -0.363860 | -0.525411 | -1.130264 | 1.130264 | 0.977221 | -0.977221 | -0.502232 | 1.955233 | -0.437092 | -0.412026 | -0.304561 | -0.242267 | -0.209411 | -0.339527 | -0.203573 | -0.824947 | 1.304977 | -0.459687 | -0.104691 | -0.314360 | 1.390123 | -0.415089 | -0.280739 | -0.085568 | -0.720544 | -0.111685 | 4.409744 | -0.062912 | -0.211327 | -0.143153 | -0.3549 | -0.115578 | -0.281759 | -0.055583 | -0.154401 | -0.141803 | -0.138143 | -0.102259 | -0.128573 | -0.122497 | -0.263770 | -0.226169 | -0.114479 | -0.096571 | -0.116668 | -0.203573 | -0.310798 | -0.375272 | -0.132959 | -0.418754 | -0.301167 | -0.39489 | 2.466353 | -0.430677 | -0.413048 | -0.389454 | -0.269074 | -0.268547 | -0.278691 | -0.045812 | -0.036838 | -0.041566 | -0.041566 | -0.040052 | 0.092594 | -0.837023 | -0.101022 | -0.494334 | 3.361322 | -0.612995 | -0.166909 | -0.551684 | -0.131026 | -0.587546 | 2.472637 | -0.599123 | -0.311513 | -0.109976 | -0.112810 | -0.765487 | 1.414867 | -0.417941 | -0.376553 | -0.134865 | -0.105291 | -0.860871 | 1.611325 | -0.444069 | -0.338398 | -0.445063 | -0.128077 | -0.597604 | -0.243404 | 1.042931 | -0.144492 | -0.433506 | -0.842996 | -1.149405 | -0.367756 | 0.520827 | -1.130264 | 0.028112 | -0.389169 | -0.932032 | -0.977221 | -1.130264 | -1.171215 | 0.688807 | -1.080025 | -0.388227 | 0.069571 | -0.433506 | 2.206229 | 1.889114 | 1.300065 | 1.469349 | -0.750207 | -0.166909 | -0.311513 | -0.376553 | -0.338398 | -0.144492 |
10804 | -0.587167 | 1.005312 | -0.571612 | 0.862173 | -0.593998 | -0.296765 | -0.304077 | 3.069164 | -0.350262 | -1.967778 | -0.469292 | -0.434891 | 2.301549 | -0.585649 | -0.571612 | -0.438291 | 2.076577 | -0.681834 | -0.363860 | -0.525411 | -1.130264 | 1.130264 | 0.977221 | -0.977221 | -0.502232 | -0.511448 | 2.287849 | -0.412026 | -0.304561 | -0.242267 | -0.209411 | -0.339527 | -0.203573 | -0.824947 | 1.304977 | -0.459687 | -0.104691 | -0.314360 | -0.719361 | 2.409119 | -0.280739 | -0.085568 | -0.720544 | -0.111685 | -0.226771 | -0.062912 | -0.211327 | -0.143153 | -0.3549 | -0.115578 | -0.281759 | -0.055583 | -0.154401 | -0.141803 | -0.138143 | -0.102259 | 7.777688 | -0.122497 | -0.263770 | -0.226169 | -0.114479 | -0.096571 | -0.116668 | -0.203573 | -0.310798 | -0.375272 | -0.132959 | -0.418754 | -0.301167 | -0.39489 | -0.405457 | -0.430677 | -0.413048 | -0.389454 | -0.269074 | 3.723748 | -0.278691 | -0.045812 | -0.036838 | -0.041566 | -0.041566 | -0.040052 | 0.092594 | -0.837023 | -0.101022 | -0.494334 | -0.297502 | -0.612995 | 5.991282 | -0.551684 | -0.131026 | -0.587546 | 2.472637 | -0.599123 | -0.311513 | -0.109976 | -0.112810 | -0.765487 | 1.414867 | -0.417941 | -0.376553 | -0.134865 | -0.105291 | -0.860871 | 1.611325 | -0.444069 | -0.338398 | -0.445063 | -0.128077 | -0.597604 | 4.108388 | -0.958836 | -0.144492 | -0.433506 | -0.842996 | -1.149405 | -0.367756 | 0.520827 | -1.130264 | 0.028112 | 0.126854 | 0.517395 | -0.977221 | -1.130264 | 0.244745 | 1.476279 | 0.888662 | 0.144347 | 0.069571 | -0.433506 | -1.954543 | 1.889114 | 1.300065 | 1.469349 | 2.798930 | 5.991282 | -0.311513 | -0.376553 | -0.338398 | -0.144492 |
747 | -0.587167 | 1.005312 | -0.571612 | -1.159860 | 1.683508 | -0.296765 | -0.304077 | -0.325822 | -0.350262 | 0.508187 | -0.469292 | -0.434891 | 2.301549 | -0.585649 | -0.571612 | -0.438291 | -0.481562 | 1.466632 | -0.363860 | -0.525411 | 0.884749 | -0.884749 | 0.977221 | -0.977221 | -0.502232 | -0.511448 | -0.437092 | 2.427034 | -0.304561 | -0.242267 | -0.209411 | -0.339527 | -0.203573 | 1.212198 | -0.766297 | -0.459687 | -0.104691 | -0.314360 | -0.719361 | -0.415089 | -0.280739 | -0.085568 | 1.387840 | -0.111685 | -0.226771 | -0.062912 | -0.211327 | -0.143153 | -0.3549 | -0.115578 | -0.281759 | -0.055583 | -0.154401 | -0.141803 | 7.238875 | -0.102259 | -0.128573 | -0.122497 | -0.263770 | -0.226169 | -0.114479 | -0.096571 | -0.116668 | -0.203573 | -0.310798 | -0.375272 | -0.132959 | -0.418754 | -0.301167 | -0.39489 | -0.405457 | 2.321926 | -0.413048 | -0.389454 | -0.269074 | -0.268547 | -0.278691 | -0.045812 | -0.036838 | -0.041566 | -0.041566 | -0.040052 | 0.092594 | -0.837023 | -0.101022 | 2.022924 | -0.297502 | -0.612995 | -0.166909 | -0.551684 | -0.131026 | 1.701994 | -0.404426 | -0.599123 | -0.311513 | -0.109976 | -0.112810 | -0.765487 | 1.414867 | -0.417941 | -0.376553 | -0.134865 | -0.105291 | -0.860871 | 1.611325 | -0.444069 | -0.338398 | -0.445063 | -0.128077 | 1.673348 | -0.243404 | -0.958836 | -0.144492 | 0.875073 | -0.842996 | 0.870015 | -0.367756 | 0.520827 | 0.884749 | 0.028112 | 0.642876 | -0.642147 | -0.977221 | 0.884749 | -1.171215 | -0.886136 | -0.686288 | 0.676921 | 0.069571 | 0.875073 | -0.290234 | -0.170054 | 1.300065 | 1.469349 | 0.137077 | -0.166909 | -0.311513 | -0.376553 | -0.338398 | -0.144492 |
7333 | -0.587167 | 1.005312 | -0.571612 | -1.159860 | 1.683508 | -0.296765 | -0.304077 | -0.325822 | -0.350262 | 0.508187 | -0.469292 | -0.434891 | -0.434490 | 1.707507 | -0.571612 | -0.438291 | -0.481562 | 1.466632 | -0.363860 | -0.525411 | 0.884749 | -0.884749 | -1.023310 | 1.023310 | -0.502232 | -0.511448 | -0.437092 | -0.412026 | -0.304561 | -0.242267 | -0.209411 | 2.945270 | -0.203573 | -0.824947 | -0.766297 | 2.175395 | -0.104691 | -0.314360 | -0.719361 | -0.415089 | -0.280739 | -0.085568 | 1.387840 | -0.111685 | -0.226771 | 15.895164 | -0.211327 | -0.143153 | -0.3549 | -0.115578 | -0.281759 | -0.055583 | -0.154401 | -0.141803 | -0.138143 | -0.102259 | -0.128573 | -0.122497 | -0.263770 | -0.226169 | -0.114479 | -0.096571 | -0.116668 | -0.203573 | -0.310798 | -0.375272 | -0.132959 | -0.418754 | -0.301167 | -0.39489 | -0.405457 | 2.321926 | -0.413048 | -0.389454 | -0.269074 | -0.268547 | -0.278691 | -0.045812 | -0.036838 | -0.041566 | -0.041566 | -0.040052 | 0.092594 | -0.837023 | -0.101022 | 2.022924 | -0.297502 | -0.612995 | -0.166909 | -0.551684 | -0.131026 | -0.587546 | -0.404426 | 1.669106 | -0.311513 | -0.109976 | -0.112810 | 1.306357 | -0.706780 | -0.417941 | -0.376553 | -0.134865 | -0.105291 | 1.161615 | -0.620607 | -0.444069 | -0.338398 | -0.445063 | -0.128077 | -0.597604 | -0.243404 | 1.042931 | -0.144492 | 0.875073 | 1.186245 | -1.149405 | -0.367756 | 0.520827 | 0.884749 | 0.768378 | -1.421214 | -0.642147 | 1.023310 | 0.884749 | -1.171215 | -0.886136 | -0.686288 | -1.453376 | 0.768470 | 0.875073 | -0.290234 | -0.856444 | -0.352139 | -0.296524 | -0.750207 | -0.166909 | -0.311513 | -0.376553 | -0.338398 | -0.144492 |
1460 | -0.587167 | 1.005312 | -0.571612 | -1.159860 | 1.683508 | -0.296765 | -0.304077 | -0.325822 | -0.350262 | 0.508187 | -0.469292 | -0.434891 | 2.301549 | -0.585649 | -0.571612 | -0.438291 | -0.481562 | 1.466632 | -0.363860 | -0.525411 | -1.130264 | 1.130264 | -1.023310 | 1.023310 | -0.502232 | -0.511448 | -0.437092 | -0.412026 | -0.304561 | 4.127684 | -0.209411 | -0.339527 | -0.203573 | 1.212198 | -0.766297 | -0.459687 | -0.104691 | -0.314360 | -0.719361 | 2.409119 | -0.280739 | -0.085568 | -0.720544 | -0.111685 | -0.226771 | -0.062912 | -0.211327 | -0.143153 | -0.3549 | -0.115578 | -0.281759 | -0.055583 | -0.154401 | -0.141803 | -0.138143 | -0.102259 | -0.128573 | -0.122497 | 3.791188 | -0.226169 | -0.114479 | -0.096571 | -0.116668 | -0.203573 | -0.310798 | -0.375272 | -0.132959 | -0.418754 | -0.301167 | -0.39489 | -0.405457 | -0.430677 | -0.413048 | -0.389454 | -0.269074 | -0.268547 | 3.588205 | -0.045812 | -0.036838 | -0.041566 | -0.041566 | -0.040052 | 0.092594 | -0.837023 | -0.101022 | -0.494334 | -0.297502 | 1.631335 | -0.166909 | -0.551684 | -0.131026 | 1.701994 | -0.404426 | -0.599123 | -0.311513 | -0.109976 | -0.112810 | 1.306357 | -0.706780 | -0.417941 | -0.376553 | -0.134865 | -0.105291 | -0.860871 | -0.620607 | 2.251900 | -0.338398 | -0.445063 | -0.128077 | 1.673348 | -0.243404 | -0.958836 | -0.144492 | 0.875073 | 1.186245 | 0.870015 | -0.367756 | 0.520827 | -1.130264 | 0.028112 | 1.674921 | 0.807280 | 1.023310 | -1.130264 | -1.171215 | 1.476279 | 1.282399 | 1.742070 | 0.069571 | 0.875073 | -1.122388 | -0.170054 | -0.352139 | -0.885148 | 0.137077 | -0.166909 | -0.311513 | -0.376553 | -0.338398 | -0.144492 |
8449 | -0.587167 | -0.994716 | 1.749437 | 0.862173 | -0.593998 | -0.296765 | -0.304077 | -0.325822 | 2.855008 | -1.967778 | -0.469292 | -0.434891 | -0.434490 | -0.585649 | 1.749437 | -0.438291 | -0.481562 | -0.681834 | 2.748311 | -0.525411 | 0.884749 | -0.884749 | -1.023310 | 1.023310 | -0.502232 | -0.511448 | -0.437092 | -0.412026 | 3.283419 | -0.242267 | -0.209411 | -0.339527 | -0.203573 | 1.212198 | -0.766297 | -0.459687 | -0.104691 | 3.181066 | -0.719361 | -0.415089 | -0.280739 | -0.085568 | -0.720544 | -0.111685 | -0.226771 | -0.062912 | -0.211327 | -0.143153 | -0.3549 | -0.115578 | -0.281759 | 17.991109 | -0.154401 | -0.141803 | -0.138143 | -0.102259 | -0.128573 | -0.122497 | -0.263770 | -0.226169 | -0.114479 | -0.096571 | -0.116668 | -0.203573 | -0.310798 | -0.375272 | -0.132959 | -0.418754 | -0.301167 | -0.39489 | -0.405457 | 2.321926 | -0.413048 | -0.389454 | -0.269074 | -0.268547 | -0.278691 | -0.045812 | -0.036838 | -0.041566 | -0.041566 | -0.040052 | 0.092594 | 1.194710 | -0.101022 | -0.494334 | -0.297502 | -0.612995 | -0.166909 | 1.812633 | -0.131026 | -0.587546 | -0.404426 | -0.599123 | -0.311513 | 9.092877 | -0.112810 | -0.765487 | -0.706780 | -0.417941 | -0.376553 | -0.134865 | -0.105291 | 1.161615 | -0.620607 | -0.444069 | -0.338398 | 2.246872 | -0.128077 | -0.597604 | -0.243404 | -0.958836 | -0.144492 | -1.742085 | 1.186245 | 0.870015 | 2.719191 | 0.520827 | 0.884749 | -1.267352 | 1.158899 | -0.642147 | 1.023310 | 0.884749 | 1.660704 | -0.098664 | -0.686288 | 1.209496 | -1.328226 | -1.742085 | 0.541920 | 0.516335 | 0.198596 | -0.296524 | 1.024361 | -0.166909 | -0.311513 | -0.376553 | -0.338398 | -0.144492 |
11224 | 1.703094 | -0.994716 | -0.571612 | 0.862173 | -0.593998 | -0.296765 | -0.304077 | 3.069164 | -0.350262 | -1.967778 | -0.469292 | 2.299429 | -0.434490 | -0.585649 | -0.571612 | -0.438291 | -0.481562 | 1.466632 | -0.363860 | -0.525411 | -1.130264 | 1.130264 | -1.023310 | 1.023310 | -0.502232 | -0.511448 | -0.437092 | -0.412026 | -0.304561 | -0.242267 | 4.775303 | -0.339527 | -0.203573 | -0.824947 | -0.766297 | 2.175395 | -0.104691 | -0.314360 | -0.719361 | -0.415089 | -0.280739 | -0.085568 | 1.387840 | -0.111685 | -0.226771 | -0.062912 | -0.211327 | -0.143153 | -0.3549 | -0.115578 | -0.281759 | -0.055583 | -0.154401 | -0.141803 | -0.138143 | -0.102259 | -0.128573 | -0.122497 | -0.263770 | -0.226169 | -0.114479 | -0.096571 | -0.116668 | -0.203573 | -0.310798 | 2.664732 | -0.132959 | -0.418754 | -0.301167 | -0.39489 | -0.405457 | 2.321926 | -0.413048 | -0.389454 | -0.269074 | -0.268547 | -0.278691 | -0.045812 | -0.036838 | -0.041566 | -0.041566 | -0.040052 | 0.092594 | 1.194710 | -0.101022 | -0.494334 | -0.297502 | -0.612995 | -0.166909 | 1.812633 | -0.131026 | -0.587546 | -0.404426 | -0.599123 | -0.311513 | -0.109976 | -0.112810 | 1.306357 | -0.706780 | -0.417941 | -0.376553 | -0.134865 | -0.105291 | -0.860871 | -0.620607 | 2.251900 | -0.338398 | 2.246872 | -0.128077 | -0.597604 | -0.243404 | -0.958836 | -0.144492 | -0.433506 | -0.842996 | -1.149405 | -0.367756 | -1.920023 | -1.130264 | 1.508643 | 2.500558 | -0.642147 | 1.023310 | -1.130264 | -1.171215 | -0.886136 | -0.686288 | 2.274644 | 1.467368 | -0.433506 | 0.541920 | 0.516335 | -0.352139 | -0.885148 | 1.024361 | -0.166909 | -0.311513 | -0.376553 | -0.338398 | -0.144492 |
5085 | -0.587167 | 1.005312 | -0.571612 | -1.159860 | 1.683508 | -0.296765 | -0.304077 | -0.325822 | -0.350262 | 0.508187 | -0.469292 | -0.434891 | 2.301549 | -0.585649 | -0.571612 | -0.438291 | -0.481562 | -0.681834 | -0.363860 | 1.903270 | -1.130264 | 1.130264 | -1.023310 | 1.023310 | -0.502232 | 1.955233 | -0.437092 | -0.412026 | -0.304561 | -0.242267 | -0.209411 | -0.339527 | -0.203573 | -0.824947 | 1.304977 | -0.459687 | -0.104691 | -0.314360 | -0.719361 | -0.415089 | 3.562026 | -0.085568 | -0.720544 | -0.111685 | -0.226771 | -0.062912 | -0.211327 | -0.143153 | -0.3549 | -0.115578 | -0.281759 | -0.055583 | -0.154401 | -0.141803 | -0.138143 | -0.102259 | -0.128573 | -0.122497 | -0.263770 | -0.226169 | -0.114479 | -0.096571 | -0.116668 | -0.203573 | -0.310798 | -0.375272 | -0.132959 | 2.388038 | -0.301167 | -0.39489 | -0.405457 | 2.321926 | -0.413048 | -0.389454 | -0.269074 | -0.268547 | -0.278691 | -0.045812 | -0.036838 | -0.041566 | -0.041566 | -0.040052 | 0.092594 | -0.837023 | -0.101022 | -0.494334 | -0.297502 | 1.631335 | -0.166909 | -0.551684 | -0.131026 | 1.701994 | -0.404426 | -0.599123 | -0.311513 | -0.109976 | -0.112810 | -0.765487 | 1.414867 | -0.417941 | -0.376553 | -0.134865 | -0.105291 | 1.161615 | -0.620607 | -0.444069 | -0.338398 | -0.445063 | -0.128077 | -0.597604 | -0.243404 | 1.042931 | -0.144492 | 0.875073 | -0.842996 | 0.870015 | -0.367756 | 0.520827 | -1.130264 | 0.028112 | -0.389169 | -0.642147 | 1.023310 | -1.130264 | 0.952725 | -1.673608 | -0.686288 | -0.388227 | 0.069571 | 0.875073 | -1.122388 | -0.170054 | 1.300065 | -0.296524 | -0.750207 | -0.166909 | -0.311513 | -0.376553 | -0.338398 | -0.144492 |
7156 | -0.587167 | 1.005312 | -0.571612 | -1.159860 | 1.683508 | -0.296765 | -0.304077 | -0.325822 | -0.350262 | 0.508187 | -0.469292 | -0.434891 | 2.301549 | -0.585649 | -0.571612 | -0.438291 | -0.481562 | -0.681834 | 2.748311 | -0.525411 | -1.130264 | 1.130264 | -1.023310 | 1.023310 | -0.502232 | -0.511448 | 2.287849 | -0.412026 | -0.304561 | -0.242267 | -0.209411 | -0.339527 | -0.203573 | 1.212198 | -0.766297 | -0.459687 | -0.104691 | -0.314360 | -0.719361 | -0.415089 | -0.280739 | -0.085568 | 1.387840 | -0.111685 | -0.226771 | -0.062912 | -0.211327 | -0.143153 | -0.3549 | 8.652157 | -0.281759 | -0.055583 | -0.154401 | -0.141803 | -0.138143 | -0.102259 | -0.128573 | -0.122497 | -0.263770 | -0.226169 | -0.114479 | -0.096571 | -0.116668 | -0.203573 | -0.310798 | -0.375272 | -0.132959 | -0.418754 | -0.301167 | -0.39489 | -0.405457 | -0.430677 | 2.421027 | -0.389454 | -0.269074 | -0.268547 | -0.278691 | -0.045812 | -0.036838 | -0.041566 | -0.041566 | -0.040052 | 0.092594 | 1.194710 | -0.101022 | -0.494334 | -0.297502 | -0.612995 | -0.166909 | 1.812633 | -0.131026 | -0.587546 | -0.404426 | -0.599123 | -0.311513 | -0.109976 | -0.112810 | -0.765487 | 1.414867 | -0.417941 | -0.376553 | -0.134865 | -0.105291 | -0.860871 | 1.611325 | -0.444069 | -0.338398 | -0.445063 | -0.128077 | 1.673348 | -0.243404 | -0.958836 | -0.144492 | 0.875073 | 1.186245 | 0.870015 | -0.367756 | 0.520827 | -1.130264 | 0.028112 | 0.126854 | -0.352261 | 1.023310 | -1.130264 | 1.660704 | -0.886136 | -0.292550 | 0.144347 | 0.069571 | 0.875073 | 0.541920 | 0.516335 | 1.300065 | 1.469349 | 0.137077 | -0.166909 | -0.311513 | -0.376553 | -0.338398 | -0.144492 |
time: 135 ms (started: 2023-09-28 17:20:02 -07:00)
data_frame_collection_filename='data_frame_collection_train_test_standardize_scale_v' + filename_version + '.pkl'
df_readback = icr.return_processed_collection_if_it_exists(filename=data_frame_collection_filename, parse_dates=False)
if df_readback != None:
data_frame_collection = df_readback
else:
#standardize and scale feature data frame
standard_scaler = StandardScaler()
standardized_scaled_data_frame_collection_X_train_ndarray = standard_scaler.fit_transform(data_frame_collection['X_train'])
data_frame_collection['X_train'] = pd.DataFrame(standardized_scaled_data_frame_collection_X_train_ndarray, columns=data_frame_collection['X_train'].columns, index=data_frame_collection['X_train'].index)
standardized_scaled_df_collection_X_test_ndarray = standard_scaler.transform(data_frame_collection['X_test'])
data_frame_collection['X_test'] = pd.DataFrame(standardized_scaled_df_collection_X_test_ndarray, columns=data_frame_collection['X_test'].columns, index=data_frame_collection['X_test'].index)
data_frame_collection = icr.save_and_return_collection(data_frame_collection=data_frame_collection, filename=data_frame_collection_filename)
del standardized_scaled_data_frame_collection_X_train_ndarray, standardized_scaled_df_collection_X_test_ndarray, standard_scaler
print(data_frame_collection['X_train'].shape, data_frame_collection['X_test'].shape, data_frame_collection['Y_train'].shape, data_frame_collection['Y_test'].shape)
p(data_frame_collection['X_train'])
This file already exists (10147, 140) (2537, 140) (10147, 1) (2537, 1) (10147, 140)
destination_Home | destination_No Urgent Place | destination_Work | passenger_Alone | passenger_Friend(s) | passenger_Kid(s) | passenger_Partner | weather_Rainy | weather_Snowy | weather_Sunny | time_10AM | time_10PM | time_2PM | time_6PM | time_7AM | coupon_venue_type_Bar | coupon_venue_type_Carry out & Take away | coupon_venue_type_Coffee House | coupon_venue_type_Restaurant(20-50) | coupon_venue_type_Restaurant(<20) | expiration_1d | expiration_2h | gender_Female | gender_Male | age_21-25 | age_26-30 | age_31-35 | age_36-40 | age_41-45 | age_46-49 | age_50+ | age_<21 | maritalStatus_Divorced | maritalStatus_Married partner | maritalStatus_Single | maritalStatus_Unmarried partner | maritalStatus_Widowed | education_Associates degree | education_Bachelors degree | education_Graduate degree (Masters or Doctorate) | education_High School Graduate | education_Some High School | education_Some college - no degree | occupation_Architecture & Engineering | occupation_Arts Design Entertainment Sports & Media | occupation_Building & Grounds Cleaning & Maintenance | occupation_Business & Financial | occupation_Community & Social Services | occupation_Computer & Mathematical | occupation_Construction & Extraction | occupation_Education&Training&Library | occupation_Farming Fishing & Forestry | occupation_Food Preparation & Serving Related | occupation_Healthcare Practitioners & Technical | occupation_Healthcare Support | occupation_Installation Maintenance & Repair | occupation_Legal | occupation_Life Physical Social Science | occupation_Management | occupation_Office & Administrative Support | occupation_Personal Care & Service | occupation_Production Occupations | occupation_Protective Service | occupation_Retired | occupation_Sales & Related | occupation_Student | occupation_Transportation & Material Moving | occupation_Unemployed | income_Less than \$12500 | income_\$100000 or More | income_\$12500 - \$24999 | income_\$25000 - \$37499 | income_\$37500 - \$49999 | income_\$50000 - \$62499 | income_\$62500 - \$74999 | income_\$75000 - \$87499 | income_\$87500 - \$99999 | car_Car that is too old to install Onstar :D | car_Mazda5 | car_Scooter and motorcycle | car_crossover | car_do not drive | car_no response | Bar_1-3 | Bar_4-8 | Bar_<1 | Bar_>8 | Bar_never | Bar_no response | CoffeeHouse_1-3 | CoffeeHouse_4-8 | CoffeeHouse_<1 | CoffeeHouse_>8 | CoffeeHouse_never | CoffeeHouse_no response | CarryAway_1-3 | CarryAway_4-8 | CarryAway_<1 | CarryAway_>8 | CarryAway_never | CarryAway_no response | RestaurantLessThan20_1-3 | RestaurantLessThan20_4-8 | RestaurantLessThan20_<1 | RestaurantLessThan20_>8 | RestaurantLessThan20_never | RestaurantLessThan20_no response | Restaurant20To50_1-3 | Restaurant20To50_4-8 | Restaurant20To50_<1 | Restaurant20To50_>8 | Restaurant20To50_never | Restaurant20To50_no response | temperature | has_children | toCoupon_GEQ15min | toCoupon_GEQ25min | direction_same_or_opposite | expiration_category_representative_numeric_encoding | time_category_representative_numeric_encoding | age_category_representative_numeric_encoding | income_category_representative_numeric_encoding | gender_binary_encoding | expiration_binary_encoding | coupon_venue_type_ordinal_integer_encoding | education_ordinal_integer_encoding | income_ordinal_integer_encoding | age_ordinal_integer_encoding | time_ordinal_integer_encoding | temperature_ordinal_integer_encoding | Bar_venue_visit_frequency_yes_response_ordinal_integer_encoding | CoffeeHouse_venue_visit_frequency_yes_response_ordinal_integer_encoding | CarryAway_venue_visit_frequency_yes_response_ordinal_integer_encoding | RestaurantLessThan20_venue_visit_frequency_yes_response_ordinal_integer_encoding | Restaurant20To50_venue_visit_frequency_yes_response_ordinal_integer_encoding | Bar_venue_visit_frequency_no_response_indicator | CoffeeHouse_venue_visit_frequency_no_response_indicator | CarryAway_venue_visit_frequency_no_response_indicator | RestaurantLessThan20_venue_visit_frequency_no_response_indicator | Restaurant20To50_venue_visit_frequency_no_response_indicator | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
8630 | -0.587858 | 1.003257 | -0.569346 | -1.154170 | 1.678978 | -0.298862 | -0.304693 | -0.327333 | -0.350292 | 0.509468 | -0.468919 | -0.436673 | -0.434912 | 1.705934 | -0.569346 | -0.435552 | -0.480608 | 1.478469 | -0.364148 | -0.534082 | 0.886994 | -0.886994 | 0.976717 | -0.976717 | 1.987399 | -0.513302 | -0.438271 | -0.407826 | -0.307965 | -0.241458 | -0.210362 | -0.336300 | -0.203894 | 1.216187 | -0.768725 | -0.459509 | -0.105167 | -0.315953 | 1.385337 | -0.409960 | -0.277188 | -0.085126 | -0.724533 | -0.115245 | -0.22790 | -0.062115 | -0.209851 | -0.140709 | -0.354533 | -0.112132 | -0.283324 | -0.057121 | -0.156638 | -0.137022 | -0.138877 | -0.104205 | -0.129358 | -0.120404 | -0.270534 | -0.225256 | -0.117851 | -0.095654 | -0.116989 | -0.202051 | -0.30854 | -0.375171 | -0.134005 | 2.401912 | -0.301398 | -0.398243 | -0.410943 | -0.429612 | -0.40799 | -0.387547 | -0.267797 | -0.270954 | 3.618433 | -0.042155 | -0.040966 | -0.042155 | -0.038477 | -0.040966 | 0.091911 | -0.834929 | -0.097216 | 2.031585 | -0.301787 | -0.615056 | -0.166278 | -0.551733 | -0.132087 | -0.582547 | -0.408319 | 1.661606 | -0.309306 | -0.11303 | 8.918057 | -0.768562 | -0.702825 | -0.413561 | -0.381299 | -0.131701 | -0.103721 | -0.862943 | -0.619167 | 2.249784 | -0.339559 | -0.447346 | -0.126171 | -0.595448 | -0.243508 | 1.041565 | -0.143595 | 0.873060 | -0.839015 | -1.140802 | -0.367266 | 0.519885 | 0.886994 | 0.764335 | -0.907620 | 0.800884 | -0.976717 | 0.886994 | -1.181367 | 0.694779 | 1.277192 | -0.923430 | 0.764521 | 0.873060 | -0.291221 | -0.858924 | 0.754965 | -0.882039 | -0.750685 | -0.166278 | -0.309306 | -0.381299 | -0.339559 | -0.143595 |
2418 | -0.587858 | 1.003257 | -0.569346 | -1.154170 | 1.678978 | -0.298862 | -0.304693 | -0.327333 | -0.350292 | 0.509468 | -0.468919 | -0.436673 | 2.299318 | -0.586189 | -0.569346 | -0.435552 | -0.480608 | 1.478469 | -0.364148 | -0.534082 | -1.127403 | 1.127403 | 0.976717 | -0.976717 | -0.503170 | 1.948171 | -0.438271 | -0.407826 | -0.307965 | -0.241458 | -0.210362 | -0.336300 | -0.203894 | -0.822242 | 1.300856 | -0.459509 | -0.105167 | -0.315953 | 1.385337 | -0.409960 | -0.277188 | -0.085126 | -0.724533 | -0.115245 | 4.38788 | -0.062115 | -0.209851 | -0.140709 | -0.354533 | -0.112132 | -0.283324 | -0.057121 | -0.156638 | -0.137022 | -0.138877 | -0.104205 | -0.129358 | -0.120404 | -0.270534 | -0.225256 | -0.117851 | -0.095654 | -0.116989 | -0.202051 | -0.30854 | -0.375171 | -0.134005 | -0.416335 | -0.301398 | -0.398243 | 2.433425 | -0.429612 | -0.40799 | -0.387547 | -0.267797 | -0.270954 | -0.276363 | -0.042155 | -0.040966 | -0.042155 | -0.038477 | -0.040966 | 0.091911 | -0.834929 | -0.097216 | -0.492227 | 3.313598 | -0.615056 | -0.166278 | -0.551733 | -0.132087 | -0.582547 | 2.449067 | -0.601827 | -0.309306 | -0.11303 | -0.112132 | -0.768562 | 1.422829 | -0.413561 | -0.381299 | -0.131701 | -0.103721 | -0.862943 | 1.615074 | -0.444487 | -0.339559 | -0.447346 | -0.126171 | -0.595448 | -0.243508 | 1.041565 | -0.143595 | -0.437240 | -0.839015 | -1.140802 | -0.367266 | 0.519885 | -1.127403 | 0.024145 | -0.391530 | -0.929130 | -0.976717 | -1.127403 | -1.181367 | 0.694779 | -1.075831 | -0.390648 | 0.065576 | -0.437240 | 2.193379 | 1.876853 | 1.305812 | 1.473237 | -0.750685 | -0.166278 | -0.309306 | -0.381299 | -0.339559 | -0.143595 |
10804 | -0.587858 | 1.003257 | -0.569346 | 0.866424 | -0.595600 | -0.298862 | -0.304693 | 3.054995 | -0.350292 | -1.962830 | -0.468919 | -0.436673 | 2.299318 | -0.586189 | -0.569346 | -0.435552 | 2.080699 | -0.676375 | -0.364148 | -0.534082 | -1.127403 | 1.127403 | 0.976717 | -0.976717 | -0.503170 | -0.513302 | 2.281692 | -0.407826 | -0.307965 | -0.241458 | -0.210362 | -0.336300 | -0.203894 | -0.822242 | 1.300856 | -0.459509 | -0.105167 | -0.315953 | -0.721846 | 2.439262 | -0.277188 | -0.085126 | -0.724533 | -0.115245 | -0.22790 | -0.062115 | -0.209851 | -0.140709 | -0.354533 | -0.112132 | -0.283324 | -0.057121 | -0.156638 | -0.137022 | -0.138877 | -0.104205 | 7.730490 | -0.120404 | -0.270534 | -0.225256 | -0.117851 | -0.095654 | -0.116989 | -0.202051 | -0.30854 | -0.375171 | -0.134005 | -0.416335 | -0.301398 | -0.398243 | -0.410943 | -0.429612 | -0.40799 | -0.387547 | -0.267797 | 3.690669 | -0.276363 | -0.042155 | -0.040966 | -0.042155 | -0.038477 | -0.040966 | 0.091911 | -0.834929 | -0.097216 | -0.492227 | -0.301787 | -0.615056 | 6.014025 | -0.551733 | -0.132087 | -0.582547 | 2.449067 | -0.601827 | -0.309306 | -0.11303 | -0.112132 | -0.768562 | 1.422829 | -0.413561 | -0.381299 | -0.131701 | -0.103721 | -0.862943 | 1.615074 | -0.444487 | -0.339559 | -0.447346 | -0.126171 | -0.595448 | 4.106633 | -0.960094 | -0.143595 | -0.437240 | -0.839015 | -1.140802 | -0.367266 | 0.519885 | -1.127403 | 0.024145 | 0.124559 | 0.512548 | -0.976717 | -1.127403 | 0.234404 | 1.485928 | 0.885022 | 0.142135 | 0.065576 | -0.437240 | -1.947620 | 1.876853 | 1.305812 | 1.473237 | 2.798803 | 6.014025 | -0.309306 | -0.381299 | -0.339559 | -0.143595 |
747 | -0.587858 | 1.003257 | -0.569346 | -1.154170 | 1.678978 | -0.298862 | -0.304693 | -0.327333 | -0.350292 | 0.509468 | -0.468919 | -0.436673 | 2.299318 | -0.586189 | -0.569346 | -0.435552 | -0.480608 | 1.478469 | -0.364148 | -0.534082 | 0.886994 | -0.886994 | 0.976717 | -0.976717 | -0.503170 | -0.513302 | -0.438271 | 2.452028 | -0.307965 | -0.241458 | -0.210362 | -0.336300 | -0.203894 | 1.216187 | -0.768725 | -0.459509 | -0.105167 | -0.315953 | -0.721846 | -0.409960 | -0.277188 | -0.085126 | 1.380200 | -0.115245 | -0.22790 | -0.062115 | -0.209851 | -0.140709 | -0.354533 | -0.112132 | -0.283324 | -0.057121 | -0.156638 | -0.137022 | 7.200622 | -0.104205 | -0.129358 | -0.120404 | -0.270534 | -0.225256 | -0.117851 | -0.095654 | -0.116989 | -0.202051 | -0.30854 | -0.375171 | -0.134005 | -0.416335 | -0.301398 | -0.398243 | -0.410943 | 2.327679 | -0.40799 | -0.387547 | -0.267797 | -0.270954 | -0.276363 | -0.042155 | -0.040966 | -0.042155 | -0.038477 | -0.040966 | 0.091911 | -0.834929 | -0.097216 | 2.031585 | -0.301787 | -0.615056 | -0.166278 | -0.551733 | -0.132087 | 1.716599 | -0.408319 | -0.601827 | -0.309306 | -0.11303 | -0.112132 | -0.768562 | 1.422829 | -0.413561 | -0.381299 | -0.131701 | -0.103721 | -0.862943 | 1.615074 | -0.444487 | -0.339559 | -0.447346 | -0.126171 | 1.679407 | -0.243508 | -0.960094 | -0.143595 | 0.873060 | -0.839015 | 0.876576 | -0.367266 | 0.519885 | 0.886994 | 0.024145 | 0.640649 | -0.640794 | -0.976717 | 0.886994 | -1.181367 | -0.887518 | -0.683661 | 0.674918 | 0.065576 | 0.873060 | -0.291221 | -0.174980 | 1.305812 | 1.473237 | 0.136687 | -0.166278 | -0.309306 | -0.381299 | -0.339559 | -0.143595 |
7333 | -0.587858 | 1.003257 | -0.569346 | -1.154170 | 1.678978 | -0.298862 | -0.304693 | -0.327333 | -0.350292 | 0.509468 | -0.468919 | -0.436673 | -0.434912 | 1.705934 | -0.569346 | -0.435552 | -0.480608 | 1.478469 | -0.364148 | -0.534082 | 0.886994 | -0.886994 | -1.023838 | 1.023838 | -0.503170 | -0.513302 | -0.438271 | -0.407826 | -0.307965 | -0.241458 | -0.210362 | 2.973533 | -0.203894 | -0.822242 | -0.768725 | 2.176237 | -0.105167 | -0.315953 | -0.721846 | -0.409960 | -0.277188 | -0.085126 | 1.380200 | -0.115245 | -0.22790 | 16.099052 | -0.209851 | -0.140709 | -0.354533 | -0.112132 | -0.283324 | -0.057121 | -0.156638 | -0.137022 | -0.138877 | -0.104205 | -0.129358 | -0.120404 | -0.270534 | -0.225256 | -0.117851 | -0.095654 | -0.116989 | -0.202051 | -0.30854 | -0.375171 | -0.134005 | -0.416335 | -0.301398 | -0.398243 | -0.410943 | 2.327679 | -0.40799 | -0.387547 | -0.267797 | -0.270954 | -0.276363 | -0.042155 | -0.040966 | -0.042155 | -0.038477 | -0.040966 | 0.091911 | -0.834929 | -0.097216 | 2.031585 | -0.301787 | -0.615056 | -0.166278 | -0.551733 | -0.132087 | -0.582547 | -0.408319 | 1.661606 | -0.309306 | -0.11303 | -0.112132 | 1.301130 | -0.702825 | -0.413561 | -0.381299 | -0.131701 | -0.103721 | 1.158826 | -0.619167 | -0.444487 | -0.339559 | -0.447346 | -0.126171 | -0.595448 | -0.243508 | 1.041565 | -0.143595 | 0.873060 | 1.191874 | -1.140802 | -0.367266 | 0.519885 | 0.886994 | 0.764335 | -1.423709 | -0.640794 | 1.023838 | 0.886994 | -1.181367 | -0.887518 | -0.683661 | -1.456213 | 0.764521 | 0.873060 | -0.291221 | -0.858924 | -0.346729 | -0.293220 | -0.750685 | -0.166278 | -0.309306 | -0.381299 | -0.339559 | -0.143595 |
10949 | -0.587858 | 1.003257 | -0.569346 | -1.154170 | -0.595600 | 3.346030 | -0.304693 | -0.327333 | 2.854765 | -1.962830 | -0.468919 | -0.436673 | -0.434912 | 1.705934 | -0.569346 | 2.295936 | -0.480608 | -0.676375 | -0.364148 | -0.534082 | 0.886994 | -0.886994 | 0.976717 | -0.976717 | -0.503170 | -0.513302 | 2.281692 | -0.407826 | -0.307965 | -0.241458 | -0.210362 | -0.336300 | -0.203894 | 1.216187 | -0.768725 | -0.459509 | -0.105167 | -0.315953 | 1.385337 | -0.409960 | -0.277188 | -0.085126 | -0.724533 | -0.115245 | -0.22790 | -0.062115 | -0.209851 | -0.140709 | -0.354533 | -0.112132 | -0.283324 | -0.057121 | -0.156638 | -0.137022 | -0.138877 | -0.104205 | -0.129358 | -0.120404 | -0.270534 | -0.225256 | -0.117851 | -0.095654 | -0.116989 | -0.202051 | -0.30854 | 2.665452 | -0.134005 | -0.416335 | -0.301398 | -0.398243 | -0.410943 | -0.429612 | 2.45104 | -0.387547 | -0.267797 | -0.270954 | -0.276363 | -0.042155 | -0.040966 | -0.042155 | -0.038477 | -0.040966 | 0.091911 | 1.197707 | -0.097216 | -0.492227 | -0.301787 | -0.615056 | -0.166278 | -0.551733 | -0.132087 | -0.582547 | -0.408319 | 1.661606 | -0.309306 | -0.11303 | -0.112132 | -0.768562 | -0.702825 | -0.413561 | 2.622614 | -0.131701 | -0.103721 | -0.862943 | 1.615074 | -0.444487 | -0.339559 | -0.447346 | -0.126171 | -0.595448 | -0.243508 | 1.041565 | -0.143595 | -1.747541 | 1.191874 | -1.140802 | -0.367266 | 0.519885 | 0.886994 | 0.764335 | 0.124559 | -0.352458 | -0.976717 | 0.886994 | -0.473481 | 0.694779 | -0.291490 | 0.142135 | 0.764521 | -1.747541 | 0.536979 | -0.858924 | -1.448423 | 1.473237 | -0.750685 | -0.166278 | -0.309306 | 2.622614 | -0.339559 | -0.143595 |
11937 | -0.587858 | 1.003257 | -0.569346 | 0.866424 | -0.595600 | -0.298862 | -0.304693 | 3.054995 | -0.350292 | -1.962830 | 2.132567 | -0.436673 | -0.434912 | -0.586189 | -0.569346 | 2.295936 | -0.480608 | -0.676375 | -0.364148 | -0.534082 | 0.886994 | -0.886994 | 0.976717 | -0.976717 | -0.503170 | -0.513302 | 2.281692 | -0.407826 | -0.307965 | -0.241458 | -0.210362 | -0.336300 | -0.203894 | 1.216187 | -0.768725 | -0.459509 | -0.105167 | -0.315953 | -0.721846 | -0.409960 | -0.277188 | -0.085126 | 1.380200 | -0.115245 | -0.22790 | -0.062115 | -0.209851 | -0.140709 | 2.820615 | -0.112132 | -0.283324 | -0.057121 | -0.156638 | -0.137022 | -0.138877 | -0.104205 | -0.129358 | -0.120404 | -0.270534 | -0.225256 | -0.117851 | -0.095654 | -0.116989 | -0.202051 | -0.30854 | -0.375171 | -0.134005 | -0.416335 | -0.301398 | 2.511027 | -0.410943 | -0.429612 | -0.40799 | -0.387547 | -0.267797 | -0.270954 | -0.276363 | -0.042155 | -0.040966 | -0.042155 | -0.038477 | -0.040966 | 0.091911 | -0.834929 | -0.097216 | -0.492227 | -0.301787 | 1.625868 | -0.166278 | -0.551733 | -0.132087 | -0.582547 | -0.408319 | 1.661606 | -0.309306 | -0.11303 | -0.112132 | -0.768562 | -0.702825 | -0.413561 | 2.622614 | -0.131701 | -0.103721 | -0.862943 | 1.615074 | -0.444487 | -0.339559 | -0.447346 | -0.126171 | -0.595448 | -0.243508 | 1.041565 | -0.143595 | -0.437240 | -0.839015 | 0.876576 | -0.367266 | 0.519885 | 0.886994 | -0.716044 | 0.124559 | 2.098405 | -0.976717 | 0.886994 | -0.473481 | -0.887518 | 1.669363 | 0.142135 | -0.633370 | -0.437240 | -1.119421 | -0.858924 | -1.448423 | 1.473237 | -0.750685 | -0.166278 | -0.309306 | 2.622614 | -0.339559 | -0.143595 |
735 | 1.701090 | -0.996753 | -0.569346 | 0.866424 | -0.595600 | -0.298862 | -0.304693 | -0.327333 | -0.350292 | 0.509468 | -0.468919 | -0.436673 | -0.434912 | 1.705934 | -0.569346 | -0.435552 | -0.480608 | -0.676375 | 2.746133 | -0.534082 | 0.886994 | -0.886994 | -1.023838 | 1.023838 | -0.503170 | -0.513302 | -0.438271 | -0.407826 | 3.247122 | -0.241458 | -0.210362 | -0.336300 | -0.203894 | -0.822242 | 1.300856 | -0.459509 | -0.105167 | -0.315953 | -0.721846 | -0.409960 | -0.277188 | -0.085126 | 1.380200 | -0.115245 | -0.22790 | -0.062115 | -0.209851 | -0.140709 | -0.354533 | -0.112132 | -0.283324 | -0.057121 | -0.156638 | -0.137022 | -0.138877 | -0.104205 | -0.129358 | -0.120404 | -0.270534 | -0.225256 | -0.117851 | -0.095654 | -0.116989 | -0.202051 | 3.24107 | -0.375171 | -0.134005 | -0.416335 | -0.301398 | -0.398243 | -0.410943 | -0.429612 | 2.45104 | -0.387547 | -0.267797 | -0.270954 | -0.276363 | -0.042155 | -0.040966 | -0.042155 | -0.038477 | -0.040966 | 0.091911 | 1.197707 | -0.097216 | -0.492227 | -0.301787 | -0.615056 | -0.166278 | 1.812471 | -0.132087 | -0.582547 | -0.408319 | -0.601827 | -0.309306 | -0.11303 | -0.112132 | -0.768562 | 1.422829 | -0.413561 | -0.381299 | -0.131701 | -0.103721 | 1.158826 | -0.619167 | -0.444487 | -0.339559 | -0.447346 | -0.126171 | -0.595448 | -0.243508 | 1.041565 | -0.143595 | -0.437240 | -0.839015 | 0.876576 | -0.367266 | 0.519885 | 0.886994 | 0.764335 | 1.156738 | -0.352458 | 1.023838 | 0.886994 | 1.650174 | -0.887518 | -0.291490 | 1.207700 | 0.764521 | -0.437240 | 0.536979 | 0.508964 | 1.305812 | -0.293220 | -0.750685 | -0.166278 | -0.309306 | -0.381299 | -0.339559 | -0.143595 |
67 | -0.587858 | 1.003257 | -0.569346 | -1.154170 | 1.678978 | -0.298862 | -0.304693 | -0.327333 | -0.350292 | 0.509468 | 2.132567 | -0.436673 | -0.434912 | -0.586189 | -0.569346 | -0.435552 | -0.480608 | 1.478469 | -0.364148 | -0.534082 | -1.127403 | 1.127403 | -1.023838 | 1.023838 | -0.503170 | -0.513302 | -0.438271 | -0.407826 | -0.307965 | 4.141504 | -0.210362 | -0.336300 | -0.203894 | 1.216187 | -0.768725 | -0.459509 | -0.105167 | -0.315953 | 1.385337 | -0.409960 | -0.277188 | -0.085126 | -0.724533 | -0.115245 | -0.22790 | -0.062115 | -0.209851 | -0.140709 | -0.354533 | -0.112132 | 3.529527 | -0.057121 | -0.156638 | -0.137022 | -0.138877 | -0.104205 | -0.129358 | -0.120404 | -0.270534 | -0.225256 | -0.117851 | -0.095654 | -0.116989 | -0.202051 | -0.30854 | -0.375171 | -0.134005 | -0.416335 | -0.301398 | -0.398243 | -0.410943 | -0.429612 | -0.40799 | -0.387547 | -0.267797 | 3.690669 | -0.276363 | -0.042155 | -0.040966 | -0.042155 | -0.038477 | -0.040966 | 0.091911 | -0.834929 | -0.097216 | -0.492227 | -0.301787 | 1.625868 | -0.166278 | -0.551733 | -0.132087 | 1.716599 | -0.408319 | -0.601827 | -0.309306 | -0.11303 | -0.112132 | 1.301130 | -0.702825 | -0.413561 | -0.381299 | -0.131701 | -0.103721 | 1.158826 | -0.619167 | -0.444487 | -0.339559 | -0.447346 | -0.126171 | -0.595448 | -0.243508 | 1.041565 | -0.143595 | 0.873060 | 1.191874 | -1.140802 | -0.367266 | 0.519885 | -1.127403 | -0.716044 | 1.672828 | 0.512548 | 1.023838 | -1.127403 | -1.181367 | 0.694779 | 0.885022 | 1.740483 | -0.633370 | 0.873060 | -1.119421 | -0.174980 | -0.346729 | -0.293220 | -0.750685 | -0.166278 | -0.309306 | -0.381299 | -0.339559 | -0.143595 |
3251 | -0.587858 | 1.003257 | -0.569346 | -1.154170 | 1.678978 | -0.298862 | -0.304693 | -0.327333 | -0.350292 | 0.509468 | 2.132567 | -0.436673 | -0.434912 | -0.586189 | -0.569346 | -0.435552 | -0.480608 | 1.478469 | -0.364148 | -0.534082 | -1.127403 | 1.127403 | 0.976717 | -0.976717 | -0.503170 | 1.948171 | -0.438271 | -0.407826 | -0.307965 | -0.241458 | -0.210362 | -0.336300 | -0.203894 | 1.216187 | -0.768725 | -0.459509 | -0.105167 | -0.315953 | -0.721846 | -0.409960 | -0.277188 | -0.085126 | 1.380200 | -0.115245 | -0.22790 | -0.062115 | -0.209851 | -0.140709 | -0.354533 | -0.112132 | -0.283324 | -0.057121 | -0.156638 | -0.137022 | -0.138877 | -0.104205 | -0.129358 | -0.120404 | -0.270534 | -0.225256 | -0.117851 | -0.095654 | -0.116989 | -0.202051 | -0.30854 | -0.375171 | -0.134005 | 2.401912 | -0.301398 | -0.398243 | -0.410943 | 2.327679 | -0.40799 | -0.387547 | -0.267797 | -0.270954 | -0.276363 | -0.042155 | -0.040966 | -0.042155 | -0.038477 | -0.040966 | 0.091911 | 1.197707 | -0.097216 | -0.492227 | -0.301787 | -0.615056 | -0.166278 | -0.551733 | -0.132087 | -0.582547 | -0.408319 | 1.661606 | -0.309306 | -0.11303 | -0.112132 | 1.301130 | -0.702825 | -0.413561 | -0.381299 | -0.131701 | -0.103721 | -0.862943 | -0.619167 | 2.249784 | -0.339559 | -0.447346 | -0.126171 | -0.595448 | -0.243508 | 1.041565 | -0.143595 | 0.873060 | 1.191874 | -1.140802 | -0.367266 | 0.519885 | -1.127403 | -0.716044 | -0.391530 | -0.640794 | -0.976717 | -1.127403 | -1.181367 | -0.887518 | -0.683661 | -0.390648 | -0.633370 | 0.873060 | 0.536979 | -0.858924 | -0.346729 | -0.882039 | -0.750685 | -0.166278 | -0.309306 | -0.381299 | -0.339559 | -0.143595 |
time: 66.8 ms (started: 2023-09-28 17:20:03 -07:00)
#convert to y_actual data frame from y_actual data frame collection
data_frame_list_stratified_fold_number_Y_test = [stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold ' + str(fold_number)]['Y_test'] for fold_number in range(5)]
df_stratified_fold_number_y_test = pd.concat(data_frame_list_stratified_fold_number_Y_test, axis=0, join='outer', ignore_index=False, copy=True).to_frame()
p(df_stratified_fold_number_y_test)
(10147, 1)
Y | |
---|---|
8630 | 1 |
2418 | 1 |
10804 | 0 |
747 | 1 |
7333 | 1 |
10949 | 0 |
11937 | 0 |
735 | 1 |
67 | 1 |
3251 | 1 |
time: 4.08 ms (started: 2023-09-28 17:20:03 -07:00)
classifier_name_list = ['random_forest_classifier', 'gradient_boosting_classifier']
#filename
grid_search_models_filename_collection={}
grid_search_models_local_optimum_filename_collection={}
model_filename_collection = {}
model_cross_validation_results_filename_collection = {}
cross_validation_model_collection_filename_collection = {}
model_cross_validation_prediction_probability_collection_filename_collection = {}
model_cross_validation_prediction_collection_filename_collection = {}
learning_curve_results_filename_collection = {}
for classifier_name in classifier_name_list:
grid_search_models_filename_collection[classifier_name]='stratified_5_fold_grid_search_cross_validation_' + classifier_name + '_v' + filename_version + '.pkl'
grid_search_models_local_optimum_filename_collection[classifier_name]='stratified_5_fold_grid_search_cross_validation_' + classifier_name + '_local_optimum_v' + filename_version + '.pkl'
model_filename_collection[classifier_name]='best_stratified_5_fold_grid_search_cross_validation_'+ classifier_name + '_v' + filename_version + '.pkl'
model_cross_validation_results_filename_collection[classifier_name]='best_' + classifier_name + '_stratified_5_fold_cross_validation_results_v' + filename_version + '.pkl'
cross_validation_model_collection_filename_collection[classifier_name] = 'stratified_5_fold_cross_validation_' + str(classifier_name) + '_collection_v' + filename_version + '.pkl'
model_cross_validation_prediction_probability_collection_filename_collection[classifier_name] = 'stratified_5_fold_cross_validation_' + str(classifier_name) + '_prediction_probability_collection_v' + filename_version + '.pkl'
model_cross_validation_prediction_collection_filename_collection[classifier_name] = 'stratified_5_fold_cross_validation_' + str(classifier_name) + '_prediction_collection_v' + filename_version + '.pkl'
learning_curve_results_filename_collection[classifier_name]='learning_curve_results_best_' + classifier_name + '_v' + filename_version + '.pkl'
test_random_forest_metric_replicate_filename_collection={}; column_name_list=['Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant']; model_name='random_forest'
for column_name in column_name_list:
test_random_forest_metric_replicate_filename_collection[column_name]='df_test_'+str(model_name)+'_number_metric_estimated_'+str(number_of_replicates)+'_metric_replicates_from_'+str(number_of_replicates)+'_nonparametric_subsamples_'+str(column_name.lower().replace(' ','_'))+'_v'+str(filename_version)+'.csv'
model_version=filename_version
time: 1.76 ms (started: 2023-09-28 17:20:03 -07:00)
StratifiedKFold_5_splits = StratifiedKFold(n_splits=5, random_state=None, shuffle=False)
stratified_5_fold_cross_validation_model_classifier_prediction_probability_series_collection = {}
stratified_5_fold_cross_validation_model_classifier_prediction_probability_data_frame_collection = {}
stratified_5_fold_cross_validation_model_classifier_prediction_data_frame_collection = {}
model_stratified_5_fold_cross_validation_results_collection = {}
time: 903 µs (started: 2023-09-28 17:20:03 -07:00)
#get grid search stratified 5-fold cross validation results
models_readback = icr.return_saved_model_if_it_exists(filename=grid_search_models_filename_collection['random_forest_classifier'])
if models_readback != None:
stratified_5_fold_grid_search_cross_validation_random_forest_classifier = models_readback
else:
#random forest hyperparameter tuning using 5-fold cross validation
# Create the parameter grid based on the results of random search
param_grid = {
'n_estimators': [200], #more is better, otherwise these are just random variation
'criterion':['gini'],
'max_depth': [3, 5, 10, 20, 25, None],
'min_samples_split': [1, 10, 50, 100],
'min_samples_leaf':[1],
'min_weight_fraction_leaf':[0.0],
'max_features':['auto'],
'max_leaf_nodes':[None],
'min_impurity_decrease':[0.0],
'bootstrap': [False ,True],
'oob_score': [False],
'n_jobs': [None],
'warm_start':[False],
'class_weight':[None],
'ccp_alpha':[0.0],
'max_samples':[None],
}
# Create a base model
random_forest_classifier = RandomForestClassifier(random_state=200)
# Instantiate the Stratified 5-Fold Grid Search Cross Validation
stratified_5_fold_grid_search_cross_validation_random_forest_classifier = GridSearchCV(estimator=random_forest_classifier,
param_grid=param_grid,
cv=StratifiedKFold_5_splits,
n_jobs=-1,
verbose=0,
scoring=None)
# Fit the grid search to the data
stratified_5_fold_grid_search_cross_validation_random_forest_classifier.fit(data_frame_collection['X_train'], data_frame_collection['Y_train'].loc[:, 'Y'])
#save it
stratified_5_fold_grid_search_cross_validation_random_forest_classifier = icr.save_and_return_model(stratified_5_fold_grid_search_cross_validation_random_forest_classifier,
filename=grid_search_models_filename_collection['random_forest_classifier'])
print()
print('Global Optimum Grid Search Cross Validation Object:')
print(stratified_5_fold_grid_search_cross_validation_random_forest_classifier)
print()
print('Best Random Forest Classifier by GridSearchCV Global Optimum:')
print(stratified_5_fold_grid_search_cross_validation_random_forest_classifier.best_estimator_)
print('Accuracy Score: '+str(stratified_5_fold_grid_search_cross_validation_random_forest_classifier.best_score_))
This file already exists Global Optimum Grid Search Cross Validation Object: GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False), estimator=RandomForestClassifier(random_state=200), n_jobs=-1, param_grid={'bootstrap': [False, True], 'ccp_alpha': [0.0], 'class_weight': [None], 'criterion': ['gini'], 'max_depth': [3, 5, 10, 20, 25, None], 'max_features': ['auto'], 'max_leaf_nodes': [None], 'max_samples': [None], 'min_impurity_decrease': [0.0], 'min_samples_leaf': [1], 'min_samples_split': [1, 10, 50, 100], 'min_weight_fraction_leaf': [0.0], 'n_estimators': [200], 'n_jobs': [None], 'oob_score': [False], 'warm_start': [False]}) Best Random Forest Classifier by GridSearchCV Global Optimum: RandomForestClassifier(bootstrap=False, max_depth=25, min_samples_split=10, n_estimators=200, random_state=200) Accuracy Score: 0.7606187133849818 time: 72.2 ms (started: 2023-09-28 17:20:03 -07:00)
models_readback = icr.return_saved_model_if_it_exists(filename=grid_search_models_local_optimum_filename_collection['random_forest_classifier'])
if models_readback != None:
stratified_5_fold_grid_search_cross_validation_random_forest_classifier_local_optimum = models_readback
else:
#random forest hyperparameter tuning using 5-fold cross validation
param_grid = {
'bootstrap': [False ,True],
'criterion':['gini'],
'max_features':['auto'],
'max_depth': [3, 5, 10, 20, 25, None], # lower numbers reduce growth
'min_samples_split': [1, 5, 10, 15, 20], # higher numbers reduce growth
'min_samples_leaf':[1],
'n_estimators': [200], #more is better, otherwise these are just random variation
'warm_start':[False]
}
#min_samples_leaf can be used as an alternative to 'min_samples_split'
# Create a base model
random_forest_classifier = RandomForestClassifier(random_state=200)
# Instantiate the Stratified 5-Fold Grid Search Cross Validation
stratified_5_fold_grid_search_cross_validation_random_forest_classifier_local_optimum = GridSearchCV(estimator=random_forest_classifier,
param_grid=param_grid,
cv=StratifiedKFold_5_splits,
n_jobs=-1,
verbose=0,
scoring=None)
# Fit the grid search to the data
stratified_5_fold_grid_search_cross_validation_random_forest_classifier_local_optimum.fit(data_frame_collection['X_train'], data_frame_collection['Y_train'].loc[:, 'Y'])
#save it
stratified_5_fold_grid_search_cross_validation_random_forest_classifier_local_optimum = icr.save_and_return_model(stratified_5_fold_grid_search_cross_validation_random_forest_classifier_local_optimum,
filename=grid_search_models_local_optimum_filename_collection['random_forest_classifier'])
#del stratified_5_fold_grid_search_cross_validation_random_forest_classifier
print()
print('Local Optimum Grid Search Cross Validation Object:')
print(stratified_5_fold_grid_search_cross_validation_random_forest_classifier_local_optimum)
print()
print('Best Random Forest Classifier by GridSearchCV Local Optimum:')
print(stratified_5_fold_grid_search_cross_validation_random_forest_classifier_local_optimum.best_estimator_)
print('Accuracy Score: '+str(stratified_5_fold_grid_search_cross_validation_random_forest_classifier_local_optimum.best_score_))
This file already exists Local Optimum Grid Search Cross Validation Object: GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False), estimator=RandomForestClassifier(random_state=200), n_jobs=-1, param_grid={'bootstrap': [False, True], 'criterion': ['gini'], 'max_depth': [3, 5, 10, 20, 25, None], 'max_features': ['auto'], 'min_samples_leaf': [1], 'min_samples_split': [1, 5, 10, 15, 20], 'n_estimators': [200], 'warm_start': [False]}) Best Random Forest Classifier by GridSearchCV Local Optimum: RandomForestClassifier(bootstrap=False, max_depth=25, min_samples_split=5, n_estimators=200, random_state=200) Accuracy Score: 0.7640680575012079 time: 114 ms (started: 2023-09-28 17:20:03 -07:00)
#get best random forest classifier
model_readback = icr.return_saved_model_if_it_exists(filename=model_filename_collection['random_forest_classifier'])
if model_readback != None:
best_stratified_5_fold_grid_search_cross_validation_random_forest_classifier = model_readback
else:
#add model environment data to model
#get best random forest classifier from grid search cross validation
best_stratified_5_fold_grid_search_cross_validation_random_forest_classifier = stratified_5_fold_grid_search_cross_validation_random_forest_classifier_local_optimum.best_estimator_
best_stratified_5_fold_grid_search_cross_validation_random_forest_classifier.version = model_version
best_stratified_5_fold_grid_search_cross_validation_random_forest_classifier.pandas_version = pd.__version__
best_stratified_5_fold_grid_search_cross_validation_random_forest_classifier.numpy_version = np.__version__
best_stratified_5_fold_grid_search_cross_validation_random_forest_classifier.sklearn_version = sklearn_version
best_stratified_5_fold_grid_search_cross_validation_random_forest_classifier.X_columns = [column_name for column_name in data_frame_collection['X_train'].columns]
best_stratified_5_fold_grid_search_cross_validation_random_forest_classifier.build_datetime = datetime.datetime.now()
best_stratified_5_fold_grid_search_cross_validation_random_forest_classifier = icr.save_and_return_model(best_stratified_5_fold_grid_search_cross_validation_random_forest_classifier,
filename=model_filename_collection['random_forest_classifier'])
best_stratified_5_fold_grid_search_cross_validation_random_forest_classifier
This file already exists
RandomForestClassifier(bootstrap=False, max_depth=25, min_samples_split=5, n_estimators=200, random_state=200)
time: 96.7 ms (started: 2023-09-28 17:20:03 -07:00)
models_readback = icr.return_saved_model_if_it_exists(filename=cross_validation_model_collection_filename_collection['random_forest_classifier'])
if models_readback != None:
stratified_5_fold_cross_validation_random_forest_classifier_collection = models_readback
else:
stratified_5_fold_cross_validation_random_forest_classifier_collection = {}
for index in range(5):
#create random forest classifier
random_forest_classifier=RandomForestClassifier(bootstrap=False,
max_depth=25,
min_samples_split=5,
n_estimators=200,
min_samples_leaf=1,
max_features='auto',
criterion='gini',
warm_start='False',
random_state=200)
#train random forest classifier and save random forest classifer per fold
random_forest_classifier.fit(X=stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold ' + str(index)]['X_train'],
y=stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold ' + str(index)]['Y_train'])
stratified_5_fold_cross_validation_random_forest_classifier_collection['fold ' + str(index)] = random_forest_classifier
#save stratified 5-fold cross validation random forest classifier collection
stratified_5_fold_cross_validation_random_forest_classifier_collection = icr.save_and_return_model(stratified_5_fold_cross_validation_random_forest_classifier_collection,
filename=cross_validation_model_collection_filename_collection['random_forest_classifier'])
stratified_5_fold_cross_validation_random_forest_classifier_collection
This file already exists
{'fold 0': RandomForestClassifier(bootstrap=False, max_depth=25, min_samples_split=5, n_estimators=200, random_state=200, warm_start='False'), 'fold 1': RandomForestClassifier(bootstrap=False, max_depth=25, min_samples_split=5, n_estimators=200, random_state=200, warm_start='False'), 'fold 2': RandomForestClassifier(bootstrap=False, max_depth=25, min_samples_split=5, n_estimators=200, random_state=200, warm_start='False'), 'fold 3': RandomForestClassifier(bootstrap=False, max_depth=25, min_samples_split=5, n_estimators=200, random_state=200, warm_start='False'), 'fold 4': RandomForestClassifier(bootstrap=False, max_depth=25, min_samples_split=5, n_estimators=200, random_state=200, warm_start='False')}
time: 333 ms (started: 2023-09-28 17:20:03 -07:00)
prediction_probability_ndarray_collection = icr.return_processed_collection_if_it_exists(filename=model_cross_validation_prediction_probability_collection_filename_collection['random_forest_classifier'])
if prediction_probability_ndarray_collection != None:
stratified_5_fold_cross_validation_random_forest_classifier_prediction_probability_collection = prediction_probability_ndarray_collection
else:
stratified_5_fold_cross_validation_random_forest_classifier_prediction_probability_collection = {}
for index in range(5):
#get predictions for test per fold
stratified_5_fold_cross_validation_random_forest_classifier_prediction_probability_collection['fold ' + str(index)] = \
stratified_5_fold_cross_validation_random_forest_classifier_collection['fold ' + str(index)]\
.predict_proba(stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold ' + str(index)]['X_test'])
#save stratified 5-fold cross validation random forest classifier prediction collection
stratified_5_fold_cross_validation_random_forest_classifier_prediction_probability_collection = icr.save_and_return_collection(data_frame_collection=stratified_5_fold_cross_validation_random_forest_classifier_prediction_probability_collection,
filename=model_cross_validation_prediction_probability_collection_filename_collection['random_forest_classifier'])
stratified_5_fold_cross_validation_random_forest_classifier_prediction_probability_collection['fold 0']
This file already exists
array([[0.22768785, 0.77231215], [0.28875 , 0.71125 ], [0.59625 , 0.40375 ], ..., [0.75791667, 0.24208333], [0.14683716, 0.85316284], [0.51916667, 0.48083333]])
time: 3.91 ms (started: 2023-09-28 17:20:03 -07:00)
#get data frame list
data_frame_list_stratified_5_fold_cross_validation_random_forest_classifier_prediction_probability = \
[pd.DataFrame(stratified_5_fold_cross_validation_random_forest_classifier_prediction_probability_collection['fold ' + str(number)]).loc[:, 1] for number in range(5)]
stratified_5_fold_cross_validation_model_classifier_prediction_probability_series_collection['random_forest_classifier'] = \
pd.concat(data_frame_list_stratified_5_fold_cross_validation_random_forest_classifier_prediction_probability)
p(stratified_5_fold_cross_validation_model_classifier_prediction_probability_series_collection['random_forest_classifier'])
(10147,)
0 0.772312 1 0.711250 2 0.403750 3 0.824488 4 0.710913 2024 0.261250 2025 0.652119 2026 0.669310 2027 0.911190 2028 0.640171 Name: 1, dtype: float64
time: 3.71 ms (started: 2023-09-28 17:20:03 -07:00)
warnings.filterwarnings('ignore')
time: 501 µs (started: 2023-09-28 17:20:03 -07:00)
#get grid search stratified 5-fold cross validation results
models_readback = icr.return_saved_model_if_it_exists(filename=grid_search_models_filename_collection['gradient_boosting_classifier'])
if models_readback != None:
stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier = models_readback
else:
gradient_boosting_classifier = GradientBoostingClassifier(random_state=200)
param_grid = {'loss' : ['log_loss', 'exponential'],
'learning_rate' : [0.01, 0.1, 1, 10, 100],
'n_estimators' : [5, 50, 250, 500],
'subsample' : [1.0],
'criterion' : ['friedman_mse', 'squared_error'],
'min_samples_split' : [2],
'min_samples_leaf' : [1],
'min_weight_fraction_leaf' : [0.0],
'max_depth' : [1, 3, 5, 7, 9, None],
'min_impurity_decrease' : [0.0],
'init' : [None],
'max_features' : [None],
'max_leaf_nodes' : [None],
'warm_start' : [False],
'n_iter_no_change' : [None],
}
# Instantiate the Stratified 5-Fold Grid Search Cross Validation
stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier = GridSearchCV(estimator=gradient_boosting_classifier,
param_grid=param_grid,
cv=StratifiedKFold_5_splits,
n_jobs=-1,
verbose=0,
scoring=None,
pre_dispatch="2*n_jobs")
stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier.fit(X=data_frame_collection['X_train'],
y=data_frame_collection['Y_train'].loc[:, 'Y'],
groups=None)
#save it
stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier = icr.save_and_return_model(stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier,
filename=grid_search_models_filename_collection['gradient_boosting_classifier'],
add_compressed_file=False)
stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier
This file already exists
GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False), estimator=GradientBoostingClassifier(random_state=200), n_jobs=-1, param_grid={'criterion': ['friedman_mse', 'squared_error'], 'init': [None], 'learning_rate': [0.01, 0.1, 1, 10, 100], 'loss': ['log_loss', 'exponential'], 'max_depth': [1, 3, 5, 7, 9, None], 'max_features': [None], 'max_leaf_nodes': [None], 'min_impurity_decrease': [0.0], 'min_samples_leaf': [1], 'min_samples_split': [2], 'min_weight_fraction_leaf': [0.0], 'n_estimators': [5, 50, 250, 500], 'n_iter_no_change': [None], 'subsample': [1.0], 'warm_start': [False]})
time: 23.1 ms (started: 2023-09-28 17:20:03 -07:00)
model_readback = icr.return_saved_model_if_it_exists(filename=model_filename_collection['gradient_boosting_classifier'])
if model_readback != None:
best_stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier = model_readback
else:
#add model environment data to model
#get best random forest classifier from grid search cross validation
best_stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier = stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier.best_estimator_
best_stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier.version = model_version
best_stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier.pandas_version = pd.__version__
best_stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier.numpy_version = np.__version__
best_stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier.sklearn_version = sklearn_version
best_stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier.X_columns = [column_name for column_name in data_frame_collection['X_train'].columns]
best_stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier.build_datetime = datetime.datetime.now()
best_stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier = icr.save_and_return_model(best_stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier,
filename=model_filename_collection['gradient_boosting_classifier'],
add_compressed_file=False)
best_stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier
This file already exists
GradientBoostingClassifier(loss='exponential', max_depth=9, n_estimators=250, random_state=200)
time: 19.3 ms (started: 2023-09-28 17:20:03 -07:00)
models_readback = icr.return_saved_model_if_it_exists(filename=cross_validation_model_collection_filename_collection['gradient_boosting_classifier'])
if models_readback != None:
stratified_5_fold_cross_validation_gradient_boosting_classifier_collection = models_readback
else:
stratified_5_fold_cross_validation_gradient_boosting_classifier_collection = {}
for index in range(5):
#create gradient boosting classifier
gradient_boosting_classifier = GradientBoostingClassifier(criterion='squared_error',
loss='exponential',
learning_rate = 0.1,
max_depth = 9,
n_estimators = 250,
random_state=200,
subsample=1.0,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0.0,
min_impurity_decrease=0.0,
init=None,
max_features=None,
verbose=0,
max_leaf_nodes=None,
warm_start=False,
n_iter_no_change=None)
#train gradient boosting classifier and save gradient boosting classifer per fold
gradient_boosting_classifier.fit(X=stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold ' + str(index)]['X_train'],
y=stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold ' + str(index)]['Y_train'])
stratified_5_fold_cross_validation_gradient_boosting_classifier_collection['fold ' + str(index)] = gradient_boosting_classifier
#save stratified 5-fold cross validation gradient boosting classifier collection
stratified_5_fold_cross_validation_gradient_boosting_classifier_collection = icr.save_and_return_model(stratified_5_fold_cross_validation_gradient_boosting_classifier_collection,
filename=cross_validation_model_collection_filename_collection['gradient_boosting_classifier'])
stratified_5_fold_cross_validation_gradient_boosting_classifier_collection
This file already exists
{'fold 0': GradientBoostingClassifier(criterion='squared_error', loss='exponential', max_depth=9, n_estimators=250, random_state=200), 'fold 1': GradientBoostingClassifier(criterion='squared_error', loss='exponential', max_depth=9, n_estimators=250, random_state=200), 'fold 2': GradientBoostingClassifier(criterion='squared_error', loss='exponential', max_depth=9, n_estimators=250, random_state=200), 'fold 3': GradientBoostingClassifier(criterion='squared_error', loss='exponential', max_depth=9, n_estimators=250, random_state=200), 'fold 4': GradientBoostingClassifier(criterion='squared_error', loss='exponential', max_depth=9, n_estimators=250, random_state=200)}
time: 89.6 ms (started: 2023-09-28 17:20:03 -07:00)
prediction_probability_ndarray_collection = icr.return_processed_collection_if_it_exists(filename=model_cross_validation_prediction_probability_collection_filename_collection['gradient_boosting_classifier'])
if prediction_probability_ndarray_collection != None:
stratified_5_fold_cross_validation_gradient_boosting_classifier_prediction_probability_collection = prediction_probability_ndarray_collection
else:
stratified_5_fold_cross_validation_gradient_boosting_classifier_prediction_probability_collection = {}
for index in range(5):
#get predictions for test per fold
stratified_5_fold_cross_validation_gradient_boosting_classifier_prediction_probability_collection['fold ' + str(index)] = \
stratified_5_fold_cross_validation_gradient_boosting_classifier_collection['fold ' + str(index)]\
.predict_proba(stratified_fold_number_X_train_X_test_Y_train_Y_test_collection['fold ' + str(index)]['X_test'])
#save stratified 5-fold cross validation random forest classifier prediction probability collection
stratified_5_fold_cross_validation_gradient_boosting_classifier_prediction_probability_collection = icr.save_and_return_collection(data_frame_collection=stratified_5_fold_cross_validation_gradient_boosting_classifier_prediction_probability_collection,
filename=model_cross_validation_prediction_probability_collection_filename_collection['gradient_boosting_classifier'])
stratified_5_fold_cross_validation_gradient_boosting_classifier_prediction_probability_collection['fold 0']
This file already exists
array([[2.48407057e-01, 7.51592943e-01], [2.05103256e-02, 9.79489674e-01], [9.98658831e-01, 1.34116928e-03], ..., [9.99920319e-01, 7.96805318e-05], [3.63162218e-05, 9.99963684e-01], [4.67138033e-02, 9.53286197e-01]])
time: 3.76 ms (started: 2023-09-28 17:20:03 -07:00)
#get data frame list
data_frame_list_stratified_5_fold_cross_validation_gradient_boosting_classifier_prediction_probability = \
[pd.DataFrame(stratified_5_fold_cross_validation_gradient_boosting_classifier_prediction_probability_collection['fold ' + str(number)]).loc[:, 1] for number in range(5)]
stratified_5_fold_cross_validation_model_classifier_prediction_probability_series_collection['gradient_boosting_classifier'] = \
pd.concat(data_frame_list_stratified_5_fold_cross_validation_gradient_boosting_classifier_prediction_probability)
p(stratified_5_fold_cross_validation_model_classifier_prediction_probability_series_collection['gradient_boosting_classifier'])
(10147,)
0 0.751593 1 0.979490 2 0.001341 3 0.993639 4 0.826168 2024 0.577856 2025 0.947631 2026 0.991084 2027 0.999991 2028 0.938312 Name: 1, dtype: float64
time: 5.61 ms (started: 2023-09-28 17:20:03 -07:00)
dpi=100
figure_filename='../reports/figures/figure_train_size_score_random_forest_classifier_gradient_boosting_classifier_dpi_'+str(dpi)+'_v'+filename_version+'.png'
figure_filename_exists=os.path.isfile(figure_filename)
if figure_filename_exists == True:
img = mpimg.imread(figure_filename)
plt.figure(figsize=(60, 15))
plt.grid(False)
plt.axis('off')
plt.imshow(img)
else:
StratifiedKFold_5_splits = StratifiedKFold(n_splits=5, random_state=None, shuffle=False)
fig, axes = plt.subplots(3, 2, figsize=(10, 15))
title=r"Learning Curves (Random Forest)"
estimator=RandomForestClassifier(bootstrap=False, max_depth=25, min_samples_split=5, n_estimators=200, random_state=200, min_samples_leaf=1, max_features='auto', criterion='gini', warm_start='False',)
plt, learning_curve_random_forest_classifier=icr.plot_learning_curve(estimator=estimator,
title=title,
X=data_frame_collection['X_train'],
y=data_frame_collection['Y_train'].loc[:, 'Y'],
filename=learning_curve_results_filename_collection['random_forest_classifier'],
axes=axes[:, 0],
ylim=(0.65, 1.01),
cv=StratifiedKFold_5_splits,
n_jobs=4,
scoring="accuracy",
train_sizes=np.linspace(0.1, 1.0, 5))
title = r"Learning Curves (Gradient Boosting)"
estimator = GradientBoostingClassifier(learning_rate=0.1,
max_depth=9,
n_estimators = 250,
random_state=200,
max_features=None,
verbose=0,
max_leaf_nodes=None,
warm_start=False,
n_iter_no_change=None)
plt, learning_curve_gradient_boosting_classifier=icr.plot_learning_curve(estimator=estimator,
title=title,
X=data_frame_collection['X_train'],
y=data_frame_collection['Y_train'].loc[:, 'Y'],
filename=learning_curve_results_filename_collection['gradient_boosting_classifier'],
axes=axes[:, 1],
ylim=(0.65, 1.01),
cv=StratifiedKFold_5_splits,
n_jobs=4,
scoring="accuracy",
train_sizes=np.linspace(0.1, 1.0, 5))
plt.title('Model Learning Curve')
fig.subplots_adjust(wspace=.4)
plt.savefig(figure_filename, bbox_inches='tight', dpi=dpi)
plt.show()
time: 192 ms (started: 2023-09-28 17:20:03 -07:00)
xlabel_string='Percentage of Coupon Acceptances Captured'
ylabel_string='Coupon Acceptance Rate'
filename = '../reports/figures/figure_precision_recall_curve_random_forest_gradient_boosting_metric_auc_v' + filename_version + '.png'
markersize=1
linewidth=1
figure, axes = plt.subplots(ncols=1, nrows=1, figsize=(12,9))
plt.rcParams.update({'font.size': 16})
#calculate precision-recall points
random_forest_classifier_precision_array, random_forest_classifier_recall_array, random_forest_classifier_decision_threshold_array = \
precision_recall_curve(y_true=data_frame_collection['Y_train'].loc[:, 'Y'], probas_pred=stratified_5_fold_cross_validation_model_classifier_prediction_probability_series_collection['random_forest_classifier'])
#calculate precision-recall curve auc
random_forest_classifier_auc = auc(random_forest_classifier_recall_array, random_forest_classifier_precision_array)
# plot the precision-recall curve
plt.plot(random_forest_classifier_recall_array, random_forest_classifier_precision_array, marker='.', markersize=markersize, linewidth=linewidth, label='Random Forest AUC=' + str(round(random_forest_classifier_auc, 3)))
#calculate precision-recall points
gradient_boosting_classifier_precision_array, gradient_boosting_classifier_recall_array, gradient_boosting_classifier_threshold_array = \
precision_recall_curve(y_true=data_frame_collection['Y_train'].loc[:, 'Y'], probas_pred=stratified_5_fold_cross_validation_model_classifier_prediction_probability_series_collection['gradient_boosting_classifier'])
#calculate precision-recall curve auc
gradient_boosting_classifier_auc = auc(gradient_boosting_classifier_recall_array, gradient_boosting_classifier_precision_array)
# plot the precision-recall curve
plt.plot(gradient_boosting_classifier_recall_array, gradient_boosting_classifier_precision_array, marker='.', markersize=markersize, linewidth=linewidth, label='Gradient Boosting AUC=' + str(round(gradient_boosting_classifier_auc, 3)))
#calculate no skill classifier curve
no_skill_classifier_auc = data_frame_collection['Y_train'].loc[data_frame_collection['Y_train'].loc[:, 'Y']==1, 'Y'].shape[0] / data_frame_collection['Y_train'].loc[:, 'Y'].shape[0]
plt.plot([0, 1], [no_skill_classifier_auc, no_skill_classifier_auc], linestyle='--', label='No Skill AUC='+str(round(no_skill_classifier_auc, 3)))
plt.xticks([.0, .1 ,.2, .3 ,.4, .5, .6 ,.7, .8, .9, 1 ])
plt.yticks([.6 ,.7, .8, .9, 1 ])
plt.gca().xaxis.set_major_formatter(PercentFormatter(1))
plt.gca().set_yticklabels(['{:.0f}%'.format(y*100) for y in plt.gca().get_yticks()])
plt.xlabel(xlabel_string)
plt.ylabel(ylabel_string)
plt.title(str(ylabel_string)+' vs. '+str(xlabel_string))
plt.legend()
#save it
plt.savefig(filename, bbox_inches='tight', dpi=100)
plt.show()
time: 247 ms (started: 2023-09-28 17:20:04 -07:00)
#get data frame random forest decision threshold, precision, and recall
random_forest_classifier_decision_threshold_array = np.append(0, random_forest_classifier_decision_threshold_array)
df_random_forest_decision_threshold_precision_recall = pd.DataFrame({'random_forest_decision_threshold':random_forest_classifier_decision_threshold_array, 'random_forest_precision':random_forest_classifier_precision_array, 'random_forest_recall':random_forest_classifier_recall_array})
del random_forest_classifier_decision_threshold_array
#get random forest 90% precision decision threshold
random_forest_90_precision_estimated_recall=df_random_forest_decision_threshold_precision_recall.loc[df_random_forest_decision_threshold_precision_recall.loc[:, 'random_forest_precision']>=.9, :].head(1).loc[:, 'random_forest_recall'].values[0]
p(df_random_forest_decision_threshold_precision_recall)
(8701, 3)
random_forest_decision_threshold | random_forest_precision | random_forest_recall | |
---|---|---|---|
0 | 0.000000 | 0.568763 | 1.000000 |
1 | 0.034167 | 0.568720 | 0.999826 |
2 | 0.035000 | 0.568777 | 0.999826 |
3 | 0.036347 | 0.568833 | 0.999826 |
4 | 0.036667 | 0.568889 | 0.999826 |
8696 | 0.987500 | 1.000000 | 0.000694 |
8697 | 0.987708 | 1.000000 | 0.000521 |
8698 | 0.988750 | 1.000000 | 0.000347 |
8699 | 0.990833 | 1.000000 | 0.000174 |
8700 | 0.993750 | 1.000000 | 0.000000 |
time: 7.32 ms (started: 2023-09-28 17:20:04 -07:00)
#get data frame gradient boosting decision threshold, precision, and recall
gradient_boosting_classifier_threshold_array = np.append(0, gradient_boosting_classifier_threshold_array)
df_gradient_boosting_decision_threshold_precision_recall = pd.DataFrame({'gradient_boosting_decision_threshold':gradient_boosting_classifier_threshold_array, 'gradient_boosting_precision':gradient_boosting_classifier_precision_array, 'gradient_boosting_recall':gradient_boosting_classifier_recall_array})
del gradient_boosting_classifier_threshold_array
p(df_gradient_boosting_decision_threshold_precision_recall)
(10108, 3)
gradient_boosting_decision_threshold | gradient_boosting_precision | gradient_boosting_recall | |
---|---|---|---|
0 | 0.000000 | 0.569494 | 1.000000 |
1 | 0.000004 | 0.569451 | 0.999826 |
2 | 0.000004 | 0.569508 | 0.999826 |
3 | 0.000004 | 0.569564 | 0.999826 |
4 | 0.000005 | 0.569620 | 0.999826 |
10103 | 0.999999 | 1.000000 | 0.000694 |
10104 | 1.000000 | 1.000000 | 0.000521 |
10105 | 1.000000 | 1.000000 | 0.000347 |
10106 | 1.000000 | 1.000000 | 0.000174 |
10107 | 1.000000 | 1.000000 | 0.000000 |
time: 5.06 ms (started: 2023-09-28 17:20:04 -07:00)
#get feature importants plot
top_number_features = 50
figsize=(6, 10)
dpi=100
figure_filename = '../reports/figures/figure_random_forest_classifier_train_top_'+ str(top_number_features) +'_feature_importances_v' + filename_version + '.png'
plt.subplots(figsize=figsize)
feature_importances = best_stratified_5_fold_grid_search_cross_validation_random_forest_classifier.feature_importances_
series_random_forest_classifier_feature_importances = pd.Series(feature_importances,
index=best_stratified_5_fold_grid_search_cross_validation_random_forest_classifier.X_columns).sort_values(ascending=False)
series_random_forest_classifier_feature_importances[0:top_number_features].sort_values(ascending=True).plot(kind='barh')
plt.xlabel('Importance')
plt.ylabel('Features')
plt.title('Random Forest Classifier Top ' + str(top_number_features) + ' Feature Importances');
plt.savefig(figure_filename, bbox_inches='tight', dpi=dpi)
plt.show()
time: 1.1 s (started: 2023-09-28 17:20:04 -07:00)
#get random forest top 50 features by importance
column_name_list_random_forest_classifier_top_50_by_feature_importance = series_random_forest_classifier_feature_importances[0:50].index.to_list()
column_name_list_random_forest_classifier_top_50_by_feature_importance_target = column_name_list_random_forest_classifier_top_50_by_feature_importance + ['Y']
del column_name_list_random_forest_classifier_top_50_by_feature_importance
time: 609 µs (started: 2023-09-28 17:20:05 -07:00)
dpi=100
figure_filename = '../reports/figures/figure_correlation_heatmap_random_forest_classifier_train_top_50_and_target_dpi_' + str(dpi) + '_v' + filename_version + '.png'
figure_filename_exists = os.path.isfile(figure_filename)
if figure_filename_exists == True:
img = mpimg.imread(figure_filename)
plt.figure(figsize=(35, 28))
plt.grid(False)
plt.axis('off')
plt.imshow(img)
else:
#correlation heatmap of top 50 features and target
df_train = pd.concat([data_frame_collection['X_train'], data_frame_collection['Y_train']], axis=1)
df_corr = df_train.loc[:, column_name_list_random_forest_classifier_top_50_by_feature_importance_target].corr()
fig, ax = plt.subplots(figsize=(30, 24))
mask = np.triu(np.ones_like(df_corr, dtype=bool))
mask = mask[1:, :-1]
corr = df_corr.iloc[1:,:-1].copy()
sns.set(font_scale=1.4)
sns.set_style("white")
res = sns.heatmap(corr, mask=mask, annot=True, fmt=".2f", annot_kws={"size": 12}, cmap='YlOrBr', vmin=-1, vmax=1, cbar_kws={"shrink": .8})
res.set_xticklabels(res.get_xmajorticklabels(), fontsize = 14, rotation=90)
res.set_yticklabels(res.get_ymajorticklabels(), fontsize = 14, rotation=0)
plt.title('Correlation Heatmap Random Forest of Top 50 Features and Target', fontsize=18)
#save it
plt.savefig(figure_filename, bbox_inches='tight', dpi=dpi)
plt.show()
time: 1.05 s (started: 2023-09-28 17:20:05 -07:00)
feature_column_name_list = ['coupon_venue_type']
## Get Prediction Probabilities for Gradient Boosting, Prediction Probabilities for Random Forest, Y_actual, and coupon venue type
df_Y_train_random_forest_prediction_probability=stratified_5_fold_cross_validation_model_classifier_prediction_probability_series_collection['random_forest_classifier'].to_frame().rename(columns={1:'Y_train_random_forest_prediction_probability'}).reset_index(drop=True)
df_Y_train_gradient_boosting_prediction_probability=stratified_5_fold_cross_validation_model_classifier_prediction_probability_series_collection['gradient_boosting_classifier'].to_frame().rename(columns={1:'Y_train_gradient_boosting_prediction_probability'}).reset_index(drop=True)
df_y_train_model_name_prediction_probability_y_actual_coupon_venue_type = \
pd.concat([df_Y_train_random_forest_prediction_probability,
df_Y_train_gradient_boosting_prediction_probability,
data_frame_collection['Y_train'].reset_index(drop=True),
df_collection['X_train'].loc[:, feature_column_name_list].reset_index(drop=True)], axis=1)
p(df_y_train_model_name_prediction_probability_y_actual_coupon_venue_type)
(10147, 4)
Y_train_random_forest_prediction_probability | Y_train_gradient_boosting_prediction_probability | Y | coupon_venue_type | |
---|---|---|---|---|
0 | 0.772312 | 0.751593 | 1 | Coffee House |
1 | 0.711250 | 0.979490 | 1 | Coffee House |
2 | 0.403750 | 0.001341 | 0 | Carry out & Take away |
3 | 0.824488 | 0.993639 | 1 | Coffee House |
4 | 0.710913 | 0.826168 | 1 | Coffee House |
10142 | 0.261250 | 0.577856 | 0 | Bar |
10143 | 0.652119 | 0.947631 | 0 | Bar |
10144 | 0.669310 | 0.991084 | 1 | Restaurant(20-50) |
10145 | 0.911190 | 0.999991 | 1 | Coffee House |
10146 | 0.640171 | 0.938312 | 1 | Coffee House |
time: 8.93 ms (started: 2023-09-28 17:20:06 -07:00)
# Get Random Forest Classifier Y Predicted from Y Prediction Probabilities and Decision Threshold .9 Precision Estimated
model_type='random_forest'
df_Y_train_random_forest_predicted = icr.get_model_predictions_from_prediction_probabilities_and_decision_threshold_proportion_metric_estimated(df=df_random_forest_decision_threshold_precision_recall,
model_proportion_precision=.9,
model_proportion_recall=None,
model_precision_column_name=model_type+'_precision',
model_recall_column_name=model_type+'_recall',
model_decision_threshold_column_name=model_type+'_decision_threshold',
df_Y_train_test_model_prediction_probability=df_Y_train_random_forest_prediction_probability.iloc[:, 0],
train_test='train',
filename_version=filename_version)
df_Y_train_random_forest_predicted=df_Y_train_random_forest_predicted.rename(columns={'Y_train_predicted':'Y_train_'+str(model_type)+'_predicted'})
#get gradient boosting 80% recall estimate predictions
model_type = 'gradient_boosting'
model_proportion_precision=None
model_proportion_recall=.8
df_Y_train_gradient_boosting_predicted = icr.get_model_predictions_from_prediction_probabilities_and_decision_threshold_proportion_metric_estimated(df=df_gradient_boosting_decision_threshold_precision_recall,
model_proportion_precision=model_proportion_precision,
model_proportion_recall=model_proportion_recall,
model_precision_column_name=model_type+'_precision',
model_recall_column_name=model_type+'_recall',
model_decision_threshold_column_name=model_type+'_decision_threshold',
df_Y_train_test_model_prediction_probability=df_Y_train_gradient_boosting_prediction_probability.iloc[:, 0],
train_test='train',
filename_version=filename_version)
df_Y_train_gradient_boosting_predicted=df_Y_train_gradient_boosting_predicted.rename(columns={'Y_train_predicted':'Y_train_'+str(model_type)+'_predicted'})
data_fold_type='train'
number_of_predictions=data_frame_collection['X_'+data_fold_type].shape[0]
#get survey 27% recall estimate predictions
recall_estimated=random_forest_90_precision_estimated_recall
df_Y_train_survey_27_recall_estimate_predicted = icr.get_survey_coupon_recommendations_by_recall_estimate(number_of_predictions=number_of_predictions, recall_estimated=recall_estimated, random_state=200, train_test='train')
#get survey 80% recall estimate predictions
recall_estimated=.8
df_Y_train_survey_80_recall_estimate_predicted=icr.get_survey_coupon_recommendations_by_recall_estimate(number_of_predictions=number_of_predictions, recall_estimated=recall_estimated, random_state=200, train_test='train')
#get survey 100% recall estimate predictions
recall_estimated=1
df_Y_train_survey_100_recall_estimate_predicted=icr.get_survey_coupon_recommendations_by_recall_estimate(number_of_predictions=number_of_predictions, recall_estimated=recall_estimated, random_state=200, train_test='train')
### Get Data Frame Y Train Random Forest Predicted, Y Train Gradient Boosting Predicted, Y Train Survey Predicted, Y Actual, and Coupon Venue Type
feature_column_name_list = ['coupon_venue_type']
df_y_train_model_name_predicted_y_train_survey_recall_estimate_predicted_y_actual_coupon_venue_type = \
pd.concat([df_Y_train_random_forest_predicted,
df_Y_train_gradient_boosting_predicted,
df_Y_train_survey_27_recall_estimate_predicted,
df_Y_train_survey_80_recall_estimate_predicted,
df_Y_train_survey_100_recall_estimate_predicted,
data_frame_collection['Y_train'].reset_index(drop=True),
df_collection['X_train'].loc[:, feature_column_name_list].reset_index(drop=True)], axis=1)
p(df_y_train_model_name_predicted_y_train_survey_recall_estimate_predicted_y_actual_coupon_venue_type)
This file already exists. This file already exists. 0.26870335011282764 0.7312966498871724 0.8 0.19999999999999996 1 0 (10147, 7)
Y_train_random_forest_predicted | Y_train_gradient_boosting_predicted | Y_train_survey_27_recall_estimate_predicted | Y_train_survey_80_recall_estimate_predicted | Y_train_survey_100_recall_estimate_predicted | Y | coupon_venue_type | |
---|---|---|---|---|---|---|---|
0 | 0 | 1 | 1 | 1 | 1 | 1 | Coffee House |
1 | 0 | 1 | 0 | 1 | 1 | 1 | Coffee House |
2 | 0 | 0 | 0 | 1 | 1 | 0 | Carry out & Take away |
3 | 0 | 1 | 0 | 1 | 1 | 1 | Coffee House |
4 | 0 | 1 | 1 | 1 | 1 | 1 | Coffee House |
10142 | 0 | 0 | 0 | 1 | 1 | 0 | Bar |
10143 | 0 | 1 | 0 | 1 | 1 | 0 | Bar |
10144 | 0 | 1 | 0 | 0 | 1 | 1 | Restaurant(20-50) |
10145 | 1 | 1 | 0 | 1 | 1 | 1 | Coffee House |
10146 | 0 | 1 | 0 | 0 | 1 | 1 | Coffee House |
time: 22.1 ms (started: 2023-09-28 17:20:06 -07:00)
multiple_index=icr.get_metric_multiple_index(proportion_or_percentage='proportion')
feature_column_name_filter_value_list_dictionary_key_list=['Overall', 'Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant']
feature_column_name_filter_value_two_dimensional_list=[['Coffee House', 'Bar', 'Carry out & Take away', 'Restaurant(<20)', 'Restaurant(20-50)'], ['Coffee House'], ['Bar'], ['Carry out & Take away'], ['Restaurant(<20)'], ['Restaurant(20-50)']]
feature_column_name_filter_value_list_dictionary=dict(zip(feature_column_name_filter_value_list_dictionary_key_list, feature_column_name_filter_value_two_dimensional_list))
feature_column_name_filter='coupon_venue_type'
y_predicted_column_name_base_survey='Y_train_survey_100_recall_estimate_predicted'
venue_type_average_sale_dictionary={'Coffee House':[5.50], 'Bar':[15], 'Takeout':[15], 'Low-Cost Restaurant':[12], 'Mid-Range Restaurant':[35],}
time: 1.85 ms (started: 2023-09-28 17:20:06 -07:00)
### Get Average Coupon Recommendation Cost Estimated (Per Coupon Venue Type) From Survey 100% Recall Metrics and Average Sale Estimated
column_name_y_predicted='Y_train_survey_100_recall_estimate_predicted'
column_name_y_actual='Y'
df_train_survey_100_recall_coupon_recommendation_cost_estimated_sale_estimated=\
icr.get_survey_or_model_average_coupon_recommendation_cost_estimated(df_y_train_model_name_predicted_y_train_survey_recall_estimate_predicted_y_actual_coupon_venue_type=df_y_train_model_name_predicted_y_train_survey_recall_estimate_predicted_y_actual_coupon_venue_type,
column_name_y_predicted=column_name_y_predicted,
column_name_y_actual=column_name_y_actual,
feature_column_name_filter=feature_column_name_filter,
feature_column_name_filter_value_two_dimensional_list=feature_column_name_filter_value_two_dimensional_list,
feature_column_name_filter_value_list_dictionary_key_list=feature_column_name_filter_value_list_dictionary_key_list,
venue_type_average_sale_dictionary=venue_type_average_sale_dictionary,
model_survey='Control')
### Get Average Coupon Recommendation Cost Estimated (Per Coupon Venue Type) From Random Forest 90% Coupon Acceptance Rate Estimated Metrics and Average Sale Estimated
column_name_y_predicted='Y_train_random_forest_predicted'
column_name_y_actual='Y'
df_train_random_forest_coupon_recommendation_cost_estimated_sale_estimated=\
icr.get_survey_or_model_average_coupon_recommendation_cost_estimated(df_y_train_model_name_predicted_y_train_survey_recall_estimate_predicted_y_actual_coupon_venue_type=df_y_train_model_name_predicted_y_train_survey_recall_estimate_predicted_y_actual_coupon_venue_type,
column_name_y_predicted=column_name_y_predicted,
column_name_y_actual=column_name_y_actual,
feature_column_name_filter=feature_column_name_filter,
feature_column_name_filter_value_two_dimensional_list=feature_column_name_filter_value_two_dimensional_list,
feature_column_name_filter_value_list_dictionary_key_list=feature_column_name_filter_value_list_dictionary_key_list,
venue_type_average_sale_dictionary=venue_type_average_sale_dictionary,
model_survey='Treatment')
### Get Average Coupon Recommendation Cost Estimated (Per Coupon Venue Type) From Gradient Boosting 80% Recall Estimated Metrics and Average Sale Estimated
column_name_y_predicted='Y_train_gradient_boosting_predicted'
column_name_y_actual='Y'
df_train_gradient_boosting_coupon_recommendation_cost_estimated_sale_estimated=\
icr.get_survey_or_model_average_coupon_recommendation_cost_estimated(df_y_train_model_name_predicted_y_train_survey_recall_estimate_predicted_y_actual_coupon_venue_type=df_y_train_model_name_predicted_y_train_survey_recall_estimate_predicted_y_actual_coupon_venue_type,
column_name_y_predicted=column_name_y_predicted,
column_name_y_actual=column_name_y_actual,
feature_column_name_filter=feature_column_name_filter,
feature_column_name_filter_value_two_dimensional_list=feature_column_name_filter_value_two_dimensional_list,
feature_column_name_filter_value_list_dictionary_key_list=feature_column_name_filter_value_list_dictionary_key_list,
venue_type_average_sale_dictionary=venue_type_average_sale_dictionary,
model_survey='Treatment')
#get df_train_random_forest_29_precision_survey_100_recall_coupon_recommendation_cost_estimated_sale_estimated
df_train_random_forest_29_precision_survey_100_recall_coupon_recommendation_cost_estimated_sale_estimated=\
pd.concat([df_train_random_forest_coupon_recommendation_cost_estimated_sale_estimated,
df_train_survey_100_recall_coupon_recommendation_cost_estimated_sale_estimated,],
axis=0)
#get df_train_gradient_boosting_survey_100_recall_coupon_recommendation_cost_estimated_sale_estimated
df_train_gradient_boosting_80_recall_survey_100_recall_coupon_recommendation_cost_estimated_sale_estimated=\
pd.concat([df_train_gradient_boosting_coupon_recommendation_cost_estimated_sale_estimated,
df_train_survey_100_recall_coupon_recommendation_cost_estimated_sale_estimated,],
axis=0)
df_train_gradient_boosting_80_recall_survey_100_recall_coupon_recommendation_cost_estimated_sale_estimated
Overall | Coffee House | Bar | Takeout | Low-Cost Restaurant | Mid-Range Restaurant | ||
---|---|---|---|---|---|---|---|
Treatment | Average Coupon Recommendation Cost Estimated | NaN | 0.848469 | 2.223881 | 2.456336 | 1.976224 | 4.638037 |
Average Sale Estimated | NaN | 5.500000 | 15.000000 | 15.000000 | 12.000000 | 35.000000 | |
Control | Average Coupon Recommendation Cost Estimated | NaN | 0.541538 | 1.244129 | 2.201155 | 1.707282 | 3.081650 |
Average Sale Estimated | NaN | 5.500000 | 15.000000 | 15.000000 | 12.000000 | 35.000000 |
time: 54.3 ms (started: 2023-09-28 17:20:06 -07:00)
model_type='random_forest'
survey_number_recall_estimated_y_predicted_column_name='Y_train_survey_27_recall_estimate_predicted'
metrics_coupon_venue_type_list = []
for feature_column_name_filter_value_list_dictionary_key in feature_column_name_filter_value_list_dictionary.keys():
metric_list=icr.get_model_and_survey_metrics(df=df_y_train_model_name_predicted_y_train_survey_recall_estimate_predicted_y_actual_coupon_venue_type,
model_y_predicted_column_name='Y_train_'+model_type+'_predicted',
survey_number_recall_estimated_y_predicted_column_name=survey_number_recall_estimated_y_predicted_column_name,
y_predicted_column_name_base_survey=y_predicted_column_name_base_survey,
y_actual_column_name='Y',
feature_column_name_filter=feature_column_name_filter,
feature_column_name_filter_value_list=feature_column_name_filter_value_list_dictionary[feature_column_name_filter_value_list_dictionary_key],
metrics_column_name_list=None,)
metrics_coupon_venue_type_list+=[metric_list]
df_train_random_forest_model_survey_metrics=\
pd.DataFrame(metrics_coupon_venue_type_list,
index=feature_column_name_filter_value_list_dictionary_key_list,
columns=multiple_index[0:int(len(multiple_index)*2/3)]).T
df_train_random_forest_metrics=icr.calculate_and_add_model_survey_difference(df_train_random_forest_model_survey_metrics, multiple_index)
#add Venue Type Coupon Recommendation Cost Estimated, Sale Estimated
df_train_random_forest_metrics_coupon_recommendation_cost_estimated_sale_estimated=\
pd.concat([df_train_random_forest_metrics, df_train_random_forest_29_precision_survey_100_recall_coupon_recommendation_cost_estimated_sale_estimated], axis=0)
#get and add Ad Revenue, Ad Spend, ROAS, Profit, Spend, ROI
df_train_random_forest_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI=icr.get_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI(df=df_train_random_forest_metrics_coupon_recommendation_cost_estimated_sale_estimated)
del df_train_random_forest_metrics_coupon_recommendation_cost_estimated_sale_estimated
#select and reorder basic metrics, recommendation cost estimated, and average sale estimate
multiindex_basic_metrics=icr.get_the_multiindex_object_with_basic_metrics()
multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated=icr.get_the_multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated()
multiindex_basic_metrics_coupon_recommendation_cost_estimate_sale_estimated=pd.MultiIndex.from_tuples(list(multiindex_basic_metrics)+list(multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated))
#display combined metrics
df_train_random_forest_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[multiindex_basic_metrics_coupon_recommendation_cost_estimate_sale_estimated,:]
Overall | Coffee House | Bar | Takeout | Low-Cost Restaurant | Mid-Range Restaurant | ||
---|---|---|---|---|---|---|---|
Treatment | Coupon Acceptance Rate | 90.000000 | 89.411765 | 79.487179 | 88.888889 | 91.480996 | 95.000000 |
Percentage of Coupon Acceptances Captured | 26.870335 | 19.387755 | 4.619970 | 35.504653 | 43.570537 | 3.632887 | |
Coupon Acceptances | 1548.000000 | 304.000000 | 31.000000 | 496.000000 | 698.000000 | 19.000000 | |
Coupon Acceptances Possible | 5761.000000 | 1568.000000 | 671.000000 | 1397.000000 | 1602.000000 | 523.000000 | |
Coupon Recommendations | 1720.000000 | 340.000000 | 39.000000 | 558.000000 | 763.000000 | 20.000000 | |
Coupon Recommendations Possible | 10147.000000 | 3185.000000 | 1618.000000 | 1904.000000 | 2252.000000 | 1188.000000 | |
Ad Revenue | 18618.000000 | 1672.000000 | 465.000000 | 7440.000000 | 8376.000000 | 665.000000 | |
Ad Spend | 3723.600000 | 334.400000 | 93.000000 | 1488.000000 | 1675.200000 | 133.000000 | |
ROAS | 500.000000 | 500.000000 | 500.000000 | 500.000000 | 500.000000 | 500.000000 | |
Control | Coupon Acceptance Rate | 57.722988 | 51.113716 | 41.630901 | 76.388889 | 69.666667 | 47.462687 |
Percentage of Coupon Acceptances Captured | 27.634091 | 27.806122 | 28.912072 | 27.559055 | 26.092385 | 30.401530 | |
Coupon Acceptances | 1592.000000 | 436.000000 | 194.000000 | 385.000000 | 418.000000 | 159.000000 | |
Coupon Acceptances Possible | 5761.000000 | 1568.000000 | 671.000000 | 1397.000000 | 1602.000000 | 523.000000 | |
Coupon Recommendations | 2758.000000 | 853.000000 | 466.000000 | 504.000000 | 600.000000 | 335.000000 | |
Coupon Recommendations Possible | 10147.000000 | 3185.000000 | 1618.000000 | 1904.000000 | 2252.000000 | 1188.000000 | |
Ad Revenue | 21664.000000 | 2398.000000 | 2910.000000 | 5775.000000 | 5016.000000 | 5565.000000 | |
Ad Spend | 6839.257701 | 838.950588 | 1111.230769 | 1344.000000 | 1317.326343 | 2227.750000 | |
ROAS | 316.759522 | 285.833282 | 261.871798 | 429.687500 | 380.771251 | 249.803614 | |
Uplift | Coupon Acceptance Rate | 32.277012 | 38.298048 | 37.856278 | 12.500000 | 21.814329 | 47.537313 |
Percentage of Coupon Acceptances Captured | -0.763756 | -8.418367 | -24.292101 | 7.945598 | 17.478152 | -26.768642 | |
Coupon Acceptances | -44.000000 | -132.000000 | -163.000000 | 111.000000 | 280.000000 | -140.000000 | |
Coupon Acceptances Possible | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | |
Coupon Recommendations | -1038.000000 | -513.000000 | -427.000000 | 54.000000 | 163.000000 | -315.000000 | |
Coupon Recommendations Possible | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | |
Ad Revenue | -3046.000000 | -726.000000 | -2445.000000 | 1665.000000 | 3360.000000 | -4900.000000 | |
Ad Spend | -3115.657701 | -504.550588 | -1018.230769 | 144.000000 | 357.873657 | -2094.750000 | |
ROAS | 183.240478 | 214.166718 | 238.128202 | 70.312500 | 119.228749 | 250.196386 | |
Treatment | Average Coupon Recommendation Cost Estimated | NaN | 0.983529 | 2.384615 | 2.666667 | 2.195544 | 6.650000 |
Average Sale Estimated | NaN | 5.500000 | 15.000000 | 15.000000 | 12.000000 | 35.000000 | |
Control | Average Coupon Recommendation Cost Estimated | NaN | 0.541538 | 1.244129 | 2.201155 | 1.707282 | 3.081650 |
Average Sale Estimated | NaN | 5.500000 | 15.000000 | 15.000000 | 12.000000 | 35.000000 |
time: 96.3 ms (started: 2023-09-28 17:20:06 -07:00)
pd.DataFrame(metrics_coupon_venue_type_list,
index=feature_column_name_filter_value_list_dictionary_key_list,
columns=multiple_index[0:int(len(multiple_index)*2/3)]).T
Overall | Coffee House | Bar | Takeout | Low-Cost Restaurant | Mid-Range Restaurant | ||
---|---|---|---|---|---|---|---|
Treatment | Coupon Acceptance Rate | 90.000000 | 89.411765 | 79.487179 | 88.888889 | 91.480996 | 95.000000 |
Percentage of Coupon Acceptances Captured | 26.870335 | 19.387755 | 4.619970 | 35.504653 | 43.570537 | 3.632887 | |
Proportion of Coupon Acceptances | 1.000000 | 0.196382 | 0.020026 | 0.320413 | 0.450904 | 0.012274 | |
Coupon Acceptances | 1548.000000 | 304.000000 | 31.000000 | 496.000000 | 698.000000 | 19.000000 | |
Coupon Acceptances Possible | 5761.000000 | 1568.000000 | 671.000000 | 1397.000000 | 1602.000000 | 523.000000 | |
Proportion of Coupon Recommendations | 1.000000 | 0.197674 | 0.022674 | 0.324419 | 0.443605 | 0.011628 | |
Coupon Recommendations | 1720.000000 | 340.000000 | 39.000000 | 558.000000 | 763.000000 | 20.000000 | |
Coupon Recommendations Possible | 10147.000000 | 3185.000000 | 1618.000000 | 1904.000000 | 2252.000000 | 1188.000000 | |
Coupon Acceptances to Base Survey Coupon Recommendations Ratio | 0.152557 | 0.095447 | 0.019159 | 0.260504 | 0.309947 | 0.015993 | |
Coupon Acceptances to Survey Coupon Acceptances Ratio | 0.972362 | 0.697248 | 0.159794 | 1.288312 | 1.669856 | 0.119497 | |
Coupon Recommendations to Survey Coupon Recommendations Ratio | 0.623640 | 0.398593 | 0.083691 | 1.107143 | 1.271667 | 0.059701 | |
Coupon Recommendations to Base Survey Coupon Recommendations Ratio | 0.169508 | 0.106750 | 0.024104 | 0.293067 | 0.338810 | 0.016835 | |
Control | Coupon Acceptance Rate | 57.722988 | 51.113716 | 41.630901 | 76.388889 | 69.666667 | 47.462687 |
Percentage of Coupon Acceptances Captured | 27.634091 | 27.806122 | 28.912072 | 27.559055 | 26.092385 | 30.401530 | |
Proportion of Coupon Acceptances | 1.000000 | 0.273869 | 0.121859 | 0.241834 | 0.262563 | 0.099874 | |
Coupon Acceptances | 1592.000000 | 436.000000 | 194.000000 | 385.000000 | 418.000000 | 159.000000 | |
Coupon Acceptances Possible | 5761.000000 | 1568.000000 | 671.000000 | 1397.000000 | 1602.000000 | 523.000000 | |
Proportion of Coupon Recommendations | 1.000000 | 0.309282 | 0.168963 | 0.182741 | 0.217549 | 0.121465 | |
Coupon Recommendations | 2758.000000 | 853.000000 | 466.000000 | 504.000000 | 600.000000 | 335.000000 | |
Coupon Recommendations Possible | 10147.000000 | 3185.000000 | 1618.000000 | 1904.000000 | 2252.000000 | 1188.000000 | |
Coupon Acceptances to Base Survey Coupon Recommendations Ratio | 0.156894 | 0.136892 | 0.119901 | 0.202206 | 0.185613 | 0.133838 | |
Coupon Acceptances to Survey Coupon Acceptances Ratio | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | |
Coupon Recommendations to Survey Coupon Recommendations Ratio | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | |
Coupon Recommendations to Base Survey Coupon Recommendations Ratio | 0.271804 | 0.267818 | 0.288010 | 0.264706 | 0.266430 | 0.281987 |
time: 9 ms (started: 2023-09-28 17:20:06 -07:00)
#ROI metrics
icr.profit_spend_roi_number_table(df=df_train_random_forest_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI)
#del df_train_random_forest_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI
Additional Production Cost | 200 | 2000 | 20000 | ||||||
---|---|---|---|---|---|---|---|---|---|
Metric | Profit | Spend | ROI | Profit | Spend | ROI | Profit | Spend | ROI |
Group | |||||||||
Control | 14624.742299 | 7039.257701 | 207.759723 | 12824.742299 | 8839.257701 | 145.088454 | -5175.257701 | 26839.257701 | -19.282417 |
Treatment | 14694.400000 | 3923.600000 | 374.513202 | 12894.400000 | 5723.600000 | 225.284786 | -5105.600000 | 23723.600000 | -21.521186 |
Uplift | 69.657701 | -3115.657701 | 166.753479 | 69.657701 | -3115.657701 | 80.196332 | 69.657701 | -3115.657701 | -2.238768 |
time: 11.6 ms (started: 2023-09-28 17:20:06 -07:00)
model_type='random_forest'
### Get Train Random Forest 90% Coupon Acceptance Rate Estimated 95% Confidence Interval Metrics (Per Coupon Venue Type) Table
if st != 'yes':
number_of_replicates=10000
quantile_lower_upper_list=[0.025, 0.975]
feature_column_name_filter='coupon_venue_type'
save_metric_replicates_feature_column_name_filter_value_list_dictionary_key_list=['Overall', 'Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant']
random_forest_model_train_survey_number_confidence_interval_metric_collection={}
df_train_random_forest_90_precision_estimated_feature_filter_number_bootstrap_replicates_metrics_collection={}
for feature_column_name_filter_value_list_dictionary_key in feature_column_name_filter_value_list_dictionary_key_list:
random_forest_model_train_survey_number_confidence_interval_metric_collection[feature_column_name_filter_value_list_dictionary_key],\
df_train_random_forest_90_precision_estimated_feature_filter_number_bootstrap_replicates_metrics_collection[feature_column_name_filter_value_list_dictionary_key]=\
icr.get_metric_confidence_interval_table_by_feature_column_name_filter_value_list_dictionary_key(df_y_train_test_model_name_predicted_y_train_test_survey_recall_estimate_predicted_y_actual_feature_column_name_filter=df_y_train_model_name_predicted_y_train_survey_recall_estimate_predicted_y_actual_coupon_venue_type,
feature_column_name_filter=feature_column_name_filter,
feature_column_name_filter_value_list_dictionary_key=feature_column_name_filter_value_list_dictionary_key,
feature_column_name_filter_value_list_dictionary=feature_column_name_filter_value_list_dictionary,
multiple_index=multiple_index,
number_of_replicates=number_of_replicates,
quantile_lower_upper_list=quantile_lower_upper_list,
model_type=model_type,
survey_number_recall_estimated_y_predicted_column_name=survey_number_recall_estimated_y_predicted_column_name,
save_metric_replicates_feature_column_name_filter_value_list_dictionary_key_list=save_metric_replicates_feature_column_name_filter_value_list_dictionary_key_list,
filename_version=filename_version,
train_test='train',
sample_size=2537)
df_random_forest_model_train_survey_number_confidence_interval_metric_feature_column_name_filter_value_sample_size_2537=\
icr.convert_collection_to_data_frame_and_drop_top_column_level(random_forest_model_train_survey_number_confidence_interval_metric_collection)
#mid-range restaurant Coupon Acceptances 95% confidence interval (100, 100) looks wrong.
#select and reorder basic metrics, recommendation cost estimated, and average sale estimate
metric_list_refined=['Coupon Acceptance Rate', 'Percentage of Coupon Acceptances Captured', 'Coupon Acceptances', 'Coupon Acceptances Possible', 'Coupon Recommendations', 'Coupon Recommendations Possible']
multiindex_basic_metrics=icr.get_the_multiindex_object_with_basic_metrics(metric_list_refined=metric_list_refined)
#multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated=icr.get_the_multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated()
#multiindex_basic_metrics_coupon_recommendation_cost_estimate_sale_estimated=pd.MultiIndex.from_tuples(list(multiindex_basic_metrics)+list(multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated))
#display combined metrics
df_random_forest_model_train_survey_number_confidence_interval_metric_feature_column_name_filter_value_sample_size_2537.loc[multiindex_basic_metrics,:]
This file already exists This file already exists This file already exists This file already exists This file already exists This file already exists
95% Confidence Interval | |||||||
---|---|---|---|---|---|---|---|
Overall | Coffee House | Bar | Takeout | Low-Cost Restaurant | Mid-Range Restaurant | ||
Treatment | Coupon Acceptance Rate | (87%, 92%) | (82%, 95%) | (50%, 100%) | (83%, 93%) | (87%, 95%) | (66%, 100%) |
Percentage of Coupon Acceptances Captured | (24%, 29%) | (15%, 23%) | (100%, 100%) | (30%, 40%) | (38%, 48%) | (78.1%, 714.3%) | |
Coupon Acceptances | (352, 422) | (60, 93) | (3, 14) | (103, 145) | (150, 200) | (100, 100) | |
Coupon Acceptances Possible | (1391, 1489) | (356, 428) | (143, 193) | (315, 384) | (365, 437) | (109, 153) | |
Coupon Recommendations | (393, 467) | (68, 103) | (4, 16) | (118, 162) | (164, 217) | (100, 100) | |
Coupon Recommendations Possible | (2537, 2537) | (750, 842) | (369, 441) | (438, 515) | (522, 604) | (266, 328) | |
Control | Coupon Acceptance Rate | (53%, 61%) | (44%, 57%) | (32%, 50%) | (68%, 83%) | (62%, 76%) | (36%, 58%) |
Percentage of Coupon Acceptances Captured | (25%, 29%) | (23%, 32%) | (22%, 35%) | (23%, 32%) | (21%, 30%) | (22%, 38%) | |
Coupon Acceptances | (363, 434) | (89, 130) | (36, 63) | (78, 116) | (85, 124) | (28, 52) | |
Coupon Acceptances Possible | (1391, 1489) | (356, 428) | (143, 193) | (315, 384) | (365, 437) | (109, 153) | |
Coupon Recommendations | (645, 733) | (186, 241) | (97, 137) | (105, 148) | (127, 174) | (66, 101) | |
Coupon Recommendations Possible | (2537, 2537) | (750, 842) | (369, 441) | (438, 515) | (522, 604) | (266, 328) | |
Uplift | Coupon Acceptance Rate | (27%, 36%) | (29%, 46%) | (7%, 62%) | (4%, 20%) | (14%, 29%) | (14%, 62%) |
Percentage of Coupon Acceptances Captured | (-4%, 2%) | (-14%, -2%) | (-31%, -16%) | (100%, 100%) | (10%, 23%) | (-35%, -18%) | |
Coupon Acceptances | (-58, 36) | (-57, -10) | (-55, -27) | (3, 52) | (43, 97) | (-48, -23) | |
Coupon Acceptances Possible | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | |
Coupon Recommendations | (-316, -203) | (-159, -98) | (-128, -86) | (-13, 40) | (11, 71) | (-97, -61) | |
Coupon Recommendations Possible | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) |
time: 1.59 s (started: 2023-09-28 17:20:06 -07:00)
model_type='gradient_boosting'
survey_number_recall_estimated_y_predicted_column_name='Y_train_survey_80_recall_estimate_predicted'
metrics_coupon_venue_type_list = []
for feature_column_name_filter_value_list_dictionary_key in feature_column_name_filter_value_list_dictionary.keys():
metric_list=icr.get_model_and_survey_metrics(df=df_y_train_model_name_predicted_y_train_survey_recall_estimate_predicted_y_actual_coupon_venue_type,
model_y_predicted_column_name='Y_train_'+model_type+'_predicted',
survey_number_recall_estimated_y_predicted_column_name=survey_number_recall_estimated_y_predicted_column_name,
y_predicted_column_name_base_survey=y_predicted_column_name_base_survey,
y_actual_column_name='Y',
feature_column_name_filter=feature_column_name_filter,
feature_column_name_filter_value_list=feature_column_name_filter_value_list_dictionary[feature_column_name_filter_value_list_dictionary_key],
metrics_column_name_list=None,)
metrics_coupon_venue_type_list+=[metric_list]
df_train_gradient_boosting_model_survey_metrics=\
pd.DataFrame(metrics_coupon_venue_type_list,
index=feature_column_name_filter_value_list_dictionary_key_list,
columns=multiple_index[0:(int(len(multiple_index)*2/3))]).T
df_train_gradient_boosting_metrics=icr.calculate_and_add_model_survey_difference(df_train_gradient_boosting_model_survey_metrics, multiple_index)
#add Venue Type Coupon Recommendation Cost Estimated, Sale Estimated
df_train_gradient_boosting_metrics_coupon_recommendation_cost_estimated_sale_estimated=pd.concat([df_train_gradient_boosting_metrics, df_train_gradient_boosting_80_recall_survey_100_recall_coupon_recommendation_cost_estimated_sale_estimated], axis=0)
#get and add Ad Revenue, Ad Spend, ROAS, Profit, Spend, ROI
df_train_gradient_boosting_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI=icr.get_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI(df=df_train_gradient_boosting_metrics_coupon_recommendation_cost_estimated_sale_estimated)
del df_train_gradient_boosting_metrics_coupon_recommendation_cost_estimated_sale_estimated
#select and reorder basic metrics, recommendation cost estimated, and average sale estimate
multiindex_basic_metrics=icr.get_the_multiindex_object_with_basic_metrics()
multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated=icr.get_the_multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated()
multiindex_basic_metrics_coupon_recommendation_cost_estimate_sale_estimated=pd.MultiIndex.from_tuples(list(multiindex_basic_metrics)+list(multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated))
#display combined metrics
df_train_gradient_boosting_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[multiindex_basic_metrics_coupon_recommendation_cost_estimate_sale_estimated,:]
Overall | Coffee House | Bar | Takeout | Low-Cost Restaurant | Mid-Range Restaurant | ||
---|---|---|---|---|---|---|---|
Treatment | Coupon Acceptance Rate | 78.673713 | 77.133550 | 74.129353 | 81.877873 | 82.342657 | 66.257669 |
Percentage of Coupon Acceptances Captured | 80.107620 | 75.510204 | 66.616990 | 89.262706 | 88.202247 | 61.950287 | |
Coupon Acceptances | 4615.000000 | 1184.000000 | 447.000000 | 1247.000000 | 1413.000000 | 324.000000 | |
Coupon Acceptances Possible | 5761.000000 | 1568.000000 | 671.000000 | 1397.000000 | 1602.000000 | 523.000000 | |
Coupon Recommendations | 5866.000000 | 1535.000000 | 603.000000 | 1523.000000 | 1716.000000 | 489.000000 | |
Coupon Recommendations Possible | 10147.000000 | 3185.000000 | 1618.000000 | 1904.000000 | 2252.000000 | 1188.000000 | |
Ad Revenue | 60218.000000 | 6512.000000 | 6705.000000 | 18705.000000 | 16956.000000 | 11340.000000 | |
Ad Spend | 12043.600000 | 1302.400000 | 1341.000000 | 3741.000000 | 3391.200000 | 2268.000000 | |
ROAS | 500.000000 | 500.000000 | 500.000000 | 500.000000 | 500.000000 | 500.000000 | |
Control | Coupon Acceptance Rate | 56.995074 | 49.764151 | 41.812643 | 72.727273 | 71.412556 | 44.947368 |
Percentage of Coupon Acceptances Captured | 80.333275 | 80.739796 | 81.818182 | 79.599141 | 79.525593 | 81.644359 | |
Coupon Acceptances | 4628.000000 | 1266.000000 | 549.000000 | 1112.000000 | 1274.000000 | 427.000000 | |
Coupon Acceptances Possible | 5761.000000 | 1568.000000 | 671.000000 | 1397.000000 | 1602.000000 | 523.000000 | |
Coupon Recommendations | 8120.000000 | 2544.000000 | 1313.000000 | 1529.000000 | 1784.000000 | 950.000000 | |
Coupon Recommendations Possible | 10147.000000 | 3185.000000 | 1618.000000 | 1904.000000 | 2252.000000 | 1188.000000 | |
Ad Revenue | 62111.000000 | 6963.000000 | 8235.000000 | 16680.000000 | 15288.000000 | 14945.000000 | |
Ad Spend | 16765.916704 | 2158.505277 | 2919.955224 | 3755.738017 | 3525.583217 | 4406.134969 | |
ROAS | 370.459910 | 322.584340 | 282.024873 | 444.120434 | 433.630383 | 339.186160 | |
Uplift | Coupon Acceptance Rate | 21.678639 | 27.369400 | 32.316710 | 9.150600 | 10.930101 | 21.310300 |
Percentage of Coupon Acceptances Captured | -0.225655 | -5.229592 | -15.201192 | 9.663565 | 8.676654 | -19.694073 | |
Coupon Acceptances | -13.000000 | -82.000000 | -102.000000 | 135.000000 | 139.000000 | -103.000000 | |
Coupon Acceptances Possible | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | |
Coupon Recommendations | -2254.000000 | -1009.000000 | -710.000000 | -6.000000 | -68.000000 | -461.000000 | |
Coupon Recommendations Possible | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | |
Ad Revenue | -1893.000000 | -451.000000 | -1530.000000 | 2025.000000 | 1668.000000 | -3605.000000 | |
Ad Spend | -4722.316704 | -856.105277 | -1578.955224 | -14.738017 | -134.383217 | -2138.134969 | |
ROAS | 129.540090 | 177.415660 | 217.975127 | 55.879566 | 66.369617 | 160.813840 | |
Treatment | Average Coupon Recommendation Cost Estimated | NaN | 0.848469 | 2.223881 | 2.456336 | 1.976224 | 4.638037 |
Average Sale Estimated | NaN | 5.500000 | 15.000000 | 15.000000 | 12.000000 | 35.000000 | |
Control | Average Coupon Recommendation Cost Estimated | NaN | 0.541538 | 1.244129 | 2.201155 | 1.707282 | 3.081650 |
Average Sale Estimated | NaN | 5.500000 | 15.000000 | 15.000000 | 12.000000 | 35.000000 |
time: 92.6 ms (started: 2023-09-28 17:20:08 -07:00)
icr.profit_spend_roi_number_table(df=df_train_gradient_boosting_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI)
Additional Production Cost | 200 | 2000 | 20000 | ||||||
---|---|---|---|---|---|---|---|---|---|
Metric | Profit | Spend | ROI | Profit | Spend | ROI | Profit | Spend | ROI |
Group | |||||||||
Control | 45145.083296 | 16965.916704 | 266.092803 | 43345.083296 | 18765.916704 | 230.977703 | 25345.083296 | 36765.916704 | 68.936356 |
Treatment | 47974.400000 | 12243.600000 | 391.832468 | 46174.400000 | 14043.600000 | 328.793187 | 28174.400000 | 32043.600000 | 87.925202 |
Uplift | 2829.316704 | -4722.316704 | 125.739665 | 2829.316704 | -4722.316704 | 97.815484 | 2829.316704 | -4722.316704 | 18.988846 |
time: 11 ms (started: 2023-09-28 17:20:08 -07:00)
if st != 'yes':
number_of_replicates=10000
quantile_lower_upper_list=[0.025, 0.975]
feature_column_name_filter='coupon_venue_type'
save_metric_replicates_feature_column_name_filter_value_list_dictionary_key_list=['Overall', 'Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant']
gradient_boosting_model_train_survey_95_confidence_interval_metric_collection={}
df_train_gradient_boosting_80_recall_estimated_feature_filter_number_bootstrap_replicates_metrics_collection={}
for feature_column_name_filter_value_list_dictionary_key in feature_column_name_filter_value_list_dictionary_key_list:
gradient_boosting_model_train_survey_95_confidence_interval_metric_collection[feature_column_name_filter_value_list_dictionary_key],\
df_train_gradient_boosting_80_recall_estimated_feature_filter_number_bootstrap_replicates_metrics_collection[feature_column_name_filter_value_list_dictionary_key]=\
icr.get_metric_confidence_interval_table_by_feature_column_name_filter_value_list_dictionary_key(df_y_train_test_model_name_predicted_y_train_test_survey_recall_estimate_predicted_y_actual_feature_column_name_filter=df_y_train_model_name_predicted_y_train_survey_recall_estimate_predicted_y_actual_coupon_venue_type,
feature_column_name_filter=feature_column_name_filter,
feature_column_name_filter_value_list_dictionary_key=feature_column_name_filter_value_list_dictionary_key,
feature_column_name_filter_value_list_dictionary=feature_column_name_filter_value_list_dictionary,
multiple_index=multiple_index,
number_of_replicates=number_of_replicates,
quantile_lower_upper_list=quantile_lower_upper_list,
model_type=model_type,
survey_number_recall_estimated_y_predicted_column_name=survey_number_recall_estimated_y_predicted_column_name,
save_metric_replicates_feature_column_name_filter_value_list_dictionary_key_list=save_metric_replicates_feature_column_name_filter_value_list_dictionary_key_list,
filename_version=filename_version,
train_test='train',
sample_size=2537)
df_gradient_boosting_model_train_survey_95_confidence_interval_metric_feature_column_name_filter_value=icr.convert_collection_to_data_frame_and_drop_top_column_level(gradient_boosting_model_train_survey_95_confidence_interval_metric_collection)
#select and reorder basic metrics, recommendation cost estimated, and average sale estimate
metric_list_refined=['Coupon Acceptance Rate', 'Percentage of Coupon Acceptances Captured', 'Coupon Acceptances', 'Coupon Acceptances Possible', 'Coupon Recommendations', 'Coupon Recommendations Possible']
multiindex_basic_metrics=icr.get_the_multiindex_object_with_basic_metrics(metric_list_refined=metric_list_refined)
#display combined metrics
df_gradient_boosting_model_train_survey_95_confidence_interval_metric_feature_column_name_filter_value.loc[multiindex_basic_metrics,:]
This file already exists This file already exists This file already exists This file already exists This file already exists This file already exists
95% Confidence Interval | |||||||
---|---|---|---|---|---|---|---|
Overall | Coffee House | Bar | Takeout | Low-Cost Restaurant | Mid-Range Restaurant | ||
Treatment | Coupon Acceptance Rate | (76%, 80%) | (72%, 81%) | (66%, 81%) | (77%, 85%) | (78%, 85%) | (57%, 74%) |
Percentage of Coupon Acceptances Captured | (78%, 82%) | (71%, 79%) | (59%, 73%) | (85%, 92%) | (84%, 91%) | (53%, 70%) | |
Coupon Acceptances | (1105, 1203) | (265, 328) | (92, 133) | (280, 345) | (320, 388) | (64, 99) | |
Coupon Acceptances Possible | (1391, 1489) | (356, 428) | (143, 193) | (315, 384) | (365, 437) | (109, 153) | |
Coupon Recommendations | (1418, 1516) | (349, 419) | (129, 175) | (346, 417) | (393, 467) | (102, 144) | |
Coupon Recommendations Possible | (2537, 2537) | (750, 842) | (369, 441) | (438, 515) | (522, 604) | (266, 328) | |
Control | Coupon Acceptance Rate | (54%, 59%) | (45%, 53%) | (36%, 47%) | (68%, 77%) | (67%, 75%) | (38%, 51%) |
Percentage of Coupon Acceptances Captured | (78%, 82%) | (76%, 84%) | (75%, 87%) | (75%, 83%) | (75%, 83%) | (74%, 87%) | |
Coupon Acceptances | (1108, 1207) | (284, 350) | (115, 160) | (248, 310) | (286, 351) | (87, 127) | |
Coupon Acceptances Possible | (1391, 1489) | (356, 428) | (143, 193) | (315, 384) | (365, 437) | (109, 153) | |
Coupon Recommendations | (1991, 2069) | (593, 678) | (296, 362) | (348, 419) | (408, 484) | (209, 266) | |
Coupon Recommendations Possible | (2537, 2537) | (750, 842) | (369, 441) | (438, 515) | (522, 604) | (266, 328) | |
Uplift | Coupon Acceptance Rate | (19%, 23%) | (23%, 31%) | (26%, 38%) | (5%, 12%) | (7%, 14%) | (14%, 28%) |
Percentage of Coupon Acceptances Captured | (-3%, 2%) | (-11%, 0%) | (-24%, -5%) | (4%, 15%) | (3%, 13%) | (-30%, -8%) | |
Coupon Acceptances | (-47, 40) | (-43, 2) | (-42, -10) | (15, 53) | (14, 55) | (-41, -11) | |
Coupon Acceptances Possible | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | |
Coupon Recommendations | (-627, -500) | (-291, -214) | (-208, -149) | (-26, 23) | (-44, 11) | (-140, -91) | |
Coupon Recommendations Possible | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) |
time: 1.55 s (started: 2023-09-28 17:20:08 -07:00)
feature_column_name_list = ['coupon_venue_type']
## Get Prediction Probabilities for Gradient Boosting, Prediction Probabilities for Random Forest, Y_actual, and coupon venue type
Y_test_random_forest_prediction_probability_class0_class1_ndarray = best_stratified_5_fold_grid_search_cross_validation_random_forest_classifier.predict_proba(data_frame_collection['X_test'])
df_Y_test_random_forest_prediction_probability = pd.DataFrame(Y_test_random_forest_prediction_probability_class0_class1_ndarray).loc[:, 1]
Y_test_gradient_boosting_prediction_probability_class0_class1_ndarray = best_stratified_5_fold_grid_search_cross_validation_gradient_boosting_classifier.predict_proba(data_frame_collection['X_test'])
df_Y_test_gradient_boosting_prediction_probability = pd.DataFrame(Y_test_gradient_boosting_prediction_probability_class0_class1_ndarray).loc[:, 1]
df_y_test_model_name_prediction_probability_y_actual_coupon_venue_type = \
pd.concat([df_Y_test_random_forest_prediction_probability.to_frame().rename(columns={1:'Y_test_random_forest_prediction_probability'}),
df_Y_test_gradient_boosting_prediction_probability.to_frame().rename(columns={1:'Y_test_gradient_boosting_prediction_probability'}),
data_frame_collection['Y_test'].reset_index(drop=True),
df_collection['X_test'].loc[:, feature_column_name_list].reset_index(drop=True)], axis=1)
p(df_y_test_model_name_prediction_probability_y_actual_coupon_venue_type)
(2537, 4)
Y_test_random_forest_prediction_probability | Y_test_gradient_boosting_prediction_probability | Y | coupon_venue_type | |
---|---|---|---|---|
0 | 0.107202 | 0.000190 | 0 | Coffee House |
1 | 0.053351 | 0.000020 | 0 | Coffee House |
2 | 0.457736 | 0.745344 | 0 | Coffee House |
3 | 0.186667 | 0.070547 | 0 | Restaurant(20-50) |
4 | 0.593144 | 0.962529 | 1 | Coffee House |
2532 | 0.208086 | 0.991149 | 0 | Restaurant(<20) |
2533 | 0.522917 | 0.277567 | 1 | Coffee House |
2534 | 0.203686 | 0.000110 | 0 | Bar |
2535 | 0.655833 | 0.916642 | 1 | Carry out & Take away |
2536 | 0.851667 | 0.996387 | 1 | Carry out & Take away |
time: 135 ms (started: 2023-09-28 17:20:10 -07:00)
### Get Random Forest Classifier Y Predicted from Y Prediction Probabilities and Decision Threshold .9 Precision Estimated
model_type='random_forest'
df_Y_test_random_forest_predicted = icr.get_model_predictions_from_prediction_probabilities_and_decision_threshold_proportion_metric_estimated(df=df_random_forest_decision_threshold_precision_recall,
model_proportion_precision=.90,
model_proportion_recall=None,
model_precision_column_name=model_type+'_precision',
model_recall_column_name=model_type+'_recall',
model_decision_threshold_column_name=model_type+'_decision_threshold',
df_Y_train_test_model_prediction_probability=df_Y_test_random_forest_prediction_probability,
train_test='test',
filename_version=filename_version)
df_Y_test_random_forest_predicted=df_Y_test_random_forest_predicted.rename(columns={'Y_test_predicted':'Y_test_'+str(model_type)+'_predicted'})
### Get Gradient Boosting Classifier Y Predicted from Y Prediction Probabilities and Decision Threshold .8 Recall Estimated
model_type = 'gradient_boosting'
model_proportion_precision=None
model_proportion_recall=.8
df_Y_test_gradient_boosting_predicted = icr.get_model_predictions_from_prediction_probabilities_and_decision_threshold_proportion_metric_estimated(df=df_gradient_boosting_decision_threshold_precision_recall,
model_proportion_precision=model_proportion_precision,
model_proportion_recall=model_proportion_recall,
model_precision_column_name=model_type+'_precision',
model_recall_column_name=model_type+'_recall',
model_decision_threshold_column_name=model_type+'_decision_threshold',
df_Y_train_test_model_prediction_probability=df_Y_test_gradient_boosting_prediction_probability,
train_test='test',
filename_version=filename_version)
df_Y_test_gradient_boosting_predicted=df_Y_test_gradient_boosting_predicted.rename(columns={'Y_test_predicted':'Y_test_'+str(model_type)+'_predicted'})
#initialize variables
data_fold_type='test'
number_of_predictions=data_frame_collection['X_'+data_fold_type].shape[0]
### Get Survey 27% Recall Predictions
recall_estimated=random_forest_90_precision_estimated_recall
df_Y_test_survey_27_recall_estimate_predicted=icr.get_survey_coupon_recommendations_by_recall_estimate(number_of_predictions=number_of_predictions, recall_estimated=recall_estimated, random_state=200)
### Get Survey 80% Recall Predictions
recall_estimated=.8
df_Y_test_survey_80_recall_estimate_predicted = icr.get_survey_coupon_recommendations_by_recall_estimate(number_of_predictions=number_of_predictions, recall_estimated=recall_estimated, random_state=200)
### Get Survey 100% Recall Predictions
recall_estimated=1
df_Y_test_survey_100_recall_estimate_predicted = icr.get_survey_coupon_recommendations_by_recall_estimate(number_of_predictions=number_of_predictions, recall_estimated=recall_estimated, random_state=200)
### Get Data Frame Y Test Random Forest Predicted, Y Test Gradient Boosting Predicted, Y Test Survey Predicted, Y Actual, and Coupon Venue Type
feature_column_name_list = ['coupon_venue_type']
df_y_test_model_name_predicted_y_test_survey_recall_estimate_predicted_y_actual_coupon_venue_type = \
pd.concat([df_Y_test_random_forest_predicted,
df_Y_test_gradient_boosting_predicted,
df_Y_test_survey_27_recall_estimate_predicted,
df_Y_test_survey_80_recall_estimate_predicted,
df_Y_test_survey_100_recall_estimate_predicted,
data_frame_collection['Y_test'].reset_index(drop=True),
df_collection['X_test'].loc[:, feature_column_name_list].reset_index(drop=True)], axis=1)
p(df_y_test_model_name_predicted_y_test_survey_recall_estimate_predicted_y_actual_coupon_venue_type)
This file already exists. This file already exists. 0.26870335011282764 0.7312966498871724 0.8 0.19999999999999996 1 0 (2537, 7)
Y_test_random_forest_predicted | Y_test_gradient_boosting_predicted | Y_test_survey_27_recall_estimate_predicted | Y_test_survey_80_recall_estimate_predicted | Y_test_survey_100_recall_estimate_predicted | Y | coupon_venue_type | |
---|---|---|---|---|---|---|---|
0 | 0 | 0 | 1 | 1 | 1 | 0 | Coffee House |
1 | 0 | 0 | 0 | 1 | 1 | 0 | Coffee House |
2 | 0 | 1 | 0 | 1 | 1 | 0 | Coffee House |
3 | 0 | 0 | 0 | 1 | 1 | 0 | Restaurant(20-50) |
4 | 0 | 1 | 1 | 1 | 1 | 1 | Coffee House |
2532 | 0 | 1 | 0 | 1 | 1 | 0 | Restaurant(<20) |
2533 | 0 | 0 | 0 | 0 | 1 | 1 | Coffee House |
2534 | 0 | 0 | 0 | 1 | 1 | 0 | Bar |
2535 | 0 | 1 | 0 | 0 | 1 | 1 | Carry out & Take away |
2536 | 1 | 1 | 0 | 1 | 1 | 1 | Carry out & Take away |
time: 20.7 ms (started: 2023-09-28 17:20:10 -07:00)
#intialize variables
multiple_index=icr.get_metric_multiple_index(proportion_or_percentage='proportion')
feature_column_name_filter_value_list_dictionary_key_list=['Overall', 'Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant']
feature_column_name_filter_value_two_dimensional_list=[['Coffee House', 'Bar', 'Carry out & Take away', 'Restaurant(<20)', 'Restaurant(20-50)'], ['Coffee House'], ['Bar'], ['Carry out & Take away'], ['Restaurant(<20)'], ['Restaurant(20-50)']]
feature_column_name_filter_value_list_dictionary=\
dict(zip(feature_column_name_filter_value_list_dictionary_key_list,feature_column_name_filter_value_two_dimensional_list))
pdc(feature_column_name_filter_value_list_dictionary)
6
{'Overall': ['Coffee House', 'Bar', 'Carry out & Take away', 'Restaurant(<20)', 'Restaurant(20-50)'], 'Coffee House': ['Coffee House'], 'Bar': ['Bar'], 'Takeout': ['Carry out & Take away'], 'Low-Cost Restaurant': ['Restaurant(<20)'], 'Mid-Range Restaurant': ['Restaurant(20-50)']}
time: 3.55 ms (started: 2023-09-28 17:20:10 -07:00)
feature_column_name_filter='coupon_venue_type'
y_predicted_column_name_base_survey='Y_test_survey_100_recall_estimate_predicted'
time: 525 µs (started: 2023-09-28 17:20:10 -07:00)
model_type='random_forest'
survey_number_recall_estimated_y_predicted_column_name='Y_test_survey_27_recall_estimate_predicted'
metrics_coupon_venue_type_list = []
for feature_column_name_filter_value_list_dictionary_key in feature_column_name_filter_value_list_dictionary.keys():
metric_list=icr.get_model_and_survey_metrics(df=df_y_test_model_name_predicted_y_test_survey_recall_estimate_predicted_y_actual_coupon_venue_type,
model_y_predicted_column_name='Y_test_'+model_type+'_predicted',
survey_number_recall_estimated_y_predicted_column_name=survey_number_recall_estimated_y_predicted_column_name,
y_predicted_column_name_base_survey=y_predicted_column_name_base_survey,
y_actual_column_name='Y',
feature_column_name_filter=feature_column_name_filter,
feature_column_name_filter_value_list=feature_column_name_filter_value_list_dictionary[feature_column_name_filter_value_list_dictionary_key],
metrics_column_name_list=None,)
metrics_coupon_venue_type_list+=[metric_list]
df_test_random_forest_model_survey_metrics=\
pd.DataFrame(metrics_coupon_venue_type_list,
index=feature_column_name_filter_value_list_dictionary_key_list,
columns=multiple_index[0:int(len(multiple_index)*2/3)]).T
df_test_random_forest_metrics=icr.calculate_and_add_model_survey_difference(df_test_random_forest_model_survey_metrics, multiple_index)
#add Venue Type Coupon Recommendation Cost Estimated, Sale Estimated
df_test_random_forest_metrics_coupon_recommendation_cost_estimated_sale_estimated=\
pd.concat([df_test_random_forest_metrics, df_train_random_forest_29_precision_survey_100_recall_coupon_recommendation_cost_estimated_sale_estimated], axis=0)
#get and add Total Ad Spend, Total Revenue, ROAS
df_test_random_forest_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI=icr.get_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI(df=df_test_random_forest_metrics_coupon_recommendation_cost_estimated_sale_estimated)
del df_test_random_forest_metrics_coupon_recommendation_cost_estimated_sale_estimated
#select and reorder basic metrics, recommendation cost estimated, and average sale estimate
multiindex_basic_metrics=icr.get_the_multiindex_object_with_basic_metrics()
multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated=icr.get_the_multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated()
multiindex_basic_metrics_coupon_recommendation_cost_estimate_sale_estimated=pd.MultiIndex.from_tuples(list(multiindex_basic_metrics)+list(multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated))
#display combined metrics
df_test_random_forest_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[multiindex_basic_metrics_coupon_recommendation_cost_estimate_sale_estimated,:]
Overall | Coffee House | Bar | Takeout | Low-Cost Restaurant | Mid-Range Restaurant | ||
---|---|---|---|---|---|---|---|
Treatment | Coupon Acceptance Rate | 90.604027 | 89.795918 | 92.307692 | 91.194969 | 90.116279 | 100.000000 |
Percentage of Coupon Acceptances Captured | 27.950311 | 20.608899 | 7.692308 | 39.944904 | 42.119565 | 3.703704 | |
Coupon Acceptances | 405.000000 | 88.000000 | 12.000000 | 145.000000 | 155.000000 | 5.000000 | |
Coupon Acceptances Possible | 1449.000000 | 427.000000 | 156.000000 | 363.000000 | 368.000000 | 135.000000 | |
Coupon Recommendations | 447.000000 | 98.000000 | 13.000000 | 159.000000 | 172.000000 | 5.000000 | |
Coupon Recommendations Possible | 2537.000000 | 811.000000 | 399.000000 | 489.000000 | 534.000000 | 304.000000 | |
Ad Revenue | 4874.000000 | 484.000000 | 180.000000 | 2175.000000 | 1860.000000 | 175.000000 | |
Ad Spend | 962.269434 | 96.385882 | 31.000000 | 424.000000 | 377.633552 | 33.250000 | |
ROAS | 506.510945 | 502.148228 | 580.645161 | 512.971698 | 492.540981 | 526.315789 | |
Control | Coupon Acceptance Rate | 56.005789 | 53.879310 | 34.188034 | 73.553719 | 65.562914 | 48.571429 |
Percentage of Coupon Acceptances Captured | 26.708075 | 29.274005 | 25.641026 | 24.517906 | 26.902174 | 25.185185 | |
Coupon Acceptances | 387.000000 | 125.000000 | 40.000000 | 89.000000 | 99.000000 | 34.000000 | |
Coupon Acceptances Possible | 1449.000000 | 427.000000 | 156.000000 | 363.000000 | 368.000000 | 135.000000 | |
Coupon Recommendations | 691.000000 | 232.000000 | 117.000000 | 121.000000 | 151.000000 | 70.000000 | |
Coupon Recommendations Possible | 2537.000000 | 811.000000 | 399.000000 | 489.000000 | 534.000000 | 304.000000 | |
Ad Revenue | 5000.500000 | 687.500000 | 600.000000 | 1335.000000 | 1188.000000 | 1190.000000 | |
Ad Spend | 1626.872620 | 228.178824 | 279.000000 | 322.666667 | 331.527130 | 465.500000 | |
ROAS | 307.368871 | 301.298775 | 215.053763 | 413.739669 | 358.341714 | 255.639098 | |
Uplift | Coupon Acceptance Rate | 34.598238 | 35.916608 | 58.119658 | 17.641250 | 24.553365 | 51.428571 |
Percentage of Coupon Acceptances Captured | 1.242236 | -8.665105 | -17.948718 | 15.426997 | 15.217391 | -21.481481 | |
Coupon Acceptances | 18.000000 | -37.000000 | -28.000000 | 56.000000 | 56.000000 | -29.000000 | |
Coupon Acceptances Possible | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | |
Coupon Recommendations | -244.000000 | -134.000000 | -104.000000 | 38.000000 | 21.000000 | -65.000000 | |
Coupon Recommendations Possible | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | |
Ad Revenue | -126.500000 | -203.500000 | -420.000000 | 840.000000 | 672.000000 | -1015.000000 | |
Ad Spend | -664.603186 | -131.792941 | -248.000000 | 101.333333 | 46.106422 | -432.250000 | |
ROAS | 199.142074 | 200.849453 | 365.591398 | 99.232029 | 134.199267 | 270.676692 | |
Treatment | Average Coupon Recommendation Cost Estimated | NaN | 0.983529 | 2.384615 | 2.666667 | 2.195544 | 6.650000 |
Average Sale Estimated | NaN | 5.500000 | 15.000000 | 15.000000 | 12.000000 | 35.000000 | |
Control | Average Coupon Recommendation Cost Estimated | NaN | 0.541538 | 1.244129 | 2.201155 | 1.707282 | 3.081650 |
Average Sale Estimated | NaN | 5.500000 | 15.000000 | 15.000000 | 12.000000 | 35.000000 |
time: 71.4 ms (started: 2023-09-28 17:20:10 -07:00)
#show ROI table
icr.profit_spend_roi_number_table(df=df_test_random_forest_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI)
Additional Production Cost | 200 | 2000 | 20000 | ||||||
---|---|---|---|---|---|---|---|---|---|
Metric | Profit | Spend | ROI | Profit | Spend | ROI | Profit | Spend | ROI |
Group | |||||||||
Control | 3173.627380 | 1826.872620 | 173.719139 | 1373.627380 | 3626.872620 | 37.873604 | -16626.372620 | 21626.872620 | -76.878303 |
Treatment | 3711.730566 | 1162.269434 | 319.351990 | 1911.730566 | 2962.269434 | 64.536012 | -16088.269434 | 20962.269434 | -76.748701 |
Uplift | 538.103186 | -664.603186 | 145.632851 | 538.103186 | -664.603186 | 26.662408 | 538.103186 | -664.603186 | 0.129602 |
time: 10.9 ms (started: 2023-09-28 17:20:10 -07:00)
######################################################################################################################################################################################################
# Get Random Forest 90% Coupon Acceptance Rate Estimated 95% Confidence Interval Metrics (Per Coupon Venue Type) DataFrame (and calculate bootstrap)
filename_list=['df_test_random_forest_model_survey_95_confidence_interval_metric_feature_column_name_filter_value_v'+filename_version+'.pkl', \
'df_random_forest_90_precision_estimated_feature_filter_number_bootstrap_replicates_metrics_collection_v'+filename_version+'.pkl']
model_type='random_forest'
df_readback=icr.return_processed_data_file_if_it_exists_v2(filename=filename_list[0], column_name_row_integer_location_list=[0, 1], index_column_integer_location_list=[0, 1])
if df_readback.empty == False:
df_test_random_forest_model_survey_95_confidence_interval_metric_feature_column_name_filter_value=df_readback
else:
quantile_lower_upper_list=[0.025, 0.975]
feature_column_name_filter='coupon_venue_type'
save_metric_replicates_feature_column_name_filter_value_list_dictionary_key_list=['Overall', 'Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant']
random_forest_model_survey_95_confidence_interval_metric_collection={}
df_random_forest_90_precision_estimated_feature_filter_number_bootstrap_replicates_metrics_collection={}
for feature_column_name_filter_value_list_dictionary_key in feature_column_name_filter_value_list_dictionary_key_list:
random_forest_model_survey_95_confidence_interval_metric_collection[feature_column_name_filter_value_list_dictionary_key],\
df_random_forest_90_precision_estimated_feature_filter_number_bootstrap_replicates_metrics_collection[feature_column_name_filter_value_list_dictionary_key]=\
icr.get_metric_confidence_interval_table_by_feature_column_name_filter_value_list_dictionary_key(df_y_train_test_model_name_predicted_y_train_test_survey_recall_estimate_predicted_y_actual_feature_column_name_filter=df_y_test_model_name_predicted_y_test_survey_recall_estimate_predicted_y_actual_coupon_venue_type.copy(),
feature_column_name_filter=feature_column_name_filter,
feature_column_name_filter_value_list_dictionary_key=feature_column_name_filter_value_list_dictionary_key,
feature_column_name_filter_value_list_dictionary=feature_column_name_filter_value_list_dictionary,
multiple_index=multiple_index,
number_of_replicates=number_of_replicates,
quantile_lower_upper_list=quantile_lower_upper_list,
model_type=model_type,
survey_number_recall_estimated_y_predicted_column_name=survey_number_recall_estimated_y_predicted_column_name,
save_metric_replicates_feature_column_name_filter_value_list_dictionary_key_list=save_metric_replicates_feature_column_name_filter_value_list_dictionary_key_list,
filename_version=filename_version,
sample_size=None)
df_test_random_forest_model_survey_95_confidence_interval_metric_feature_column_name_filter_value=\
icr.convert_collection_to_data_frame_and_drop_top_column_level(random_forest_model_survey_95_confidence_interval_metric_collection)
#save it
df_test_random_forest_model_survey_95_confidence_interval_metric_feature_column_name_filter_value=\
icr.save_and_return_data_frame_v2(df_test_random_forest_model_survey_95_confidence_interval_metric_feature_column_name_filter_value, filename=filename_list[0])
#save it
df_random_forest_90_precision_estimated_feature_filter_number_bootstrap_replicates_metrics_collection=\
icr.save_and_return_collection(df_random_forest_90_precision_estimated_feature_filter_number_bootstrap_replicates_metrics_collection, filename=filename_list[1])
######################################################################################################################################################################################################
######################################################################################################################################################################################################
#Get Random Forest 90% Coupon Acceptance Rate Estimated 95% Confidence Interval Ad Revenue, Ad Spend, ROAS, Profit, Spend, and ROI (Per Coupon Venue Type) Table
filename='df_random_forest_95_confidence_interval_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_v'+filename_version+'.pkl'
df_readback=icr.return_processed_data_file_if_it_exists_v2(filename=filename, column_name_row_integer_location_list=[0, 1], index_column_integer_location_list=[0, 1])
if df_readback.empty == False:
df_random_forest_95_confidence_interval_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI=df_readback
else:
#get Random Forest Model and Survey Coupon Recommendation Cost Estimated and Sale Estimated Replicate Collection by Venue Type
df_random_forest_model_survey_coupon_recommendation_cost_estimated_sale_estimated_replicate_collection=\
icr.get_model_survey_coupon_recommendation_cost_estimated_and_sale_estimated_replicate_collection_venue_type(df=df_train_random_forest_29_precision_survey_100_recall_coupon_recommendation_cost_estimated_sale_estimated,
column_name_list=['Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant'],
column_name_drop_list=['Overall'],
number_of_replicates=number_of_replicates)
#calculate 95% confidence interval for Ad Revenue, Ad Spend, ROAS, Profit, Spend, and ROI
if st!='yes':
number_of_replicates=10000
df_random_forest_95_confidence_interval_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI=\
icr.calculate_Overall_and_Coupon_Venue_Type_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_95_Confidence_Intervals_from_metric_replicates_and_append_to_metric_confidence_interval_table(
df_model_name_model_survey_coupon_recommendation_cost_estimated_sale_estimated_replicate_collection=df_random_forest_model_survey_coupon_recommendation_cost_estimated_sale_estimated_replicate_collection,
df_test_model_name_model_survey_95_confidence_interval_metric_feature_column_name_filter_value=df_test_random_forest_model_survey_95_confidence_interval_metric_feature_column_name_filter_value,
test_model_name_metric_replicate_filename_collection=test_random_forest_metric_replicate_filename_collection,
model_type=model_name,
filename_version=filename_version,
number_of_replicates=number_of_replicates,)
#save it
df_random_forest_95_confidence_interval_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI=\
icr.save_and_return_data_frame_v2(df_random_forest_95_confidence_interval_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI, filename=filename)
######################################################################################################################################################################################################
#select and reorder basic metrics, recommendation cost estimated, and average sale estimate
multiindex_basic_metrics=icr.get_the_multiindex_object_with_basic_metrics()
multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated=icr.get_the_multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated()
multiindex_basic_metrics_coupon_recommendation_cost_estimate_sale_estimated=pd.MultiIndex.from_tuples(list(multiindex_basic_metrics)+list(multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated))
#display combined metrics
df_random_forest_95_confidence_interval_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[multiindex_basic_metrics_coupon_recommendation_cost_estimate_sale_estimated,:]
This file already exists This file already exists
95% Confidence Interval | |||||||
---|---|---|---|---|---|---|---|
Overall | Coffee House | Bar | Takeout | Low-Cost Restaurant | Mid-Range Restaurant | ||
Treatment | Coupon Acceptance Rate | (87%, 93%) | (83%, 95%) | (75%, 100%) | (86%, 95%) | (85%, 94%) | (100%, 100%) |
Percentage of Coupon Acceptances Captured | (25%, 30%) | (16%, 24%) | (3%, 12%) | (34%, 45%) | (37%, 47%) | (77.5%, 714.3%) | |
Coupon Acceptances | (369, 441) | (70, 107) | (6, 19) | (122, 168) | (132, 179) | (100, 100) | |
Coupon Acceptances Possible | (1401, 1498) | (390, 464) | (133, 181) | (328, 398) | (334, 403) | (113, 158) | |
Coupon Recommendations | (410, 485) | (79, 118) | (6, 21) | (135, 183) | (147, 197) | (100, 100) | |
Coupon Recommendations Possible | (2537, 2537) | (765, 858) | (362, 435) | (450, 529) | (494, 575) | (271, 337) | |
Ad Revenue | (\$4410.49, \$5348.0) | (\$385.0, \$588.5) | (\$90.0, \$285.0) | (\$1830.0, \$2520.0) | (\$1584.0, \$2148.0) | (\$35.0, \$350.0) | |
Ad Spend | (\$875.96, \$1049.24) | (\$77.7, \$116.06) | (\$14.31, \$50.08) | (\$360.0, \$488.0) | (\$322.74, \$432.52) | (\$6.65, \$66.5) | |
ROAS | (490.68%, 521.11%) | (466.91%, 533.79%) | (471.77%, 629.03%) | (486.58%, 536.25%) | (467.06%, 516.03%) | (526.32%, 526.32%) | |
Control | Coupon Acceptance Rate | (52%, 59%) | (47%, 60%) | (25%, 43%) | (65%, 81%) | (57%, 73%) | (36%, 60%) |
Percentage of Coupon Acceptances Captured | (24%, 28%) | (25%, 33%) | (19%, 32%) | (20%, 29%) | (22%, 31%) | (18%, 32%) | |
Coupon Acceptances | (352, 422) | (104, 147) | (28, 53) | (72, 108) | (80, 119) | (23, 46) | |
Coupon Acceptances Possible | (1401, 1498) | (390, 464) | (133, 181) | (328, 398) | (334, 403) | (113, 158) | |
Coupon Recommendations | (647, 735) | (204, 261) | (97, 138) | (100, 142) | (128, 174) | (54, 86) | |
Coupon Recommendations Possible | (2537, 2537) | (765, 858) | (362, 435) | (450, 529) | (494, 575) | (271, 337) | |
Ad Revenue | (\$4462.99, \$5564.01) | (\$572.0, \$808.5) | (\$420.0, \$795.0) | (\$1080.0, \$1620.0) | (\$960.0, \$1428.0) | (\$805.0, \$1610.0) | |
Ad Spend | (\$1498.3, \$1757.92) | (\$200.64, \$256.7) | (\$231.31, \$329.08) | (\$266.67, \$378.67) | (\$281.03, \$382.08) | (\$359.1, \$571.9) | |
ROAS | (283.02%, 331.81%) | (265.16%, 336.4%) | (161.43%, 272.42%) | (369.93%, 457.03%) | (316.63%, 400.02%) | (194.33%, 317.34%) | |
Uplift | Coupon Acceptance Rate | (30%, 38%) | (27%, 44%) | (39%, 72%) | (9%, 25%) | (16%, 32%) | (38%, 63%) |
Percentage of Coupon Acceptances Captured | (-2%, 4%) | (-14%, -2%) | (-26%, -9%) | (8%, 22%) | (8%, 21%) | (-29%, -13%) | |
Coupon Acceptances | (-29, 65) | (-62, -11) | (-42, -15) | (30, 81) | (31, 81) | (-41, -17) | |
Coupon Acceptances Possible | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | |
Coupon Recommendations | (-302, -185) | (-167, -101) | (-126, -83) | (10, 65) | (-8, 50) | (-82, -48) | |
Coupon Recommendations Possible | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | |
Ad Revenue | (\$-837.02, \$542.52) | (\$-341.0, \$-60.5) | (\$-630.0, \$-225.0) | (\$450.0, \$1215.0) | (\$372.0, \$972.0) | (\$-1435.0, \$-595.0) | |
Ad Spend | (\$-822.91, \$-507.03) | (\$-164.25, \$-99.34) | (\$-300.46, \$-197.92) | (\$26.67, \$173.33) | (\$-17.56, \$109.78) | (\$-545.3, \$-319.2) | |
ROAS | (172.28%, 226.21%) | (154.54%, 246.39%) | (246.23%, 454.93%) | (55.82%, 143.48%) | (89.93%, 177.59%) | (208.98%, 331.98%) | |
Treatment | Average Coupon Recommendation Cost Estimated | NaN | (\$0.98, \$0.98) | (\$2.38, \$2.38) | (\$2.67, \$2.67) | (\$2.2, \$2.2) | (\$6.65, \$6.65) |
Average Sale Estimated | NaN | (\$5.5, \$5.5) | (\$15.0, \$15.0) | (\$15.0, \$15.0) | (\$12.0, \$12.0) | (\$35.0, \$35.0) | |
Control | Average Coupon Recommendation Cost Estimated | NaN | (\$0.54, \$0.54) | (\$1.24, \$1.24) | (\$2.2, \$2.2) | (\$1.71, \$1.71) | (\$3.08, \$3.08) |
Average Sale Estimated | NaN | (\$5.5, \$5.5) | (\$15.0, \$15.0) | (\$15.0, \$15.0) | (\$12.0, \$12.0) | (\$35.0, \$35.0) |
time: 21.2 ms (started: 2023-09-28 17:20:10 -07:00)
#show ROI 95% Confidence Interval table
icr.profit_spend_roi_number_table(df=df_random_forest_95_confidence_interval_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI)
95% Confidence Interval | |||||||||
---|---|---|---|---|---|---|---|---|---|
$200 Additional Production Cost | $2,000 Additional Production Cost | $20,000 Additional Production Cost | |||||||
Profit | Spend | ROI | Profit | Spend | ROI | Profit | Spend | ROI | |
Group | |||||||||
Treatment | (\$3334.66, \$4102.85) | (\$1075.96, \$1249.24) | (304.45%, 333.08%) | (\$1534.66, \$2302.85) | (\$2875.96, \$3049.24) | (53.3%, 75.7%) | (\$-16465.34, \$-15697.15) | (\$20875.96, \$21049.24) | (-78.87%, -74.59%) |
Control | (\$2716.56, \$3657.38) | (\$1698.3, \$1957.92) | (151.84%, 195.44%) | (\$916.56, \$1857.38) | (\$3498.3, \$3757.92) | (25.77%, 50.2%) | (\$-17083.44, \$-16142.62) | (\$21498.3, \$21757.92) | (-79.28%, -74.37%) |
Uplift | (\$-48.19, \$1101.09) | (\$-822.91, \$-507.03) | (121.24%, 169.45%) | (\$-48.19, \$1101.09) | (\$-822.91, \$-507.03) | (10.66%, 42.0%) | (\$-48.19, \$1101.09) | (\$-822.91, \$-507.03) | (-3.05%, 3.12%) |
time: 17.3 ms (started: 2023-09-28 17:20:10 -07:00)
model_type='random_forest'
number_metric='90_precision'
df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall=\
icr.combine_model_metric_replicates_and_ad_revenue_ad_spend_roas_profit_spend_roi_replicates(model_type=model_type,
number_metric=number_metric,
filename_version=filename_version,
number_of_replicates=number_of_replicates)
This file already exists. time: 596 ms (started: 2023-09-28 17:20:10 -07:00)
row_index_tuple_list=[('Treatment', 'Coupon Acceptance Rate'),
('Treatment', 'Percentage of Coupon Acceptances Captured'),
('Uplift', 'Coupon Acceptance Rate'),
('Treatment', 'Ad Revenue'),
('Uplift', 'Ad Revenue'),
('Treatment', 'Ad Spend'),
('Uplift', 'Ad Spend'),
('Treatment', 'ROAS'),
('Uplift', 'ROAS'),
('Treatment', 'ROI 2000'),
('Uplift', 'ROI 2000')]
multiply_by_100_tuple_list=[('Treatment', 'Coupon Acceptance Rate'),
('Uplift', 'Coupon Acceptance Rate'),]
random_forest_model_metric_replicate_quantile_series_collection_overall={}
for row_index_tuple in row_index_tuple_list:
if row_index_tuple in multiply_by_100_tuple_list:
random_forest_model_metric_replicate_quantile_series_collection_overall[row_index_tuple]=\
df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_index_tuple,:].quantile(q=[0,.1,.9,1])*100
else:
random_forest_model_metric_replicate_quantile_series_collection_overall[row_index_tuple]=\
df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_index_tuple,:].quantile(q=[0,.1,.9,1])
time: 12.2 ms (started: 2023-09-28 17:20:11 -07:00)
df_test_random_forest_10000_metric_replicates_Ad_Revenue_Ad_Spend_ROAS_replicates_collection=\
rpp('df_test_random_forest_'+str(number_of_replicates)+'_metric_replicates_Ad_Revenue_Ad_Spend_ROAS_replicates_collection_coupon_venue_type_v'+str(filename_version)+'.pkl')
time: 13.3 ms (started: 2023-09-28 17:20:11 -07:00)
row_index_tuple_list=[('Treatment', 'Coupon Acceptance Rate'),
('Uplift', 'Coupon Acceptance Rate'),
('Treatment', 'Ad Revenue'),
('Uplift', 'Ad Revenue'),
('Treatment', 'Ad Spend'),
('Uplift', 'Ad Spend'),
('Treatment', 'ROAS'),
('Uplift', 'ROAS'),]
multiply_by_100_tuple_list=[('Treatment', 'Coupon Acceptance Rate'),
('Uplift', 'Coupon Acceptance Rate'),]
coupon_venue_type_list=['Coffee House', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant', 'Bar']
random_forest_model_metric_replicate_quantile_series_collection={}
for coupon_venue_type in coupon_venue_type_list:
if row_index_tuple in multiply_by_100_tuple_list:
random_forest_model_metric_replicate_quantile_series_collection[coupon_venue_type]=\
{row_index_tuple:df_test_random_forest_10000_metric_replicates_Ad_Revenue_Ad_Spend_ROAS_replicates_collection[coupon_venue_type].loc[row_index_tuple,:].quantile(q=[0,.1,.9,1])*100
for row_index_tuple in row_index_tuple_list}
else:
random_forest_model_metric_replicate_quantile_series_collection[coupon_venue_type]=\
{row_index_tuple:df_test_random_forest_10000_metric_replicates_Ad_Revenue_Ad_Spend_ROAS_replicates_collection[coupon_venue_type].loc[row_index_tuple,:].quantile(q=[0,.1,.9,1])
for row_index_tuple in row_index_tuple_list}
random_forest_model_metric_replicate_quantile_series_collection['Coffee House'][('Uplift', 'ROAS')]
0.0 97.677705 0.1 170.437496 0.9 231.005771 1.0 289.450148 Name: (Uplift, ROAS), dtype: float64
time: 46.9 ms (started: 2023-09-28 17:20:11 -07:00)
def format_percentage(value, tick_position):
'The two args are the value and tick position'
return f'{value*100:.0f}%'
def format_percentage_without_multiplier(value, tick_position):
'The two args are the value and tick position'
return f'{value:.0f}%'
def usd_format(value, tick_position):
if (value < 1000) and (value >= 0):
return f'${value:.0f}'
elif (value < 0) and (value > -1000):
return '-${:.0f}'.format(abs(value))
elif (value >= 1000):
return f'${value:,.0f}'
elif (value <= -1000):
return "-${:,.0f}".format(abs(value))
time: 1.03 ms (started: 2023-09-28 17:20:11 -07:00)
multiindex_object=pd.MultiIndex.from_tuples((('90% Confidence Interval', 'lower limit'),('90% Confidence Interval', 'upper limit')))
pilot_campaign_model_roas_uplift_90_percent_confidence_interval_dictionary=\
{coupon_venue_type:(random_forest_model_metric_replicate_quantile_series_collection[coupon_venue_type][('Uplift', 'ROAS')][.1], random_forest_model_metric_replicate_quantile_series_collection[coupon_venue_type][('Uplift', 'ROAS')][1])
for coupon_venue_type in coupon_venue_type_list}
df_random_forest_roas_uplift_90_percent_confidence_interval=\
pd.DataFrame(pilot_campaign_model_roas_uplift_90_percent_confidence_interval_dictionary)
df_random_forest_roas_uplift_90_percent_confidence_interval.index=multiindex_object
df_random_forest_roas_uplift_90_percent_confidence_interval=\
df_random_forest_roas_uplift_90_percent_confidence_interval.T
df_random_forest_roas_uplift_90_percent_confidence_interval
90% Confidence Interval | ||
---|---|---|
lower limit | upper limit | |
Coffee House | 170.437496 | 289.450148 |
Takeout | 70.465187 | 194.376305 |
Low-Cost Restaurant | 105.154252 | 220.155957 |
Mid-Range Restaurant | 230.263158 | 398.293030 |
Bar | 292.050691 | 504.536290 |
time: 5.55 ms (started: 2023-09-28 17:20:11 -07:00)
model_type='random_forest'
row_name_tuple=('Treatment', 'Coupon Acceptance Rate')
xlabel_string='Coupon Acceptance Rate'
ylabel_string='Frequency'
title_string='Coupon Acceptance Rate Distribution'
xaxis_interval=.02
dpi=100
figure_filename = '../reports/figures/figure_'+model_type+'_classifier_'+xlabel_string.lower().replace(' ', '_')+'_overall_90_percent_confidence_dpi_'+str(dpi)+'_v'+filename_version+'.png'
number_of_bins = 88
bin_number_color_split=32
figure_filename_exists = os.path.isfile(figure_filename)
if figure_filename_exists == True:
img = mpimg.imread(figure_filename)
plt.figure(figsize=(10, 8))
plt.grid(False)
plt.axis('off')
plt.imshow(img)
else:
figsize=(6,4)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize)
ax.xaxis.set_major_formatter(FuncFormatter(format_percentage))
plt.xticks(np.arange(min(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:]),
max(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:])+1,
xaxis_interval))
bin_count_array, bin_array, patches = ax.hist(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:],
linewidth=1,
bins=number_of_bins,
rwidth=1,
alpha=1,
edgecolor='green',)
for i in range(0,bin_number_color_split):
patches[i].set_facecolor('gray')
patches[i].set_edgecolor('gray')
for i in range(bin_number_color_split,len(patches)):
patches[i].set_facecolor('tab:green')
patches[i].set_edgecolor('tab:green')
plt.tick_params(axis='both', which='both', bottom=True, left=True, direction='out', length=6, width=1,)
plt.xlabel(xlabel_string, fontsize=15)
plt.ylabel(ylabel_string, fontsize=15)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.title(title_string, fontsize=15);
plt.savefig(figure_filename, bbox_inches='tight', dpi=100)
plt.show()
(random_forest_model_metric_replicate_quantile_series_collection_overall[('Treatment', 'Coupon Acceptance Rate')][.1], random_forest_model_metric_replicate_quantile_series_collection_overall[('Treatment', 'Coupon Acceptance Rate')][1])
(8886.132347411958, 9567.307692307691)
time: 71.6 ms (started: 2023-09-28 17:20:11 -07:00)
model_type='random_forest'
row_name_tuple=('Uplift', 'Coupon Acceptance Rate')
xlabel_string='Coupon Acceptance Rate Uplift'
ylabel_string='Frequency'
title_string='Coupon Acceptance Rate Uplift Distribution'
xaxis_interval=.02
dpi=100
figure_filename='../reports/figures/figure_'+model_type+'_classifier_'+xlabel_string.lower().replace(' ', '_')+'_overall_90_percent_confidence_dpi_'+str(dpi)+'_v'+filename_version+'.png'
number_of_bins = 88
bin_number_color_split=32
figure_filename_exists = os.path.isfile(figure_filename)
if figure_filename_exists == True:
img = mpimg.imread(figure_filename)
plt.figure(figsize=(10, 8))
plt.grid(False)
plt.axis('off')
plt.imshow(img)
else:
figsize=(6,4)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize)
ax.xaxis.set_major_formatter(FuncFormatter(format_percentage))
plt.xticks(np.arange(min(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:]),
max(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:])+1,
xaxis_interval))
bin_count_array, bin_array, patches = ax.hist(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:],
linewidth=1,
bins=number_of_bins,
rwidth=1,
alpha=1,
edgecolor='green',)
for i in range(0,bin_number_color_split):
patches[i].set_facecolor('gray')
patches[i].set_edgecolor('gray')
for i in range(bin_number_color_split,len(patches)):
patches[i].set_facecolor('tab:green')
patches[i].set_edgecolor('tab:green')
plt.tick_params(axis='both', which='both', bottom=True, left=True, direction='out', length=6, width=1,)
plt.xlabel(xlabel_string, fontsize=15)
plt.ylabel(ylabel_string, fontsize=15)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.title(title_string, fontsize=15);
plt.savefig(figure_filename, bbox_inches='tight', dpi=100)
plt.show()
(random_forest_model_metric_replicate_quantile_series_collection_overall[('Uplift', 'Coupon Acceptance Rate')][.1], random_forest_model_metric_replicate_quantile_series_collection_overall[('Uplift', 'Coupon Acceptance Rate')][1])
(3182.543273985793, 4178.305835149251)
time: 56.9 ms (started: 2023-09-28 17:20:11 -07:00)
model_type='random_forest'
row_name_tuple=('Treatment', 'Percentage of Coupon Acceptances Captured')
xlabel_string='Percentage of Coupon Acceptances Captured'
ylabel_string='Frequency'
title_string='Percentage of Coupon Acceptances Captured Distribution'
xaxis_interval=.02
dpi=100
figure_filename='../reports/figures/figure_'+model_type+'_classifier_'+xlabel_string.lower().replace(' ', '_')+'_overall_90_percent_confidence_dpi_'+str(dpi)+'_v'+filename_version+'.png'
number_of_bins = 88
bin_number_color_split=25
figure_filename_exists = os.path.isfile(figure_filename)
if figure_filename_exists == True:
img = mpimg.imread(figure_filename)
plt.figure(figsize=(10, 8))
plt.grid(False)
plt.axis('off')
plt.imshow(img)
else:
figsize=(6,4)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize)
ax.xaxis.set_major_formatter(FuncFormatter(format_percentage))
plt.xticks(np.arange(min(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:]),
max(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:])+1,
xaxis_interval))
bin_count_array, bin_array, patches = ax.hist(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:],
linewidth=1,
bins=number_of_bins,
rwidth=1,
alpha=1,
edgecolor='green',)
for i in range(0,bin_number_color_split):
patches[i].set_facecolor('gray')
patches[i].set_edgecolor('gray')
for i in range(bin_number_color_split,len(patches)):
patches[i].set_facecolor('tab:green')
patches[i].set_edgecolor('tab:green')
plt.tick_params(axis='both', which='both', bottom=True, left=True, direction='out', length=6, width=1,)
plt.xlabel(xlabel_string, fontsize=15)
plt.ylabel(ylabel_string, fontsize=15)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.title(title_string, fontsize=15);
plt.savefig(figure_filename, bbox_inches='tight', dpi=100)
plt.show()
(random_forest_model_metric_replicate_quantile_series_collection_overall[('Treatment', 'Percentage of Coupon Acceptances Captured')][.1], random_forest_model_metric_replicate_quantile_series_collection_overall[('Treatment', 'Percentage of Coupon Acceptances Captured')][1])
(26.425553786324812, 32.765667574931875)
time: 65.9 ms (started: 2023-09-28 17:20:11 -07:00)
#revenue 90% confidence interval
model_type='random_forest'
row_name_tuple=('Treatment', 'Ad Revenue')
xlabel_string='Ad Revenue'
ylabel_string='Frequency'
title_string='Ad Revenue Distribution'
xaxis_interval=500
round_to_nearest=500
dpi=100
figure_filename='../reports/figures/figure_'+model_type+'_classifier_'+xlabel_string.lower().replace(' ', '_')+'_overall_90_percent_confidence_dpi_'+str(dpi)+'_v'+filename_version+'.png'
number_of_bins = 88
bin_number_color_split=27
figure_filename_exists = os.path.isfile(figure_filename)
if figure_filename_exists == True:
img = mpimg.imread(figure_filename)
plt.figure(figsize=(10, 8))
plt.grid(False)
plt.axis('off')
plt.imshow(img)
else:
figsize=(6,4)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize)
ax.xaxis.set_major_formatter(FuncFormatter(usd_format))
plt.xticks(np.arange(round(min(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:])/round_to_nearest)*round_to_nearest,
round(max(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:])/round_to_nearest)*round_to_nearest+1,
xaxis_interval))
bin_count_array, bin_array, patches = ax.hist(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:],
linewidth=1,
bins=number_of_bins,
rwidth=1,
alpha=1,
edgecolor='green',)
for i in range(0,bin_number_color_split):
patches[i].set_facecolor('gray')
patches[i].set_edgecolor('gray')
for i in range(bin_number_color_split,len(patches)):
patches[i].set_facecolor('tab:green')
patches[i].set_edgecolor('tab:green')
plt.tick_params(axis='both', which='both', bottom=True, left=True, direction='out', length=6, width=1,)
plt.xlabel(xlabel_string, fontsize=15)
plt.ylabel(ylabel_string, fontsize=15)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.title(title_string, fontsize=15);
plt.savefig(figure_filename, bbox_inches='tight', dpi=100)
plt.show()
(random_forest_model_metric_replicate_quantile_series_collection_overall[('Treatment', 'Ad Revenue')][.1], random_forest_model_metric_replicate_quantile_series_collection_overall[('Treatment', 'Ad Revenue')][1])
(4568.45, 5818.5)
time: 64 ms (started: 2023-09-28 17:20:11 -07:00)
#roas 90% confidence interval
model_type='random_forest'
row_name_tuple=('Treatment', 'ROAS')
xlabel_string='ROAS'
ylabel_string='Count'
title_string='ROAS Distribution'
xaxis_interval=10
round_to_nearest=10
dpi=100
figure_filename='../reports/figures/figure_'+model_type+'_classifier_'+xlabel_string.lower().replace(' ', '_')+'_overall_90_percent_confidence_dpi_'+str(dpi)+'_v'+filename_version+'.png'
number_of_bins=88
bin_number_color_split=33
figure_filename_exists = os.path.isfile(figure_filename)
if figure_filename_exists == True:
img = mpimg.imread(figure_filename)
plt.figure(figsize=(10, 8))
plt.grid(False)
plt.axis('off')
plt.imshow(img)
else:
figsize=(6,4)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize)
ax.xaxis.set_major_formatter(FuncFormatter(format_percentage_without_multiplier))
plt.xticks(np.arange(round(min(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:])/round_to_nearest)*round_to_nearest,
round(max(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:])/round_to_nearest)*round_to_nearest+1,
xaxis_interval))
bin_count_array, bin_array, patches = ax.hist(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:],
linewidth=1,
bins=number_of_bins,
rwidth=1,
alpha=1,
edgecolor='green',)
for i in range(0,bin_number_color_split):
patches[i].set_facecolor('gray')
patches[i].set_edgecolor('gray')
for i in range(bin_number_color_split,len(patches)):
patches[i].set_facecolor('tab:green')
patches[i].set_edgecolor('tab:green')
plt.tick_params(axis='both', which='both', bottom=True, left=True, direction='out', length=6, width=1,)
plt.xlabel(xlabel_string, fontsize=15)
plt.ylabel(ylabel_string, fontsize=15)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.title(title_string, fontsize=15);
plt.savefig(figure_filename, bbox_inches='tight', dpi=100)
plt.show()
(random_forest_model_metric_replicate_quantile_series_collection_overall[('Treatment', 'ROAS')][.1], random_forest_model_metric_replicate_quantile_series_collection_overall[('Treatment', 'ROAS')][1])
(496.56230745474505, 531.9878764150473)
time: 66.4 ms (started: 2023-09-28 17:20:11 -07:00)
#roas uplift 90% confidence interval
model_type='random_forest'
row_name_tuple=('Uplift', 'ROAS')
xlabel_string='ROAS Uplift'
ylabel_string='Count'
title_string='ROAS Uplift Distribution'
xaxis_interval=20
round_to_nearest=20
dpi=100
figure_filename='../reports/figures/figure_'+model_type+'_classifier_'+xlabel_string.lower().replace(' ', '_')+'_overall_90_percent_confidence_dpi_'+str(dpi)+'_v'+filename_version+'.png'
number_of_bins = 88
bin_number_color_split=30
figure_filename_exists = os.path.isfile(figure_filename)
if figure_filename_exists == True:
img = mpimg.imread(figure_filename)
plt.figure(figsize=(10, 8))
plt.grid(False)
plt.axis('off')
plt.imshow(img)
else:
figsize=(6,4)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize)
ax.xaxis.set_major_formatter(FuncFormatter(format_percentage_without_multiplier))
plt.xticks(np.arange(round(min(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:])/round_to_nearest)*round_to_nearest,
round(max(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:])/round_to_nearest)*round_to_nearest+1,
xaxis_interval))
bin_count_array, bin_array, patches = ax.hist(df_test_random_forest_90_precision_estimated_10000_metric_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_replicates_overall.loc[row_name_tuple,:],
linewidth=1,
bins=number_of_bins,
rwidth=1,
alpha=1,
edgecolor='green',)
for i in range(0,bin_number_color_split):
patches[i].set_facecolor('gray')
patches[i].set_edgecolor('gray')
for i in range(bin_number_color_split,len(patches)):
patches[i].set_facecolor('tab:green')
patches[i].set_edgecolor('tab:green')
plt.tick_params(axis='both', which='both', bottom=True, left=True, direction='out', length=6, width=1,)
plt.xlabel(xlabel_string, fontsize=15)
plt.ylabel(ylabel_string, fontsize=15)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.title(title_string, fontsize=15);
plt.savefig(figure_filename, bbox_inches='tight', dpi=100)
plt.show()
(random_forest_model_metric_replicate_quantile_series_collection_overall[row_name_tuple][.1],
random_forest_model_metric_replicate_quantile_series_collection_overall[row_name_tuple][1])
(181.33783599369275, 252.37496977458187)
time: 64.7 ms (started: 2023-09-28 17:20:11 -07:00)
model_type='gradient_boosting'
survey_number_recall_estimated_y_predicted_column_name='Y_test_survey_80_recall_estimate_predicted'
metrics_coupon_venue_type_list=[]
for feature_column_name_filter_value_list_dictionary_key in feature_column_name_filter_value_list_dictionary.keys():
metric_list=icr.get_model_and_survey_metrics(df=df_y_test_model_name_predicted_y_test_survey_recall_estimate_predicted_y_actual_coupon_venue_type.copy(),
model_y_predicted_column_name='Y_test_'+model_type+'_predicted',
survey_number_recall_estimated_y_predicted_column_name=survey_number_recall_estimated_y_predicted_column_name,
y_predicted_column_name_base_survey=y_predicted_column_name_base_survey,
y_actual_column_name='Y',
feature_column_name_filter=feature_column_name_filter,
feature_column_name_filter_value_list=feature_column_name_filter_value_list_dictionary[feature_column_name_filter_value_list_dictionary_key],
metrics_column_name_list=None,)
metrics_coupon_venue_type_list+=[metric_list]
df_test_gradient_boosting_model_survey_metrics=\
pd.DataFrame(metrics_coupon_venue_type_list,
index=feature_column_name_filter_value_list_dictionary_key_list,
columns=multiple_index[0:int(len(multiple_index)*2/3)]).T
df_test_gradient_boosting_metrics=icr.calculate_and_add_model_survey_difference(df_test_gradient_boosting_model_survey_metrics, multiple_index)
#add Venue Type Coupon Recommendation Cost Estimated, Sale Estimated
df_test_gradient_boosting_metrics_coupon_recommendation_cost_estimated_sale_estimated=pd.concat([df_test_gradient_boosting_metrics, df_train_gradient_boosting_80_recall_survey_100_recall_coupon_recommendation_cost_estimated_sale_estimated], axis=0)
df_test_gradient_boosting_metrics_coupon_recommendation_cost_estimated_sale_estimated
#get and add Total Ad Spend, Total Revenue, ROAS
df_test_gradient_boosting_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI=icr.get_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI(df=df_test_gradient_boosting_metrics_coupon_recommendation_cost_estimated_sale_estimated)
#select and reorder basic metrics, recommendation cost estimated, and average sale estimate
multiindex_basic_metrics=icr.get_the_multiindex_object_with_basic_metrics()
multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated=icr.get_the_multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated()
multiindex_basic_metrics_coupon_recommendation_cost_estimate_sale_estimated=pd.MultiIndex.from_tuples(list(multiindex_basic_metrics)+list(multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated))
#display combined metrics
df_test_gradient_boosting_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[multiindex_basic_metrics_coupon_recommendation_cost_estimate_sale_estimated,:]
Overall | Coffee House | Bar | Takeout | Low-Cost Restaurant | Mid-Range Restaurant | ||
---|---|---|---|---|---|---|---|
Treatment | Coupon Acceptance Rate | 79.876374 | 80.100756 | 75.159236 | 83.418367 | 83.163265 | 62.711864 |
Percentage of Coupon Acceptances Captured | 80.262250 | 74.473068 | 75.641026 | 90.082645 | 88.586957 | 54.814815 | |
Coupon Acceptances | 1163.000000 | 318.000000 | 118.000000 | 327.000000 | 326.000000 | 74.000000 | |
Coupon Acceptances Possible | 1449.000000 | 427.000000 | 156.000000 | 363.000000 | 368.000000 | 135.000000 | |
Coupon Recommendations | 1456.000000 | 397.000000 | 157.000000 | 392.000000 | 392.000000 | 118.000000 | |
Coupon Recommendations Possible | 2537.000000 | 811.000000 | 399.000000 | 489.000000 | 534.000000 | 304.000000 | |
Ad Revenue | 14926.000000 | 1749.000000 | 1770.000000 | 4905.000000 | 3912.000000 | 2590.000000 | |
Ad Spend | 2970.843315 | 336.842215 | 349.149254 | 962.883782 | 774.679720 | 547.288344 | |
ROAS | 502.416264 | 519.234206 | 506.946522 | 509.407271 | 504.982885 | 473.242310 | |
Control | Coupon Acceptance Rate | 56.597222 | 52.715655 | 37.537538 | 73.809524 | 67.840376 | 46.640316 |
Percentage of Coupon Acceptances Captured | 78.743961 | 77.283372 | 80.128205 | 76.859504 | 78.532609 | 87.407407 | |
Coupon Acceptances | 1141.000000 | 330.000000 | 125.000000 | 279.000000 | 289.000000 | 118.000000 | |
Coupon Acceptances Possible | 1449.000000 | 427.000000 | 156.000000 | 363.000000 | 368.000000 | 135.000000 | |
Coupon Recommendations | 2016.000000 | 626.000000 | 333.000000 | 378.000000 | 426.000000 | 253.000000 | |
Coupon Recommendations Possible | 2537.000000 | 811.000000 | 399.000000 | 489.000000 | 534.000000 | 304.000000 | |
Ad Revenue | 15473.000000 | 1815.000000 | 1875.000000 | 4185.000000 | 3468.000000 | 4130.000000 | |
Ad Spend | 4215.483585 | 531.141629 | 740.552239 | 928.495076 | 841.871329 | 1173.423313 | |
ROAS | 367.051601 | 341.716767 | 253.189431 | 450.729370 | 411.939436 | 351.961645 | |
Uplift | Coupon Acceptance Rate | 23.279151 | 27.385101 | 37.621698 | 9.608844 | 15.322890 | 16.071548 |
Percentage of Coupon Acceptances Captured | 1.518288 | -2.810304 | -4.487179 | 13.223140 | 10.054348 | -32.592593 | |
Coupon Acceptances | 22.000000 | -12.000000 | -7.000000 | 48.000000 | 37.000000 | -44.000000 | |
Coupon Acceptances Possible | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | |
Coupon Recommendations | -560.000000 | -229.000000 | -176.000000 | 14.000000 | -34.000000 | -135.000000 | |
Coupon Recommendations Possible | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | |
Ad Revenue | -547.000000 | -66.000000 | -105.000000 | 720.000000 | 444.000000 | -1540.000000 | |
Ad Spend | -1244.640270 | -194.299414 | -391.402985 | 34.388707 | -67.191608 | -626.134969 | |
ROAS | 135.364663 | 177.517439 | 253.757091 | 58.677902 | 93.043449 | 121.280665 | |
Treatment | Average Coupon Recommendation Cost Estimated | NaN | 0.848469 | 2.223881 | 2.456336 | 1.976224 | 4.638037 |
Average Sale Estimated | NaN | 5.500000 | 15.000000 | 15.000000 | 12.000000 | 35.000000 | |
Control | Average Coupon Recommendation Cost Estimated | NaN | 0.541538 | 1.244129 | 2.201155 | 1.707282 | 3.081650 |
Average Sale Estimated | NaN | 5.500000 | 15.000000 | 15.000000 | 12.000000 | 35.000000 |
time: 62.4 ms (started: 2023-09-28 17:20:11 -07:00)
#Dot Plot for Coupon Acceptance Rate, Percentage of Coupon Acceptances Captured, and Coupon Acceptances
#initialize variables
column_name_list=['Overall', 'Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant']
#get Coupon Acceptance Rate, Percentage of Coupon Acceptances Captured and Coupon Acceptances DataFrame
multiindex_metrics_list=[('Treatment', 'Coupon Acceptance Rate'), ('Treatment', 'Percentage of Coupon Acceptances Captured'), ('Treatment', 'Coupon Acceptances')]
df_test_random_forest_metrics_coupon_acceptance_rate_percentage_of_coupon_acceptances_captured_coupon_acceptances=df_test_random_forest_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[multiindex_metrics_list, column_name_list]
#build the Coupon Acceptance Group DataFrame
df_test_random_forest_metrics_coupon_acceptance_rate_percentage_of_coupon_acceptances_captured_coupon_acceptances_coupon_acceptances_group=\
icr.extract_and_add_metric_coupon_acceptances_group(df=df_test_random_forest_metrics_coupon_acceptance_rate_percentage_of_coupon_acceptances_captured_coupon_acceptances)
df_test_random_forest_metrics_coupon_acceptance_rate_percentage_of_coupon_acceptances_captured_coupon_acceptances_coupon_acceptances_group
Overall | Coffee House | Bar | Takeout | Low-Cost Restaurant | Mid-Range Restaurant | ||
---|---|---|---|---|---|---|---|
Treatment | Coupon Acceptance Rate | 90.604027 | 89.795918 | 92.307692 | 91.194969 | 90.116279 | 100.000000 |
Percentage of Coupon Acceptances Captured | 27.950311 | 20.608899 | 7.692308 | 39.944904 | 42.119565 | 3.703704 | |
Coupon Acceptances | 405.000000 | 88.000000 | 12.000000 | 145.000000 | 155.000000 | 5.000000 | |
Coupon Acceptances Group | 400.000000 | 140.000000 | 40.000000 | 140.000000 | 140.000000 | 40.000000 |
time: 9.13 ms (started: 2023-09-28 17:20:11 -07:00)
xlabel_string = 'Percentage of Coupon Acceptances Captured'
ylabel_string = 'Coupon Acceptance Rate'
title_string = 'Pilot Campaign Model Metrics'
model_type='random_forest'
figure_filename='../reports/figures/figure_'+str(model_type)+'_'+title_string.replace(' ','_').lower()+'_in_plot_dot_labels'+'_v'+'_B_'+filename_version+'.png'
campaign_model_metric_label = ['Overall', 'Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant',]
x = df_test_random_forest_metrics_coupon_acceptance_rate_percentage_of_coupon_acceptances_captured_coupon_acceptances_coupon_acceptances_group.loc[('Treatment','Percentage of Coupon Acceptances Captured')].values
y = df_test_random_forest_metrics_coupon_acceptance_rate_percentage_of_coupon_acceptances_captured_coupon_acceptances_coupon_acceptances_group.loc[('Treatment','Coupon Acceptance Rate')].values
sizes = df_test_random_forest_metrics_coupon_acceptance_rate_percentage_of_coupon_acceptances_captured_coupon_acceptances_coupon_acceptances_group.loc[('Treatment','Coupon Acceptances Group')].values
#color_list = ['red', 'green', 'blue', 'orange', 'purple', 'pink', 'gray']
#color_list = ['#1f78b4','#1f78b4','#a6cee3','#1f78b4','#1f78b4','#a6cee3',]
color_list = ['#a6cee3',]*6
plt.rcParams.update({'font.size': 18})
#coupon_acceptances_group_legend_location = (0.6922, 0.06)
#coupon_acceptances_group_legend_location = (1.0222, 0.5)
#campaign_model_legend_location = (1.0222, 0.8)
# Create the Plot
figure, axes = plt.subplots(nrows=1, ncols=1, figsize=(12, 9))
scatter = axes.scatter(x, y, c=color_list, s=sizes, alpha=0.7, edgecolors='k', linewidths=1)
# Add Dot Labels
for index, string_label in enumerate(campaign_model_metric_label):
if index in [0,]:
#axes.annotate(string_label, (x[index]+0.5, y[index]+1)) # Overall
axes.annotate(string_label, (x[index]-4.5, y[index]+1.5)) # Overall
elif index in [1,]:
axes.annotate(string_label, (x[index]-10, y[index]-1.8)) # Coffee House
#axes.annotate(string_label, (x[index]-18.5, y[index]-1.4)) # Coffee House
elif index in [2,]:
axes.annotate(string_label, (x[index]+1, y[index])) # Bar
elif index in [3,]:
axes.annotate(string_label, (x[index]+1, y[index]+0.2)) # Takeout
elif index in [4,]:
axes.annotate(string_label, (x[index]+1, y[index]-1)) # Low-Cost Restuarant
elif index in [5,]:
axes.annotate(string_label, (x[index]+1, y[index]-1)) # Mid-Range Restaurant
# Add the "Coupon Acceptances" legend
legend_coupon_acceptances_group = plt.legend(handles=scatter.legend_elements(prop='sizes')[0], title="Coupon Acceptances", labels=['1-20', '21-200', '201-500', '501-1500'], loc='lower right')
plt.gca().add_artist(legend_coupon_acceptances_group)
# Add plot lebels
plt.xlabel(xlabel_string)
plt.ylabel(ylabel_string)
plt.title(title_string)
plt.xticks([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
plt.yticks([60 ,70, 80, 90, 100])
plt.gca().set_xticklabels(['{:.0f}%'.format(x) for x in plt.gca().get_xticks()])
plt.gca().set_yticklabels(['{:.0f}%'.format(y) for y in plt.gca().get_yticks()])
# Save it
plt.savefig(figure_filename, bbox_inches='tight', dpi=dpi)
plt.show()
time: 215 ms (started: 2023-09-28 17:20:11 -07:00)
#Dot Plot for Coupon Acceptance Rate, Percentage of Coupon Acceptances Captured, and Coupon Acceptances
#initialize variables
column_name_list=['Overall', 'Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant']
#get Coupon Acceptance Rate, Percentage of Coupon Acceptances Captured and Coupon Acceptances DataFrame
multiindex_metrics_list=[('Treatment', 'Coupon Acceptance Rate'), ('Treatment', 'Percentage of Coupon Acceptances Captured'), ('Treatment', 'Coupon Acceptances')]
df_test_gradient_boosting_metrics_coupon_acceptance_rate_percentage_of_coupon_acceptances_captured_coupon_acceptances=df_test_gradient_boosting_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[multiindex_metrics_list, column_name_list]
#build the Coupon Acceptance Group DataFrame
df_test_gradient_boosting_metrics_coupon_acceptance_rate_percentage_of_coupon_acceptances_captured_coupon_acceptances_coupon_acceptances_group=\
icr.extract_and_add_metric_coupon_acceptances_group(df=df_test_gradient_boosting_metrics_coupon_acceptance_rate_percentage_of_coupon_acceptances_captured_coupon_acceptances)
df_test_gradient_boosting_metrics_coupon_acceptance_rate_percentage_of_coupon_acceptances_captured_coupon_acceptances_coupon_acceptances_group
Overall | Coffee House | Bar | Takeout | Low-Cost Restaurant | Mid-Range Restaurant | ||
---|---|---|---|---|---|---|---|
Treatment | Coupon Acceptance Rate | 79.876374 | 80.100756 | 75.159236 | 83.418367 | 83.163265 | 62.711864 |
Percentage of Coupon Acceptances Captured | 80.262250 | 74.473068 | 75.641026 | 90.082645 | 88.586957 | 54.814815 | |
Coupon Acceptances | 1163.000000 | 318.000000 | 118.000000 | 327.000000 | 326.000000 | 74.000000 | |
Coupon Acceptances Group | 750.000000 | 400.000000 | 140.000000 | 400.000000 | 400.000000 | 140.000000 |
time: 10.4 ms (started: 2023-09-28 17:20:11 -07:00)
xlabel_string = 'Percentage of Coupon Acceptances Captured'
ylabel_string = 'Coupon Acceptance Rate'
title_string = 'Drive Sales Campaign Model Metrics'
model_type='gradient_boosting'
figure_filename='../reports/figures/figure_'+str(model_type)+'_'+title_string.replace(' ','_').lower()+'_in_plot_dot_labels'+'_v'+filename_version+'.png'
campaign_model_metric_label = ['Overall', 'Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant',]
x = df_test_gradient_boosting_metrics_coupon_acceptance_rate_percentage_of_coupon_acceptances_captured_coupon_acceptances_coupon_acceptances_group.loc[('Treatment','Percentage of Coupon Acceptances Captured')].values
y = df_test_gradient_boosting_metrics_coupon_acceptance_rate_percentage_of_coupon_acceptances_captured_coupon_acceptances_coupon_acceptances_group.loc[('Treatment','Coupon Acceptance Rate')].values
sizes = df_test_gradient_boosting_metrics_coupon_acceptance_rate_percentage_of_coupon_acceptances_captured_coupon_acceptances_coupon_acceptances_group.loc[('Treatment','Coupon Acceptances Group')].values
#color_list = ['#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c',]
color_list = ['#a6cee3',]*6
plt.rcParams.update({'font.size': 18})
# Create the Plot
figure, axes = plt.subplots(nrows=1, ncols=1, figsize=(12, 9))
scatter = axes.scatter(x, y, c=color_list, s=sizes, alpha=0.7, edgecolors='k', linewidths=1)
# Add Dot Labels
for index, string_label in enumerate(campaign_model_metric_label):
if index in []:
axes.annotate(string_label, (x[index], y[index]))
elif index in [0,]:
axes.annotate(string_label, (x[index]+2.5, y[index]-0.7)) # Overall
elif index in [1,]:
axes.annotate(string_label, (x[index]-20.2, y[index]-.4)) # Coffee House
elif index in [2,]:
axes.annotate(string_label, (x[index]+1, y[index])) # Bar
elif index in [3,]:
axes.annotate(string_label, (x[index]-.9, y[index]+1.2)) # Takeout
elif index in [4,]:
axes.annotate(string_label, (x[index]-30, y[index]+1)) # Low-Cost Restuarant
elif index in [5,]:
axes.annotate(string_label, (x[index]-31, y[index]-0)) # Mid-Range Restuarant
# Add the "Coupon Acceptances" legend
legend_coupon_acceptances_group = plt.legend(handles=scatter.legend_elements(prop='sizes')[0], title="Coupon Acceptances", labels=['21-200', '201-500', '501-1500'], loc='lower right')
plt.gca().add_artist(legend_coupon_acceptances_group)
# Add plot lebels
plt.xlabel(xlabel_string)
plt.ylabel(ylabel_string)
plt.title(title_string)
plt.xticks([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
plt.yticks([60 ,70, 80, 90, 100])
plt.gca().set_xticklabels(['{:.0f}%'.format(x) for x in plt.gca().get_xticks()])
plt.gca().set_yticklabels(['{:.0f}%'.format(y) for y in plt.gca().get_yticks()])
# Save it
plt.savefig(figure_filename, bbox_inches='tight', dpi=dpi)
plt.show()
time: 211 ms (started: 2023-09-28 17:20:11 -07:00)
#Dot Plot for Coupon Acceptance Rate, Percentage of Coupon Acceptances Captured, and Coupon Acceptances
#initialize variables
column_name_list=['Overall', 'Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant']
#get Coupon Acceptance Rate, Percentage of Coupon Acceptances Captured and Coupon Acceptances DataFrame
multiindex_metrics_list=[('Treatment', 'Coupon Acceptance Rate'), ('Treatment', 'Percentage of Coupon Acceptances Captured'), ('Treatment', 'Coupon Acceptances')]
df_test_random_forest_metrics_coupon_acceptance_rate_percentage_of_coupon_acceptances_captured_coupon_acceptances=df_test_random_forest_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[multiindex_metrics_list, column_name_list]
column_name_list=['Overall', 'Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant']
column_name_list_rename=['Drive Sales Campaign ' + column_name for column_name in column_name_list]
column_name_list_dictionary=dict(zip(column_name_list, column_name_list_rename))
df_drive_sales_campaign_model_overall=df_test_gradient_boosting_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[multiindex_metrics_list, column_name_list].rename(columns=column_name_list_dictionary)
df_campaign_model_metrics=pd.concat([df_test_random_forest_metrics_coupon_acceptance_rate_percentage_of_coupon_acceptances_captured_coupon_acceptances, df_drive_sales_campaign_model_overall], axis=1)
icr.extract_and_add_metric_coupon_acceptances_group(df=df_campaign_model_metrics)
#build the Coupon Acceptance Group DataFrame
df_campaign_model_metrics_coupon_acceptances_group=icr.extract_and_add_metric_coupon_acceptances_group(df=df_campaign_model_metrics)
del(df_campaign_model_metrics)
df_campaign_model_metrics_coupon_acceptances_group_all = df_campaign_model_metrics_coupon_acceptances_group.copy()
df_campaign_model_metrics_coupon_acceptances_group=df_campaign_model_metrics_coupon_acceptances_group.iloc[:, 0:7]
df_campaign_model_metrics_coupon_acceptances_group
Overall | Coffee House | Bar | Takeout | Low-Cost Restaurant | Mid-Range Restaurant | Drive Sales Campaign Overall | ||
---|---|---|---|---|---|---|---|---|
Treatment | Coupon Acceptance Rate | 90.604027 | 89.795918 | 92.307692 | 91.194969 | 90.116279 | 100.000000 | 79.876374 |
Percentage of Coupon Acceptances Captured | 27.950311 | 20.608899 | 7.692308 | 39.944904 | 42.119565 | 3.703704 | 80.262250 | |
Coupon Acceptances | 405.000000 | 88.000000 | 12.000000 | 145.000000 | 155.000000 | 5.000000 | 1163.000000 | |
Coupon Acceptances Group | 400.000000 | 140.000000 | 40.000000 | 140.000000 | 140.000000 | 40.000000 | 750.000000 |
time: 14.7 ms (started: 2023-09-28 17:20:12 -07:00)
xlabel_string = 'Percentage of Coupon Acceptances Captured'
ylabel_string = 'Coupon Acceptance Rate'
title_string = 'Campaign Model Metrics'
model_types='random_forest_gradient_boosting'
figure_filename='../reports/figures/figure_'+str(model_types)+'_'+title_string.replace(' ','_').lower()+'_v'+filename_version+'.png'
campaign_model_metric_label = ['Overall', 'Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant', 'Overall']
x = df_campaign_model_metrics_coupon_acceptances_group.loc[('Treatment','Percentage of Coupon Acceptances Captured')].values
y = df_campaign_model_metrics_coupon_acceptances_group.loc[('Treatment','Coupon Acceptance Rate')].values
sizes = df_campaign_model_metrics_coupon_acceptances_group.loc[('Treatment','Coupon Acceptances Group')].values
#color_list = ['red', 'green', 'blue', 'orange', 'purple', 'pink', 'gray']
#color_list = ['#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f']
color_list = ['#a6cee3',]*7
#pattern_list = ['///', 'ooo', '\\\\', 'xxx', '||||']
pattern_list = [' ','xxx',]
pattern_list_2 = [' ', ' ', ' ', ' ', ' ', ' ','xxx',]
plt.rcParams.update({'font.size': 18})
#coupon_acceptances_group_legend_location = (0.6922, 0.06)
coupon_acceptances_group_legend_location = (1.0222, 0.5)
campaign_model_legend_location = (1.0222, 0.8)
# Create the Plot
figure, axes = plt.subplots(nrows=1, ncols=1, figsize=(12, 9))
scatter = axes.scatter(x, y, c=color_list, s=sizes, alpha=0.7, edgecolors='k', linewidths=1)
# Add Dot Labels
for index, string_label in enumerate(campaign_model_metric_label):
if index in [0,]:
axes.annotate(string_label, (x[index]-4.5, y[index]+1.5)) # Overall
elif index in [1,]:
axes.annotate(string_label, (x[index]-10, y[index]-1.8)) # Coffee House
elif index in [2,]:
axes.annotate(string_label, (x[index]+1, y[index])) # Bar
elif index in [3,]:
axes.annotate(string_label, (x[index]+1, y[index]+0.2)) # Takeout
elif index in [4,]:
axes.annotate(string_label, (x[index]+1, y[index]-1)) # Low-Cost Restuarant
elif index in [5,]:
axes.annotate(string_label, (x[index]+1, y[index]-1)) # Mid-Range Restaurant
elif index in [6,]:
axes.annotate(string_label, (x[index]+1, y[index]+1))
# Add Dot Campaign Model Type Hatch
for index, color in enumerate(color_list):
axes.scatter(x[index], y[index], c=color, s=sizes[index], alpha=0.7, edgecolors='k', linewidths=1, hatch=pattern_list_2[index])
# Add the "Campaign Model" Legend
legend_elements_patterns = [mpatches.Patch(facecolor='white', edgecolor='black', hatch=pattern, label=['Pilot', 'Drive Sales'][i]) for i, pattern in enumerate(pattern_list)]
legend_patterns = axes.legend(handles=legend_elements_patterns, loc=campaign_model_legend_location, title='Campaign Model')
axes.add_artist(legend_patterns)
# Add the "Coupon Acceptances" legend
legend_coupon_acceptances_group = plt.legend(handles=scatter.legend_elements(prop='sizes')[0], title="Coupon Acceptances", labels=['1-20', '21-200', '201-500', '501-1500'], loc=coupon_acceptances_group_legend_location)
plt.gca().add_artist(legend_coupon_acceptances_group)
# Add plot lebels
plt.xlabel(xlabel_string)
plt.ylabel(ylabel_string)
plt.title(title_string)
plt.xticks([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
plt.yticks([60 ,70, 80, 90, 100])
plt.gca().set_xticklabels(['{:.0f}%'.format(x) for x in plt.gca().get_xticks()])
plt.gca().set_yticklabels(['{:.0f}%'.format(y) for y in plt.gca().get_yticks()])
# Save it
plt.savefig(figure_filename, bbox_inches='tight', dpi=dpi)
plt.show()
time: 261 ms (started: 2023-09-28 17:20:12 -07:00)
xlabel_string = 'Percentage of Coupon Acceptances Captured'
ylabel_string = 'Coupon Acceptance Rate'
title_string = 'Campaign Model Metrics'
model_types='random_forest_gradient_boosting'
figure_filename='../reports/figures/figure_'+str(model_types)+'_'+title_string.replace(' ','_').lower()+'_v'+filename_version+'.png'
campaign_model_metric_label = ['Overall', 'Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant', 'Overall']
x = df_campaign_model_metrics_coupon_acceptances_group.loc[('Treatment','Percentage of Coupon Acceptances Captured')].values
y = df_campaign_model_metrics_coupon_acceptances_group.loc[('Treatment','Coupon Acceptance Rate')].values
sizes = df_campaign_model_metrics_coupon_acceptances_group.loc[('Treatment','Coupon Acceptances Group')].values
color_list = ['#a6cee3',]*7
#pattern_list = ['///', 'ooo', '\\\\', 'xxx', '||||']
pattern_list = [' ','xxx',]
pattern_list_2 = [' ', ' ', ' ', ' ', ' ', ' ','xxx',]
plt.rcParams.update({'font.size': 18})
#coupon_acceptances_group_legend_location = (0.6922, 0.06)
coupon_acceptances_group_legend_location = (0.0222, 0.06)
campaign_model_legend_location = (0.0222, 0.36)
# Create the Plot
figure, axes = plt.subplots(nrows=1, ncols=1, figsize=(12, 9))
scatter = axes.scatter(x, y, c=color_list, s=sizes, alpha=0.7, edgecolors='k', linewidths=1)
# Add Dot Labels
for index, string_label in enumerate(campaign_model_metric_label):
if index in [0,]:
axes.annotate(string_label, (x[index]-4.5, y[index]+1.5)) # Overall
elif index in [1,]:
axes.annotate(string_label, (x[index]-10, y[index]-1.8)) # Coffee House
elif index in [2,]:
axes.annotate(string_label, (x[index]+1, y[index])) # Bar
elif index in [3,]:
axes.annotate(string_label, (x[index]+1, y[index]+0.2)) # Takeout
elif index in [4,]:
axes.annotate(string_label, (x[index]+1, y[index]-1)) # Low-Cost Restuarant
elif index in [5,]:
axes.annotate(string_label, (x[index]+1, y[index]-1)) # Mid-Range Restaurant
elif index in [6,]:
axes.annotate(string_label, (x[index]+1, y[index]+1))
# Add Dot Campaign Model Type Hatch
for index, color in enumerate(color_list):
axes.scatter(x[index], y[index], c=color, s=sizes[index], alpha=0.7, edgecolors='k', linewidths=1, hatch=pattern_list_2[index])
# Add the "Campaign Model" Legend
legend_elements_patterns = [mpatches.Patch(facecolor='white', edgecolor='black', hatch=pattern, label=['Pilot', 'Drive Sales'][i]) for i, pattern in enumerate(pattern_list)]
legend_patterns = axes.legend(handles=legend_elements_patterns, loc=campaign_model_legend_location, title='Campaign Model')
axes.add_artist(legend_patterns)
# Add the "Coupon Acceptances" legend
legend_coupon_acceptances_group = plt.legend(handles=scatter.legend_elements(prop='sizes')[0], title="Coupon Acceptances", labels=['1-20', '21-200', '201-500', '501-1500'], loc=coupon_acceptances_group_legend_location)
plt.gca().add_artist(legend_coupon_acceptances_group)
# Add plot lebels
plt.xlabel(xlabel_string)
plt.ylabel(ylabel_string)
plt.title(title_string)
plt.xticks([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
plt.yticks([60 ,70, 80, 90, 100])
plt.gca().set_xticklabels(['{:.0f}%'.format(x) for x in plt.gca().get_xticks()])
plt.gca().set_yticklabels(['{:.0f}%'.format(y) for y in plt.gca().get_yticks()])
# Save it
plt.savefig(figure_filename, bbox_inches='tight', dpi=dpi)
plt.show()
time: 248 ms (started: 2023-09-28 17:20:12 -07:00)
xlabel_string = 'Percentage of Coupon Acceptances Captured'
ylabel_string = 'Coupon Acceptance Rate'
title_string = 'Campaign Model Metrics'
model_types='random_forest_gradient_boosting'
figure_filename='../reports/figures/figure_'+str(model_types)+'_'+title_string.replace(' ','_').lower()+'_v'+filename_version+'.png'
campaign_model_metric_label = ['Overall', 'Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant',]*2
#campaign_model_metric_label = ['overall', 'coffee house', 'bar', 'takeout', 'low-cost restaurant', 'mid-range restaurant',]*2
#campaign_model_metric_label = ['Overall', 'Coffee house', 'Bar', 'Takeout', 'Low-cost restaurant', 'Mid-range restaurant',]*2
x = df_campaign_model_metrics_coupon_acceptances_group_all.loc[('Treatment','Percentage of Coupon Acceptances Captured')].values
y = df_campaign_model_metrics_coupon_acceptances_group_all.loc[('Treatment','Coupon Acceptance Rate')].values
sizes = df_campaign_model_metrics_coupon_acceptances_group_all.loc[('Treatment','Coupon Acceptances Group')].values
#color_list = ['red', 'green', 'blue', 'orange', 'purple', 'pink', 'gray']
#color_list = ['#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f']
color = '#a6cee3'
dot_count = 12
color_list = [color,]*dot_count
#pattern_list = ['///', 'ooo', '\\\\', 'xxx', '||||']
pattern_list = [' ','xxx',]
pattern_list_2 = [' ',]*6 + ['xxx',]*6
plt.rcParams.update({'font.size': 18})
#coupon_acceptances_group_legend_location = (0.6922, 0.06)
coupon_acceptances_group_legend_location = (0.0222, 0.06)
campaign_model_legend_location = (0.0222, 0.36)
# Create the Plot
figure, axes = plt.subplots(nrows=1, ncols=1, figsize=(12, 9))
scatter = axes.scatter(x, y, c=color_list, s=sizes, alpha=0.7, edgecolors='k', linewidths=1)
# Add Dot Labels
for index, string_label in enumerate(campaign_model_metric_label):
if index in [0,]:
axes.annotate(string_label, (x[index]-4.5, y[index]+3)) # Overall
elif index in [1,]:
axes.annotate(string_label, (x[index]-10, y[index]-4.8)) # Coffee House
elif index in [2,]:
axes.annotate(string_label, (x[index]+1, y[index])) # Bar
elif index in [3,]:
axes.annotate(string_label, (x[index]+0.0, y[index]+1.7)) # Takeout
elif index in [4,]:
axes.annotate(string_label, (x[index]+1.5, y[index]-1.5)) # Low-Cost Restaurant
elif index in [5,]:
axes.annotate(string_label, (x[index]+1, y[index]-1.8)) # Mid-Range Restaurant
elif index in [6,]:
axes.annotate(string_label, (x[index]+2.3, y[index]-2.9)) # Overall
elif index in [7,]:
axes.annotate(string_label, (x[index]-20.2, y[index]-.4)) # Coffee House
elif index in [8,]:
axes.annotate(string_label, (x[index]+1, y[index]-3)) # Bar
elif index in [9,]:
axes.annotate(string_label, (x[index]-.9, y[index]+3.0)) # Takeout
elif index in [10,]:
axes.annotate(string_label, (x[index]-29.5, y[index]+1)) # Low-Cost Restaurant
elif index in [11,]:
axes.annotate(string_label, (x[index]-32.0, y[index]-0)) # Mid-Range Restaurant
# Add Dot Campaign Model Type Hatch
for index, color in enumerate(color_list):
axes.scatter(x[index], y[index], c=color, s=sizes[index], alpha=0.7, edgecolors='k', linewidths=1, hatch=pattern_list_2[index])
# Add the "Campaign Model" Legend
legend_elements_patterns = [mpatches.Patch(facecolor=color, edgecolor='black', hatch=pattern, label=['Pilot', 'Drive Sales'][i]) for i, pattern in enumerate(pattern_list)]
legend_patterns = axes.legend(handles=legend_elements_patterns, loc=campaign_model_legend_location, title='Campaign Model')
axes.add_artist(legend_patterns)
# Add the "Coupon Acceptances" legend
legend_coupon_acceptances_group = plt.legend(handles=scatter.legend_elements(prop='sizes')[0], title="Coupon Acceptances", labels=['1-20', '21-200', '201-500', '501-1500'], loc=coupon_acceptances_group_legend_location)
plt.gca().add_artist(legend_coupon_acceptances_group)
# Add plot lebels
plt.xlabel(xlabel_string)
plt.ylabel(ylabel_string)
plt.title(title_string)
plt.xticks([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
#plt.yticks([60 ,70, 80, 90, 100])
plt.yticks([0, 10, 20, 30, 40, 50, 60 ,70, 80, 90, 100])
plt.gca().set_xticklabels(['{:.0f}%'.format(x) for x in plt.gca().get_xticks()])
plt.gca().set_yticklabels(['{:.0f}%'.format(y) for y in plt.gca().get_yticks()])
# Save it
plt.savefig(figure_filename, bbox_inches='tight', dpi=dpi)
plt.show()
time: 336 ms (started: 2023-09-28 17:24:18 -07:00)
#show ROI table
icr.profit_spend_roi_number_table(df=df_test_gradient_boosting_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI)
Additional Production Cost | 200 | 2000 | 20000 | ||||||
---|---|---|---|---|---|---|---|---|---|
Metric | Profit | Spend | ROI | Profit | Spend | ROI | Profit | Spend | ROI |
Group | |||||||||
Control | 11057.516415 | 4415.483585 | 250.425943 | 9257.516415 | 6215.483585 | 148.942818 | -8742.483585 | 24215.483585 | -36.102866 |
Treatment | 11755.156685 | 3170.843315 | 370.726508 | 9955.156685 | 4970.843315 | 200.270981 | -8044.843315 | 22970.843315 | -35.021976 |
Uplift | 697.640270 | -1244.640270 | 120.300564 | 697.640270 | -1244.640270 | 51.328163 | 697.640270 | -1244.640270 | 1.080890 |
time: 9.78 ms (started: 2023-09-28 17:20:12 -07:00)
#V-- add second legend with distinction between the Pilot Campaign Model and Drive Sales Campaign Model
time: 287 µs (started: 2023-09-28 17:20:12 -07:00)
########################################################################################################################################################################################################
filename='df_test_gradient_boosting_model_survey_95_confidence_interval_metric_feature_column_name_filter_value_v'+filename_version+'.pkl'
df_readback=icr.return_processed_data_file_if_it_exists_v2(filename=filename, column_name_row_integer_location_list=[0, 1], index_column_integer_location_list=[0, 1])
if df_readback.empty == False:
df_test_gradient_boosting_model_survey_95_confidence_interval_metric_feature_column_name_filter_value=df_readback
else:
model_type='gradient_boosting'
survey_number_recall_estimated_y_predicted_column_name='Y_test_survey_80_recall_estimate_predicted'
if st != 'yes':
number_of_replicates=10000
quantile_lower_upper_list=[0.025, 0.975]
feature_column_name_filter='coupon_venue_type'
save_metric_replicates_feature_column_name_filter_value_list_dictionary_key_list=['Overall', 'Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant']
gradient_boosting_model_survey_95_confidence_interval_metric_collection={}
df_gradient_boosting_80_recall_estimated_feature_filter_number_bootstrap_replicates_metrics_collection={}
for feature_column_name_filter_value_list_dictionary_key in feature_column_name_filter_value_list_dictionary_key_list:
gradient_boosting_model_survey_95_confidence_interval_metric_collection[feature_column_name_filter_value_list_dictionary_key],\
df_gradient_boosting_80_recall_estimated_feature_filter_number_bootstrap_replicates_metrics_collection[feature_column_name_filter_value_list_dictionary_key]=\
icr.get_metric_confidence_interval_table_by_feature_column_name_filter_value_list_dictionary_key(df_y_train_test_model_name_predicted_y_train_test_survey_recall_estimate_predicted_y_actual_feature_column_name_filter=df_y_test_model_name_predicted_y_test_survey_recall_estimate_predicted_y_actual_coupon_venue_type.copy(),
feature_column_name_filter=feature_column_name_filter,
feature_column_name_filter_value_list_dictionary_key=feature_column_name_filter_value_list_dictionary_key,
feature_column_name_filter_value_list_dictionary=feature_column_name_filter_value_list_dictionary,
multiple_index=multiple_index,
number_of_replicates=number_of_replicates,
quantile_lower_upper_list=quantile_lower_upper_list,
model_type=model_type,
survey_number_recall_estimated_y_predicted_column_name=survey_number_recall_estimated_y_predicted_column_name,
save_metric_replicates_feature_column_name_filter_value_list_dictionary_key_list=save_metric_replicates_feature_column_name_filter_value_list_dictionary_key_list,
filename_version=filename_version,
sample_size=None)
df_test_gradient_boosting_model_survey_95_confidence_interval_metric_feature_column_name_filter_value=\
icr.convert_collection_to_data_frame_and_drop_top_column_level(gradient_boosting_model_survey_95_confidence_interval_metric_collection)
#save it
df_test_gradient_boosting_model_survey_95_confidence_interval_metric_feature_column_name_filter_value=\
icr.save_and_return_data_frame_v2(df_test_gradient_boosting_model_survey_95_confidence_interval_metric_feature_column_name_filter_value, filename=filename)
filename='df_gradient_boosting_80_recall_estimated_feature_filter_number_bootstrap_replicates_metrics_collection_v'+filename_version+'.pkl'
#save it
df_gradient_boosting_80_recall_estimated_feature_filter_number_bootstrap_replicates_metrics_collection=\
icr.save_and_return_collection(df_gradient_boosting_80_recall_estimated_feature_filter_number_bootstrap_replicates_metrics_collection, filename=filename)
########################################################################################################################################################################################################
########################################################################################################################################################################################################
### Get Gradient Boosting 80% Recall Estimated 95% Confidence Interval Ad Revenue, Ad Spend, ROAS, Profit, Spend, and ROI (Per Coupon Venue Type) Table
filename='df_gradient_boosting_95_confidence_interval_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_v'+filename_version+'.pkl'
df_readback=icr.return_processed_data_file_if_it_exists_v2(filename=filename, column_name_row_integer_location_list=[0, 1], index_column_integer_location_list=[0, 1])
if df_readback.empty == False:
df_gradient_boosting_95_confidence_interval_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI=df_readback
else:
#get Random Forest Model and Survey Coupon Recommendation Cost Estimated and Sale Estimated Replicate Collection by Venue Type
df_gradient_boosting_model_survey_coupon_recommendation_cost_estimated_sale_estimated_replicate_collection=\
icr.get_model_survey_coupon_recommendation_cost_estimated_and_sale_estimated_replicate_collection_venue_type(df=df_train_gradient_boosting_80_recall_survey_100_recall_coupon_recommendation_cost_estimated_sale_estimated,
column_name_list=['Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant'],
column_name_drop_list=['Overall'],
number_of_replicates=number_of_replicates)
#get filename list of gradient boosting metric replicates by venue type
column_name_list=['Coffee House', 'Bar', 'Takeout', 'Low-Cost Restaurant', 'Mid-Range Restaurant']
model_name='gradient_boosting'
test_gradient_boosting_metric_replicate_filename_collection={}
for column_name in column_name_list:
test_gradient_boosting_metric_replicate_filename_collection[column_name]='df_test_'+str(model_name)+'_number_metric_estimated_'+str(number_of_replicates)+'_metric_replicates_from_'+str(number_of_replicates)+'_nonparametric_subsamples_'+str(column_name.lower().replace(' ','_'))+'_v'+str(filename_version)+'.csv'
print(test_gradient_boosting_metric_replicate_filename_collection[column_name])
#calculate 95% confidence interval for Ad Revenue, Ad Spend, ROAS, Profit, Spend and ROI
if st!='yes':
number_of_replicates=10000
df_gradient_boosting_95_confidence_interval_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI=\
icr.calculate_Overall_and_Coupon_Venue_Type_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI_95_Confidence_Intervals_from_metric_replicates_and_append_to_metric_confidence_interval_table(
df_model_name_model_survey_coupon_recommendation_cost_estimated_sale_estimated_replicate_collection=df_gradient_boosting_model_survey_coupon_recommendation_cost_estimated_sale_estimated_replicate_collection,
df_test_model_name_model_survey_95_confidence_interval_metric_feature_column_name_filter_value=df_test_gradient_boosting_model_survey_95_confidence_interval_metric_feature_column_name_filter_value,
test_model_name_metric_replicate_filename_collection=test_gradient_boosting_metric_replicate_filename_collection,
model_type=model_name,
filename_version=filename_version,
number_of_replicates=number_of_replicates)
#save it
df_gradient_boosting_95_confidence_interval_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI=\
icr.save_and_return_data_frame_v2(df_gradient_boosting_95_confidence_interval_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI, filename=filename)
#select and reorder basic metrics, recommendation cost estimated, and average sale estimate
multiindex_basic_metrics=icr.get_the_multiindex_object_with_basic_metrics()
multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated=icr.get_the_multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated()
multiindex_basic_metrics_coupon_recommendation_cost_estimate_sale_estimated=pd.MultiIndex.from_tuples(list(multiindex_basic_metrics)+list(multiindex_metrics_coupon_recommendation_cost_estimate_sale_estimated))
#display combined metrics
df_gradient_boosting_95_confidence_interval_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[multiindex_basic_metrics_coupon_recommendation_cost_estimate_sale_estimated,:]
########################################################################################################################################################################################################
This file already exists This file already exists
95% Confidence Interval | |||||||
---|---|---|---|---|---|---|---|
Overall | Coffee House | Bar | Takeout | Low-Cost Restaurant | Mid-Range Restaurant | ||
Treatment | Coupon Acceptance Rate | (77%, 81%) | (76%, 83%) | (68%, 81%) | (79%, 87%) | (79%, 86%) | (53%, 71%) |
Percentage of Coupon Acceptances Captured | (78%, 82%) | (70%, 78%) | (68%, 82%) | (86%, 93%) | (85%, 91%) | (46%, 63%) | |
Coupon Acceptances | (1115, 1213) | (286, 351) | (98, 140) | (294, 361) | (294, 360) | (58, 91) | |
Coupon Acceptances Possible | (1401, 1498) | (390, 464) | (133, 181) | (328, 398) | (334, 403) | (113, 158) | |
Coupon Recommendations | (1407, 1505) | (361, 433) | (133, 182) | (356, 428) | (357, 429) | (98, 139) | |
Coupon Recommendations Possible | (2537, 2537) | (765, 858) | (362, 435) | (450, 529) | (494, 575) | (271, 337) | |
Ad Revenue | (\$14132.98, \$15725.56) | (\$1573.0, \$1930.5) | (\$1470.0, \$2100.0) | (\$4410.0, \$5415.0) | (\$3528.0, \$4320.0) | (\$2030.0, \$3185.0) | |
Ad Spend | (\$2846.93, \$3098.58) | (\$306.3, \$367.39) | (\$295.78, \$404.75) | (\$874.46, \$1051.31) | (\$705.51, \$847.8) | (\$454.53, \$644.69) | |
ROAS | (485.46%, 518.55%) | (493.8%, 544.38%) | (460.09%, 553.01%) | (486.65%, 531.53%) | (481.97%, 526.35%) | (405.85%, 538.14%) | |
Control | Coupon Acceptance Rate | (54%, 58%) | (48%, 56%) | (32%, 42%) | (69%, 78%) | (63%, 72%) | (40%, 52%) |
Percentage of Coupon Acceptances Captured | (76%, 80%) | (73%, 81%) | (73%, 86%) | (72%, 81%) | (74%, 82%) | (81%, 92%) | |
Coupon Acceptances | (1093, 1190) | (297, 364) | (104, 147) | (248, 310) | (258, 321) | (98, 140) | |
Coupon Acceptances Possible | (1401, 1498) | (390, 464) | (133, 181) | (328, 398) | (334, 403) | (113, 158) | |
Coupon Recommendations | (1976, 2055) | (584, 669) | (299, 366) | (343, 414) | (389, 463) | (223, 284) | |
Coupon Recommendations Possible | (2537, 2537) | (765, 858) | (362, 435) | (450, 529) | (494, 575) | (271, 337) | |
Ad Revenue | (\$14602.48, \$16338.54) | (\$1633.5, \$2002.0) | (\$1560.0, \$2205.0) | (\$3720.0, \$4650.0) | (\$3096.0, \$3852.0) | (\$3430.0, \$4900.0) | |
Ad Spend | (\$4083.83, \$4349.14) | (\$495.51, \$567.63) | (\$664.94, \$813.94) | (\$842.52, \$1016.92) | (\$768.75, \$914.99) | (\$1034.28, \$1317.2) | |
ROAS | (350.06%, 384.03%) | (316.86%, 367.33%) | (218.47%, 289.66%) | (423.55%, 477.54%) | (385.1%, 438.71%) | (304.81%, 398.62%) | |
Uplift | Coupon Acceptance Rate | (21%, 25%) | (23%, 31%) | (31%, 43%) | (6%, 12%) | (11%, 18%) | (9%, 23%) |
Percentage of Coupon Acceptances Captured | (-1%, 4%) | (-8%, 3%) | (-13%, 4%) | (8%, 18%) | (4%, 15%) | (-42%, -23%) | |
Coupon Acceptances | (-20, 64) | (-37, 13) | (-21, 7) | (29, 68) | (18, 57) | (-59, -30) | |
Coupon Acceptances Possible | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | |
Coupon Recommendations | (-624, -497) | (-268, -190) | (-204, -148) | (-10, 39) | (-61, -7) | (-159, -111) | |
Coupon Recommendations Possible | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | (0.0, 0.0) | |
Ad Revenue | (\$-1256.5, \$139.54) | (\$-203.5, \$71.5) | (\$-315.0, \$105.0) | (\$435.0, \$1020.0) | (\$216.0, \$684.0) | (\$-2065.0, \$-1050.0) | |
Ad Spend | (\$-1398.25, \$-1097.03) | (\$-227.39, \$-161.21) | (\$-453.67, \$-329.13) | (\$-24.56, \$95.8) | (\$-120.55, \$-13.83) | (\$-737.45, \$-514.82) | |
ROAS | (120.31%, 150.38%) | (151.62%, 203.11%) | (214.2%, 294.6%) | (39.23%, 78.83%) | (71.26%, 114.84%) | (68.5%, 174.18%) | |
Treatment | Average Coupon Recommendation Cost Estimated | NaN | (\$0.85, \$0.85) | (\$2.22, \$2.22) | (\$2.46, \$2.46) | (\$1.98, \$1.98) | (\$4.64, \$4.64) |
Average Sale Estimated | NaN | (\$5.5, \$5.5) | (\$15.0, \$15.0) | (\$15.0, \$15.0) | (\$12.0, \$12.0) | (\$35.0, \$35.0) | |
Control | Average Coupon Recommendation Cost Estimated | NaN | (\$0.54, \$0.54) | (\$1.24, \$1.24) | (\$2.2, \$2.2) | (\$1.71, \$1.71) | (\$3.08, \$3.08) |
Average Sale Estimated | NaN | (\$5.5, \$5.5) | (\$15.0, \$15.0) | (\$15.0, \$15.0) | (\$12.0, \$12.0) | (\$35.0, \$35.0) |
time: 20.6 ms (started: 2023-09-28 17:20:12 -07:00)
#show ROI table
icr.profit_spend_roi_number_table(df=df_gradient_boosting_95_confidence_interval_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI)
95% Confidence Interval | |||||||||
---|---|---|---|---|---|---|---|---|---|
$200 Additional Production Cost | $2,000 Additional Production Cost | $20,000 Additional Production Cost | |||||||
Profit | Spend | ROI | Profit | Spend | ROI | Profit | Spend | ROI | |
Group | |||||||||
Treatment | (\$11062.41, \$12460.41) | (\$3046.93, \$3298.58) | (354.69%, 385.84%) | (\$9262.41, \$10660.41) | (\$4846.93, \$5098.58) | (189.07%, 211.21%) | (\$-8737.59, \$-7339.59) | (\$22846.93, \$23098.58) | (-38.21%, -31.83%) |
Control | (\$10263.34, \$11855.73) | (\$4283.83, \$4549.14) | (234.3%, 266.61%) | (\$8463.34, \$10055.73) | (\$6083.83, \$6349.14) | (137.16%, 160.91%) | (\$-9536.66, \$-7944.27) | (\$24083.83, \$24349.14) | (-39.5%, -32.73%) |
Uplift | (\$70.49, \$1295.19) | (\$-1398.25, \$-1097.03) | (106.33%, 134.08%) | (\$70.49, \$1295.19) | (\$-1398.25, \$-1097.03) | (42.23%, 60.22%) | (\$70.49, \$1295.19) | (\$-1398.25, \$-1097.03) | (-1.67%, 3.71%) |
time: 15.8 ms (started: 2023-09-28 17:20:12 -07:00)
model_types='random_forest_gradient_boosting'
xlabel_string='Additional Production Cost'
ylabel_string='ROI'
title_string='Campaign '+str(ylabel_string)+' Per '+str(xlabel_string)
figure_filename='../reports/figures/figure_'+str(model_types)+'_'+title_string.replace(' ','_').lower()+'_v'+filename_version+'.png'
additional_production_cost = np.linspace(0, 20000, 1000)
#pilot campaign roi equation
pilot_campaign_model_ad_revenue=df_test_random_forest_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[('Treatment', 'Ad Revenue'), 'Overall']
pilot_campaign_model_ad_spend=df_test_random_forest_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[('Treatment', 'Ad Spend'), 'Overall']
pilot_campaign_model_roi=(pilot_campaign_model_ad_revenue-pilot_campaign_model_ad_spend-additional_production_cost)/(pilot_campaign_model_ad_spend+additional_production_cost)
#drive sales campaign roi equation
drive_sales_campaign_model_ad_revenue=df_test_gradient_boosting_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[('Treatment', 'Ad Revenue'), 'Overall']
drive_sales_campaign_model_ad_spend=df_test_gradient_boosting_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[('Treatment', 'Ad Spend'), 'Overall']
drive_sales_campaign_model_roi=(drive_sales_campaign_model_ad_revenue-drive_sales_campaign_model_ad_spend-additional_production_cost)/(drive_sales_campaign_model_ad_spend+additional_production_cost)
plt.rcParams.update({'font.size': 16})
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10, 5))
axes.plot(additional_production_cost, pilot_campaign_model_roi*100)
axes.plot(additional_production_cost, drive_sales_campaign_model_roi*100)
axes.axhline(y=0, color='k', linewidth=.6)
axes.axvline(x=0, color='k')
axes.set_xlabel(xlabel_string)
axes.set_ylabel(ylabel_string)
axes.set_title(title_string)
axes.xaxis.set_major_formatter('${x:1.0f}')
axes.yaxis.set_major_formatter(mtick.PercentFormatter())
axes.legend(['Pilot Campaign','Drive Sales Campaign'])
plt.tick_params(axis='both', which='both', bottom=True, left=True, direction='out', length=6, width=1,)
axes.set_xlim(-0,20000)
axes.set_ylim(-100,480)
axes.set_xticks([0, 5000, 10000, 15000, 20000])
axes.set_yticks([-100, 0, 100, 200, 300, 400])
axes.set_xticklabels(['$0', '$5K', '$10K', '$15K', '$20K'])
axes.set_yticklabels(['-100%','0%', '100%', '200%', '300%', '400%'])
#save it
plt.savefig(figure_filename, bbox_inches='tight', dpi=dpi)
plt.show()
time: 137 ms (started: 2023-09-28 17:20:13 -07:00)
model_types='random_forest_gradient_boosting'
xlabel_string='Additional Production Cost'
ylabel_string='ROI Uplift Estimate'
title_string='Campaign '+str(ylabel_string)+' Per '+str(xlabel_string)
figure_filename='../reports/figures/figure_'+str(model_types)+'_'+title_string.replace(' ','_').lower()+'_v'+filename_version+'.png'
#get pilot campaign model ad revenue and ad spend,
pilot_campaign_model_ad_revenue=df_test_random_forest_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[('Treatment', 'Ad Revenue'), 'Overall']
pilot_campaign_model_ad_spend=df_test_random_forest_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[('Treatment', 'Ad Spend'), 'Overall']
#get pilot campaig survey ad revenue and ad spend
pilot_campaign_survey_ad_revenue=df_test_random_forest_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[('Control', 'Ad Revenue'), 'Overall']
pilot_campaign_survey_ad_spend=df_test_random_forest_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[('Control', 'Ad Spend'), 'Overall']
#get drive sales campaign model ad revenue and ad spend
drive_sales_campaign_model_ad_revenue=df_test_gradient_boosting_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[('Treatment', 'Ad Revenue'), 'Overall']
drive_sales_campaign_model_ad_spend=df_test_gradient_boosting_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[('Treatment', 'Ad Spend'), 'Overall']
#get drive sales campaign survey ad revenue and ad spend
drive_sales_campaign_survey_ad_revenue=df_test_gradient_boosting_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[('Control', 'Ad Revenue'), 'Overall']
drive_sales_campaign_survey_ad_spend=df_test_gradient_boosting_metrics_Ad_Revenue_Ad_Spend_ROAS_Profit_Spend_ROI.loc[('Control', 'Ad Spend'), 'Overall']
#additional production cost values
additional_production_cost=np.linspace(0, 10000, 1000)
#calculate pilot campaign model/survey roi per addtional production cost values
pilot_campaign_model_roi=icr.get_campaign_roi_from_ad_revenue_ad_spend_additional_production_cost(ad_revenue=pilot_campaign_model_ad_revenue, ad_spend=pilot_campaign_model_ad_spend, additional_production_cost=additional_production_cost)
pilot_campaign_survey_roi=icr.get_campaign_roi_from_ad_revenue_ad_spend_additional_production_cost(ad_revenue=pilot_campaign_survey_ad_revenue, ad_spend=pilot_campaign_survey_ad_spend, additional_production_cost=additional_production_cost)
#calculate pilot campaign roi uplift
pilot_campaign_model_survey_difference_roi=pilot_campaign_model_roi-pilot_campaign_survey_roi
#calculate drive sales campaign model/survey roi per addtional production cost values
drive_sales_campaign_model_roi=icr.get_campaign_roi_from_ad_revenue_ad_spend_additional_production_cost(ad_revenue=drive_sales_campaign_model_ad_revenue, ad_spend=drive_sales_campaign_model_ad_spend, additional_production_cost=additional_production_cost)
drive_sales_campaign_survey_roi=icr.get_campaign_roi_from_ad_revenue_ad_spend_additional_production_cost(ad_revenue=drive_sales_campaign_survey_ad_revenue, ad_spend=drive_sales_campaign_survey_ad_spend, additional_production_cost=additional_production_cost)
#calculate drive sales campaign model roi uplift
drive_sales_campaign_model_survey_difference_roi=drive_sales_campaign_model_roi-drive_sales_campaign_survey_roi
plt.rcParams.update({'font.size': 16})
fig, axes=plt.subplots(nrows=1, ncols=1, figsize=(10, 5))
#plot pilot campaign and drive sales campaign curves
axes.plot(additional_production_cost, pilot_campaign_model_survey_difference_roi*100, label='Pilot Campaign')
axes.plot(additional_production_cost, drive_sales_campaign_model_survey_difference_roi*100, label='Drive Sales Campaign')
pilot_campaign_ROAS_two_tuple=(int(additional_production_cost[0]), int(pilot_campaign_model_survey_difference_roi[0]*100))
drive_sales_campaign_ROAS_two_tuple=(int(additional_production_cost[0]), int(drive_sales_campaign_model_survey_difference_roi[0]*100))
axes.plot(0, pilot_campaign_ROAS_two_tuple[1], 'ro', label=f'ROAS Uplift Estimate (${pilot_campaign_ROAS_two_tuple[0]}, {pilot_campaign_ROAS_two_tuple[1]}%)')
axes.plot(0, drive_sales_campaign_ROAS_two_tuple[1], 'bo', label=f'ROAS Uplift Estimate (${drive_sales_campaign_ROAS_two_tuple[0]}, {drive_sales_campaign_ROAS_two_tuple[1]}%)')
axes.set_xlabel(xlabel_string)
axes.set_ylabel(ylabel_string)
axes.set_title(title_string)
axes.xaxis.set_major_formatter('${x:1.0f}')
axes.yaxis.set_major_formatter(mtick.PercentFormatter())
axes.legend()
axes.tick_params(axis='both', which='both', bottom=True, left=True, direction='out', length=6, width=1,)
axes.set_xlim(-0,10000)
axes.set_ylim(-0,220)
axes.set_xticks([0, 5000, 10000,])
axes.set_yticks([0, 100, 200,])
axes.set_xticklabels(['$0', '$5K', '$10K',])
axes.set_yticklabels(['0%', '100%', '200%',])
#save it
plt.savefig(figure_filename, bbox_inches='tight', dpi=dpi)
plt.show()
time: 138 ms (started: 2023-09-28 17:20:13 -07:00)
%watermark -a "Paul Jacob" -d -t -v -p numpy,pandas -g
Author: Paul Jacob Python implementation: CPython Python version : 3.9.12 IPython version : 8.2.0 numpy : 1.21.5 pandas: 1.4.2 Git hash: 964b5d89b7a257f3efd946cda8d59619eb1f33f2 time: 91.5 ms (started: 2023-09-28 17:20:13 -07:00)