from importlib import import_module
import json
from pathlib import Path
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotnine as p
import bookgender.datatools as dt
from bookgender.config import data_dir
from bookgender.nbutils import *
init_figs('AlgoPerf')
using figure dir figures/AlgoPerf
We need to collect all of the accuracy output files.
perf_data = pd.read_csv('data/rec-perf.csv')
perf_data.head()
MRR | HR | NDCG | DataSet | Algorithm | |
---|---|---|---|---|---|
0 | 0.000301 | 0.0044 | 0.001089 | BX-E | user-user |
1 | 0.000556 | 0.0074 | 0.001804 | BX-E | item-item |
2 | 0.003905 | 0.0354 | 0.010269 | BX-E | als |
3 | 0.029221 | 0.1568 | 0.056760 | BX-I | user-user |
4 | 0.020828 | 0.0876 | 0.035883 | BX-I | item-item |
We would also like to bootstrap confidence intervals - this requires per-user statistics.
tune_data = pd.read_json('data/rec-tune.json', orient='records', lines=True)
tune_data.head()
params | iters | MRR | DataSet | Algorithm | |
---|---|---|---|---|---|
0 | [67, 0.069343245462767] | 20 | 0.000856 | BX-E | user-user |
1 | [232, 0.084880046234972] | 20 | 0.000540 | BX-E | item-item |
2 | [297, 4.36979166769093, 4.927362178355647, 24.... | 20 | 0.002965 | BX-E | als |
3 | [303, 0.052267596581313004] | 20 | 0.026312 | BX-I | user-user |
4 | [61, 0.03579462837616] | 20 | 0.015321 | BX-I | item-item |
rerank_data = pd.read_csv('data/rerank-perf.csv')
rerank_data.head()
Strategy | MRR | HR | NDCG | DataSet | Algorithm | |
---|---|---|---|---|---|---|
0 | SingleEQ | 0.000316 | 0.0046 | 0.001133 | BX-E | user-user |
1 | GreedyEQ | 0.000269 | 0.0044 | 0.001051 | BX-E | user-user |
2 | GreedyReflect | 0.000344 | 0.0050 | 0.001241 | BX-E | user-user |
3 | SingleEQ | 0.000359 | 0.0060 | 0.001286 | BX-E | item-item |
4 | GreedyEQ | 0.000447 | 0.0076 | 0.001638 | BX-E | item-item |
We have data sets, and we have implicit/explicit configurations. For some data sets, these are encoded in the data set; for others, in the algorithm.
We need to split that out.
def normalize_runs(frame):
frame = frame.assign(Implicit = frame['DataSet'].str.endswith('-I') | frame['Algorithm'].str.endswith('-imp'))
frame['Algorithm'] = frame['Algorithm'].str.replace('-imp', '')
frame['DataSet'] = frame['DataSet'].str.replace('-[IE]', '')
frame['Algorithm'] = frame['Algorithm'].str.replace('wrls', 'als').str.upper()
frame['Algorithm'] = frame['Algorithm'].str.replace(r'(\w)\w+-(\w)\w+', r'\1\2')
return frame
perf_data = normalize_runs(perf_data)
And look at that data:
perf_data
MRR | HR | NDCG | DataSet | Algorithm | Implicit | |
---|---|---|---|---|---|---|
0 | 0.000301 | 0.0044 | 0.001089 | BX | UU | False |
1 | 0.000556 | 0.0074 | 0.001804 | BX | II | False |
2 | 0.003905 | 0.0354 | 0.010269 | BX | ALS | False |
3 | 0.029221 | 0.1568 | 0.056760 | BX | UU | True |
4 | 0.020828 | 0.0876 | 0.035883 | BX | II | True |
5 | 0.030019 | 0.1522 | 0.057322 | BX | ALS | True |
6 | 0.017138 | 0.1004 | 0.034448 | BX | BPR | True |
7 | 0.000043 | 0.0008 | 0.000183 | AZ | UU | False |
8 | 0.003357 | 0.0246 | 0.007375 | AZ | II | False |
9 | 0.000221 | 0.0036 | 0.000879 | AZ | ALS | False |
10 | 0.054445 | 0.2106 | 0.093025 | AZ | UU | True |
11 | 0.037334 | 0.1200 | 0.059690 | AZ | II | True |
12 | 0.031726 | 0.1530 | 0.059956 | AZ | ALS | True |
13 | 0.003825 | 0.0456 | 0.011680 | AZ | BPR | True |
14 | 0.000062 | 0.0016 | 0.000325 | GR | UU | False |
15 | 0.010818 | 0.1482 | 0.036822 | GR | II | False |
16 | 0.096567 | 0.3664 | 0.159028 | GR | UU | True |
17 | 0.087541 | 0.3474 | 0.147663 | GR | II | True |
18 | 0.095640 | 0.3902 | 0.167903 | GR | ALS | True |
19 | 0.044103 | 0.1990 | 0.080554 | GR | BPR | True |
tune_data = normalize_runs(tune_data)
tune_data
params | iters | MRR | DataSet | Algorithm | Implicit | |
---|---|---|---|---|---|---|
0 | [67, 0.069343245462767] | 20 | 0.000856 | BX | UU | False |
1 | [232, 0.084880046234972] | 20 | 0.000540 | BX | II | False |
2 | [297, 4.36979166769093, 4.927362178355647, 24.... | 20 | 0.002965 | BX | ALS | False |
3 | [303, 0.052267596581313004] | 20 | 0.026312 | BX | UU | True |
4 | [61, 0.03579462837616] | 20 | 0.015321 | BX | II | True |
5 | [499, 0.151451427679924, 0.06685861681803601, ... | 20 | 0.026321 | BX | ALS | True |
6 | [249, 0.011980259542217, 0.09697898418796101] | 20 | 0.014692 | BX | BPR | True |
7 | [68, 0.070821028781701] | 20 | 0.000306 | AZ | UU | False |
8 | [5, 0.1] | 20 | 0.002919 | AZ | II | False |
9 | [299, 0.027209462059128, 0.09593544705902601, ... | 100 | 0.000533 | AZ | ALS | False |
10 | [7, 0.00087767799404] | 20 | 0.058791 | AZ | UU | True |
11 | [9, 2.9102495117394603e-05] | 20 | 0.034288 | AZ | II | True |
12 | [300, 1e-06, 1e-06, 50.0] | 20 | 0.031447 | AZ | ALS | True |
13 | [6, 0.00428451465746, 0.00346828460276] | 20 | 0.005907 | AZ | BPR | True |
14 | [7, 0.09988270936531501] | 20 | 0.000073 | GR | UU | False |
15 | [6, 0.099969938296841] | 20 | 0.013365 | GR | II | False |
16 | [32, 0.08878273357970401] | 20 | 0.099882 | GR | UU | True |
17 | [6, 0.097933753442365] | 16 | 0.095913 | GR | II | True |
18 | [487, 4.098540323995087, 0.36994560658956105, ... | 20 | 0.102185 | GR | ALS | True |
19 | [250, 0.0, 0.028168135291681003] | 20 | 0.042900 | GR | BPR | True |
Plot the MRR for each data set!
merged_mrr = pd.concat([
tune_data[['DataSet', 'Algorithm', 'Implicit', 'MRR']].assign(Stage='Tune'),
perf_data[['DataSet', 'Algorithm', 'Implicit', 'MRR']].assign(Stage='Eval')
], ignore_index=True)
merged_mrr['Mode'] = 'Explicit'
merged_mrr.loc[merged_mrr['Implicit'], 'Mode'] = 'Implicit'
merged_mrr.head()
DataSet | Algorithm | Implicit | MRR | Stage | Mode | |
---|---|---|---|---|---|---|
0 | BX | UU | False | 0.000856 | Tune | Explicit |
1 | BX | II | False | 0.000540 | Tune | Explicit |
2 | BX | ALS | False | 0.002965 | Tune | Explicit |
3 | BX | UU | True | 0.026312 | Tune | Implicit |
4 | BX | II | True | 0.015321 | Tune | Implicit |
sns.catplot(x='Algorithm', y='MRR', row='Mode', col='DataSet', hue='Stage', data=merged_mrr, kind='bar',
sharey=False, margin_titles=True, aspect=1.5, height=2.2)
<seaborn.axisgrid.FacetGrid at 0x7f48143b2390>
make_plot(merged_mrr, p.aes(x='Algorithm', y='MRR', fill='Stage'),
p.geom_bar(stat='identity', position='dodge'),
p.facet_grid('Mode ~ DataSet', scales='free_y'),
p.scale_fill_brewer('qual', 'Dark2'),
file='rec-perf.pdf', width=7, height=4, legend_position='top', legend_title=p.element_blank())
/home/MICHAELEKSTRAND/anaconda3/envs/bookfair/lib/python3.7/site-packages/plotnine/ggplot.py:729: PlotnineWarning: Saving 7 x 4 in image. from_inches(height, units), units), PlotnineWarning) /home/MICHAELEKSTRAND/anaconda3/envs/bookfair/lib/python3.7/site-packages/plotnine/ggplot.py:730: PlotnineWarning: Filename: figures/AlgoPerf/rec-perf.pdf warn('Filename: {}'.format(filename), PlotnineWarning)
<ggplot: (8746721886361)>
print(perf_data[['DataSet', 'Algorithm', 'Implicit', 'MRR']].set_index(['DataSet', 'Implicit', 'Algorithm']).unstack().to_latex())
\begin{tabular}{llrrrr} \toprule & {} & \multicolumn{4}{l}{MRR} \\ & Algorithm & ALS & BPR & II & UU \\ DataSet & Implicit & & & & \\ \midrule AZ & False & 0.000221 & NaN & 0.003357 & 0.000043 \\ & True & 0.031726 & 0.003825 & 0.037334 & 0.054445 \\ BX & False & 0.003905 & NaN & 0.000556 & 0.000301 \\ & True & 0.030019 & 0.017138 & 0.020828 & 0.029221 \\ GR & False & NaN & NaN & 0.010818 & 0.000062 \\ & True & 0.095640 & 0.044103 & 0.087541 & 0.096567 \\ \bottomrule \end{tabular}
And hit rate:
sns.catplot(x='Algorithm', y='HR', row='Implicit', col='DataSet', data=perf_data, kind='bar',
sharey=False, margin_titles=True)
<seaborn.axisgrid.FacetGrid at 0x7f4813fa4610>
rerank_data = normalize_runs(rerank_data)
rerank_data.head()
Strategy | MRR | HR | NDCG | DataSet | Algorithm | Implicit | |
---|---|---|---|---|---|---|---|
0 | SingleEQ | 0.000316 | 0.0046 | 0.001133 | BX | UU | False |
1 | GreedyEQ | 0.000269 | 0.0044 | 0.001051 | BX | UU | False |
2 | GreedyReflect | 0.000344 | 0.0050 | 0.001241 | BX | UU | False |
3 | SingleEQ | 0.000359 | 0.0060 | 0.001286 | BX | II | False |
4 | GreedyEQ | 0.000447 | 0.0076 | 0.001638 | BX | II | False |
rr_mrr = pd.concat([
perf_data[['DataSet', 'Algorithm', 'Implicit', 'MRR', 'HR']].assign(Strategy='Raw'),
rerank_data[['DataSet', 'Algorithm', 'Implicit', 'MRR', 'HR', 'Strategy']]
], ignore_index=True)
rr_mrr.head()
rr_mrr['Mode'] = 'Explicit'
rr_mrr.loc[rr_mrr['Implicit'], 'Mode'] = 'Implicit'
rr_mrr['Strategy'] = rr_mrr['Strategy'].astype('category').cat.reorder_categories(['Raw', 'SingleEQ', 'GreedyEQ', 'GreedyReflect'])
rr_mrr.head()
DataSet | Algorithm | Implicit | MRR | HR | Strategy | Mode | |
---|---|---|---|---|---|---|---|
0 | BX | UU | False | 0.000301 | 0.0044 | Raw | Explicit |
1 | BX | II | False | 0.000556 | 0.0074 | Raw | Explicit |
2 | BX | ALS | False | 0.003905 | 0.0354 | Raw | Explicit |
3 | BX | UU | True | 0.029221 | 0.1568 | Raw | Implicit |
4 | BX | II | True | 0.020828 | 0.0876 | Raw | Implicit |
sns.catplot(x='Algorithm', y='MRR', row='Mode', col='DataSet', hue='Strategy', data=rr_mrr, kind='bar',
sharey=False, margin_titles=True, aspect=1.5, height=1.8)
# plt.savefig(fig_dir / 'rerank-perf.pdf')
<seaborn.axisgrid.FacetGrid at 0x7f480f452310>
make_plot(rr_mrr, p.aes(x='Algorithm', y='MRR', fill='Strategy'),
p.geom_bar(stat='identity', position='dodge'),
p.facet_grid('Mode ~ DataSet', scales='free_y'),
p.scale_fill_brewer('qual', 'Dark2'),
file='rerank-perf.pdf', width=7, height=4, legend_position='top', legend_title=p.element_blank())
/home/MICHAELEKSTRAND/anaconda3/envs/bookfair/lib/python3.7/site-packages/plotnine/ggplot.py:729: PlotnineWarning: Saving 7 x 4 in image. from_inches(height, units), units), PlotnineWarning) /home/MICHAELEKSTRAND/anaconda3/envs/bookfair/lib/python3.7/site-packages/plotnine/ggplot.py:730: PlotnineWarning: Filename: figures/AlgoPerf/rerank-perf.pdf warn('Filename: {}'.format(filename), PlotnineWarning)
<ggplot: (8746716406401)>
penalty = (perf_data.set_index(['DataSet', 'Implicit', 'Algorithm']).MRR - \
rerank_data.set_index(['DataSet', 'Implicit', 'Algorithm', 'Strategy']).MRR) / perf_data.set_index(['DataSet', 'Implicit', 'Algorithm']).MRR
print(penalty.unstack().to_latex(float_format=lambda f: '{:.2f}%'.format(f*100)))
\begin{tabular}{lllrrr} \toprule & & Strategy & GreedyEQ & GreedyReflect & SingleEQ \\ DataSet & Implicit & Algorithm & & & \\ \midrule AZ & False & ALS & 3.23\% & -0.57\% & -5.60\% \\ & & II & 3.65\% & -0.01\% & 3.72\% \\ & & UU & -10.23\% & 0.85\% & -10.23\% \\ & True & ALS & 8.11\% & 2.63\% & 13.09\% \\ & & BPR & 6.18\% & -0.98\% & 10.32\% \\ & & II & 5.08\% & 1.34\% & 7.60\% \\ & & UU & 4.65\% & 1.08\% & 8.69\% \\ BX & False & ALS & 0.82\% & -2.24\% & 1.72\% \\ & & II & 19.48\% & -10.85\% & 35.42\% \\ & & UU & 10.70\% & -14.50\% & -5.12\% \\ & True & ALS & 6.89\% & 3.59\% & 15.99\% \\ & & BPR & 8.09\% & 3.08\% & 16.76\% \\ & & II & 5.56\% & 2.46\% & 12.03\% \\ & & UU & 4.24\% & 1.48\% & 9.66\% \\ GR & False & II & 7.40\% & -0.50\% & 10.13\% \\ & & UU & 25.01\% & 17.91\% & 33.56\% \\ & True & ALS & 4.75\% & 3.08\% & 11.36\% \\ & & BPR & 7.08\% & 4.21\% & 13.52\% \\ & & II & 3.23\% & 1.42\% & 6.65\% \\ & & UU & 3.77\% & 2.17\% & 7.58\% \\ \bottomrule \end{tabular}