Algorithm Performance¶

Setup¶

In [1]:

from importlib import import_module
import json
from pathlib import Path

In [2]:

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotnine as p

In [3]:

import bookgender.datatools as dt
from bookgender.config import data_dir
from bookgender.nbutils import *

In [4]:

init_figs('AlgoPerf')

using figure dir figures/AlgoPerf

Load Performance Data¶

We need to collect all of the accuracy output files.

Algorithm Eval Performance¶

In [5]:

perf_data = pd.read_csv('data/rec-perf.csv')
perf_data.head()

Out[5]:

	MRR	HR	NDCG	DataSet	Algorithm
0	0.000301	0.0044	0.001089	BX-E	user-user
1	0.000556	0.0074	0.001804	BX-E	item-item
2	0.003905	0.0354	0.010269	BX-E	als
3	0.029221	0.1568	0.056760	BX-I	user-user
4	0.020828	0.0876	0.035883	BX-I	item-item

We would also like to bootstrap confidence intervals - this requires per-user statistics.

Algorithm Tune Performance¶

In [6]:

tune_data = pd.read_json('data/rec-tune.json', orient='records', lines=True)
tune_data.head()

Out[6]:

	params	iters	MRR	DataSet	Algorithm
0	[67, 0.069343245462767]	20	0.000856	BX-E	user-user
1	[232, 0.084880046234972]	20	0.000540	BX-E	item-item
2	[297, 4.36979166769093, 4.927362178355647, 24....	20	0.002965	BX-E	als
3	[303, 0.052267596581313004]	20	0.026312	BX-I	user-user
4	[61, 0.03579462837616]	20	0.015321	BX-I	item-item

Rerank Performance¶

In [7]:

rerank_data = pd.read_csv('data/rerank-perf.csv')
rerank_data.head()

Out[7]:

	Strategy	MRR	HR	NDCG	DataSet	Algorithm
0	SingleEQ	0.000316	0.0046	0.001133	BX-E	user-user
1	GreedyEQ	0.000269	0.0044	0.001051	BX-E	user-user
2	GreedyReflect	0.000344	0.0050	0.001241	BX-E	user-user
3	SingleEQ	0.000359	0.0060	0.001286	BX-E	item-item
4	GreedyEQ	0.000447	0.0076	0.001638	BX-E	item-item

Organize Runs¶

We have data sets, and we have implicit/explicit configurations. For some data sets, these are encoded in the data set; for others, in the algorithm.

We need to split that out.

In [8]:

def normalize_runs(frame):
    frame = frame.assign(Implicit = frame['DataSet'].str.endswith('-I') | frame['Algorithm'].str.endswith('-imp'))
    frame['Algorithm'] = frame['Algorithm'].str.replace('-imp', '')
    frame['DataSet'] = frame['DataSet'].str.replace('-[IE]', '')
    frame['Algorithm'] = frame['Algorithm'].str.replace('wrls', 'als').str.upper()
    frame['Algorithm'] = frame['Algorithm'].str.replace(r'(\w)\w+-(\w)\w+', r'\1\2')
    return frame

In [9]:

perf_data = normalize_runs(perf_data)

And look at that data:

In [10]:

perf_data

Out[10]:

	MRR	HR	NDCG	DataSet	Algorithm	Implicit
0	0.000301	0.0044	0.001089	BX	UU	False
1	0.000556	0.0074	0.001804	BX	II	False
2	0.003905	0.0354	0.010269	BX	ALS	False
3	0.029221	0.1568	0.056760	BX	UU	True
4	0.020828	0.0876	0.035883	BX	II	True
5	0.030019	0.1522	0.057322	BX	ALS	True
6	0.017138	0.1004	0.034448	BX	BPR	True
7	0.000043	0.0008	0.000183	AZ	UU	False
8	0.003357	0.0246	0.007375	AZ	II	False
9	0.000221	0.0036	0.000879	AZ	ALS	False
10	0.054445	0.2106	0.093025	AZ	UU	True
11	0.037334	0.1200	0.059690	AZ	II	True
12	0.031726	0.1530	0.059956	AZ	ALS	True
13	0.003825	0.0456	0.011680	AZ	BPR	True
14	0.000062	0.0016	0.000325	GR	UU	False
15	0.010818	0.1482	0.036822	GR	II	False
16	0.096567	0.3664	0.159028	GR	UU	True
17	0.087541	0.3474	0.147663	GR	II	True
18	0.095640	0.3902	0.167903	GR	ALS	True
19	0.044103	0.1990	0.080554	GR	BPR	True

In [11]:

tune_data = normalize_runs(tune_data)
tune_data

Out[11]:

	params	iters	MRR	DataSet	Algorithm	Implicit
0	[67, 0.069343245462767]	20	0.000856	BX	UU	False
1	[232, 0.084880046234972]	20	0.000540	BX	II	False
2	[297, 4.36979166769093, 4.927362178355647, 24....	20	0.002965	BX	ALS	False
3	[303, 0.052267596581313004]	20	0.026312	BX	UU	True
4	[61, 0.03579462837616]	20	0.015321	BX	II	True
5	[499, 0.151451427679924, 0.06685861681803601, ...	20	0.026321	BX	ALS	True
6	[249, 0.011980259542217, 0.09697898418796101]	20	0.014692	BX	BPR	True
7	[68, 0.070821028781701]	20	0.000306	AZ	UU	False
8	[5, 0.1]	20	0.002919	AZ	II	False
9	[299, 0.027209462059128, 0.09593544705902601, ...	100	0.000533	AZ	ALS	False
10	[7, 0.00087767799404]	20	0.058791	AZ	UU	True
11	[9, 2.9102495117394603e-05]	20	0.034288	AZ	II	True
12	[300, 1e-06, 1e-06, 50.0]	20	0.031447	AZ	ALS	True
13	[6, 0.00428451465746, 0.00346828460276]	20	0.005907	AZ	BPR	True
14	[7, 0.09988270936531501]	20	0.000073	GR	UU	False
15	[6, 0.099969938296841]	20	0.013365	GR	II	False
16	[32, 0.08878273357970401]	20	0.099882	GR	UU	True
17	[6, 0.097933753442365]	16	0.095913	GR	II	True
18	[487, 4.098540323995087, 0.36994560658956105, ...	20	0.102185	GR	ALS	True
19	[250, 0.0, 0.028168135291681003]	20	0.042900	GR	BPR	True

Accuracy Plots¶

Plot the MRR for each data set!

In [12]:

merged_mrr = pd.concat([
    tune_data[['DataSet', 'Algorithm', 'Implicit', 'MRR']].assign(Stage='Tune'),
    perf_data[['DataSet', 'Algorithm', 'Implicit', 'MRR']].assign(Stage='Eval')
], ignore_index=True)
merged_mrr['Mode'] = 'Explicit'
merged_mrr.loc[merged_mrr['Implicit'], 'Mode'] = 'Implicit'
merged_mrr.head()

Out[12]:

	DataSet	Algorithm	Implicit	MRR	Stage	Mode
0	BX	UU	False	0.000856	Tune	Explicit
1	BX	II	False	0.000540	Tune	Explicit
2	BX	ALS	False	0.002965	Tune	Explicit
3	BX	UU	True	0.026312	Tune	Implicit
4	BX	II	True	0.015321	Tune	Implicit

In [13]:

sns.catplot(x='Algorithm', y='MRR', row='Mode', col='DataSet', hue='Stage', data=merged_mrr, kind='bar',
            sharey=False, margin_titles=True, aspect=1.5, height=2.2)

Out[13]:

<seaborn.axisgrid.FacetGrid at 0x7f48143b2390>

In [14]:

make_plot(merged_mrr, p.aes(x='Algorithm', y='MRR', fill='Stage'),
          p.geom_bar(stat='identity', position='dodge'),
          p.facet_grid('Mode ~ DataSet', scales='free_y'),
          p.scale_fill_brewer('qual', 'Dark2'),
          file='rec-perf.pdf', width=7, height=4, legend_position='top', legend_title=p.element_blank())

/home/MICHAELEKSTRAND/anaconda3/envs/bookfair/lib/python3.7/site-packages/plotnine/ggplot.py:729: PlotnineWarning: Saving 7 x 4 in image.
  from_inches(height, units), units), PlotnineWarning)
/home/MICHAELEKSTRAND/anaconda3/envs/bookfair/lib/python3.7/site-packages/plotnine/ggplot.py:730: PlotnineWarning: Filename: figures/AlgoPerf/rec-perf.pdf
  warn('Filename: {}'.format(filename), PlotnineWarning)

Out[14]:

<ggplot: (8746721886361)>

In [15]:

print(perf_data[['DataSet', 'Algorithm', 'Implicit', 'MRR']].set_index(['DataSet', 'Implicit', 'Algorithm']).unstack().to_latex())

\begin{tabular}{llrrrr}
\toprule
   & {} & \multicolumn{4}{l}{MRR} \\
   & Algorithm &       ALS &       BPR &        II &        UU \\
DataSet & Implicit &           &           &           &           \\
\midrule
AZ & False &  0.000221 &       NaN &  0.003357 &  0.000043 \\
   & True  &  0.031726 &  0.003825 &  0.037334 &  0.054445 \\
BX & False &  0.003905 &       NaN &  0.000556 &  0.000301 \\
   & True  &  0.030019 &  0.017138 &  0.020828 &  0.029221 \\
GR & False &       NaN &       NaN &  0.010818 &  0.000062 \\
   & True  &  0.095640 &  0.044103 &  0.087541 &  0.096567 \\
\bottomrule
\end{tabular}

And hit rate:

In [16]:

sns.catplot(x='Algorithm', y='HR', row='Implicit', col='DataSet', data=perf_data, kind='bar', 
            sharey=False, margin_titles=True)

Out[16]:

<seaborn.axisgrid.FacetGrid at 0x7f4813fa4610>

Rerank Loss¶

In [17]:

rerank_data = normalize_runs(rerank_data)
rerank_data.head()

Out[17]:

	Strategy	MRR	HR	NDCG	DataSet	Algorithm	Implicit
0	SingleEQ	0.000316	0.0046	0.001133	BX	UU	False
1	GreedyEQ	0.000269	0.0044	0.001051	BX	UU	False
2	GreedyReflect	0.000344	0.0050	0.001241	BX	UU	False
3	SingleEQ	0.000359	0.0060	0.001286	BX	II	False
4	GreedyEQ	0.000447	0.0076	0.001638	BX	II	False

In [18]:

rr_mrr = pd.concat([
    perf_data[['DataSet', 'Algorithm', 'Implicit', 'MRR', 'HR']].assign(Strategy='Raw'),
    rerank_data[['DataSet', 'Algorithm', 'Implicit', 'MRR', 'HR', 'Strategy']]
], ignore_index=True)
rr_mrr.head()
rr_mrr['Mode'] = 'Explicit'
rr_mrr.loc[rr_mrr['Implicit'], 'Mode'] = 'Implicit'
rr_mrr['Strategy'] = rr_mrr['Strategy'].astype('category').cat.reorder_categories(['Raw', 'SingleEQ', 'GreedyEQ', 'GreedyReflect'])
rr_mrr.head()

Out[18]:

	DataSet	Algorithm	Implicit	MRR	HR	Strategy	Mode
0	BX	UU	False	0.000301	0.0044	Raw	Explicit
1	BX	II	False	0.000556	0.0074	Raw	Explicit
2	BX	ALS	False	0.003905	0.0354	Raw	Explicit
3	BX	UU	True	0.029221	0.1568	Raw	Implicit
4	BX	II	True	0.020828	0.0876	Raw	Implicit

In [19]:

sns.catplot(x='Algorithm', y='MRR', row='Mode', col='DataSet', hue='Strategy', data=rr_mrr, kind='bar',
            sharey=False, margin_titles=True, aspect=1.5, height=1.8)
# plt.savefig(fig_dir / 'rerank-perf.pdf')

Out[19]:

<seaborn.axisgrid.FacetGrid at 0x7f480f452310>

In [20]:

make_plot(rr_mrr, p.aes(x='Algorithm', y='MRR', fill='Strategy'),
          p.geom_bar(stat='identity', position='dodge'),
          p.facet_grid('Mode ~ DataSet', scales='free_y'),
          p.scale_fill_brewer('qual', 'Dark2'),
          file='rerank-perf.pdf', width=7, height=4, legend_position='top', legend_title=p.element_blank())

/home/MICHAELEKSTRAND/anaconda3/envs/bookfair/lib/python3.7/site-packages/plotnine/ggplot.py:729: PlotnineWarning: Saving 7 x 4 in image.
  from_inches(height, units), units), PlotnineWarning)
/home/MICHAELEKSTRAND/anaconda3/envs/bookfair/lib/python3.7/site-packages/plotnine/ggplot.py:730: PlotnineWarning: Filename: figures/AlgoPerf/rerank-perf.pdf
  warn('Filename: {}'.format(filename), PlotnineWarning)

Out[20]:

<ggplot: (8746716406401)>

In [21]:

penalty = (perf_data.set_index(['DataSet', 'Implicit', 'Algorithm']).MRR - \
   rerank_data.set_index(['DataSet', 'Implicit', 'Algorithm', 'Strategy']).MRR) / perf_data.set_index(['DataSet', 'Implicit', 'Algorithm']).MRR

In [22]:

print(penalty.unstack().to_latex(float_format=lambda f: '{:.2f}%'.format(f*100)))

\begin{tabular}{lllrrr}
\toprule
   &       & Strategy &  GreedyEQ &  GreedyReflect &  SingleEQ \\
DataSet & Implicit & Algorithm &           &                &           \\
\midrule
AZ & False & ALS &     3.23\% &         -0.57\% &    -5.60\% \\
   &       & II &     3.65\% &         -0.01\% &     3.72\% \\
   &       & UU &   -10.23\% &          0.85\% &   -10.23\% \\
   & True  & ALS &     8.11\% &          2.63\% &    13.09\% \\
   &       & BPR &     6.18\% &         -0.98\% &    10.32\% \\
   &       & II &     5.08\% &          1.34\% &     7.60\% \\
   &       & UU &     4.65\% &          1.08\% &     8.69\% \\
BX & False & ALS &     0.82\% &         -2.24\% &     1.72\% \\
   &       & II &    19.48\% &        -10.85\% &    35.42\% \\
   &       & UU &    10.70\% &        -14.50\% &    -5.12\% \\
   & True  & ALS &     6.89\% &          3.59\% &    15.99\% \\
   &       & BPR &     8.09\% &          3.08\% &    16.76\% \\
   &       & II &     5.56\% &          2.46\% &    12.03\% \\
   &       & UU &     4.24\% &          1.48\% &     9.66\% \\
GR & False & II &     7.40\% &         -0.50\% &    10.13\% \\
   &       & UU &    25.01\% &         17.91\% &    33.56\% \\
   & True  & ALS &     4.75\% &          3.08\% &    11.36\% \\
   &       & BPR &     7.08\% &          4.21\% &    13.52\% \\
   &       & II &     3.23\% &          1.42\% &     6.65\% \\
   &       & UU &     3.77\% &          2.17\% &     7.58\% \\
\bottomrule
\end{tabular}

In [ ]: