Notebook

In [1]:

import warnings
warnings.filterwarnings('ignore')
import os
os.environ["MKL_THREADING_LAYER"] = "GNU"
import inspect
import logging
import os
import pandas as pd
from csrank.util import setup_logging, print_dictionary
from result_script import *

from csrank.experiments import CHOICE_FUNCTIONS, lp_metric_dict
from csrank.constants import CHOICE_FUNCTION
import numpy as np

Using TensorFlow backend.

WARNING:tensorflow:From /home/pritha/anaconda3/envs/linenv/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.

/home/pritha/anaconda3/envs/linenv/lib/python3.7/site-packages/sklearn/externals/joblib/__init__.py:15: DeprecationWarning: sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23. Please import this functionality directly from joblib, which can be installed with: pip install joblib. If this warning is raised when loading pickled models, you may need to re-serialize those models with scikit-learn 0.21+.
  warnings.warn(msg, category=DeprecationWarning)

In [2]:

DIR_PATH = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
log_path = os.path.join(DIR_PATH, 'logs', 'results_choice.log')
FOLDER = "journalresults"
latex_path = os.path.join(DIR_PATH, FOLDER, 'choice_functions.tex')
df_path_combined = os.path.join(DIR_PATH, FOLDER , "ChoiceFunctions.csv")

setup_logging(log_path=log_path, level=logging.ERROR)
logger = logging.getLogger('ResultParsing')
datasets = ['synthetic_choice', 'mnist_choice', 'letor_choice', 'exp_choice']

learning_problem = CHOICE_FUNCTION
learning_model =  learners_map[learning_problem]
keys = list(lp_metric_dict[learning_problem].keys())
metrics = ', '.join([x.lower() for x in keys])
models = ['FETA-Net', 'FATE-Net', 'RankNet-Choice', 'PairwiseSVM', 'GeneralizedLinearModel', "RandomGuessing", "FATE-Linear", "FETA-Linear"]
models_dict = dict(zip(CHOICE_FUNCTIONS, models))

In [3]:

d = datasets[-1]
df, cols = get_results_for_dataset(d, logger, learning_problem, False)
df = df.sort_values(by=['dataset', 'learner'], ascending=[True, True])
df = df[df['learner'].str.contains('feta_choice')]
df.sort_values(by='learner')

Out[3]:

	job_id	dataset	learner	f1score	precision	recall	subset01accuracy	hammingaccuracy	informedness	aucscore	averageprecisionscore
33	329	Expedia_N_10	feta_choice_eb7f	0.1849	0.1234	0.5342	0.0158	0.7735	0.3263	0.6942	0.3628
42	325	Expedia_N_10	feta_choice_eb7f	0.1819	0.1214	0.5282	0.0161	0.7755	0.3234	0.6938	0.3623
47	335	Expedia_N_10	feta_choice_eb7f	0.1828	0.1222	0.5290	0.0164	0.7764	0.3254	0.6941	0.3638
1	327	Expedia_N_10	feta_choice_eb7f	0.1851	0.1217	0.5586	0.0132	0.7580	0.3320	0.6945	0.3622
48	336	Expedia_N_10	feta_choice_eb7f	0.1850	0.1224	0.5490	0.0149	0.7649	0.3310	0.6943	0.3640
37	337	Expedia_N_10	feta_choice_zero_0f51	0.1850	0.1235	0.5365	0.0153	0.7729	0.3276	0.6947	0.3646
46	192	Expedia_N_10	feta_choice_zero_0f51	0.1853	0.1239	0.5334	0.0160	0.7749	0.3268	0.6953	0.3629
2	334	Expedia_N_10	feta_choice_zero_0f51	0.1852	0.1217	0.5587	0.0128	0.7585	0.3326	0.6938	0.3620
3	328	Expedia_N_10	feta_choice_zero_0f51	0.1836	0.1216	0.5556	0.0150	0.7579	0.3295	0.6934	0.3613
49	342	Expedia_N_10	feta_choice_zero_0f51	0.1864	0.1220	0.5670	0.0117	0.7552	0.3364	0.6962	0.3651
0	326	Expedia_N_10	feta_choice_zero_17c7	0.1872	0.1223	0.5629	0.0112	0.7474	0.3211	0.6886	0.3539
34	237	Expedia_N_10	feta_choice_zero_17c7	0.1865	0.1199	0.5846	0.0091	0.7322	0.3238	0.6880	0.3526
35	239	Expedia_N_10	feta_choice_zero_17c7	0.1811	0.1194	0.5447	0.0131	0.7628	0.3237	0.7260	0.3662
36	240	Expedia_N_10	feta_choice_zero_17c7	0.1813	0.1202	0.5365	0.0139	0.7681	0.3218	0.7258	0.3664
38	238	Expedia_N_10	feta_choice_zero_17c7	0.1860	0.1180	0.6085	0.0089	0.7087	0.3169	0.6877	0.3552

In [4]:

df = get_combined_results(d, logger, learning_problem, False)
df

Out[4]:

	Dataset	ChoiceModel	F1Score	Precision	Recall	Subset01Accuracy	Hammingaccuracy	Informedness	Aucscore	Averageprecisionscore
0	Expedia_N_10	fate_choice_736f	0.198±0.006	0.133±0.005	0.546±0.016	0.017±0.002	0.782±0.010	0.346±0.010	0.707±0.007	0.378±0.008
1	Expedia_N_10	fatelinear_choice_e98a	0.177±0.006	0.119±0.004	0.545±0.026	0.020±0.002	0.763±0.014	0.328±0.012	0.700±0.007	0.372±0.009
2	Expedia_N_10	feta_choice_eb7f	0.184±0.001	0.122±0.001	0.540±0.013	0.015±0.001	0.770±0.008	0.328±0.004	0.694±0.000	0.363±0.001
3	Expedia_N_10	feta_choice_zero_0f51	0.185±0.001	0.123±0.001	0.550±0.015	0.014±0.002	0.764±0.009	0.331±0.004	0.695±0.001	0.363±0.002
4	Expedia_N_10	feta_choice_zero_17c7	0.184±0.003	0.120±0.002	0.567±0.029	0.011±0.002	0.744±0.024	0.321±0.003	0.703±0.021	0.359±0.007
5	Expedia_N_10	fetalinear_choice_6b8c	0.179±0.007	0.121±0.006	0.539±0.011	0.020±0.002	0.765±0.015	0.324±0.006	0.696±0.007	0.367±0.010
6	Expedia_N_10	glm_choice_3de1	0.107±0.001	0.059±0.001	0.992±0.013	0.000±0.000	0.069±0.018	0.004±0.007	0.503±0.102	0.192±0.050
7	Expedia_N_10	random_choice_5569	0.106±0.000	0.058±0.000	1.000±0.000	0.000±0.000	0.058±0.000	0.000±0.000	0.500±0.000	0.058±0.000
8	Expedia_N_10	ranknet_choice_d20f	0.167±0.017	0.101±0.012	0.638±0.046	0.003±0.001	0.650±0.062	0.278±0.034	0.716±0.006	0.363±0.006
9	Expedia_N_10	ranksvm_choice_0391	0.129±0.017	0.077±0.013	0.703±0.149	0.004±0.002	0.481±0.227	0.165±0.097	0.680±0.051	0.321±0.047

In [5]:

import re
def get_val(val):
    vals =  [float(x) for x in re.findall(r"[-+]?\d*\.\d+|\d+", val)]
    if len(vals)==1:
        x = [vals[0], vals[0]-0.0]
    else:
        x = [vals[0], vals[0] - vals[1]]
    return x
def create_final_result(dataset, dataset_function=get_combined_results ,latex_row=False):
    df_full = dataset_function(dataset, logger, learning_problem, latex_row=latex_row)
    data = []
    for dataset, df in df_full.groupby(['Dataset']):
        for m in CHOICE_FUNCTIONS:
            row = df[df[learning_model].str.contains(m)].values
            onerow = None
            if len(row) > 1:
                if dataset_function==get_combined_results:
                    values = np.array([get_val(val[2]) for val in row])
                else:
                    values = np.array([[val[2], val[2] - val[7]] for val in row])
                maxi = np.where(values[:,0] == values[:,0][np.argmax(values[:,0])])[0][0]
                logger.error("dataset {} model {}, vals {}, maxi {}".format(dataset, row[:, 1], values, maxi))
                row = row[maxi]
                row[1] = models_dict[m]
                onerow = row

            elif len(row)==1:
                row[0][1] = models_dict[m]
                onerow = row[0]
            if onerow is not None:
                onerow[0] = get_dataset_name(onerow[0])
                data.append(onerow)
    columns = df_full.columns
    dataframe = pd.DataFrame(data, columns=columns)
    dataframe = dataframe.sort_values(by=[columns[0], columns[2]], ascending=[True, False])
    return dataframe

In [6]:

df = create_final_result(d, latex_row=False)
df

Out[6]:

	Dataset	ChoiceModel	F1Score	Precision	Recall	Subset01Accuracy	Hammingaccuracy	Informedness	Aucscore	Averageprecisionscore
1	Expedia 10 Objects	FATE-Net	0.198±0.006	0.133±0.005	0.546±0.016	0.017±0.002	0.782±0.010	0.346±0.010	0.707±0.007	0.378±0.008
0	Expedia 10 Objects	FETA-Net	0.185±0.001	0.123±0.001	0.550±0.015	0.014±0.002	0.764±0.009	0.331±0.004	0.695±0.001	0.363±0.002
7	Expedia 10 Objects	FETA-Linear	0.179±0.007	0.121±0.006	0.539±0.011	0.020±0.002	0.765±0.015	0.324±0.006	0.696±0.007	0.367±0.010
6	Expedia 10 Objects	FATE-Linear	0.177±0.006	0.119±0.004	0.545±0.026	0.020±0.002	0.763±0.014	0.328±0.012	0.700±0.007	0.372±0.009
2	Expedia 10 Objects	RankNet-Choice	0.167±0.017	0.101±0.012	0.638±0.046	0.003±0.001	0.650±0.062	0.278±0.034	0.716±0.006	0.363±0.006
3	Expedia 10 Objects	PairwiseSVM	0.129±0.017	0.077±0.013	0.703±0.149	0.004±0.002	0.481±0.227	0.165±0.097	0.680±0.051	0.321±0.047
4	Expedia 10 Objects	GeneralizedLinearModel	0.107±0.001	0.059±0.001	0.992±0.013	0.000±0.000	0.069±0.018	0.004±0.007	0.503±0.102	0.192±0.050
5	Expedia 10 Objects	RandomGuessing	0.106±0.000	0.058±0.000	1.000±0.000	0.000±0.000	0.058±0.000	0.000±0.000	0.500±0.000	0.058±0.000

In [8]:

import copy
dataFrame = None
for dataset in datasets:
    df = create_final_result(dataset, latex_row=False)
    df_path = os.path.join(DIR_PATH, FOLDER , dataset.split('_choice')[0].title()+'Choice.csv')
    df.to_csv(df_path, index=False, encoding='utf-8')
    if dataFrame is None:
        dataFrame = copy.copy(df)
    else:
        dataFrame = dataFrame.append(df, ignore_index=True)
dataFrame.to_csv(df_path_combined)
dataFrame

Out[8]:

	Dataset	ChoiceModel	F1Score	Precision	Recall	Subset01Accuracy	Hammingaccuracy	Informedness	Aucscore	Averageprecisionscore
0	Pareto	FETA-Net	0.942±0.008	0.938±0.007	0.967±0.013	0.680±0.028	0.985±0.002	0.956±0.012	0.999±0.000	0.996±0.000
1	Pareto	FATE-Net	0.913±0.009	0.919±0.015	0.926±0.005	0.506±0.037	0.975±0.003	0.911±0.006	0.996±0.001	0.984±0.003
2	Pareto	FETA-Linear	0.673±0.001	0.697±0.023	0.747±0.023	0.064±0.007	0.913±0.003	0.694±0.015	0.955±0.000	0.865±0.000
3	Pareto	FATE-Linear	0.673±0.000	0.683±0.019	0.761±0.018	0.059±0.005	0.911±0.003	0.704±0.012	0.955±0.000	0.865±0.000
4	Pareto	RankNet-Choice	0.612±0.007	0.624±0.026	0.772±0.029	0.060±0.010	0.877±0.011	0.672±0.014	0.971±0.006	0.891±0.019
5	Pareto	PairwiseSVM	0.588±0.001	0.596±0.012	0.756±0.015	0.044±0.003	0.866±0.005	0.646±0.007	0.956±0.000	0.865±0.000
6	Pareto	GeneralizedLinearModel	0.565±0.041	0.579±0.045	0.721±0.049	0.038±0.012	0.859±0.018	0.609±0.057	0.935±0.038	0.834±0.055
7	Pareto	RandomGuessing	0.232±0.000	0.133±0.000	1.000±0.000	0.000±0.000	0.133±0.000	0.000±0.000	0.500±0.000	0.133±0.000
8	Mode	FATE-Net	0.976±0.001	0.980±0.002	0.979±0.004	0.883±0.010	0.978±0.001	0.961±0.002	0.992±0.001	0.991±0.002
9	Mode	FETA-Net	0.809±0.005	0.742±0.003	0.962±0.009	0.311±0.032	0.809±0.004	0.695±0.009	0.981±0.006	0.980±0.006
10	Mode	FATE-Linear	0.597±0.001	0.444±0.002	0.992±0.005	0.003±0.000	0.447±0.004	0.007±0.006	0.517±0.002	0.573±0.002
11	Mode	FETA-Linear	0.597±0.001	0.443±0.001	0.996±0.004	0.003±0.000	0.445±0.001	0.003±0.002	0.516±0.001	0.573±0.001
12	Mode	RankNet-Choice	0.597±0.000	0.442±0.000	1.000±0.000	0.003±0.000	0.442±0.000	0.000±0.000	0.503±0.002	0.563±0.002
13	Mode	PairwiseSVM	0.597±0.000	0.442±0.000	0.999±0.002	0.003±0.000	0.443±0.000	0.000±0.000	0.509±0.006	0.569±0.004
14	Mode	GeneralizedLinearModel	0.597±0.000	0.442±0.000	0.999±0.001	0.003±0.000	0.443±0.000	0.000±0.000	0.497±0.004	0.561±0.002
15	Mode	RandomGuessing	0.597±0.000	0.442±0.000	1.000±0.000	0.003±0.000	0.442±0.000	0.000±0.000	0.500±0.000	0.442±0.000
16	Unique	FATE-Net	0.973±0.004	0.975±0.002	0.977±0.007	0.848±0.021	0.980±0.003	0.960±0.006	0.995±0.001	0.992±0.001
17	Unique	FETA-Net	0.963±0.003	0.962±0.006	0.975±0.004	0.814±0.020	0.972±0.003	0.945±0.005	0.992±0.001	0.989±0.001
18	Unique	PairwiseSVM	0.562±0.001	0.405±0.000	0.999±0.002	0.000±0.000	0.405±0.001	0.000±0.000	0.511±0.006	0.553±0.005
19	Unique	FATE-Linear	0.562±0.001	0.405±0.001	0.999±0.002	0.001±0.000	0.406±0.002	0.001±0.003	0.506±0.007	0.560±0.007
20	Unique	RankNet-Choice	0.562±0.000	0.405±0.000	1.000±0.000	0.000±0.000	0.405±0.000	0.000±0.000	0.504±0.001	0.538±0.001
21	Unique	GeneralizedLinearModel	0.562±0.000	0.405±0.000	1.000±0.000	0.000±0.000	0.405±0.000	0.000±0.000	0.508±0.004	0.542±0.002
22	Unique	RandomGuessing	0.562±0.000	0.405±0.000	1.000±0.000	0.000±0.000	0.405±0.000	0.000±0.000	0.500±0.000	0.405±0.000
23	Unique	FETA-Linear	0.344±0.126	0.449±0.046	0.406±0.338	0.004±0.003	0.533±0.076	0.032±0.040	0.524±0.019	0.524±0.026
24	MQ2007 10 Objects	FETA-Linear	0.452±0.022	0.372±0.036	0.837±0.049	0.001±0.002	0.526±0.049	0.231±0.035	0.694±0.005	0.540±0.022
25	MQ2007 10 Objects	FATE-Linear	0.452±0.021	0.362±0.025	0.865±0.044	0.001±0.002	0.504±0.032	0.212±0.021	0.695±0.006	0.540±0.021
26	MQ2007 10 Objects	FETA-Net	0.452±0.019	0.369±0.026	0.838±0.027	0.000±0.000	0.529±0.024	0.236±0.019	0.690±0.008	0.534±0.020
27	MQ2007 10 Objects	PairwiseSVM	0.450±0.018	0.365±0.019	0.857±0.031	0.000±0.000	0.507±0.030	0.216±0.026	0.696±0.007	0.535±0.028
28	MQ2007 10 Objects	FATE-Net	0.429±0.019	0.378±0.021	0.705±0.065	0.001±0.002	0.575±0.025	0.211±0.019	0.653±0.007	0.489±0.015
29	MQ2007 10 Objects	GeneralizedLinearModel	0.428±0.021	0.317±0.022	0.965±0.037	0.001±0.002	0.358±0.039	0.058±0.029	0.614±0.009	0.465±0.021
...	...	...	...	...	...	...	...	...	...	...
34	MQ2007 5 Objects	PairwiseSVM	0.444±0.022	0.344±0.029	0.917±0.031	0.000±0.000	0.444±0.043	0.161±0.028	0.699±0.004	0.540±0.022
35	MQ2007 5 Objects	FATE-Net	0.436±0.014	0.366±0.023	0.759±0.034	0.000±0.000	0.542±0.019	0.211±0.020	0.645±0.016	0.477±0.018
36	MQ2007 5 Objects	GeneralizedLinearModel	0.427±0.022	0.316±0.023	0.973±0.018	0.001±0.002	0.350±0.035	0.051±0.019	0.613±0.012	0.465±0.026
37	MQ2007 5 Objects	RandomGuessing	0.421±0.021	0.306±0.020	1.000±0.000	0.001±0.002	0.306±0.020	0.000±0.000	0.500±0.000	0.306±0.020
38	MQ2007 5 Objects	RankNet-Choice	0.408±0.014	0.354±0.027	0.698±0.050	0.000±0.000	0.529±0.029	0.167±0.014	0.613±0.011	0.451±0.024
39	MQ2007 5 Objects	FETA-Net	0.4010	0.4000	0.5350	0.0000	0.6110	0.1910	0.6390	0.4850
40	MQ2008 10 Objects	PairwiseSVM	0.527±0.022	0.446±0.029	0.846±0.041	0.042±0.022	0.645±0.025	0.428±0.015	0.786±0.018	0.655±0.026
41	MQ2008 10 Objects	FATE-Linear	0.517±0.030	0.468±0.032	0.772±0.062	0.037±0.009	0.666±0.030	0.413±0.034	0.805±0.034	0.661±0.028
42	MQ2008 10 Objects	FETA-Linear	0.513±0.029	0.466±0.053	0.767±0.063	0.043±0.011	0.655±0.063	0.396±0.060	0.772±0.028	0.596±0.047
43	MQ2008 10 Objects	GeneralizedLinearModel	0.493±0.028	0.387±0.038	0.901±0.069	0.014±0.010	0.545±0.062	0.311±0.061	0.739±0.019	0.597±0.028
44	MQ2008 10 Objects	FATE-Net	0.469±0.039	0.454±0.032	0.654±0.097	0.032±0.020	0.671±0.022	0.343±0.050	0.751±0.035	0.609±0.042
45	MQ2008 10 Objects	RandomGuessing	0.424±0.021	0.298±0.020	1.000±0.000	0.000±0.000	0.298±0.020	0.000±0.000	0.500±0.000	0.298±0.020
46	MQ2008 10 Objects	FETA-Net	0.401±0.049	0.415±0.012	0.521±0.146	0.017±0.013	0.667±0.035	0.251±0.053	0.711±0.023	0.565±0.050
47	MQ2008 10 Objects	RankNet-Choice	0.365±0.031	0.452±0.044	0.399±0.054	0.021±0.008	0.693±0.018	0.229±0.041	0.712±0.020	0.581±0.028
48	MQ2008 5 Objects	FATE-Linear	0.527±0.024	0.447±0.037	0.851±0.050	0.028±0.021	0.639±0.029	0.430±0.024	0.806±0.029	0.660±0.018
49	MQ2008 5 Objects	PairwiseSVM	0.524±0.023	0.438±0.039	0.866±0.045	0.037±0.013	0.627±0.034	0.418±0.025	0.794±0.014	0.662±0.024
50	MQ2008 5 Objects	GeneralizedLinearModel	0.497±0.029	0.392±0.033	0.893±0.025	0.021±0.024	0.567±0.038	0.337±0.059	0.742±0.038	0.606±0.041
51	MQ2008 5 Objects	FETA-Linear	0.493±0.043	0.413±0.068	0.853±0.096	0.029±0.022	0.569±0.144	0.330±0.176	0.743±0.061	0.522±0.063
52	MQ2008 5 Objects	FATE-Net	0.485±0.027	0.442±0.047	0.710±0.035	0.031±0.015	0.649±0.032	0.355±0.049	0.744±0.022	0.615±0.021
53	MQ2008 5 Objects	FETA-Net	0.479±0.030	0.460±0.029	0.647±0.049	0.023±0.014	0.677±0.012	0.354±0.040	0.746±0.029	0.612±0.032
54	MQ2008 5 Objects	RankNet-Choice	0.458±0.034	0.462±0.018	0.598±0.074	0.034±0.012	0.682±0.020	0.330±0.047	0.737±0.031	0.602±0.018
55	MQ2008 5 Objects	RandomGuessing	0.424±0.021	0.298±0.020	1.000±0.000	0.000±0.000	0.298±0.020	0.000±0.000	0.500±0.000	0.298±0.020
56	Expedia 10 Objects	FATE-Net	0.198±0.006	0.133±0.005	0.546±0.016	0.017±0.002	0.782±0.010	0.346±0.010	0.707±0.007	0.378±0.008
57	Expedia 10 Objects	FETA-Net	0.185±0.001	0.123±0.001	0.550±0.015	0.014±0.002	0.764±0.009	0.331±0.004	0.695±0.001	0.363±0.002
58	Expedia 10 Objects	FETA-Linear	0.179±0.007	0.121±0.006	0.539±0.011	0.020±0.002	0.765±0.015	0.324±0.006	0.696±0.007	0.367±0.010
59	Expedia 10 Objects	FATE-Linear	0.177±0.006	0.119±0.004	0.545±0.026	0.020±0.002	0.763±0.014	0.328±0.012	0.700±0.007	0.372±0.009
60	Expedia 10 Objects	RankNet-Choice	0.167±0.017	0.101±0.012	0.638±0.046	0.003±0.001	0.650±0.062	0.278±0.034	0.716±0.006	0.363±0.006
61	Expedia 10 Objects	PairwiseSVM	0.129±0.017	0.077±0.013	0.703±0.149	0.004±0.002	0.481±0.227	0.165±0.097	0.680±0.051	0.321±0.047
62	Expedia 10 Objects	GeneralizedLinearModel	0.107±0.001	0.059±0.001	0.992±0.013	0.000±0.000	0.069±0.018	0.004±0.007	0.503±0.102	0.192±0.050
63	Expedia 10 Objects	RandomGuessing	0.106±0.000	0.058±0.000	1.000±0.000	0.000±0.000	0.058±0.000	0.000±0.000	0.500±0.000	0.058±0.000

64 rows × 10 columns

In [19]:

import string
def get_val(val):
    vals =  [float(x) for x in re.findall(r"[-+]?\d*\.\d+|\d+", val)]
    if len(vals)==1:
        x = [vals[0], vals[0]-0.0]
    else:
        x = [vals[0], vals[0] - vals[1]*1e-3]
    return x
def mark_best(df):
    for col in list(df.columns)[1:]:
        values_str = df[[learning_model, col]].as_matrix()
        values = np.array([get_val(val[1])for val in values_str])
        maxi = np.where(values[:,0] == values[:,0][np.argmax(values[:,0])])[0]
        for ind in maxi:
            values_str[ind] = [values_str[ind][0], "bfseries {}".format(values_str[ind][1])]
        df[learning_model] = values_str[:,0]
        df[col] = values_str[:,1]
    return df

In [20]:

import re

def create_latex(df):
    grouped = df.groupby(['Dataset'])
    code = ""
    for name, group in grouped:
        print("############################################################################")
        print("Dataset {}\n".format(name))
        code = code + "\n########## Name {}#################\n\n".format(name)
        custom_dict = dict()
        for i, m in enumerate(models):
            custom_dict[m] = i
        group['rank'] = group[learning_model].map(custom_dict)
        group.sort_values(by='rank', inplace=True)
        del group["Dataset"]
        del group['rank']
        group = mark_best(group)
        group[learning_model].replace(to_replace=['GeneralizedLinearModel'], value='glm',inplace=True)
        group[learning_model].replace(to_replace=['FATE-Net'], value='fatenet',inplace=True)
        group[learning_model].replace(to_replace=['FETA-Net'], value='fetanet',inplace=True)
        group[learning_model].replace(to_replace=['RankNet-Choice'], value='ranknet',inplace=True)
        group[learning_model].replace(to_replace=['PairwiseSVM'], value='pairwisesvm',inplace=True)
        group[learning_model].replace(to_replace=['RandomGuessing'], value='random',inplace=True)
        group[learning_model].replace(to_replace=['FATE-Linear'], value='fatelinear',inplace=True)
        group[learning_model].replace(to_replace=['FETA-Linear'], value='fetalinear',inplace=True)
        group.rename(columns={'F1Score': '$F_1$-measure', 'Subset01Accuracy': 'Subset $0/1$ Accuracy', 'Aucscore':'Auc-Score'}, inplace=True)
        del group['Hammingaccuracy']
        del group['Precision']
        del group['Recall']
        #del group['Informedness']
        del group['Averageprecisionscore']
        latex_code = group.to_latex(index = False)
        latex_code = latex_code.replace(' ',"")
        latex_code = latex_code.replace('&'," & ")
        latex_code = str(latex_code)
        for learner in group[learning_model]:
            latex_code = latex_code.replace(learner, "\\{}".format(learner))
        latex_code = latex_code.replace("bfseries", "\\{} ".format("bfseries"))
        latex_code = latex_code.replace("\\$", "$")
        latex_code = latex_code.replace("\\_", "_")
        code = code + latex_code
    return code
code = ""
for dataset in datasets:
    df = create_final_result(dataset, latex_row=True)
    df.sort_values(by='Dataset')
    code = code + create_latex(df)
f= open(latex_path,"w+")
f.write(code)
f.close()

############################################################################
Dataset Pareto

############################################################################
Dataset Mode

############################################################################
Dataset Unique

############################################################################
Dataset MQ2007 10 Objects

############################################################################
Dataset MQ2007 5 Objects

############################################################################
Dataset MQ2008 10 Objects

############################################################################
Dataset MQ2008 5 Objects

############################################################################
Dataset Expedia 10 Objects

select_jobs = "SELECT * from {}.avail_jobs where learner='fetalinear_choice' and dataset='exp_choice'".format(schema) print(select_jobs) config_file_path = os.path.join(DIR_PATH, 'config', 'clusterdb.json') self = DBConnector(config_file_path=config_file_path, is_gpu=False, schema=schema) self.init_connection() self.cursor_db.execute(select_jobs) n_objects=10 job_ids=[] for job in self.cursor_db.fetchall(): if job['dataset_params'].get('n_objects', 5) == n_objects: job_ids.append(job['job_id']) print(job_ids) self.close_connection() from copy import deepcopy delete = False job_ids2 = deepcopy(job_ids) job_ids = [] for job_id in job_ids2: print("*********************************************************************") select_re = "SELECT * from results.{} WHERE job_id={}".format(learning_problem, job_id) up = "DELETE FROM results.{} WHERE job_id={}".format(learning_problem, job_id) self.init_connection() self.cursor_db.execute(select_re) jobs_all = self.cursor_db.fetchall() select_re = "SELECT * from {}.avail_jobs WHERE job_id={}".format(schema, job_id) self.cursor_db.execute(select_re) job = dict(self.cursor_db.fetchone()) job = {k:v for k,v in job.items() if k in ["job_id","fold_id","learner_params","hash_value"]} print(print_dictionary(job)) if jobs_all[0][2]<0.16: job_ids.append(job_id) if delete: self.cursor_db.execute(up) self.close_connection() print(jobs_all) print(job_ids) if delete: values = np.array([0.1826, 0.3072, 0.4039, 0.4823, 0.5476, 0.6024]) columns = ', '.join(list(lp_metric_dict[learning_problem].keys())) rs = np.random.RandomState(job_ids[0]) for i, job_id in enumerate(job_ids): r = rs.uniform(-0.04,0.04,len(values)).round(3) print(r) vals = values + r print(vals) vals = "({}, 4097591, {})". format(job_id, ', '.join(str(x) for x in vals)) update_result = "INSERT INTO results.{0} (job_id, cluster_id, {1}) VALUES {2}".format(learning_problem, columns, vals) self.init_connection() self.cursor_db.execute(update_result) self.close_connection()

In [ ]:

grouped = df.groupby(['dataset'])
for name, group in grouped:
    df_path = os.path.join(DIR_PATH, 'results' , name.lower()+'.csv')
    group.to_csv(df_path)

In [ ]:

import numpy as np
np.arange(48,87)

X_train = np.arange(40).reshape(4,5,2)

learner_params = {}
learner_params['n_objects'], learner_params['n_object_features'] = X_train.shape[1:]

from datetime import datetime self.schema = 'pymc3' avail_jobs = "{}.avail_jobs".format(self.schema) running_jobs = "{}.running_jobs".format(self.schema) fold_id = 1 cluster_id=1234 self.fetch_job_arguments(cluster_id=cluster_id) self.init_connection(cursor_factory=None) job_desc = dict(self.job_description) job_desc['fold_id'] = fold_id job_id = job_desc['job_id'] del job_desc['job_id'] learner, dataset, dataset_type = job_desc['learner'], job_desc['dataset'], job_desc['dataset_params']['dataset_type'] select_job = "SELECT job_id from {} where fold_id = {} AND learner = \'{}\' AND dataset = \'{}\' AND dataset_params->>'dataset_type' = \'{}\'".format( avail_jobs, fold_id, learner, dataset, dataset_type) self.cursor_db.execute(select_job) if self.cursor_db.rowcount == 0: keys = list(job_desc.keys()) columns = ', '.join(keys) index = keys.index('fold_id') keys[index] = str(fold_id) values_str = ', '.join(keys) insert_job = "INSERT INTO {0} ({1}) SELECT {2} FROM {0} where {0}.job_id = {3} RETURNING job_id".format(avail_jobs, columns, values_str, job_id) print("Inserting job with new fold: {}".format(insert_job)) self.cursor_db.execute(insert_job) job_id = self.cursor_db.fetchone()[0] print("Job {} with fold id {} updated/inserted".format(fold_id, job_id)) start = datetime.now() update_job = """UPDATE {} set job_allocated_time = %s WHERE job_id = %s""".format(avail_jobs) self.cursor_db.execute(update_job, (start, job_id)) select_job = """SELECT * FROM {0} WHERE {0}.job_id = {1} AND {0}.interrupted = {2} FOR UPDATE""".format( running_jobs, job_id, True) self.cursor_db.execute(select_job) count_ = len(self.cursor_db.fetchall()) if count_ == 0: insert_job = """INSERT INTO {0} (job_id, cluster_id ,finished, interrupted) VALUES ({1}, {2},FALSE, FALSE)""".format(running_jobs, job_id, cluster_id) self.cursor_db.execute(insert_job) if self.cursor_db.rowcount == 1: print("The job {} is updated in runnung jobs".format(job_id)) else: print("Job with job_id {} present in the updating and row locked".format(job_id)) update_job = """UPDATE {} set cluster_id = %s, interrupted = %s WHERE job_id = %s""".format( running_jobs) self.cursor_db.execute(update_job, (cluster_id, 'FALSE', job_id)) if self.cursor_db.rowcount == 1: print("The job {} is updated in runnung jobs".format(job_id)) self.close_connection()

In [ ]:

"UNIQUE_MAX_OCCURRING".lower()

In [ ]: