#!/usr/bin/env python
# coding: utf-8

# In[ ]:


# In[4]:


xml = openbm.xml2df()
search_string = 'RX 470'


# In[1]:


from os.path import join as pjoin
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns # seaborn is only used for its plotting layout configuration
get_ipython().run_line_magic('matplotlib', 'inline')
import openbenchmarking as openbm


# If you are new to Python or to some of the packages that are being imported below: they are part of the standard stack often used in data science and science/engineering environments. Some of these dependencies are Python packages with compiled extensions, but there are Python distributions with binary packages availble which make installing SciPy stack a breeze. Alternatively, Linux distributions contain usually (most) of these dependencies.
# * [SciPy Stack](http://scipy.org/)
# * [Various installation procedures](http://scipy.org/install.html)
# 
# Dependencies:
# * lxml, numpy, pandas, matplotlib, seaborn (optionall), openpyxl (optionally, for saving DataFrame to Excel sheet)
# 
# The plots layout is quite basic for this example. With a few more tweaks anything is possibile thanks to the extensibility of the underlying plotting library Matplotlib.
# 
# Other advantage of using Python: availability of interactive HTML5 plotting tools, such as:
# * [Matplotlib + D3](http://mpld3.github.io/)
# * [plotly](https://plot.ly/python/)
# * [bokeh](http://bokeh.pydata.org/en/latest/)
# * [holoviews](http://holoviews.org/)
# * [pygal](http://www.pygal.org/en/stable/)
# 
# I have not used these packages yet, so an example with interactive plot might come at some other point.

# In[ ]:


# this might take a minute...
df = openbm.download_from_openbm(search_string)

# for this example we'll just save the DataFramee in a text based csv file
df.to_csv('search_{}.csv'.format(search_string))

# hdf5 is a fast storage format that can handle very large data sets
#df.to_hdf('search_{}.h5'.format(search_string), 'table')
#df.to_excel('search_{}.xlsx'.format(search_string))


# First we will use the search function on the OpenBenchMarking.org webpage. The ```download_from_openbm``` simply gets all the test id's from the search result as you can find here: [http://openbenchmarking.org/s/RX%20470&show_more](http://openbenchmarking.org/s/RX%20470&show_more), and downloads all the XML result files, converts them into a ```pands.DataFrame```, and merges them into one big table.

# In[5]:


# load previously donwload data
df = pd.read_csv('search_{}.csv'.format(search_string))
# optionally, in either Excel or hdf5 format this would be:
#df = pd.read_hdf('search_{}.h5'.format(search_string), 'table')
#df = pd.read_excel('search_{}.xlsx'.format(search_string))

df.drop(xml.user_cols, inplace=True, axis=1)
df.drop_duplicates(inplace=True)

# only R470 graphic cards
res_find = df['Graphics'].str.lower().str.find('rx 470')
# grp_lwr holds -1 for entries that do not contain the search string
# we are only interested in taking the indeces of those entries that do
# contain our search term, so antyhing above -1
df_find = df.loc[(res_find > -1).values]


# In[6]:


# now see for which tests we have sufficient data
openbm.explore_dataset(df_find, 'ResultIdentifier', 'ResultDescription', 'Processor')


# In[7]:


test = 'pts/unigine-valley-1.1.4'
test_detail = 'Resolution: 1920 x 1080 - Mode: Fullscreen'

# select only a certain test
sel = df_find[df_find['ResultIdentifier'] == test]
# and the same version/resultion of said test
sel = sel[sel['ResultDescription']==test_detail].copy()
# cast Value to a float64
sel['Value'] = sel['Value'].astype(np.float64)
# remove close to zero measurements
#sel = sel[(sel['Display Driver']!='None') & (sel['Value']>0.5)]
fig, ax = openbm.plot_barh_groups(sel, 'Graphics', 'Processor', label_xval='Value')
ax.set_title(test + '\n' + test_detail)


# We can also inspect the DataFrame directly to see all the available details of the corresponding tests

# In[8]:


sel


# In[10]:


test = 'pts/xonotic-1.4.0'
test_detail = 'Resolution: 1920 x 1080 - Effects Quality: Ultimate'

# select only a certain test
sel = df_find[df_find['ResultIdentifier'] == test]
# and the same version/resultion of said test
sel = sel[sel['ResultDescription']==test_detail].copy()
# cast Value to a float64
sel['Value'] = sel['Value'].astype(np.float64)
# remove close to zero measurements
#sel = sel[(sel['Display Driver']!='None') & (sel['Value']>0.5)]
fig, ax = openbm.plot_barh_groups(sel, 'Graphics', 'Processor', label_xval='Value')
ax.set_title(test + '\n' + test_detail)


# Or show only the Processor and Graphics columns (not truncated):

# In[9]:


print(sel[['Processor', 'Graphics']])


# In[11]:


test = 'pts/xonotic-1.4.0'
test_detail = 'Resolution: 3840 x 2160 - Effects Quality: Ultimate'

# select only a certain test
sel = df_find[df_find['ResultIdentifier'] == test]
# and the same version/resultion of said test
sel = sel[sel['ResultDescription']==test_detail].copy()
# cast Value to a float64
sel['Value'] = sel['Value'].astype(np.float64)
# remove close to zero measurements
#sel = sel[(sel['Display Driver']!='None') & (sel['Value']>0.5)]
fig, ax = openbm.plot_barh_groups(sel, 'Graphics', 'Processor', label_xval='Value')
ax.set_title(test + '\n' + test_detail)


# 

# In[ ]: