#!/usr/bin/env python # coding: utf-8 # In[ ]: # In[4]: xml = openbm.xml2df() search_string = 'RX 470' # In[1]: from os.path import join as pjoin import numpy as np import pandas as pd from matplotlib import pyplot as plt import seaborn as sns # seaborn is only used for its plotting layout configuration get_ipython().run_line_magic('matplotlib', 'inline') import openbenchmarking as openbm # If you are new to Python or to some of the packages that are being imported below: they are part of the standard stack often used in data science and science/engineering environments. Some of these dependencies are Python packages with compiled extensions, but there are Python distributions with binary packages availble which make installing SciPy stack a breeze. Alternatively, Linux distributions contain usually (most) of these dependencies. # * [SciPy Stack](http://scipy.org/) # * [Various installation procedures](http://scipy.org/install.html) # # Dependencies: # * lxml, numpy, pandas, matplotlib, seaborn (optionall), openpyxl (optionally, for saving DataFrame to Excel sheet) # # The plots layout is quite basic for this example. With a few more tweaks anything is possibile thanks to the extensibility of the underlying plotting library Matplotlib. # # Other advantage of using Python: availability of interactive HTML5 plotting tools, such as: # * [Matplotlib + D3](http://mpld3.github.io/) # * [plotly](https://plot.ly/python/) # * [bokeh](http://bokeh.pydata.org/en/latest/) # * [holoviews](http://holoviews.org/) # * [pygal](http://www.pygal.org/en/stable/) # # I have not used these packages yet, so an example with interactive plot might come at some other point. # In[ ]: # this might take a minute... df = openbm.download_from_openbm(search_string) # for this example we'll just save the DataFramee in a text based csv file df.to_csv('search_{}.csv'.format(search_string)) # hdf5 is a fast storage format that can handle very large data sets #df.to_hdf('search_{}.h5'.format(search_string), 'table') #df.to_excel('search_{}.xlsx'.format(search_string)) # First we will use the search function on the OpenBenchMarking.org webpage. The ```download_from_openbm``` simply gets all the test id's from the search result as you can find here: [http://openbenchmarking.org/s/RX%20470&show_more](http://openbenchmarking.org/s/RX%20470&show_more), and downloads all the XML result files, converts them into a ```pands.DataFrame```, and merges them into one big table. # In[5]: # load previously donwload data df = pd.read_csv('search_{}.csv'.format(search_string)) # optionally, in either Excel or hdf5 format this would be: #df = pd.read_hdf('search_{}.h5'.format(search_string), 'table') #df = pd.read_excel('search_{}.xlsx'.format(search_string)) df.drop(xml.user_cols, inplace=True, axis=1) df.drop_duplicates(inplace=True) # only R470 graphic cards res_find = df['Graphics'].str.lower().str.find('rx 470') # grp_lwr holds -1 for entries that do not contain the search string # we are only interested in taking the indeces of those entries that do # contain our search term, so antyhing above -1 df_find = df.loc[(res_find > -1).values] # In[6]: # now see for which tests we have sufficient data openbm.explore_dataset(df_find, 'ResultIdentifier', 'ResultDescription', 'Processor') # In[7]: test = 'pts/unigine-valley-1.1.4' test_detail = 'Resolution: 1920 x 1080 - Mode: Fullscreen' # select only a certain test sel = df_find[df_find['ResultIdentifier'] == test] # and the same version/resultion of said test sel = sel[sel['ResultDescription']==test_detail].copy() # cast Value to a float64 sel['Value'] = sel['Value'].astype(np.float64) # remove close to zero measurements #sel = sel[(sel['Display Driver']!='None') & (sel['Value']>0.5)] fig, ax = openbm.plot_barh_groups(sel, 'Graphics', 'Processor', label_xval='Value') ax.set_title(test + '\n' + test_detail) # We can also inspect the DataFrame directly to see all the available details of the corresponding tests # In[8]: sel # In[10]: test = 'pts/xonotic-1.4.0' test_detail = 'Resolution: 1920 x 1080 - Effects Quality: Ultimate' # select only a certain test sel = df_find[df_find['ResultIdentifier'] == test] # and the same version/resultion of said test sel = sel[sel['ResultDescription']==test_detail].copy() # cast Value to a float64 sel['Value'] = sel['Value'].astype(np.float64) # remove close to zero measurements #sel = sel[(sel['Display Driver']!='None') & (sel['Value']>0.5)] fig, ax = openbm.plot_barh_groups(sel, 'Graphics', 'Processor', label_xval='Value') ax.set_title(test + '\n' + test_detail) # Or show only the Processor and Graphics columns (not truncated): # In[9]: print(sel[['Processor', 'Graphics']]) # In[11]: test = 'pts/xonotic-1.4.0' test_detail = 'Resolution: 3840 x 2160 - Effects Quality: Ultimate' # select only a certain test sel = df_find[df_find['ResultIdentifier'] == test] # and the same version/resultion of said test sel = sel[sel['ResultDescription']==test_detail].copy() # cast Value to a float64 sel['Value'] = sel['Value'].astype(np.float64) # remove close to zero measurements #sel = sel[(sel['Display Driver']!='None') & (sel['Value']>0.5)] fig, ax = openbm.plot_barh_groups(sel, 'Graphics', 'Processor', label_xval='Value') ax.set_title(test + '\n' + test_detail) # # In[ ]: