#!/usr/bin/env python # coding: utf-8 # # Job Time Statistics # In[1]: import pandas as pd import subprocess import sys import datetime as dt from io import StringIO # In[2]: job_name = 'umui-rerun' start_date = '2020-02-01' # The `sacct` command will let us get info for job names: # In[3]: cmd = [ 'sacct', '-P', '-o', 'JobID,Account,State,AllocCPUS,CpuTime,CpuTimeRAW,Elapsed,ElapsedRAW', '--name', job_name, '-S', start_date, ] # In[4]: child = subprocess.run(cmd, capture_output=True, encoding='utf8') if child.returncode: print(child.stderr, file=sys.stderr) raise ValueError('child failed with code ' + str(child.returncode)) acct_text = child.stdout acct = pd.read_csv(StringIO(acct_text), sep='|') acct # The jobs ending in `.batch` are duplicates - remove: # In[5]: jobs = acct[~acct['JobID'].str.endswith('.batch')] jobs # What is the CPU time? # In[6]: cpu = dt.timedelta(seconds=int(jobs['CPUTimeRAW'].sum())) cpu # How many hours? # In[7]: cpu.total_seconds() / 3600 # What is the wall clock time? # In[8]: wall = dt.timedelta(seconds=int(jobs['ElapsedRaw'].sum())) wall # How many hours? # In[9]: wall.total_seconds() / 3600 # In[ ]: