import pandas as pd
import subprocess
import sys
import datetime as dt
from io import StringIO
job_name = 'umui-rerun'
start_date = '2020-02-01'
The sacct
command will let us get info for job names:
cmd = [
'sacct', '-P', '-o', 'JobID,Account,State,AllocCPUS,CpuTime,CpuTimeRAW,Elapsed,ElapsedRAW',
'--name', job_name,
'-S', start_date,
]
child = subprocess.run(cmd, capture_output=True, encoding='utf8')
if child.returncode:
print(child.stderr, file=sys.stderr)
raise ValueError('child failed with code ' + str(child.returncode))
acct_text = child.stdout
acct = pd.read_csv(StringIO(acct_text), sep='|')
acct
JobID | Account | State | AllocCPUS | CPUTime | CPUTimeRAW | Elapsed | ElapsedRaw | |
---|---|---|---|---|---|---|---|---|
0 | 210506 | mekstrand | FAILED | 28 | 00:03:16 | 196 | 00:00:07 | 7 |
1 | 210506.batch | mekstrand | FAILED | 28 | 00:03:16 | 196 | 00:00:07 | 7 |
2 | 210507 | mekstrand | FAILED | 28 | 00:03:16 | 196 | 00:00:07 | 7 |
3 | 210507.batch | mekstrand | FAILED | 28 | 00:03:16 | 196 | 00:00:07 | 7 |
4 | 210508 | mekstrand | FAILED | 28 | 11-20:35:48 | 1024548 | 10:09:51 | 36591 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
62 | 210930 | mekstrand | FAILED | 28 | 00:07:00 | 420 | 00:00:15 | 15 |
63 | 210930.batch | mekstrand | FAILED | 28 | 00:07:00 | 420 | 00:00:15 | 15 |
64 | 210931 | mekstrand | FAILED | 28 | 00:10:44 | 644 | 00:00:23 | 23 |
65 | 210975 | mekstrand | COMPLETED | 28 | 25-06:19:28 | 2182768 | 21:39:16 | 77956 |
66 | 210975.batch | mekstrand | COMPLETED | 28 | 25-06:19:28 | 2182768 | 21:39:16 | 77956 |
67 rows × 8 columns
The jobs ending in .batch
are duplicates - remove:
jobs = acct[~acct['JobID'].str.endswith('.batch')]
jobs
JobID | Account | State | AllocCPUS | CPUTime | CPUTimeRAW | Elapsed | ElapsedRaw | |
---|---|---|---|---|---|---|---|---|
0 | 210506 | mekstrand | FAILED | 28 | 00:03:16 | 196 | 00:00:07 | 7 |
2 | 210507 | mekstrand | FAILED | 28 | 00:03:16 | 196 | 00:00:07 | 7 |
4 | 210508 | mekstrand | FAILED | 28 | 11-20:35:48 | 1024548 | 10:09:51 | 36591 |
6 | 210564 | mekstrand | FAILED | 28 | 04:03:36 | 14616 | 00:08:42 | 522 |
8 | 210594 | mekstrand | FAILED | 28 | 00:04:40 | 280 | 00:00:10 | 10 |
10 | 210595 | mekstrand | FAILED | 28 | 11-16:12:36 | 1008756 | 10:00:27 | 36027 |
12 | 210701 | mekstrand | FAILED | 28 | 00:04:40 | 280 | 00:00:10 | 10 |
14 | 210704 | mekstrand | FAILED | 28 | 00:09:48 | 588 | 00:00:21 | 21 |
16 | 210705 | mekstrand | FAILED | 28 | 00:04:12 | 252 | 00:00:09 | 9 |
18 | 210712 | mekstrand | CANCELLED by 1051 | 28 | 06:39:00 | 23940 | 00:14:15 | 855 |
20 | 210713 | mekstrand | FAILED | 28 | 00:02:20 | 140 | 00:00:05 | 5 |
22 | 210714 | mekstrand | FAILED | 28 | 00:02:20 | 140 | 00:00:05 | 5 |
24 | 210715 | mekstrand | CANCELLED by 1051 | 28 | 00:33:36 | 2016 | 00:01:12 | 72 |
26 | 210716 | mekstrand | FAILED | 28 | 00:02:48 | 168 | 00:00:06 | 6 |
28 | 210717 | mekstrand | FAILED | 28 | 1-10:20:20 | 123620 | 01:13:35 | 4415 |
30 | 210789 | mekstrand | FAILED | 28 | 00:57:24 | 3444 | 00:02:03 | 123 |
32 | 210790 | mekstrand | FAILED | 28 | 00:03:44 | 224 | 00:00:08 | 8 |
33 | 210791 | mekstrand | FAILED | 28 | 00:03:16 | 196 | 00:00:07 | 7 |
34 | 210792 | mekstrand | CANCELLED by 1051 | 28 | 00:01:52 | 112 | 00:00:04 | 4 |
35 | 210793 | mekstrand | FAILED | 28 | 11:15:44 | 40544 | 00:24:08 | 1448 |
37 | 210883 | mekstrand | FAILED | 28 | 00:22:52 | 1372 | 00:00:49 | 49 |
39 | 210884 | mekstrand | CANCELLED by 1051 | 28 | 02:26:32 | 8792 | 00:05:14 | 314 |
41 | 210886 | mekstrand | FAILED | 28 | 00:12:36 | 756 | 00:00:27 | 27 |
43 | 210888 | mekstrand | FAILED | 28 | 00:07:28 | 448 | 00:00:16 | 16 |
45 | 210891 | mekstrand | FAILED | 28 | 00:06:32 | 392 | 00:00:14 | 14 |
47 | 210892 | mekstrand | FAILED | 28 | 00:07:56 | 476 | 00:00:17 | 17 |
49 | 210893 | mekstrand | FAILED | 28 | 00:03:44 | 224 | 00:00:08 | 8 |
51 | 210894 | mekstrand | FAILED | 28 | 1-06:56:24 | 111384 | 01:06:18 | 3978 |
53 | 210923 | mekstrand | CANCELLED by 1051 | 28 | 00:00:28 | 28 | 00:00:01 | 1 |
54 | 210924 | mekstrand | COMPLETED | 28 | 00:18:40 | 1120 | 00:00:40 | 40 |
55 | 210925 | mekstrand | COMPLETED | 28 | 00:52:44 | 3164 | 00:01:53 | 113 |
56 | 210926 | mekstrand | FAILED | 28 | 00:03:44 | 224 | 00:00:08 | 8 |
58 | 210927 | mekstrand | FAILED | 28 | 00:04:12 | 252 | 00:00:09 | 9 |
60 | 210928 | mekstrand | FAILED | 28 | 3-22:59:24 | 341964 | 03:23:33 | 12213 |
62 | 210930 | mekstrand | FAILED | 28 | 00:07:00 | 420 | 00:00:15 | 15 |
64 | 210931 | mekstrand | FAILED | 28 | 00:10:44 | 644 | 00:00:23 | 23 |
65 | 210975 | mekstrand | COMPLETED | 28 | 25-06:19:28 | 2182768 | 21:39:16 | 77956 |
What is the CPU time?
cpu = dt.timedelta(seconds=int(jobs['CPUTimeRAW'].sum()))
cpu
datetime.timedelta(days=56, seconds=60284)
How many hours?
cpu.total_seconds() / 3600
1360.7455555555555
What is the wall clock time?
wall = dt.timedelta(seconds=int(jobs['ElapsedRaw'].sum()))
wall
datetime.timedelta(days=2, seconds=2153)
How many hours?
wall.total_seconds() / 3600
48.598055555555554