from subprocess import run
try:
token = run(["gh", "auth", "token"], check=True, text=True, capture_output=True).stdout.strip()
except Exception:
token = None
from github import Github as GitHub
gh = GitHub(token)
repo = gh.get_repo("conda-forge/fenics-dolfinx-feedstock")
import pandas as pd
import re
name_pat = re.compile(r"\(Build\s+\w+\s+([\w\.]+)\)?")
def extract_name(name):
m = name_pat.search(name)
if m:
return m.group(1)
else:
return name
def get_checks_df(repo, pr):
records = []
if isinstance(pr, int):
pr = repo.get_pull(pr)
for check in repo.get_commit(pr.head.sha).get_check_runs():
# exclude meta tasks, like the overall job and Check Skip
if "Build" not in check.name:
continue
name = extract_name(check.name)
records.append(
{
"name": name,
"started": check.started_at,
"completed": check.completed_at,
"duration": (check.completed_at - check.started_at),
"status": check.conclusion,
}
)
return pd.DataFrame.from_records(records).sort_values("started").reset_index(drop=True)
flat_df = get_checks_df(repo, 99)
split_df = get_checks_df(repo, 100)
before_df = get_checks_df(repo, 98)
before_df.duration.sum()
Timedelta('1 days 23:11:17')
petsc4py = gh.get_repo("conda-forge/petsc4py-feedstock")
before_df = get_checks_df(petsc4py, 111)
before_df.completed.max() - before_df.started.min()
Timedelta('0 days 00:51:03')
before_df.duration.sum()
Timedelta('1 days 12:24:58')
after_df = get_checks_df(petsc4py, 112)
# artificial: split_df had one retry, which skews things
# set start of last job to start of second to last job
last_split_df = after_df.iloc[-1]
next_to_last = after_df.iloc[-2]
offset = last_split_df.started - next_to_last.started
last_split_df.started -= offset
last_split_df.completed -= offset
after_df.iloc[-1] = last_split_df
/var/folders/qr/3vxfnp1x2t1fw55dr288mphc0000gn/T/ipykernel_76070/2980723383.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy last_split_df.started -= offset /var/folders/qr/3vxfnp1x2t1fw55dr288mphc0000gn/T/ipykernel_76070/2980723383.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy last_split_df.completed -= offset
after_df["tool"] = "rattler-build"
before_df["tool"] = "conda-build"
df = pd.concat([after_df, before_df])
df["duration"] = df["duration"].dt.total_seconds()
len(before_df)
220
len(after_df)
220
before_df.duration.sum().total_seconds() / 3600
36.416111111111114
after_df.duration.sum().total_seconds() / 3600
24.595
before_df.duration.min(), before_df.duration.median(), before_df.duration.max()
(Timedelta('0 days 00:06:13'), Timedelta('0 days 00:09:53'), Timedelta('0 days 00:16:13'))
after_df.duration.min(), after_df.duration.median(), after_df.duration.max()
(Timedelta('0 days 00:03:42'), Timedelta('0 days 00:06:58'), Timedelta('0 days 00:10:59'))
before_df.completed.max() - before_df.started.min()
Timedelta('0 days 00:51:03')
after_df.completed.max() - after_df.started.min()
Timedelta('0 days 00:36:40')
import altair as alt
# before_df["seconds"] = before_df.duration.dt.total_seconds()
alt.Chart(df).mark_bar().encode(
alt.X("duration:Q", bin=True),
alt.Y("count()"),
alt.Color("tool"),
alt.Facet("tool")
)
import altair as alt
before_df["seconds"] = before_df.duration.dt.total_seconds()
alt.Chart(before_df[["seconds"]]).mark_bar().encode(
alt.X("seconds:Q", bin=True),
y="count()",
)
minutes = before_df.duration.dt.total_seconds() / 60
minutes.plot.hist()
<Axes: ylabel='Frequency'>
split_df.name
0 osx_64_mpimpichpython3.9.____cpythonscalarreal 1 osx_64_mpiopenmpipython3.11.____cpythonscalarreal 2 osx_64_mpiopenmpipython3.10.____cpythonscalarc... 3 osx_64_mpiopenmpipython3.12.____cpythonscalarc... 4 osx_64_mpiopenmpipython3.11.____cpythonscalarc... ... 95 linux_ppc64le_mpiopenmpipython3.13.____cp313sc... 96 linux_ppc64le_mpiopenmpipython3.13.____cp313sc... 97 linux_ppc64le_mpiopenmpipython3.9.____cpythons... 98 linux_ppc64le_mpiopenmpipython3.9.____cpythons... 99 osx_arm64_mpiopenmpipython3.10.____cpythonscal... Name: name, Length: 100, dtype: object
# artificial: split_df had one retry, which skews things
# set start of last job to start of second to last job
last_split_df = split_df.iloc[-1]
next_to_last = split_df.iloc[-2]
offset = last_split_df.started - next_to_last.started
last_split_df.started -= offset
last_split_df.completed -= offset
split_df.iloc[-1] = last_split_df
flat_df.completed.max() - flat_df.started.min()
Timedelta('0 days 01:33:20')
split_df.completed.max() - split_df.started.min()
Timedelta('0 days 01:03:48')
flat_df['label'] = 'flat'
split_df['label'] = 'split'
merged = pd.concat([flat_df, split_df])
merged.groupby('label').duration.sum()
label flat 0 days 18:14:53 split 1 days 08:19:22 Name: duration, dtype: timedelta64[ns]
gb = merged.groupby('label')
gb.duration.min()
label flat 0 days 00:18:39 split 0 days 00:06:32 Name: duration, dtype: timedelta64[ns]
gb.duration.max()
label flat 0 days 01:33:13 split 0 days 00:41:30 Name: duration, dtype: timedelta64[ns]
flat_df.name.str.split("_", n=2).str[:2].str.join("_")
0 osx_64 1 osx_arm64 2 osx_64 3 osx_64 4 linux_64 5 linux_64 6 osx_arm64 7 osx_arm64 8 linux_aarch64 9 linux_aarch64 10 linux_aarch64 11 linux_64 12 linux_aarch64 13 linux_ppc64le 14 osx_64 15 linux_ppc64le 16 linux_ppc64le 17 osx_arm64 18 linux_ppc64le 19 linux_64 Name: name, dtype: object
flat_df['plat'] = flat_df.name.str.split("_", n=2).str[:2].str.join("_")
split_df['plat'] = split_df.name.str.split("_", n=2).str[:2].str.join("_")
flat_df.groupby('plat').duration.sum()
plat linux_64 0 days 03:04:59 linux_aarch64 0 days 05:05:18 linux_ppc64le 0 days 05:14:42 osx_64 0 days 03:27:21 osx_arm64 0 days 01:22:33 Name: duration, dtype: timedelta64[ns]
split_df.groupby('plat').duration.sum()
plat linux_64 0 days 04:24:43 linux_aarch64 0 days 09:33:57 linux_ppc64le 0 days 10:01:46 osx_64 0 days 05:16:08 osx_arm64 0 days 03:02:48 Name: duration, dtype: timedelta64[ns]
5.25 / 18
0.2916666666666667
10 / 32
0.3125
flat_df.duration.sum()
Timedelta('0 days 18:14:53')
len(split_df)
100
from matplotlib import pyplot as plt
split_df.started
0 2025-01-09 14:18:33+00:00 1 2025-01-09 14:18:34+00:00 2 2025-01-09 14:18:34+00:00 3 2025-01-09 14:18:34+00:00 4 2025-01-09 14:18:34+00:00 ... 95 2025-01-09 14:46:07+00:00 96 2025-01-09 14:46:09+00:00 97 2025-01-09 14:49:11+00:00 98 2025-01-09 14:54:42+00:00 99 2025-01-09 14:54:42+00:00 Name: started, Length: 100, dtype: datetime64[ns, UTC]
flat_df.started.astype(int)
0 1736441406000000000 1 1736441407000000000 2 1736441408000000000 3 1736441409000000000 4 1736441410000000000 5 1736441410000000000 6 1736441410000000000 7 1736441410000000000 8 1736441410000000000 9 1736441410000000000 10 1736441411000000000 11 1736441411000000000 12 1736441411000000000 13 1736441411000000000 14 1736441412000000000 15 1736441412000000000 16 1736441412000000000 17 1736441413000000000 18 1736441413000000000 19 1736441422000000000 Name: started, dtype: int64
(flat_df.started.astype(int).astype(float) - flat_df.started.astype(int).astype(float).min()) * 1e-9,
(0 0.0 1 1.0 2 2.0 3 3.0 4 4.0 5 4.0 6 4.0 7 4.0 8 4.0 9 4.0 10 5.0 11 5.0 12 5.0 13 5.0 14 6.0 15 6.0 16 6.0 17 7.0 18 7.0 19 16.0 Name: started, dtype: float64,)
plt.bar(
x=flat_df.name,
height=flat_df.duration.astype(int).astype(float) * 1e-9,
bottom=(flat_df.started.astype(int).astype(float) - flat_df.started.astype(int).astype(float).min()) * 1e-9,
)
plt.grid(False)
plt.tick_params(labelbottom=False)
plt.bar(
x=split_df.name,
height=split_df.duration.astype(int).astype(float) * 1e-9,
bottom=(split_df.started.astype(int).astype(float) - split_df.started.astype(int).astype(float).min()) * 1e-9,
)
plt.grid(True, axis='y')
plt.grid(False, axis='x')
plt.tick_params(labelbottom=False)
plt.bar(
x=after_df.name,
height=after_df.duration.astype(int).astype(float) * 1e-9,
bottom=(after_df.started.astype(int).astype(float) - after_df.started.astype(int).astype(float).min()) * 1e-9,
)
plt.grid(False)
plt.tick_params(labelbottom=False)
plt.bar(
x=before_df.name,
height=before_df.duration.astype(int).astype(float) * 1e-9,
bottom=(before_df.started.astype(int).astype(float) - before_df.started.astype(int).astype(float).min()) * 1e-9,
)
plt.grid(False)
plt.tick_params(labelbottom=False)