from functools import partial
import ipyparallel as ipp
def crash():
import ctypes
ctypes.CDLL(None).time(-1)
def notice_mpiexec_stopped(stop_info, launcher):
output = launcher.get_output()
print("mpiexec stopped: pid={pid}, exit_code={exit_code}".format(**stop_info))
# extract mpiexec output, which starts with =
# could also dump the full thing, which is a lot,
# or tail a number of lines
for line in output.splitlines(True):
if line.startswith("="):
sys.stdout.write(line)
with ipp.Cluster(engine_launcher_class="mpi", n=4) as rc:
cluster = rc.cluster
for launcher in cluster.engines.values():
launcher.on_stop(partial(notice_mpiexec_stopped, launcher=launcher))
ar = rc[-1].apply_async(crash)
try:
ar.get()
except Exception as e:
print(f"error in task (expected): {e}")
Using existing profile dir: '/Users/minrk/.ipython/profile_default' waiting False running Starting 4 engines with <class 'ipyparallel.cluster.launcher.MPIEngineSetLauncher'> waiting False running
0%| | 0/4 [00:00<?, ?engine/s]
error in task (expected): EngineError(Engine 3 died while running task 'e9d2eb18-6099acff559d987377320c1d_1') Stopping engine(s): 1630335261 in stop <ipyparallel.cluster.launcher.MPIEngineSetLauncher object at 0x10b098af0> engine set stopped 1630335261: {'exit_code': 11, 'pid': 54331, 'identifier': 'ipengine-1630335261-y7tt-1630335261-54070'} mpiexec stopped: pid=54331, exit_code=11 =================================================================================== = BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES = PID 54333 RUNNING AT touchy = EXIT CODE: 9 = CLEANING UP REMAINING PROCESSES = YOU CAN IGNORE THE BELOW CLEANUP MESSAGES =================================================================================== Stopping controller in stop <ipyparallel.cluster.launcher.LocalControllerLauncher object at 0x10b098b80> Controller stopped: {'exit_code': 0, 'pid': 54330, 'identifier': 'ipcontroller-1630335261-y7tt-54070'}