From e99b14dc80485cabbd2353d577a79c49ff8877bd Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 13 Jun 2025 14:44:05 +0100 Subject: [PATCH] Wait for concurrent jobs on error Exiting immediately when one of the jobs fails can leave source and build directories in a bad state. For example interrupting the kernel link process leaves an empty file which causes build to fail on the next attempt. Killing git clone can leave index.lock files which causes an error on the next attempt. When a process returns an error, mark the build as failed (by setting exec_error) and clear the queue but keep running until the concurrent processes complete. Signed-off-by: Jean-Philippe Brucker --- shrinkwrap/utils/graph.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/shrinkwrap/utils/graph.py b/shrinkwrap/utils/graph.py index bf843b9..ba54dd8 100644 --- a/shrinkwrap/utils/graph.py +++ b/shrinkwrap/utils/graph.py @@ -116,6 +116,7 @@ def execute(graph, tasks, verbose=False, colorize=True): log = logger.Logger(27) ts = graphlib.TopologicalSorter(graph) lognum = {} + exec_error = None def _pump(pm): nonlocal queue @@ -165,6 +166,7 @@ def execute(graph, tasks, verbose=False, colorize=True): nonlocal queue nonlocal active nonlocal ts + nonlocal exec_error data = proc.data[0] err = proc.data[1] @@ -179,14 +181,19 @@ def execute(graph, tasks, verbose=False, colorize=True): # to do anything further. return + if retcode: - if not verbose: - print('\n== error start ' + ('=' * 65)) - print(''.join(err)) - print('== error end ' + ('=' * 67) + '\n') - raise Exception(f"Failed to execute '{frag}'") + # Fatal error. Do not execute any new fragment, only wait for those + # that are currently running. + exec_error = { + "exception": Exception(f"Failed to execute '{frag}'"), + "error": ''.join(err), + } + queue = [] + state = 'Error' + else: + state = 'Done' if frag.final else 'Waiting...' - state = 'Done' if frag.final else 'Waiting...' _update_labels(labels, mask, frag.config, @@ -195,9 +202,10 @@ def execute(graph, tasks, verbose=False, colorize=True): if frag.final: mask[frag.config][frag.component] = False - ts.done(frag) active -= 1 - queue.extend(ts.get_ready()) + if not exec_error: + ts.done(frag) + queue.extend(ts.get_ready()) _pump(pm) lc.update() @@ -222,6 +230,13 @@ def execute(graph, tasks, verbose=False, colorize=True): lc.update() pm.run() + if exec_error: + if not verbose: + print('\n== error start ' + ('=' * 65)) + print(exec_error['error']) + print('== error end ' + ('=' * 67) + '\n') + raise exec_error['exception'] + # Mark all components as done. This should be a nop since the script # should have indicated if it was the last step for a given # config/component and we would have already set it to done. But this -- GitLab