From 4046e06c3459e9c1e4d02dcfc6afa3665a5a0651 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Wed, 25 Sep 2024 15:37:49 +0100 Subject: [PATCH 1/2] run: Option to override default 1 day timeout Tuxmake spwans all of its containers with the command `sleep 1d`. This is useful as it means that if the container becomes dangling somehow, it automatically shuts down after 1 day. This timeout is sufficient for all imaginable build configs. However there are cases where we want to run the FVP for longer than this; the glibc test suite takes 3-4 days to complete. So let's add a command line option, --timeout, to allow the user to specify the number of days after which the container should auto shutdown. When not provided, we stick with tuxmake's default. Unfortunately, tuxmake has no API to set this, so we achieve it by hooking its spawn_container() method and modifying the argument. Yuk; this is fragile. Signed-off-by: Ryan Roberts --- shrinkwrap/commands/run.py | 9 ++++++++- shrinkwrap/utils/runtime.py | 24 ++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/shrinkwrap/commands/run.py b/shrinkwrap/commands/run.py index 3fa9d40..699d8de 100644 --- a/shrinkwrap/commands/run.py +++ b/shrinkwrap/commands/run.py @@ -61,6 +61,12 @@ def add_parser(parser, formatter): required=False, default=False, action='store_true', help="""If specified, logs will not be colorized.""") + cmdp.add_argument('-t', '--timeout', + required=False, default=None, type=int, + help="""Number of days after which to automatically shutdown the + container, if using a container runtime. Defaults to 1 day. + """) + return cmd_name @@ -229,7 +235,8 @@ def dispatch(args): # on the host or may execute commands in a container, depending on what # the user specified. with runtime.Runtime(name=args.runtime, image=config.get_image([resolveb], args), - ssh_agent_keys=args.ssh_agent_keys) as rt: + ssh_agent_keys=args.ssh_agent_keys, + timeout=args.timeout) as rt: for rtvar in resolver['run']['rtvars'].values(): if rtvar['type'] == 'path': rt.add_volume(rtvar['value']) diff --git a/shrinkwrap/utils/runtime.py b/shrinkwrap/utils/runtime.py index 52be694..160f87b 100644 --- a/shrinkwrap/utils/runtime.py +++ b/shrinkwrap/utils/runtime.py @@ -25,9 +25,11 @@ class Runtime: an abstracted runtime. Multiple runtimes are supported, identified by a `name`. The 'null' runtime simply executes the commands on the native host. The 'docker', 'docker-local', 'podman' and 'podman-local' runtimes - execute the commands in a container. + execute the commands in a container. `timeout`, if provided, is an + integer number of days after which the container will be shutdown if + still running. If None, the default timeout is used. """ - def __init__(self, *, name, image=None, ssh_agent_keys=None): + def __init__(self, *, name, image=None, ssh_agent_keys=None, timeout=None): self._rt = None self._mountpoints = set() @@ -36,6 +38,7 @@ class Runtime: is_mac = sys.platform.startswith('darwin') is_docker = name.startswith('docker') + is_container = is_docker or name.startswith('podman') # MacOS uses GIDs that overlap with already defined GIDs in the # container so we can't just bind the macos host UID/GID to the @@ -57,6 +60,23 @@ class Runtime: self._rt.set_user('shrinkwrap') self._rt.set_group('shrinkwrap') + # Tuxmake always starts the container with "sleep 1d" so that it + # automatically shuts down after 1 day if it ends up dangling. + # This is problematic for some long running FVP test cases, so + # we allow overriding it on the command line. We add a filter + # for spawn_container() which modifies the sleep argument which + # is the last element in the cmd list. Avoid hooking unless the + # user explicitly specified a timeout that is different from the + # expected default. + if is_container and timeout and timeout != 1: + spawn_container_orig = self._rt.spawn_container + def spawn_container_new(self, cmd): + if (cmd[-1] == '1d'): + cmd[-1] = f'{timeout}d' + return spawn_container_orig(cmd) + self._rt.spawn_container = \ + types.MethodType(spawn_container_new, self._rt) + for key in ssh_agent_keys: ssh_agent_lib.add(key) -- GitLab From 7454996c2a2652aee5bbab796a3f37a318529c61 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Wed, 25 Sep 2024 16:00:49 +0100 Subject: [PATCH 2/2] build, clean, run: Speedup container cleanup Horrible hack alert; I've observed that docker can take over 10 seconds to stop the container on at least 1 system running Ubuntu 24.04. Let's cleanup the container asynchonously in a child process. Signed-off-by: Ryan Roberts --- shrinkwrap/utils/runtime.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/shrinkwrap/utils/runtime.py b/shrinkwrap/utils/runtime.py index 160f87b..e8c5342 100644 --- a/shrinkwrap/utils/runtime.py +++ b/shrinkwrap/utils/runtime.py @@ -163,7 +163,13 @@ print(ip) assert _instance == self _instance = None if self._rt: - self._rt.cleanup() + # Horrible hack alert; I've observed that docker can + # take over 10 seconds to stop the container on at least + # 1 system running Ubuntu 24.04. Let's cleanup the + # container asynchonously in a child process. + if os.fork() == 0: + self._rt.cleanup() + exit() self._rt = None -- GitLab