From 5d63966752ccd659c036aea4b7fcbfaeb92bd791 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Wed, 24 Apr 2024 16:35:35 +0100 Subject: [PATCH] lisa.trace: Fix parquet loading from cache FIX Ensure that we check the backing file in indeed parquet, rather than waiting for the first df.collect() to explode. --- lisa/trace.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lisa/trace.py b/lisa/trace.py index 90828baa4..e3b44d5f7 100644 --- a/lisa/trace.py +++ b/lisa/trace.py @@ -4242,6 +4242,10 @@ class _TraceCache(Loggable): pass raise else: + # Ensure we actually trigger a file read, in case we are trying + # to interpret as parquet something that is not parquet + df.clear().collect() + df = _LazyFrameOnDelete.attach_file_cleanup(df, [hardlink_base]) return df -- GitLab