From 921e55cf60d29c09748b7a93eba7f78b28f3c1c7 Mon Sep 17 00:00:00 2001
From: Douglas Raillard <douglas.raillard@arm.com>
Date: Wed, 18 Jun 2025 20:42:03 +0100
Subject: [PATCH] lisa: Fix pl.LazyFrame.rename() uses

FIX

The new column names must not already exist in the LazyFrame, otherwise
a duplicated column error will arise later on.

Fix that issue by dropping any column that we plan on creating with the
rename operation prior to the rename.
---
 lisa/analysis/latency.py | 14 ++++++++++---
 lisa/analysis/tasks.py   | 44 +++++++++++++++++++++++++++-------------
 lisa/trace.py            | 26 +++++++++++++++++-------
 3 files changed, 60 insertions(+), 24 deletions(-)

diff --git a/lisa/analysis/latency.py b/lisa/analysis/latency.py
index 69a32f800..20296cf1b 100644
--- a/lisa/analysis/latency.py
+++ b/lisa/analysis/latency.py
@@ -56,7 +56,7 @@ class LatencyAnalysis(TraceAnalysisBase):
             (pl.col('next_state') == next_state)
         )
         df = df.select(['Time', 'delta', 'cpu', 'target_cpu'])
-        df = df.rename({'delta': name})
+        df = df.drop(name, strict=False).rename({'delta': name})
         return df
 
     @TraceAnalysisBase.df_method
@@ -290,11 +290,19 @@ class LatencyAnalysis(TraceAnalysisBase):
 
         if wakeup:
             wkp_df = ana.df_latency_wakeup(task)
-            wkp_df = wkp_df.rename({'wakeup_latency': 'latency'})
+            wkp_df = (
+                wkp_df
+                .drop('latency', strict=False)
+                .rename({'wakeup_latency': 'latency'})
+            )
 
         if preempt:
             prt_df = ana.df_latency_preemption(task)
-            prt_df = prt_df.rename({'preempt_latency': 'latency'})
+            prt_df = (
+                prt_df
+                .drop('latency', strict=False)
+                .rename({'preempt_latency': 'latency'})
+            )
 
         if wakeup and preempt:
             return pl.concat([wkp_df, prt_df], how='diagonal_relaxed')
diff --git a/lisa/analysis/tasks.py b/lisa/analysis/tasks.py
index f6481763f..6d6a11936 100644
--- a/lisa/analysis/tasks.py
+++ b/lisa/analysis/tasks.py
@@ -696,26 +696,38 @@ class TasksAnalysis(TraceAnalysisBase):
         prev_sw_df = sw_df.select(["Time", "__cpu", "prev_pid", "prev_state", "prev_comm"])
         next_sw_df = sw_df.select(["Time", "__cpu", "next_pid", "next_comm"])
 
-        prev_sw_df = prev_sw_df.rename({
-            "prev_pid": "pid",
-            "prev_state": "curr_state",
-            "prev_comm": "comm",
-        })
+        prev_sw_df = (
+            prev_sw_df
+            .drop('pid', 'curr_state', 'comm', strict=False)
+            .rename({
+                "prev_pid": "pid",
+                "prev_state": "curr_state",
+                "prev_comm": "comm",
+            })
+        )
 
         next_sw_df = next_sw_df.with_columns(
             curr_state=state(TaskState.TASK_ACTIVE)
         )
-        next_sw_df = next_sw_df.rename({
-            'next_pid': 'pid',
-            'next_comm': 'comm'
-        })
+        next_sw_df = (
+            next_sw_df
+            .drop('pid', 'comm', strict=False)
+            .rename({
+                'next_pid': 'pid',
+                'next_comm': 'comm'
+            })
+        )
         all_sw_df = pl.concat([prev_sw_df, next_sw_df], how='diagonal_relaxed')
 
         if add_rename:
-            rename_df = trace.df_event('task_rename').rename({
-                'oldcomm': 'comm',
-                '__pid': 'pid',
-            })
+            rename_df = (
+                trace.df_event('task_rename')
+                .drop('pid', 'comm', strict=False)
+                .rename({
+                    'oldcomm': 'comm',
+                    '__pid': 'pid',
+                })
+            )
             rename_df = rename_df.select(['Time', 'pid', 'comm'])
             rename_df = rename_df.with_columns(
                 curr_state=state(TaskState.TASK_RENAMED),
@@ -729,7 +741,11 @@ class TasksAnalysis(TraceAnalysisBase):
 
         df = pl.concat([all_sw_df, wk_df], how='diagonal_relaxed')
         df = df.sort('Time')
-        df = df.rename({'__cpu': 'cpu'})
+        df = (
+            df
+            .drop('cpu', strict=False)
+            .rename({'__cpu': 'cpu'})
+        )
 
         # Restrict the set of data we will process to a given set of tasks
         if tasks is not None:
diff --git a/lisa/trace.py b/lisa/trace.py
index 4ce6211df..7e6cbb5a5 100644
--- a/lisa/trace.py
+++ b/lisa/trace.py
@@ -1082,11 +1082,15 @@ class TraceDumpTraceParser(TraceParserBase):
                     )
 
     def _fixup_df(self, event, df, pid_comms):
-        df = df.rename({
-            'common_ts': 'Time',
-            'common_pid': '__pid',
-            'common_cpu': '__cpu',
-        })
+        df = (
+            df
+            .drop('Time', '__pid', '__cpu', strict=False)
+            .rename({
+                'common_ts': 'Time',
+                'common_pid': '__pid',
+                'common_cpu': '__cpu',
+            })
+        )
         df = df.with_columns([
             pl.col('Time').cast(pl.Duration("ns")),
             pl.col('__pid').replace_strict(pid_comms, default=None).alias('__comm')
@@ -1998,7 +2002,11 @@ class TxtTraceParserBase(TraceParserBase):
             pl.col(name).cast(dtype)
             for name, dtype in infer_schema(df).items()
         )
-        df = df.rename({'__timestamp': 'Time'})
+        df = (
+            df
+            .drop('Time', strict=False)
+            .rename({'__timestamp': 'Time'})
+        )
 
         schema = df.collect_schema()
         if event == 'sched_switch':
@@ -2032,7 +2040,11 @@ class TxtTraceParserBase(TraceParserBase):
             # In-kernel name is "cpumask", "cpus" is just an artifact of the pretty
             # printing format string of ftrace, that happens to be used by a
             # specific parser.
-            df = df.rename({'cpus': 'cpumask'})
+            df = (
+                df
+                .drop('cpumask', strict=False)
+                .rename({'cpus': 'cpumask'})
+            )
 
             if event == 'thermal_power_cpu_get_power':
                 if isinstance(schema['load'], (pl.String, pl.Binary, pl.Categorical)):
-- 
GitLab