From a438753c077347753b0c05c929e91b58fb6b5f20 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Tue, 4 Feb 2025 18:16:44 +0000 Subject: [PATCH] lisa.datautils: Expose polars_to_pandas() FEATURE Add a helper to allow converting polars objects to pandas object so they can be consumed by third parties such as bokeh. --- lisa/datautils.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/lisa/datautils.py b/lisa/datautils.py index 9dd6d163a..1b25aaab3 100644 --- a/lisa/datautils.py +++ b/lisa/datautils.py @@ -245,6 +245,10 @@ class _NoIndex: NO_INDEX = _NoIndex() +""" +Indicates there is no index to be used when passed as the index name to various +functions. +""" def _polars_index_col(df, index=None): @@ -307,7 +311,9 @@ def _df_to_pandas(df, index): if isinstance(df, pd.DataFrame): return df else: - assert isinstance(df, pl.LazyFrame) + assert isinstance(df, (pl.LazyFrame, pl.DataFrame)) + df = df.lazy() + index = _polars_index_col(df, index) schema = df.collect_schema() has_time_index = index == 'Time' and schema[index].is_temporal() @@ -387,6 +393,28 @@ def _df_to(df, fmt, index=None): raise ValueError(f'Unknown format {fmt}') +def polars_to_pandas(data, index=None): + """ + Convert the given :mod:`polars` data object to :mod:`pandas`, for cases + where an external consumer requires a pandas object. + + :param data: Polars object to convert. + :type data: polars.LazyFrame or polars.DataFrame + + :param index: Name of the column to use as index for the pandas object. If + ``None``, the first column is selected. If + :data:`~lisa.datautils.NO_INDEX`, then the pandas object will not have + any particular inde (default :class:`pandas.RangeIndex`). + :type index: str or None + + .. note:: This helper will eventually be retired when the data science + ecosystem becomes more compatible with polars, e.g. by exploiting the + dataframe protocol. + """ + assert isinstance(data, (pl.LazyFrame, pl.DataFrame)) + return _df_to(data, fmt='pandas', index=index) + + def _pandas_cleanup_df(df): assert isinstance(df, pd.DataFrame) -- GitLab