diff --git a/lisa/datautils.py b/lisa/datautils.py index 9dd6d163a867b60c131c21b7d753c0ae9e5f1c8f..1b25aaab327984272d54df73c506169ac92600ae 100644 --- a/lisa/datautils.py +++ b/lisa/datautils.py @@ -245,6 +245,10 @@ class _NoIndex: NO_INDEX = _NoIndex() +""" +Indicates there is no index to be used when passed as the index name to various +functions. +""" def _polars_index_col(df, index=None): @@ -307,7 +311,9 @@ def _df_to_pandas(df, index): if isinstance(df, pd.DataFrame): return df else: - assert isinstance(df, pl.LazyFrame) + assert isinstance(df, (pl.LazyFrame, pl.DataFrame)) + df = df.lazy() + index = _polars_index_col(df, index) schema = df.collect_schema() has_time_index = index == 'Time' and schema[index].is_temporal() @@ -387,6 +393,28 @@ def _df_to(df, fmt, index=None): raise ValueError(f'Unknown format {fmt}') +def polars_to_pandas(data, index=None): + """ + Convert the given :mod:`polars` data object to :mod:`pandas`, for cases + where an external consumer requires a pandas object. + + :param data: Polars object to convert. + :type data: polars.LazyFrame or polars.DataFrame + + :param index: Name of the column to use as index for the pandas object. If + ``None``, the first column is selected. If + :data:`~lisa.datautils.NO_INDEX`, then the pandas object will not have + any particular inde (default :class:`pandas.RangeIndex`). + :type index: str or None + + .. note:: This helper will eventually be retired when the data science + ecosystem becomes more compatible with polars, e.g. by exploiting the + dataframe protocol. + """ + assert isinstance(data, (pl.LazyFrame, pl.DataFrame)) + return _df_to(data, fmt='pandas', index=index) + + def _pandas_cleanup_df(df): assert isinstance(df, pd.DataFrame)