1616
1717from __future__ import annotations
1818
19+ import dataclasses
1920from typing import cast
2021
2122import pandas as pd
2223import pyarrow as pa
2324
2425
26+ @dataclasses .dataclass (frozen = True )
27+ class FlattenResult :
28+ """The result of flattening a DataFrame."""
29+
30+ dataframe : pd .DataFrame
31+ """The flattened DataFrame."""
32+
33+ row_groups : dict [str , list [int ]]
34+ """
35+ A mapping from original row index to the new row indices that were created
36+ from it.
37+ """
38+
39+ cleared_on_continuation : list [str ]
40+ """A list of column names that should be cleared on continuation rows."""
41+
42+ nested_columns : set [str ]
43+ """A set of column names that were created from nested data."""
44+
45+
2546def flatten_nested_data (
2647 dataframe : pd .DataFrame ,
27- ) -> tuple [ pd . DataFrame , dict [ str , list [ int ]], list [ str ], set [ str ]] :
48+ ) -> FlattenResult :
2849 """Flatten nested STRUCT and ARRAY columns for display."""
2950 if dataframe .empty :
30- return dataframe .copy (), {}, [], set ()
51+ return FlattenResult (
52+ dataframe = dataframe .copy (),
53+ row_groups = {},
54+ cleared_on_continuation = [],
55+ nested_columns = set (),
56+ )
3157
3258 result_df = dataframe .copy ()
3359
@@ -49,19 +75,19 @@ def flatten_nested_data(
4975
5076 # Now handle ARRAY columns (including the newly created ones from ARRAY of STRUCT)
5177 if not array_columns :
52- return (
53- result_df ,
54- {},
55- clear_on_continuation_cols ,
56- nested_originated_columns ,
78+ return FlattenResult (
79+ dataframe = result_df ,
80+ row_groups = {},
81+ cleared_on_continuation = clear_on_continuation_cols ,
82+ nested_columns = nested_originated_columns ,
5783 )
5884
5985 result_df , array_row_groups = _explode_array_columns (result_df , array_columns )
60- return (
61- result_df ,
62- array_row_groups ,
63- clear_on_continuation_cols ,
64- nested_originated_columns ,
86+ return FlattenResult (
87+ dataframe = result_df ,
88+ row_groups = array_row_groups ,
89+ cleared_on_continuation = clear_on_continuation_cols ,
90+ nested_columns = nested_originated_columns ,
6591 )
6692
6793
0 commit comments