From 1e5379437b02182a93f286d015a4b8f50323acaa Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 2 Apr 2024 19:36:21 +0000 Subject: [PATCH 1/3] fix: include all names in MultiIndex repr docs: include Index in table-of-contents --- bigframes/core/blocks.py | 4 +- bigframes/core/indexes/__init__.py | 2 +- bigframes/core/indexes/{index.py => base.py} | 15 +++---- docs/reference/bigframes.pandas/indexing.rst | 2 +- docs/templates/toc.yml | 2 + scripts/publish_api_coverage.py | 4 +- tests/system/small/test_index.py | 39 +++++++++++++++++++ tests/system/small/test_session.py | 2 +- .../pandas/core/indexes/base.py | 12 ++++++ 9 files changed, 67 insertions(+), 15 deletions(-) rename bigframes/core/indexes/{index.py => base.py} (98%) diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index 11899eef11..52ef77b51e 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -1314,8 +1314,8 @@ def retrieve_repr_request_results( head_block = self computed_df, query_job = head_block.to_pandas() formatted_df = computed_df.set_axis(self.column_labels, axis=1) - # we reset the axis and substitute the bf index name for the default - formatted_df.index.name = self.index.name + # we reset the axis and substitute the bf index name(s) for the default + formatted_df.index.names = self.index.names return formatted_df, count, query_job def promote_offsets(self, label: Label = None) -> typing.Tuple[Block, str]: diff --git a/bigframes/core/indexes/__init__.py b/bigframes/core/indexes/__init__.py index 6419d0985a..ae6011ffa5 100644 --- a/bigframes/core/indexes/__init__.py +++ b/bigframes/core/indexes/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from bigframes.core.indexes.index import Index +from bigframes.core.indexes.base import Index __all__ = [ "Index", diff --git a/bigframes/core/indexes/index.py b/bigframes/core/indexes/base.py similarity index 98% rename from bigframes/core/indexes/index.py rename to bigframes/core/indexes/base.py index c818b68711..daa52a02b9 100644 --- a/bigframes/core/indexes/index.py +++ b/bigframes/core/indexes/base.py @@ -88,7 +88,12 @@ def from_frame( @property def name(self) -> blocks.Label: - return self.names[0] + names = self.names + if len(names) == 1: + return self.names[0] + else: + # pandas returns None for MultiIndex.name. + return None @name.setter def name(self, value: blocks.Label): @@ -460,14 +465,6 @@ def __init__( super().__init__(series_or_dataframe._block) self._whole_frame = series_or_dataframe - @property - def name(self) -> blocks.Label: - return self.names[0] - - @name.setter - def name(self, value: blocks.Label): - self.names = [value] - @property def names(self) -> typing.Sequence[blocks.Label]: """Returns the names of the Index.""" diff --git a/docs/reference/bigframes.pandas/indexing.rst b/docs/reference/bigframes.pandas/indexing.rst index 8f7f194740..2cc1acfabf 100644 --- a/docs/reference/bigframes.pandas/indexing.rst +++ b/docs/reference/bigframes.pandas/indexing.rst @@ -3,7 +3,7 @@ Index objects ============= -.. autoclass:: bigframes.core.indexes.index.Index +.. autoclass:: bigframes.core.indexes.base.Index :members: :inherited-members: :undoc-members: diff --git a/docs/templates/toc.yml b/docs/templates/toc.yml index 1898655535..3c2c688d78 100644 --- a/docs/templates/toc.yml +++ b/docs/templates/toc.yml @@ -40,6 +40,8 @@ - name: SeriesGroupBy uid: bigframes.core.groupby.SeriesGroupBy name: Groupby + - name: Index + uid: bigframes.core.indexes.base.Index - items: - name: AtDataFrameIndexer uid: bigframes.core.indexers.AtDataFrameIndexer diff --git a/scripts/publish_api_coverage.py b/scripts/publish_api_coverage.py index 4a35ade9ef..25fbfbf988 100644 --- a/scripts/publish_api_coverage.py +++ b/scripts/publish_api_coverage.py @@ -44,6 +44,9 @@ "dataframegroupby": ( "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.core.groupby.DataFrameGroupBy#bigframes_core_groupby_DataFrameGroupBy_" ), + "index": ( + "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.core.indexes.base.Index#bigframes_core_indexes_base_Index_" + ), "series": ( "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.series.Series#bigframes_series_Series_" ), @@ -59,7 +62,6 @@ "window": ( "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.core.window.Window#bigframes_core_window_Window_" ), - # TODO: Index not documented. } diff --git a/tests/system/small/test_index.py b/tests/system/small/test_index.py index 1f39ba25fe..c419dc4907 100644 --- a/tests/system/small/test_index.py +++ b/tests/system/small/test_index.py @@ -370,3 +370,42 @@ def test_index_isin(scalars_df_index, scalars_pandas_df_index): bf_series, check_names=False, ) + + +def test_multiindex_name_is_none(session): + df = pd.DataFrame( + { + "A": [0, 0, 0, 1, 1, 1], + "B": ["x", "y", "z", "x", "y", "z"], + "C": [123, 345, 789, -123, -345, -789], + "D": ["a", "b", "c", "d", "e", "f"], + }, + ) + index = session.read_pandas(df).set_index(["A", "B"]).index + assert index.name is None + + +def test_multiindex_names_not_none(session): + df = pd.DataFrame( + { + "A": [0, 0, 0, 1, 1, 1], + "B": ["x", "y", "z", "x", "y", "z"], + "C": [123, 345, 789, -123, -345, -789], + "D": ["a", "b", "c", "d", "e", "f"], + }, + ) + index = session.read_pandas(df).set_index(["A", "B"]).index + assert tuple(index.names) == ("A", "B") + + +def test_multiindex_repr_includes_all_names(session): + df = pd.DataFrame( + { + "A": [0, 0, 0, 1, 1, 1], + "B": ["x", "y", "z", "x", "y", "z"], + "C": [123, 345, 789, -123, -345, -789], + "D": ["a", "b", "c", "d", "e", "f"], + }, + ) + index = session.read_pandas(df).set_index(["A", "B"]).index + assert "names=['A', 'B']" in repr(index) diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index d0c20f3839..28a3f03860 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -27,7 +27,7 @@ import pytest import bigframes -import bigframes.core.indexes.index +import bigframes.core.indexes.base import bigframes.dataframe import bigframes.dtypes import bigframes.ml.linear_model diff --git a/third_party/bigframes_vendored/pandas/core/indexes/base.py b/third_party/bigframes_vendored/pandas/core/indexes/base.py index 3ad8729271..7f5761e45b 100644 --- a/third_party/bigframes_vendored/pandas/core/indexes/base.py +++ b/third_party/bigframes_vendored/pandas/core/indexes/base.py @@ -8,6 +8,18 @@ class Index: """Immutable sequence used for indexing and alignment. The basic object storing axis labels for all objects. + + Args: + data (pandas.Series | pandas.Index | bigframes.series.Series | bigframes.core.indexes.base.Index): + Labels (1-dimensional). + dtype: + Data type for the output Index. If not specified, this will be + inferred from `data`. + name: + Name to be stored in the index. + session (Optional[bigframes.session.Session]): + BigQuery DataFrames session where queries are run. If not set, + a default session is used. """ @property From a8cc934e9db1d37101f51071995ce24f4f17404a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 2 Apr 2024 19:53:59 +0000 Subject: [PATCH 2/3] address type error --- bigframes/core/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index 52ef77b51e..04a98ac9a4 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -1315,7 +1315,7 @@ def retrieve_repr_request_results( computed_df, query_job = head_block.to_pandas() formatted_df = computed_df.set_axis(self.column_labels, axis=1) # we reset the axis and substitute the bf index name(s) for the default - formatted_df.index.names = self.index.names + formatted_df.index.names = self.index.names # type: ignore return formatted_df, count, query_job def promote_offsets(self, label: Label = None) -> typing.Tuple[Block, str]: From 340c903275a8938b6c3521b1016a45c46b2ada6c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 2 Apr 2024 21:57:58 +0000 Subject: [PATCH 3/3] fix doctest --- third_party/bigframes_vendored/pandas/core/frame.py | 2 +- third_party/bigframes_vendored/pandas/core/series.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 50cce1eeab..3ae5b0db2a 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -4797,7 +4797,7 @@ def index(self): MultiIndex([( 'Alice', 'Seattle'), ( 'Bob', 'New York'), ('Aritra', 'Kona')], - name='Name') + names=['Name', 'Location']) >>> df1.index.values array([('Alice', 'Seattle'), ('Bob', 'New York'), ('Aritra', 'Kona')], dtype=object) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 0aebd0660f..89b39cf8a0 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -87,7 +87,7 @@ def index(self): MultiIndex([( 'Alice', 'Seattle'), ( 'Bob', 'New York'), ('Aritra', 'Kona')], - name='Name') + names=['Name', 'Location']) >>> s1.index.values array([('Alice', 'Seattle'), ('Bob', 'New York'), ('Aritra', 'Kona')], dtype=object)