diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index da6f3f3740..bc03bd1df0 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -192,7 +192,15 @@ def __setitem__( and isinstance(key[0], bigframes.series.Series) and key[0].dtype == "boolean" ) and pd.api.types.is_scalar(value): - new_column = key[0].map({True: value, False: None}) + # For integer scalar, if set value to a new column, the dtype would be default to float. + # But if set value to an existing Int64 column, the dtype would still be integer. + # So we need to use different NaN type to match this behavior. + new_column = key[0].map( + { + True: value, + False: pd.NA if key[1] in self._dataframe.columns else None, + } + ) try: original_column = self._dataframe[key[1]] except KeyError: diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index ba205078ed..e70764fcc0 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -2918,15 +2918,23 @@ def test_loc_setitem_bool_series_scalar_new_col(scalars_dfs): ) -def test_loc_setitem_bool_series_scalar_existing_col(scalars_dfs): +@pytest.mark.parametrize( + ("col", "value"), + [ + ("string_col", "hello"), + ("int64_col", 3), + ("float64_col", 3.5), + ], +) +def test_loc_setitem_bool_series_scalar_existing_col(scalars_dfs, col, value): if pd.__version__.startswith("1."): pytest.skip("this loc overload not supported in pandas 1.x.") scalars_df, scalars_pandas_df = scalars_dfs bf_df = scalars_df.copy() pd_df = scalars_pandas_df.copy() - bf_df.loc[bf_df["int64_too"] == 1, "string_col"] = "hello" - pd_df.loc[pd_df["int64_too"] == 1, "string_col"] = "hello" + bf_df.loc[bf_df["int64_too"] == 1, col] = value + pd_df.loc[pd_df["int64_too"] == 1, col] = value pd.testing.assert_frame_equal( bf_df.to_pandas(), diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py index 61bc39bb12..baa9534a0e 100644 --- a/third_party/bigframes_vendored/pandas/core/generic.py +++ b/third_party/bigframes_vendored/pandas/core/generic.py @@ -662,9 +662,9 @@ def copy(self): >>> df.loc[df["b"] == 2, "b"] = 22 >>> df - a b - 0 1 22.0 - 1 3 4.0 + a b + 0 1 22 + 1 3 4 [2 rows x 2 columns] >>> df_copy