diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 9a99dbad30708..4d4dc6f705027 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -951,6 +951,10 @@ Indexing - Bug in :class:`BusinessHour` would cause creation of :class:`DatetimeIndex` to fail when no opening hour was included in the index (:issue:`49835`) - +Copy on write +^^^^^^^^^^^^^ +- Bug in :class:`DataFrame` constructor not tracking reference if called with another :class:`DataFrame` (:issue:`50499`) + Missing ^^^^^^^ - Bug in :meth:`Index.equals` raising ``TypeError`` when :class:`Index` consists of tuples that contain ``NA`` (:issue:`48446`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4e1d5af1e8a4a..9990904ee1fb3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -205,6 +205,7 @@ to_arrays, treat_as_nested, ) +from pandas.core.internals.managers import using_copy_on_write from pandas.core.reshape.melt import melt from pandas.core.series import Series from pandas.core.shared_docs import _shared_docs @@ -637,6 +638,8 @@ def __init__( if isinstance(data, DataFrame): data = data._mgr + if not copy and using_copy_on_write(): + data = data.copy(deep=False) if isinstance(data, (BlockManager, ArrayManager)): # first check if a Manager is passed without any other arguments diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8980fe0249193..7724fb286647c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5257,7 +5257,12 @@ def _reindex_with_indexers( # If we've made a copy once, no need to make another one copy = False - if (copy or copy is None) and new_data is self._mgr: + if ( + (copy or copy is None) + and new_data is self._mgr + or not copy + and using_copy_on_write() + ): new_data = new_data.copy(deep=copy) return self._constructor(new_data).__finalize__(self) diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py new file mode 100644 index 0000000000000..bc4c9d91aee18 --- /dev/null +++ b/pandas/tests/copy_view/test_constructors.py @@ -0,0 +1,24 @@ +import numpy as np +import pytest + +from pandas import DataFrame +import pandas._testing as tm +from pandas.tests.copy_view.util import get_array + + +@pytest.mark.parametrize("columns", [None, ["a"]]) +def test_dataframe_constructor_mgr(using_copy_on_write, columns): + df = DataFrame({"a": [1, 2, 3]}) + df_orig = df.copy() + + new_df = DataFrame(df) + + assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) + new_df.iloc[0] = 100 + + if using_copy_on_write: + assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) + tm.assert_frame_equal(df, df_orig) + else: + assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) + tm.assert_frame_equal(df, new_df) diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index 88963dcc4b0f7..30b7ed963e792 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -11,6 +11,7 @@ date_range, ) import pandas._testing as tm +from pandas.core.internals.managers import using_copy_on_write class TestDataFrameAlign: @@ -45,7 +46,10 @@ def test_align_float(self, float_frame): assert af._mgr is not float_frame._mgr af, bf = float_frame.align(float_frame, copy=False) - assert af._mgr is float_frame._mgr + if using_copy_on_write(): + assert af._mgr is not float_frame._mgr + else: + assert af._mgr is float_frame._mgr # axis = 0 other = float_frame.iloc[:-5, :3] diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 0f85cb4515e13..91f9ad3244f20 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -76,7 +76,9 @@ class TestiLocBaseIndependent: ], ) @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) - def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manager): + def test_iloc_setitem_fullcol_categorical( + self, indexer, key, using_array_manager, using_copy_on_write + ): frame = DataFrame({0: range(3)}, dtype=object) cat = Categorical(["alpha", "beta", "gamma"]) @@ -90,7 +92,7 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage indexer(df)[key, 0] = cat expected = DataFrame({0: cat}).astype(object) - if not using_array_manager: + if not using_array_manager and not using_copy_on_write: assert np.shares_memory(df[0].values, orig_vals) tm.assert_frame_equal(df, expected)