diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 7693caf3b31d2..b99f0336fa4c5 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -26,13 +26,22 @@ from pandas.io.formats.excel import ExcelFormatter from pandas.io.parsers import read_csv -_seriesd = tm.getSeriesData() -_tsd = tm.getTimeSeriesData() -_frame = DataFrame(_seriesd)[:10] -_frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A'])[:10] -_tsframe = tm.makeTimeDataFrame()[:5] -_mixed_frame = _frame.copy() -_mixed_frame['foo'] = 'bar' + +@pytest.fixture +def frame(float_frame): + return float_frame[:10] + + +@pytest.fixture +def frame2(float_frame): + float_frame = float_frame.copy() + float_frame.columns = ['D', 'C', 'B', 'A'] + return float_frame[:10] + + +@pytest.fixture +def tsframe(): + return tm.makeTimeDataFrame()[:5] @contextlib.contextmanager @@ -49,18 +58,8 @@ def ignore_xlrd_time_clock_warning(): yield -class SharedItems: - - @pytest.fixture(autouse=True) - def setup_method(self, datapath): - self.frame = _frame.copy() - self.frame2 = _frame2.copy() - self.tsframe = _tsframe.copy() - self.mixed_frame = _mixed_frame.copy() - - @td.skip_if_no('xlrd', '1.0.0') -class ReadingTestsBase(SharedItems): +class ReadingTestsBase: # This is based on ExcelWriterBase @pytest.fixture(autouse=True, params=['xlrd', None]) @@ -1055,9 +1054,9 @@ class TestXlrdReader(ReadingTestsBase): """ @td.skip_if_no("xlwt") - def test_read_xlrd_book(self, ext): + def test_read_xlrd_book(self, ext, frame): import xlrd - df = self.frame + df = frame engine = "xlrd" sheet_name = "SheetA" @@ -1075,7 +1074,7 @@ def test_read_xlrd_book(self, ext): tm.assert_frame_equal(df, result) -class _WriterBase(SharedItems): +class _WriterBase: @pytest.fixture(autouse=True) def set_engine_and_path(self, request, merge_cells, engine, ext): @@ -1150,75 +1149,79 @@ def test_excel_sheet_by_name_raise(self, *_): with pytest.raises(xlrd.XLRDError): pd.read_excel(xl, "0") - def test_excel_writer_context_manager(self, *_): + def test_excel_writer_context_manager(self, frame, frame2, *_): with ExcelWriter(self.path) as writer: - self.frame.to_excel(writer, "Data1") - self.frame2.to_excel(writer, "Data2") + frame.to_excel(writer, "Data1") + frame2.to_excel(writer, "Data2") with ExcelFile(self.path) as reader: found_df = pd.read_excel(reader, "Data1", index_col=0) found_df2 = pd.read_excel(reader, "Data2", index_col=0) - tm.assert_frame_equal(found_df, self.frame) - tm.assert_frame_equal(found_df2, self.frame2) + tm.assert_frame_equal(found_df, frame) + tm.assert_frame_equal(found_df2, frame2) - def test_roundtrip(self, merge_cells, engine, ext): - self.frame['A'][:5] = nan + def test_roundtrip(self, merge_cells, engine, ext, frame): + frame = frame.copy() + frame['A'][:5] = nan - self.frame.to_excel(self.path, 'test1') - self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) - self.frame.to_excel(self.path, 'test1', header=False) - self.frame.to_excel(self.path, 'test1', index=False) + frame.to_excel(self.path, 'test1') + frame.to_excel(self.path, 'test1', columns=['A', 'B']) + frame.to_excel(self.path, 'test1', header=False) + frame.to_excel(self.path, 'test1', index=False) # test roundtrip - self.frame.to_excel(self.path, 'test1') + frame.to_excel(self.path, 'test1') recons = pd.read_excel(self.path, 'test1', index_col=0) - tm.assert_frame_equal(self.frame, recons) + tm.assert_frame_equal(frame, recons) - self.frame.to_excel(self.path, 'test1', index=False) + frame.to_excel(self.path, 'test1', index=False) recons = pd.read_excel(self.path, 'test1', index_col=None) - recons.index = self.frame.index - tm.assert_frame_equal(self.frame, recons) + recons.index = frame.index + tm.assert_frame_equal(frame, recons) - self.frame.to_excel(self.path, 'test1', na_rep='NA') + frame.to_excel(self.path, 'test1', na_rep='NA') recons = pd.read_excel( self.path, 'test1', index_col=0, na_values=['NA']) - tm.assert_frame_equal(self.frame, recons) + tm.assert_frame_equal(frame, recons) # GH 3611 - self.frame.to_excel(self.path, 'test1', na_rep='88') + frame.to_excel(self.path, 'test1', na_rep='88') recons = pd.read_excel( self.path, 'test1', index_col=0, na_values=['88']) - tm.assert_frame_equal(self.frame, recons) + tm.assert_frame_equal(frame, recons) - self.frame.to_excel(self.path, 'test1', na_rep='88') + frame.to_excel(self.path, 'test1', na_rep='88') recons = pd.read_excel( self.path, 'test1', index_col=0, na_values=[88, 88.0]) - tm.assert_frame_equal(self.frame, recons) + tm.assert_frame_equal(frame, recons) # GH 6573 - self.frame.to_excel(self.path, 'Sheet1') + frame.to_excel(self.path, 'Sheet1') recons = pd.read_excel(self.path, index_col=0) - tm.assert_frame_equal(self.frame, recons) + tm.assert_frame_equal(frame, recons) - self.frame.to_excel(self.path, '0') + frame.to_excel(self.path, '0') recons = pd.read_excel(self.path, index_col=0) - tm.assert_frame_equal(self.frame, recons) + tm.assert_frame_equal(frame, recons) # GH 8825 Pandas Series should provide to_excel method - s = self.frame["A"] + s = frame["A"] s.to_excel(self.path) recons = pd.read_excel(self.path, index_col=0) tm.assert_frame_equal(s.to_frame(), recons) - def test_mixed(self, merge_cells, engine, ext): - self.mixed_frame.to_excel(self.path, 'test1') + def test_mixed(self, merge_cells, engine, ext, frame): + mixed_frame = frame.copy() + mixed_frame['foo'] = 'bar' + + mixed_frame.to_excel(self.path, 'test1') reader = ExcelFile(self.path) recons = pd.read_excel(reader, 'test1', index_col=0) - tm.assert_frame_equal(self.mixed_frame, recons) + tm.assert_frame_equal(mixed_frame, recons) - def test_ts_frame(self, *_): - df = tm.makeTimeDataFrame()[:5] + def test_ts_frame(self, tsframe, *_): + df = tsframe df.to_excel(self.path, "test1") reader = ExcelFile(self.path) @@ -1226,33 +1229,34 @@ def test_ts_frame(self, *_): recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(df, recons) - def test_basics_with_nan(self, merge_cells, engine, ext): - self.frame['A'][:5] = nan - self.frame.to_excel(self.path, 'test1') - self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) - self.frame.to_excel(self.path, 'test1', header=False) - self.frame.to_excel(self.path, 'test1', index=False) + def test_basics_with_nan(self, merge_cells, engine, ext, frame): + frame = frame.copy() + frame['A'][:5] = nan + frame.to_excel(self.path, 'test1') + frame.to_excel(self.path, 'test1', columns=['A', 'B']) + frame.to_excel(self.path, 'test1', header=False) + frame.to_excel(self.path, 'test1', index=False) @pytest.mark.parametrize("np_type", [ np.int8, np.int16, np.int32, np.int64]) def test_int_types(self, merge_cells, engine, ext, np_type): # Test np.int values read come back as int # (rather than float which is Excel's format). - frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), - dtype=np_type) - frame.to_excel(self.path, "test1") + df = DataFrame(np.random.randint(-10, 10, size=(10, 2)), + dtype=np_type) + df.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = pd.read_excel(reader, "test1", index_col=0) - int_frame = frame.astype(np.int64) + int_frame = df.astype(np.int64) tm.assert_frame_equal(int_frame, recons) recons2 = pd.read_excel(self.path, "test1", index_col=0) tm.assert_frame_equal(int_frame, recons2) # Test with convert_float=False comes back as float. - float_frame = frame.astype(float) + float_frame = df.astype(float) recons = pd.read_excel(self.path, "test1", convert_float=False, index_col=0) tm.assert_frame_equal(recons, float_frame, @@ -1263,120 +1267,123 @@ def test_int_types(self, merge_cells, engine, ext, np_type): np.float16, np.float32, np.float64]) def test_float_types(self, merge_cells, engine, ext, np_type): # Test np.float values read come back as float. - frame = DataFrame(np.random.random_sample(10), dtype=np_type) - frame.to_excel(self.path, "test1") + df = DataFrame(np.random.random_sample(10), dtype=np_type) + df.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type) - tm.assert_frame_equal(frame, recons, check_dtype=False) + tm.assert_frame_equal(df, recons, check_dtype=False) @pytest.mark.parametrize("np_type", [np.bool8, np.bool_]) def test_bool_types(self, merge_cells, engine, ext, np_type): # Test np.bool values read come back as float. - frame = (DataFrame([1, 0, True, False], dtype=np_type)) - frame.to_excel(self.path, "test1") + df = (DataFrame([1, 0, True, False], dtype=np_type)) + df.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type) - tm.assert_frame_equal(frame, recons) + tm.assert_frame_equal(df, recons) def test_inf_roundtrip(self, *_): - frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) - frame.to_excel(self.path, "test1") + df = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) + df.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = pd.read_excel(reader, "test1", index_col=0) - tm.assert_frame_equal(frame, recons) + tm.assert_frame_equal(df, recons) - def test_sheets(self, merge_cells, engine, ext): - self.frame['A'][:5] = nan + def test_sheets(self, merge_cells, engine, ext, frame, tsframe): + frame = frame.copy() + frame['A'][:5] = nan - self.frame.to_excel(self.path, 'test1') - self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) - self.frame.to_excel(self.path, 'test1', header=False) - self.frame.to_excel(self.path, 'test1', index=False) + frame.to_excel(self.path, 'test1') + frame.to_excel(self.path, 'test1', columns=['A', 'B']) + frame.to_excel(self.path, 'test1', header=False) + frame.to_excel(self.path, 'test1', index=False) # Test writing to separate sheets writer = ExcelWriter(self.path) - self.frame.to_excel(writer, 'test1') - self.tsframe.to_excel(writer, 'test2') + frame.to_excel(writer, 'test1') + tsframe.to_excel(writer, 'test2') writer.save() reader = ExcelFile(self.path) recons = pd.read_excel(reader, 'test1', index_col=0) - tm.assert_frame_equal(self.frame, recons) + tm.assert_frame_equal(frame, recons) recons = pd.read_excel(reader, 'test2', index_col=0) - tm.assert_frame_equal(self.tsframe, recons) + tm.assert_frame_equal(tsframe, recons) assert 2 == len(reader.sheet_names) assert 'test1' == reader.sheet_names[0] assert 'test2' == reader.sheet_names[1] - def test_colaliases(self, merge_cells, engine, ext): - self.frame['A'][:5] = nan + def test_colaliases(self, merge_cells, engine, ext, frame, frame2): + frame = frame.copy() + frame['A'][:5] = nan - self.frame.to_excel(self.path, 'test1') - self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) - self.frame.to_excel(self.path, 'test1', header=False) - self.frame.to_excel(self.path, 'test1', index=False) + frame.to_excel(self.path, 'test1') + frame.to_excel(self.path, 'test1', columns=['A', 'B']) + frame.to_excel(self.path, 'test1', header=False) + frame.to_excel(self.path, 'test1', index=False) # column aliases col_aliases = Index(['AA', 'X', 'Y', 'Z']) - self.frame2.to_excel(self.path, 'test1', header=col_aliases) + frame2.to_excel(self.path, 'test1', header=col_aliases) reader = ExcelFile(self.path) rs = pd.read_excel(reader, 'test1', index_col=0) - xp = self.frame2.copy() + xp = frame2.copy() xp.columns = col_aliases tm.assert_frame_equal(xp, rs) - def test_roundtrip_indexlabels(self, merge_cells, engine, ext): - self.frame['A'][:5] = nan + def test_roundtrip_indexlabels(self, merge_cells, engine, ext, frame): + frame = frame.copy() + frame['A'][:5] = nan - self.frame.to_excel(self.path, 'test1') - self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) - self.frame.to_excel(self.path, 'test1', header=False) - self.frame.to_excel(self.path, 'test1', index=False) + frame.to_excel(self.path, 'test1') + frame.to_excel(self.path, 'test1', columns=['A', 'B']) + frame.to_excel(self.path, 'test1', header=False) + frame.to_excel(self.path, 'test1', index=False) # test index_label - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(self.path, 'test1', - index_label=['test'], - merge_cells=merge_cells) + df = (DataFrame(np.random.randn(10, 2)) >= 0) + df.to_excel(self.path, 'test1', + index_label=['test'], + merge_cells=merge_cells) reader = ExcelFile(self.path) recons = pd.read_excel( reader, 'test1', index_col=0).astype(np.int64) - frame.index.names = ['test'] - assert frame.index.names == recons.index.names - - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(self.path, - 'test1', - index_label=['test', 'dummy', 'dummy2'], - merge_cells=merge_cells) + df.index.names = ['test'] + assert df.index.names == recons.index.names + + df = (DataFrame(np.random.randn(10, 2)) >= 0) + df.to_excel(self.path, + 'test1', + index_label=['test', 'dummy', 'dummy2'], + merge_cells=merge_cells) reader = ExcelFile(self.path) recons = pd.read_excel( reader, 'test1', index_col=0).astype(np.int64) - frame.index.names = ['test'] - assert frame.index.names == recons.index.names - - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(self.path, - 'test1', - index_label='test', - merge_cells=merge_cells) + df.index.names = ['test'] + assert df.index.names == recons.index.names + + df = (DataFrame(np.random.randn(10, 2)) >= 0) + df.to_excel(self.path, + 'test1', + index_label='test', + merge_cells=merge_cells) reader = ExcelFile(self.path) recons = pd.read_excel( reader, 'test1', index_col=0).astype(np.int64) - frame.index.names = ['test'] - tm.assert_frame_equal(frame, recons.astype(bool)) + df.index.names = ['test'] + tm.assert_frame_equal(df, recons.astype(bool)) - self.frame.to_excel(self.path, - 'test1', - columns=['A', 'B', 'C', 'D'], - index=False, merge_cells=merge_cells) + frame.to_excel(self.path, + 'test1', + columns=['A', 'B', 'C', 'D'], + index=False, merge_cells=merge_cells) # take 'A' and 'B' as indexes (same row as cols 'C', 'D') - df = self.frame.copy() + df = frame.copy() df = df.set_index(['A', 'B']) reader = ExcelFile(self.path) @@ -1395,17 +1402,17 @@ def test_excel_roundtrip_indexname(self, merge_cells, engine, ext): tm.assert_frame_equal(result, df) assert result.index.name == 'foo' - def test_excel_roundtrip_datetime(self, merge_cells, *_): + def test_excel_roundtrip_datetime(self, merge_cells, tsframe, *_): # datetime.date, not sure what to test here exactly - tsf = self.tsframe.copy() + tsf = tsframe.copy() - tsf.index = [x.date() for x in self.tsframe.index] + tsf.index = [x.date() for x in tsframe.index] tsf.to_excel(self.path, "test1", merge_cells=merge_cells) reader = ExcelFile(self.path) recons = pd.read_excel(reader, "test1", index_col=0) - tm.assert_frame_equal(self.tsframe, recons) + tm.assert_frame_equal(tsframe, recons) def test_excel_date_datetime_format(self, merge_cells, engine, ext): # see gh-4133 @@ -1450,14 +1457,14 @@ def test_to_excel_interval_no_labels(self, *_): # see gh-19242 # # Test writing Interval without labels. - frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), - dtype=np.int64) - expected = frame.copy() + df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), + dtype=np.int64) + expected = df.copy() - frame["new"] = pd.cut(frame[0], 10) + df["new"] = pd.cut(df[0], 10) expected["new"] = pd.cut(expected[0], 10).astype(str) - frame.to_excel(self.path, "test1") + df.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = pd.read_excel(reader, "test1", index_col=0) @@ -1467,15 +1474,15 @@ def test_to_excel_interval_labels(self, *_): # see gh-19242 # # Test writing Interval with labels. - frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), - dtype=np.int64) - expected = frame.copy() - intervals = pd.cut(frame[0], 10, labels=["A", "B", "C", "D", "E", - "F", "G", "H", "I", "J"]) - frame["new"] = intervals + df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), + dtype=np.int64) + expected = df.copy() + intervals = pd.cut(df[0], 10, labels=["A", "B", "C", "D", "E", + "F", "G", "H", "I", "J"]) + df["new"] = intervals expected["new"] = pd.Series(list(intervals)) - frame.to_excel(self.path, "test1") + df.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = pd.read_excel(reader, "test1", index_col=0) @@ -1485,23 +1492,23 @@ def test_to_excel_timedelta(self, *_): # see gh-19242, gh-9155 # # Test writing timedelta to xls. - frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), - columns=["A"], dtype=np.int64) - expected = frame.copy() + df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), + columns=["A"], dtype=np.int64) + expected = df.copy() - frame["new"] = frame["A"].apply(lambda x: timedelta(seconds=x)) + df["new"] = df["A"].apply(lambda x: timedelta(seconds=x)) expected["new"] = expected["A"].apply( lambda x: timedelta(seconds=x).total_seconds() / float(86400)) - frame.to_excel(self.path, "test1") + df.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons) - def test_to_excel_periodindex(self, merge_cells, engine, ext): - frame = self.tsframe - xp = frame.resample('M', kind='period').mean() + def test_to_excel_periodindex( + self, merge_cells, engine, ext, tsframe): + xp = tsframe.resample('M', kind='period').mean() xp.to_excel(self.path, 'sht1') @@ -1509,8 +1516,7 @@ def test_to_excel_periodindex(self, merge_cells, engine, ext): rs = pd.read_excel(reader, 'sht1', index_col=0) tm.assert_frame_equal(xp, rs.to_period('M')) - def test_to_excel_multiindex(self, merge_cells, engine, ext): - frame = self.frame + def test_to_excel_multiindex(self, merge_cells, engine, ext, frame): arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) @@ -1526,21 +1532,21 @@ def test_to_excel_multiindex(self, merge_cells, engine, ext): tm.assert_frame_equal(frame, df) # GH13511 - def test_to_excel_multiindex_nan_label(self, merge_cells, engine, ext): - frame = pd.DataFrame({'A': [None, 2, 3], - 'B': [10, 20, 30], - 'C': np.random.sample(3)}) - frame = frame.set_index(['A', 'B']) - - frame.to_excel(self.path, merge_cells=merge_cells) - df = pd.read_excel(self.path, index_col=[0, 1]) - tm.assert_frame_equal(frame, df) + def test_to_excel_multiindex_nan_label( + self, merge_cells, engine, ext): + df = pd.DataFrame({'A': [None, 2, 3], + 'B': [10, 20, 30], + 'C': np.random.sample(3)}) + df = df.set_index(['A', 'B']) + + df.to_excel(self.path, merge_cells=merge_cells) + df1 = pd.read_excel(self.path, index_col=[0, 1]) + tm.assert_frame_equal(df, df1) # Test for Issue 11328. If column indices are integers, make # sure they are handled correctly for either setting of # merge_cells - def test_to_excel_multiindex_cols(self, merge_cells, engine, ext): - frame = self.frame + def test_to_excel_multiindex_cols(self, merge_cells, engine, ext, frame): arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) @@ -1563,9 +1569,9 @@ def test_to_excel_multiindex_cols(self, merge_cells, engine, ext): frame.columns = [".".join(map(str, q)) for q in zip(*fm)] tm.assert_frame_equal(frame, df) - def test_to_excel_multiindex_dates(self, merge_cells, engine, ext): + def test_to_excel_multiindex_dates( + self, merge_cells, engine, ext, tsframe): # try multiindex with dates - tsframe = self.tsframe.copy() new_index = [tsframe.index, np.arange(len(tsframe.index))] tsframe.index = MultiIndex.from_arrays(new_index)