From e9fd189b939699ee69141e089ac20f86d39c4afb Mon Sep 17 00:00:00 2001 From: josham Date: Wed, 31 Oct 2018 16:08:36 -0400 Subject: [PATCH 1/2] BUG: fix HDFStore.append with all empty strings error (GH12242) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/io/pytables.py | 2 +- pandas/tests/io/test_pytables.py | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index de111072bef02..6d5a88d09b0e1 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1216,6 +1216,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) - Bug in :func:`to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`) +- Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and min_itemsize < 8 (:issue:`12242`) Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 56b63fddd96ad..41e14e482d061 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4637,7 +4637,7 @@ def _convert_string_array(data, encoding, errors, itemsize=None): # create the sized dtype if itemsize is None: ensured = ensure_object(data.ravel()) - itemsize = libwriters.max_len_string_array(ensured) + itemsize = max(1, libwriters.max_len_string_array(ensured)) data = np.asarray(data, dtype="S%d" % itemsize) return data diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 337eb74b3b51a..7717dc0d80358 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -1441,6 +1441,15 @@ def check_col(key, name, size): result = store.select('df') tm.assert_frame_equal(result, df) + # with all empty strings (GH 12242) + _maybe_remove(store, 'df') + df1 = DataFrame({'x': list('abcdef')}) + df2 = DataFrame({'x': ['']}) + store.append('df', df1, min_itemsize={'x': 1}) + store.append('df', df2, min_itemsize={'x': 1}) + tm.assert_frame_equal(store.select('df'), + pd.concat([df1, df2])) + with ensure_clean_store(self.path) as store: def check_col(key, name, size): From b68f1b7329ca7f3a13227b2c350fb5800402a80d Mon Sep 17 00:00:00 2001 From: josham Date: Wed, 31 Oct 2018 20:53:07 -0400 Subject: [PATCH 2/2] updates for PR comments --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/tests/io/test_pytables.py | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 6d5a88d09b0e1..4f3548193bed6 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1216,7 +1216,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) - Bug in :func:`to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`) -- Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and min_itemsize < 8 (:issue:`12242`) +- Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`) Plotting ^^^^^^^^ diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 7717dc0d80358..b6cf660cf171e 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -1441,15 +1441,6 @@ def check_col(key, name, size): result = store.select('df') tm.assert_frame_equal(result, df) - # with all empty strings (GH 12242) - _maybe_remove(store, 'df') - df1 = DataFrame({'x': list('abcdef')}) - df2 = DataFrame({'x': ['']}) - store.append('df', df1, min_itemsize={'x': 1}) - store.append('df', df2, min_itemsize={'x': 1}) - tm.assert_frame_equal(store.select('df'), - pd.concat([df1, df2])) - with ensure_clean_store(self.path) as store: def check_col(key, name, size): @@ -1491,6 +1482,16 @@ def check_col(key, name, size): pytest.raises(ValueError, store.append, 'df', df, min_itemsize={'foo': 20, 'foobar': 20}) + def test_append_with_empty_string(self): + + with ensure_clean_store(self.path) as store: + + # with all empty strings (GH 12242) + df = DataFrame({'x': ['a', 'b', 'c', 'd', 'e', 'f', '']}) + store.append('df', df[:-1], min_itemsize={'x': 1}) + store.append('df', df[-1:], min_itemsize={'x': 1}) + tm.assert_frame_equal(store.select('df'), df) + def test_to_hdf_with_min_itemsize(self): with ensure_clean_path(self.path) as path: