Skip to content

Commit 0db5ab2

Browse files
committed
Fixed issue GH7777, HDFStore.read_column did not preserve UTC tzinfo
1 parent 43ba74e commit 0db5ab2

File tree

3 files changed

+51
-7
lines changed

3 files changed

+51
-7
lines changed

doc/source/v0.15.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ Bug Fixes
196196
- Bug in Series 0-division with a float and integer operand dtypes (:issue:`7785`)
197197
- Bug in ``Series.astype("unicode")`` not calling ``unicode`` on the values correctly (:issue:`7758`)
198198
- Bug in ``DataFrame.as_matrix()`` with mixed ``datetime64[ns]`` and ``timedelta64[ns]`` dtypes (:issue:`7778`)
199+
- Bug in ``HDFStore.select_column()`` not preserving UTC timezone info when selecting a DatetimeIndex (:issue:`7777`)
199200

200201

201202
- Bug in pickles contains ``DateOffset`` may raise ``AttributeError`` when ``normalize`` attribute is reffered internally (:issue:`7748`)

pandas/io/pytables.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,18 @@ def _ensure_encoding(encoding):
6262
encoding = _default_encoding
6363
return encoding
6464

65+
def _set_tz(values, tz, preserve_UTC=False):
66+
""" set the timezone if values are an Index """
67+
if tz is not None and isinstance(values, Index):
68+
tz = _ensure_decoded(tz)
69+
if values.tz is None:
70+
values = values.tz_localize('UTC').tz_convert(tz)
71+
if preserve_UTC:
72+
if tslib.get_timezone(tz) == 'UTC':
73+
values = list(values)
74+
75+
return values
76+
6577

6678
Term = Expr
6779

@@ -1464,11 +1476,7 @@ def convert(self, values, nan_rep, encoding):
14641476
kwargs['freq'] = None
14651477
self.values = Index(values, **kwargs)
14661478

1467-
# set the timezone if indicated
1468-
# we stored in utc, so reverse to local timezone
1469-
if self.tz is not None:
1470-
self.values = self.values.tz_localize(
1471-
'UTC').tz_convert(_ensure_decoded(self.tz))
1479+
self.values = _set_tz(self.values, self.tz)
14721480

14731481
return self
14741482

@@ -3443,8 +3451,11 @@ def read_column(self, column, where=None, start=None, stop=None, **kwargs):
34433451
# column must be an indexable or a data column
34443452
c = getattr(self.table.cols, column)
34453453
a.set_info(self.info)
3446-
return Series(a.convert(c[start:stop], nan_rep=self.nan_rep,
3447-
encoding=self.encoding).take_data())
3454+
return Series(_set_tz(a.convert(c[start:stop],
3455+
nan_rep=self.nan_rep,
3456+
encoding=self.encoding
3457+
).take_data(),
3458+
a.tz, True))
34483459

34493460
raise KeyError("column [%s] not found in the table" % column)
34503461

pandas/io/tests/test_pytables.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4299,6 +4299,38 @@ def test_tseries_indices_frame(self):
42994299
self.assertEqual(type(result.index), type(df.index))
43004300
self.assertEqual(result.index.freq, df.index.freq)
43014301

4302+
def test_tseries_select_index_column(self):
4303+
# GH7777
4304+
# selecting a UTC datetimeindex column did
4305+
# not preserve UTC tzinfo set before storing
4306+
4307+
# check that no tz still works
4308+
rng = date_range('1/1/2000', '1/30/2000')
4309+
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
4310+
4311+
with ensure_clean_store(self.path) as store:
4312+
store.append('frame', frame)
4313+
result = store.select_column('frame', 'index')
4314+
self.assertEqual(rng.tz, DatetimeIndex(result.values).tz)
4315+
4316+
# check utc
4317+
rng = date_range('1/1/2000', '1/30/2000', tz='UTC')
4318+
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
4319+
4320+
with ensure_clean_store(self.path) as store:
4321+
store.append('frame', frame)
4322+
result = store.select_column('frame', 'index')
4323+
self.assertEqual(rng.tz, DatetimeIndex(result.values).tz)
4324+
4325+
# double check non-utc
4326+
rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern')
4327+
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
4328+
4329+
with ensure_clean_store(self.path) as store:
4330+
store.append('frame', frame)
4331+
result = store.select_column('frame', 'index')
4332+
self.assertEqual(rng.tz, DatetimeIndex(result.values).tz)
4333+
43024334
def test_unicode_index(self):
43034335

43044336
unicode_values = [u('\u03c3'), u('\u03c3\u03c3')]

0 commit comments

Comments
 (0)