Skip to content

Commit 12b6012

Browse files
committed
ENH: simple patch for read_json compression
Addresses GH15644
1 parent 8a98f5e commit 12b6012

File tree

2 files changed

+15
-5
lines changed

2 files changed

+15
-5
lines changed

doc/source/whatsnew/v0.21.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ New features
2424
<https://www.python.org/dev/peps/pep-0519/>`_ on most readers and writers (:issue:`13823`)
2525
- Added ``__fspath__`` method to :class:`~pandas.HDFStore`, :class:`~pandas.ExcelFile`,
2626
and :class:`~pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`)
27+
- The ``read_json`` method now supports a ``compression`` keyword, which allows you to read compressed json directly. The behavior of this is identical to the ``read_csv`` keyword and defaults to ``infer``. (:issue:`15644`)
2728

2829
.. _whatsnew_0210.enhancements.other:
2930

pandas/io/json/json.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pandas import compat, isnull
99
from pandas import Series, DataFrame, to_datetime, MultiIndex
1010
from pandas.io.common import (get_filepath_or_buffer, _get_handle,
11-
_stringify_path)
11+
_stringify_path, _infer_compression)
1212
from pandas.core.common import AbstractMethodError
1313
from pandas.io.formats.printing import pprint_thing
1414
from .normalize import _convert_to_line_delimits
@@ -174,7 +174,7 @@ def write(self):
174174
def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
175175
convert_axes=True, convert_dates=True, keep_default_dates=True,
176176
numpy=False, precise_float=False, date_unit=None, encoding=None,
177-
lines=False):
177+
lines=False, compression='infer'):
178178
"""
179179
Convert a JSON string to pandas object
180180
@@ -258,6 +258,13 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
258258
259259
.. versionadded:: 0.19.0
260260
261+
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
262+
For on-the-fly decompression of on-disk data. If 'infer', then use gzip,
263+
bz2, zip or xz if filepath_or_buffer is a string ending in '.gz', '.bz2',
264+
'.zip', or 'xz', respectively, and no decompression otherwise. If using
265+
'zip', the ZIP file must contain only one data file to be read in.
266+
Set to None for no decompression.
267+
261268
encoding : str, default is 'utf-8'
262269
The encoding to use to decode py3 bytes.
263270
@@ -319,9 +326,10 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
319326
"data": [{"index": "row 1", "col 1": "a", "col 2": "b"},
320327
{"index": "row 2", "col 1": "c", "col 2": "d"}]}'
321328
"""
329+
compression = _infer_compression(path_or_buf, compression)
330+
filepath_or_buffer, _, compression = get_filepath_or_buffer(
331+
path_or_buf, encoding=encoding, compression=compression)
322332

323-
filepath_or_buffer, _, _ = get_filepath_or_buffer(path_or_buf,
324-
encoding=encoding)
325333
if isinstance(filepath_or_buffer, compat.string_types):
326334
try:
327335
exists = os.path.exists(filepath_or_buffer)
@@ -333,7 +341,8 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
333341

334342
if exists:
335343
fh, handles = _get_handle(filepath_or_buffer, 'r',
336-
encoding=encoding)
344+
encoding=encoding,
345+
compression=compression)
337346
json = fh.read()
338347
fh.close()
339348
else:

0 commit comments

Comments
 (0)