Skip to content

Commit 06aafa6

Browse files
committed
ENH: simple patch for read_json compression
Addresses GH15644
1 parent 8a98f5e commit 06aafa6

File tree

1 file changed

+15
-7
lines changed

1 file changed

+15
-7
lines changed

pandas/io/json/json.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pandas import compat, isnull
99
from pandas import Series, DataFrame, to_datetime, MultiIndex
1010
from pandas.io.common import (get_filepath_or_buffer, _get_handle,
11-
_stringify_path)
11+
_stringify_path, _infer_compression)
1212
from pandas.core.common import AbstractMethodError
1313
from pandas.io.formats.printing import pprint_thing
1414
from .normalize import _convert_to_line_delimits
@@ -171,10 +171,10 @@ def write(self):
171171
return serialized
172172

173173

174-
def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
174+
def read_json(filepath_or_buffer=None, orient=None, typ='frame', dtype=True,
175175
convert_axes=True, convert_dates=True, keep_default_dates=True,
176176
numpy=False, precise_float=False, date_unit=None, encoding=None,
177-
lines=False):
177+
lines=False, compression='infer'):
178178
"""
179179
Convert a JSON string to pandas object
180180
@@ -257,6 +257,13 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
257257
Read the file as a json object per line.
258258
259259
.. versionadded:: 0.19.0
260+
261+
compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
262+
For on-the-fly decompression of on-disk data. If 'infer', then use gzip,
263+
bz2, zip or xz if filepath_or_buffer is a string ending in '.gz', '.bz2',
264+
'.zip', or 'xz', respectively, and no decompression otherwise. If using
265+
'zip', the ZIP file must contain only one data file to be read in.
266+
Set to None for no decompression.
260267
261268
encoding : str, default is 'utf-8'
262269
The encoding to use to decode py3 bytes.
@@ -319,9 +326,10 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
319326
"data": [{"index": "row 1", "col 1": "a", "col 2": "b"},
320327
{"index": "row 2", "col 1": "c", "col 2": "d"}]}'
321328
"""
322-
323-
filepath_or_buffer, _, _ = get_filepath_or_buffer(path_or_buf,
324-
encoding=encoding)
329+
compression = _infer_compression(filepath_or_buffer, compression)
330+
filepath_or_buffer, _, compression = get_filepath_or_buffer(
331+
filepath_or_buffer, encoding=encoding, compression=compression)
332+
325333
if isinstance(filepath_or_buffer, compat.string_types):
326334
try:
327335
exists = os.path.exists(filepath_or_buffer)
@@ -333,7 +341,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
333341

334342
if exists:
335343
fh, handles = _get_handle(filepath_or_buffer, 'r',
336-
encoding=encoding)
344+
encoding=encoding, compression=compression)
337345
json = fh.read()
338346
fh.close()
339347
else:

0 commit comments

Comments
 (0)