diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index bb1493280dfd2..1b566af7a5437 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1367,7 +1367,26 @@ def _ensure_encoded(list lst): # common NA values # no longer excluding inf representations # '1.#INF','-1.#INF', '1.#INF000000', -_NA_VALUES = _ensure_encoded(list(icom._NA_VALUES)) +STR_NA_VALUES = { + "-1.#IND", + "1.#QNAN", + "1.#IND", + "-1.#QNAN", + "#N/A N/A", + "#N/A", + "N/A", + "n/a", + "NA", + "#NA", + "NULL", + "null", + "NaN", + "-NaN", + "nan", + "-nan", + "", +} +_NA_VALUES = _ensure_encoded(list(STR_NA_VALUES)) def _maybe_upcast(arr): diff --git a/pandas/io/common.py b/pandas/io/common.py index a01011cd7d4e4..0159716248b11 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -47,29 +47,6 @@ lzma = _import_lzma() -# common NA values -# no longer excluding inf representations -# '1.#INF','-1.#INF', '1.#INF000000', -_NA_VALUES = { - "-1.#IND", - "1.#QNAN", - "1.#IND", - "-1.#QNAN", - "#N/A N/A", - "#N/A", - "N/A", - "n/a", - "NA", - "#NA", - "NULL", - "null", - "NaN", - "-NaN", - "nan", - "-nan", - "", -} - _VALID_URLS = set(uses_relative + uses_netloc + uses_params) _VALID_URLS.discard("") diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 81d3d46f78bdb..8368142c3633a 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -6,6 +6,7 @@ from pandas._config import config +from pandas._libs.parsers import STR_NA_VALUES from pandas.errors import EmptyDataError from pandas.util._decorators import Appender @@ -14,7 +15,6 @@ from pandas.core.frame import DataFrame from pandas.io.common import ( - _NA_VALUES, _is_url, _stringify_path, _validate_header_arg, @@ -124,7 +124,7 @@ Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: '""" - + fill("', '".join(sorted(_NA_VALUES)), 70, subsequent_indent=" ") + + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") + """'. keep_default_na : bool, default True Whether or not to include the default NaN values when parsing the data. diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a887a537a2201..32d812637a067 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -17,6 +17,7 @@ import pandas._libs.lib as lib import pandas._libs.ops as libops import pandas._libs.parsers as parsers +from pandas._libs.parsers import STR_NA_VALUES from pandas._libs.tslibs import parsing from pandas.errors import ( AbstractMethodError, @@ -60,7 +61,6 @@ from pandas.core.tools import datetimes as tools from pandas.io.common import ( - _NA_VALUES, BaseIterator, UnicodeReader, UTF8Recoder, @@ -195,7 +195,7 @@ Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: '""" - + fill("', '".join(sorted(_NA_VALUES)), 70, subsequent_indent=" ") + + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") + """'. keep_default_na : bool, default True Whether or not to include the default NaN values when parsing the data. @@ -3398,7 +3398,7 @@ def _clean_na_values(na_values, keep_default_na=True): if na_values is None: if keep_default_na: - na_values = _NA_VALUES + na_values = STR_NA_VALUES else: na_values = set() na_fvalues = set() @@ -3415,7 +3415,7 @@ def _clean_na_values(na_values, keep_default_na=True): v = [v] if keep_default_na: - v = set(v) | _NA_VALUES + v = set(v) | STR_NA_VALUES na_values[k] = v na_fvalues = {k: _floatify_na_values(v) for k, v in na_values.items()} @@ -3424,7 +3424,7 @@ def _clean_na_values(na_values, keep_default_na=True): na_values = [na_values] na_values = _stringify_na_values(na_values) if keep_default_na: - na_values = na_values | _NA_VALUES + na_values = na_values | STR_NA_VALUES na_fvalues = _floatify_na_values(na_values) @@ -3575,7 +3575,7 @@ def _get_na_values(col, na_values, na_fvalues, keep_default_na): return na_values[col], na_fvalues[col] else: if keep_default_na: - return _NA_VALUES, set() + return STR_NA_VALUES, set() return set(), set() else: diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index f52c6b8858fd3..353d309a84823 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -7,11 +7,11 @@ import numpy as np import pytest +from pandas._libs.parsers import STR_NA_VALUES + from pandas import DataFrame, Index, MultiIndex import pandas.util.testing as tm -import pandas.io.common as com - def test_string_nas(all_parsers): parser = all_parsers @@ -99,7 +99,7 @@ def test_default_na_values(all_parsers): "#N/A N/A", "", } - assert _NA_VALUES == com._NA_VALUES + assert _NA_VALUES == STR_NA_VALUES parser = all_parsers nv = len(_NA_VALUES)