From acffef2ee184251f25cbe05a1ecd1f8c184fb3ca Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Tue, 8 Sep 2020 13:32:44 -0400
Subject: [PATCH 1/7] change read_csv and read_table to use high precision by
 default

---
 pandas/io/parsers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index a0466c5ac6b57..90a2105eb93d5 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -589,7 +589,7 @@ def read_csv(
     delim_whitespace=False,
     low_memory=_c_parser_defaults["low_memory"],
     memory_map=False,
-    float_precision=None,
+    float_precision="high",
     storage_options: StorageOptions = None,
 ):
     # gh-23761
@@ -747,7 +747,7 @@ def read_table(
     delim_whitespace=False,
     low_memory=_c_parser_defaults["low_memory"],
     memory_map=False,
-    float_precision=None,
+    float_precision="high",
 ):
     return read_csv(**locals())
 

From 68ecda33e75768a168cf13860985f031f7933ffb Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Tue, 8 Sep 2020 15:33:19 -0400
Subject: [PATCH 2/7] Modify test, whatsnew

---
 doc/source/whatsnew/v1.2.0.rst               | 11 +++++++++++
 pandas/tests/io/parser/test_c_parser_only.py |  4 +++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 2aac2596c18cb..797341877bd54 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -96,6 +96,17 @@ For example:
    buffer = io.BytesIO()
    data.to_csv(buffer, mode="w+b", encoding="utf-8", compression="gzip")
 
+:.. _whatsnew_read_csv_table_precision_default:
+
+Change in default floating precision for ``read_csv`` and ``read_table``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For the C parsing engine, the methods :meth:`read_csv` and :meth:`read_table` previously defaulted to a parser that
+could read floating point numbers slightly incorrectly with respect to the last bit in precision.
+The option ``floating_precision="high"`` has always been available to avoid this issue.
+Beginning with this version, the default is now to use the more accurate parser by making
+``floating_precision="high"`` the default, with no impact on performance. (:issue:`17154`)
+
 .. _whatsnew_120.enhancements.other:
 
 Other enhancements
diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
index 50d5fb3e49c2a..042c2baf90450 100644
--- a/pandas/tests/io/parser/test_c_parser_only.py
+++ b/pandas/tests/io/parser/test_c_parser_only.py
@@ -160,7 +160,9 @@ def test_precise_conversion(c_parser_only):
         # 25 decimal digits of precision
         text = f"a\n{num:.25}"
 
-        normal_val = float(parser.read_csv(StringIO(text))["a"][0])
+        normal_val = float(
+            parser.read_csv(StringIO(text), float_precision=None)["a"][0]
+        )
         precise_val = float(
             parser.read_csv(StringIO(text), float_precision="high")["a"][0]
         )

From 9aa25daea343fdb13d576e5ddc3a63dbad1bab56 Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Tue, 8 Sep 2020 16:43:24 -0400
Subject: [PATCH 3/7] add legacy option for float_precision for C parser

---
 doc/source/whatsnew/v1.2.0.rst               |  4 +++-
 pandas/_libs/parsers.pyx                     |  6 +++---
 pandas/io/parsers.py                         | 10 +++++-----
 pandas/tests/io/parser/test_c_parser_only.py | 21 +++++++++++++++++---
 4 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 797341877bd54..8fecf4f864b97 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -105,7 +105,9 @@ For the C parsing engine, the methods :meth:`read_csv` and :meth:`read_table` pr
 could read floating point numbers slightly incorrectly with respect to the last bit in precision.
 The option ``floating_precision="high"`` has always been available to avoid this issue.
 Beginning with this version, the default is now to use the more accurate parser by making
-``floating_precision="high"`` the default, with no impact on performance. (:issue:`17154`)
+``floating_precision=None`` correspond to the high precision parser, and the new option
+``floating_precision="legacy"`` to use the legacy parser. The change to using the higher precision
+parser by default should have no impact on performance. (:issue:`17154`)
 
 .. _whatsnew_120.enhancements.other:
 
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 811e28b830921..efb58e33777d7 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -476,10 +476,10 @@ cdef class TextReader:
         if float_precision == "round_trip":
             # see gh-15140
             self.parser.double_converter = round_trip
-        elif float_precision == "high":
-            self.parser.double_converter = precise_xstrtod
-        else:
+        elif float_precision == "legacy":
             self.parser.double_converter = xstrtod
+        else: # float_precision == "high" or float_precision is None:
+            self.parser.double_converter = precise_xstrtod
 
         if isinstance(dtype, dict):
             dtype = {k: pandas_dtype(dtype[k])
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 90a2105eb93d5..1c177c88b0052 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -338,9 +338,9 @@
     option can improve performance because there is no longer any I/O overhead.
 float_precision : str, optional
     Specifies which converter the C engine should use for floating-point
-    values. The options are `None` for the ordinary converter,
-    `high` for the high-precision converter, and `round_trip` for the
-    round-trip converter.
+    values. The options are `None` or `high` for the ordinary converter,
+    `legacy` for the original lower precision pandas converter, and
+    `round_trip` for the round-trip converter.
 
 Returns
 -------
@@ -589,7 +589,7 @@ def read_csv(
     delim_whitespace=False,
     low_memory=_c_parser_defaults["low_memory"],
     memory_map=False,
-    float_precision="high",
+    float_precision=None,
     storage_options: StorageOptions = None,
 ):
     # gh-23761
@@ -747,7 +747,7 @@ def read_table(
     delim_whitespace=False,
     low_memory=_c_parser_defaults["low_memory"],
     memory_map=False,
-    float_precision="high",
+    float_precision=None,
 ):
     return read_csv(**locals())
 
diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
index 042c2baf90450..22d5ef22b2059 100644
--- a/pandas/tests/io/parser/test_c_parser_only.py
+++ b/pandas/tests/io/parser/test_c_parser_only.py
@@ -161,7 +161,7 @@ def test_precise_conversion(c_parser_only):
         text = f"a\n{num:.25}"
 
         normal_val = float(
-            parser.read_csv(StringIO(text), float_precision=None)["a"][0]
+            parser.read_csv(StringIO(text), float_precision="legacy")["a"][0]
         )
         precise_val = float(
             parser.read_csv(StringIO(text), float_precision="high")["a"][0]
@@ -610,7 +610,7 @@ def test_unix_style_breaks(c_parser_only):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("float_precision", [None, "high", "round_trip"])
+@pytest.mark.parametrize("float_precision", [None, "legacy", "high", "round_trip"])
 @pytest.mark.parametrize(
     "data,thousands,decimal",
     [
@@ -648,7 +648,7 @@ def test_1000_sep_with_decimal(
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("float_precision", [None, "high", "round_trip"])
+@pytest.mark.parametrize("float_precision", [None, "legacy", "high", "round_trip"])
 @pytest.mark.parametrize(
     "value,expected",
     [
@@ -704,3 +704,18 @@ def test_1000_sep_decimal_float_precision(
     )
     val = df.iloc[0, 0]
     assert val == expected
+
+
+def test_high_is_default(c_parser_only):
+    # GH 17154, 36228
+    parser = c_parser_only
+    s = "foo\n243.164\n"
+    df = parser.read_csv(StringIO(s))
+    df2 = parser.read_csv(StringIO(s), float_precision="high")
+
+    tm.assert_frame_equal(df, df2)
+
+    df3 = parser.read_csv(StringIO(s), float_precision="legacy")
+
+    assert not df.iloc[0, 0] == df3.iloc[0, 0]
+

From afaf031d7f19929302ab53d0e7b2df7bdb9f5a23 Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Tue, 8 Sep 2020 16:45:20 -0400
Subject: [PATCH 4/7] remove blank line in tst file

---
 pandas/tests/io/parser/test_c_parser_only.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
index 22d5ef22b2059..953b936426602 100644
--- a/pandas/tests/io/parser/test_c_parser_only.py
+++ b/pandas/tests/io/parser/test_c_parser_only.py
@@ -718,4 +718,3 @@ def test_high_is_default(c_parser_only):
     df3 = parser.read_csv(StringIO(s), float_precision="legacy")
 
     assert not df.iloc[0, 0] == df3.iloc[0, 0]
-

From fa97aabb0d7d756316dfea5d40835e93baf99513 Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Tue, 8 Sep 2020 17:16:44 -0400
Subject: [PATCH 5/7] two spaces before inline comment

---
 pandas/_libs/parsers.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index efb58e33777d7..eb0aa155d1c4a 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -478,7 +478,7 @@ cdef class TextReader:
             self.parser.double_converter = round_trip
         elif float_precision == "legacy":
             self.parser.double_converter = xstrtod
-        else: # float_precision == "high" or float_precision is None:
+        else:  # float_precision == "high" or float_precision is None:
             self.parser.double_converter = precise_xstrtod
 
         if isinstance(dtype, dict):

From 7f4cf45066f259b29d39267d25431f535dc9ad20 Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Fri, 11 Sep 2020 18:08:09 -0400
Subject: [PATCH 6/7] add test for invalid float_precision option

---
 pandas/_libs/parsers.pyx                     | 5 ++++-
 pandas/io/parsers.py                         | 1 +
 pandas/tests/io/parser/test_c_parser_only.py | 7 ++++++-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index eb0aa155d1c4a..b87e46f9b6648 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -478,8 +478,11 @@ cdef class TextReader:
             self.parser.double_converter = round_trip
         elif float_precision == "legacy":
             self.parser.double_converter = xstrtod
-        else:  # float_precision == "high" or float_precision is None:
+        elif float_precision == "high" or float_precision is None:
             self.parser.double_converter = precise_xstrtod
+        else:
+            raise ValueError(f'Unrecognized float_precision option: '
+                             f'{float_precision}')
 
         if isinstance(dtype, dict):
             dtype = {k: pandas_dtype(dtype[k])
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 1c177c88b0052..7f6739d6d0022 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -2299,6 +2299,7 @@ def TextParser(*args, **kwds):
         values. The options are None for the ordinary converter,
         'high' for the high-precision converter, and 'round_trip' for the
         round-trip converter.
+        .. versionchanged:: 1.1.2
     """
     kwds["engine"] = "python"
     return TextFileReader(*args, **kwds)
diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
index 953b936426602..7c58afe867440 100644
--- a/pandas/tests/io/parser/test_c_parser_only.py
+++ b/pandas/tests/io/parser/test_c_parser_only.py
@@ -706,7 +706,7 @@ def test_1000_sep_decimal_float_precision(
     assert val == expected
 
 
-def test_high_is_default(c_parser_only):
+def test_float_precision_options(c_parser_only):
     # GH 17154, 36228
     parser = c_parser_only
     s = "foo\n243.164\n"
@@ -718,3 +718,8 @@ def test_high_is_default(c_parser_only):
     df3 = parser.read_csv(StringIO(s), float_precision="legacy")
 
     assert not df.iloc[0, 0] == df3.iloc[0, 0]
+
+    msg = "Unrecognized float_precision option: junk"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(s), float_precision="junk")

From be5910dcdd330b89f8f05620d477e58cce8e92be Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Fri, 11 Sep 2020 18:09:15 -0400
Subject: [PATCH 7/7] correct versionadded for 1.2

---
 pandas/io/parsers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 7637f28e9ae49..fe94a722f2ec8 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -2299,7 +2299,7 @@ def TextParser(*args, **kwds):
         values. The options are None for the ordinary converter,
         'high' for the high-precision converter, and 'round_trip' for the
         round-trip converter.
-        .. versionchanged:: 1.1.2
+        .. versionchanged:: 1.2
     """
     kwds["engine"] = "python"
     return TextFileReader(*args, **kwds)