Skip to content

Commit 671c4b3

Browse files
bertrandhautjreback
authored andcommitted
ENH: add decimal support in to_csv (GH8448)
remove numpy directory Reformating internals.py Test for to_csv decimal separator option Joris' comments issue number as comment
1 parent bb9d35c commit 671c4b3

File tree

6 files changed

+44
-9
lines changed

6 files changed

+44
-9
lines changed

doc/source/whatsnew/v0.16.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ New features
6262
- ``StringMethods.pad()`` and ``center()`` now accept ``fillchar`` option to specify filling character (:issue:`9352`)
6363
- Added ``StringMethods.zfill()`` which behave as the same as standard ``str`` (:issue:`9387`)
6464
- Added ``days_in_month`` (compatibility alias ``daysinmonth``) property to ``Timestamp``, ``DatetimeIndex``, ``Period``, ``PeriodIndex``, and ``Series.dt`` (:issue:`9572`)
65+
- Added ``decimal`` option in ``to_csv`` to provide formatting for non-'.' decimal separators (:issue:`781`)
6566

6667
DataFrame Assign
6768
~~~~~~~~~~~~~~~~

pandas/core/format.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1173,7 +1173,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
11731173
mode='w', nanRep=None, encoding=None, quoting=None,
11741174
line_terminator='\n', chunksize=None, engine=None,
11751175
tupleize_cols=False, quotechar='"', date_format=None,
1176-
doublequote=True, escapechar=None):
1176+
doublequote=True, escapechar=None, decimal='.'):
11771177

11781178
self.engine = engine # remove for 0.13
11791179
self.obj = obj
@@ -1185,6 +1185,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
11851185
self.sep = sep
11861186
self.na_rep = na_rep
11871187
self.float_format = float_format
1188+
self.decimal = decimal
11881189

11891190
self.header = header
11901191
self.index = index
@@ -1513,6 +1514,7 @@ def _save_chunk(self, start_i, end_i):
15131514
b = self.blocks[i]
15141515
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep,
15151516
float_format=self.float_format,
1517+
decimal=self.decimal,
15161518
date_format=self.date_format)
15171519

15181520
for col_loc, col in zip(b.mgr_locs, d):

pandas/core/frame.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,7 +1112,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
11121112
mode='w', encoding=None, quoting=None,
11131113
quotechar='"', line_terminator='\n', chunksize=None,
11141114
tupleize_cols=False, date_format=None, doublequote=True,
1115-
escapechar=None, **kwds):
1115+
escapechar=None, decimal='.', **kwds):
11161116
r"""Write DataFrame to a comma-separated values (csv) file
11171117
11181118
Parameters
@@ -1164,6 +1164,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
11641164
or new (expanded format) if False)
11651165
date_format : string, default None
11661166
Format string for datetime objects
1167+
decimal: string, default '.'
1168+
Character recognized as decimal separator. E.g. use ',' for European data
11671169
"""
11681170

11691171
formatter = fmt.CSVFormatter(self, path_or_buf,
@@ -1178,7 +1180,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
11781180
tupleize_cols=tupleize_cols,
11791181
date_format=date_format,
11801182
doublequote=doublequote,
1181-
escapechar=escapechar)
1183+
escapechar=escapechar,
1184+
decimal=decimal)
11821185
formatter.save()
11831186

11841187
if path_or_buf is None:

pandas/core/internals.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1219,7 +1219,7 @@ def _try_cast(self, element):
12191219
except: # pragma: no cover
12201220
return element
12211221

1222-
def to_native_types(self, slicer=None, na_rep='', float_format=None,
1222+
def to_native_types(self, slicer=None, na_rep='', float_format=None, decimal='.',
12231223
**kwargs):
12241224
""" convert to our native types format, slicing if desired """
12251225

@@ -1229,10 +1229,22 @@ def to_native_types(self, slicer=None, na_rep='', float_format=None,
12291229
values = np.array(values, dtype=object)
12301230
mask = isnull(values)
12311231
values[mask] = na_rep
1232-
if float_format:
1232+
1233+
1234+
if float_format and decimal != '.':
1235+
formatter = lambda v : (float_format % v).replace('.',decimal,1)
1236+
elif decimal != '.':
1237+
formatter = lambda v : ('%g' % v).replace('.',decimal,1)
1238+
elif float_format:
1239+
formatter = lambda v : float_format % v
1240+
else:
1241+
formatter = None
1242+
1243+
if formatter:
12331244
imask = (~mask).ravel()
12341245
values.flat[imask] = np.array(
1235-
[float_format % val for val in values.ravel()[imask]])
1246+
[formatter(val) for val in values.ravel()[imask]])
1247+
12361248
return values.tolist()
12371249

12381250
def should_store(self, value):

pandas/core/series.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2325,7 +2325,7 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,
23252325
def to_csv(self, path, index=True, sep=",", na_rep='',
23262326
float_format=None, header=False,
23272327
index_label=None, mode='w', nanRep=None, encoding=None,
2328-
date_format=None):
2328+
date_format=None, decimal='.'):
23292329
"""
23302330
Write Series to a comma-separated values (csv) file
23312331
@@ -2353,14 +2353,16 @@ def to_csv(self, path, index=True, sep=",", na_rep='',
23532353
non-ascii, for python versions prior to 3
23542354
date_format: string, default None
23552355
Format string for datetime objects.
2356+
decimal: string, default '.'
2357+
Character recognized as decimal separator. E.g. use ',' for European data
23562358
"""
23572359
from pandas.core.frame import DataFrame
23582360
df = DataFrame(self)
23592361
# result is only a string if no path provided, otherwise None
23602362
result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
23612363
float_format=float_format, header=header,
23622364
index_label=index_label, mode=mode, nanRep=nanRep,
2363-
encoding=encoding, date_format=date_format)
2365+
encoding=encoding, date_format=date_format, decimal=decimal)
23642366
if path is None:
23652367
return result
23662368

pandas/tests/test_format.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1738,7 +1738,7 @@ def test_to_html_columns_arg(self):
17381738
self.assertNotIn('<th>B</th>', result)
17391739

17401740
def test_to_html_multiindex(self):
1741-
columns = MultiIndex.from_tuples(list(zip(np.arange(2).repeat(2),
1741+
columns = MultiIndex.from_tuples(list(zip(np.arange(2).repeat(2),
17421742
np.mod(lrange(4), 2))),
17431743
names=['CL0', 'CL1'])
17441744
df = DataFrame([list('abcd'), list('efgh')], columns=columns)
@@ -2396,6 +2396,21 @@ def test_csv_to_string(self):
23962396
expected = ',col\n0,1\n1,2\n'
23972397
self.assertEqual(df.to_csv(), expected)
23982398

2399+
def test_to_csv_decimal(self):
2400+
# GH 781
2401+
df = DataFrame({'col1' : [1], 'col2' : ['a'], 'col3' : [10.1] })
2402+
2403+
expected_default = ',col1,col2,col3\n0,1,a,10.1\n'
2404+
self.assertEqual(df.to_csv(), expected_default)
2405+
2406+
expected_european_excel = ';col1;col2;col3\n0;1;a;10,1\n'
2407+
self.assertEqual(df.to_csv(decimal=',',sep=';'), expected_european_excel)
2408+
2409+
expected_float_format_default = ',col1,col2,col3\n0,1,a,10.10\n'
2410+
self.assertEqual(df.to_csv(float_format = '%.2f'), expected_float_format_default)
2411+
2412+
expected_float_format = ';col1;col2;col3\n0;1;a;10,10\n'
2413+
self.assertEqual(df.to_csv(decimal=',',sep=';', float_format = '%.2f'), expected_float_format)
23992414

24002415
class TestSeriesFormatting(tm.TestCase):
24012416
_multiprocess_can_split_ = True

0 commit comments

Comments
 (0)