Skip to content

Commit dc4fbec

Browse files
Merge remote-tracking branch 'upstream/master' into GH36666_beta
2 parents 76e6602 + 75a5fa7 commit dc4fbec

File tree

118 files changed

+2504
-1835
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

118 files changed

+2504
-1835
lines changed

.pre-commit-config.yaml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,15 @@ repos:
5353
types: [rst]
5454
args: [--filename=*.rst]
5555
additional_dependencies: [flake8-rst==0.7.0, flake8==3.7.9]
56+
- id: incorrect-sphinx-directives
57+
name: Check for incorrect Sphinx directives
58+
language: pygrep
59+
entry: >-
60+
\.\. (autosummary|contents|currentmodule|deprecated
61+
|function|image|important|include|ipython|literalinclude
62+
|math|module|note|raw|seealso|toctree|versionadded
63+
|versionchanged|warning):[^:]
64+
files: \.(py|pyx|rst)$
5665
- repo: https://github.com/asottile/yesqa
5766
rev: v1.2.2
5867
hooks:
@@ -61,4 +70,6 @@ repos:
6170
rev: v3.2.0
6271
hooks:
6372
- id: end-of-file-fixer
64-
exclude: '.html$|^LICENSES/|.csv$|.txt$|.svg$|.py$'
73+
exclude: ^LICENSES/|\.(html|csv|txt|svg|py)$
74+
- id: trailing-whitespace
75+
exclude: \.(html|svg)$

asv_bench/benchmarks/io/pickle.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,11 @@ def time_read_pickle(self):
2424
def time_write_pickle(self):
2525
self.df.to_pickle(self.fname)
2626

27+
def peakmem_read_pickle(self):
28+
read_pickle(self.fname)
29+
30+
def peakmem_write_pickle(self):
31+
self.df.to_pickle(self.fname)
32+
2733

2834
from ..pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/rolling.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,21 @@ class ExpandingMethods:
7676

7777
def setup(self, constructor, dtype, method):
7878
N = 10 ** 5
79+
N_groupby = 100
7980
arr = (100 * np.random.random(N)).astype(dtype)
8081
self.expanding = getattr(pd, constructor)(arr).expanding()
82+
self.expanding_groupby = (
83+
pd.DataFrame({"A": arr[:N_groupby], "B": range(N_groupby)})
84+
.groupby("B")
85+
.expanding()
86+
)
8187

8288
def time_expanding(self, constructor, dtype, method):
8389
getattr(self.expanding, method)()
8490

91+
def time_expanding_groupby(self, constructor, dtype, method):
92+
getattr(self.expanding_groupby, method)()
93+
8594

8695
class EWMMethods:
8796

asv_bench/benchmarks/timeseries.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,14 @@
33
import dateutil
44
import numpy as np
55

6-
from pandas import DataFrame, Series, date_range, period_range, to_datetime
6+
from pandas import (
7+
DataFrame,
8+
Series,
9+
date_range,
10+
period_range,
11+
timedelta_range,
12+
to_datetime,
13+
)
714

815
from pandas.tseries.frequencies import infer_freq
916

@@ -121,12 +128,15 @@ def time_convert(self):
121128

122129
class Iteration:
123130

124-
params = [date_range, period_range]
131+
params = [date_range, period_range, timedelta_range]
125132
param_names = ["time_index"]
126133

127134
def setup(self, time_index):
128135
N = 10 ** 6
129-
self.idx = time_index(start="20140101", freq="T", periods=N)
136+
if time_index is timedelta_range:
137+
self.idx = time_index(start=0, freq="T", periods=N)
138+
else:
139+
self.idx = time_index(start="20140101", freq="T", periods=N)
130140
self.exit = 10000
131141

132142
def time_iter(self, time_index):

ci/code_checks.sh

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -180,14 +180,6 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
180180
invgrep -r -E --include '*.py' "[[:space:]] pytest.raises" pandas/tests/
181181
RET=$(($RET + $?)) ; echo $MSG "DONE"
182182

183-
MSG='Check for python2-style file encodings' ; echo $MSG
184-
invgrep -R --include="*.py" --include="*.pyx" -E "# -\*- coding: utf-8 -\*-" pandas scripts
185-
RET=$(($RET + $?)) ; echo $MSG "DONE"
186-
187-
MSG='Check for python2-style super usage' ; echo $MSG
188-
invgrep -R --include="*.py" -E "super\(\w*, (self|cls)\)" pandas
189-
RET=$(($RET + $?)) ; echo $MSG "DONE"
190-
191183
MSG='Check for use of builtin filter function' ; echo $MSG
192184
invgrep -R --include="*.py" -P '(?<!def)[\(\s]filter\(' pandas
193185
RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -206,18 +198,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
206198
invgrep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas
207199
RET=$(($RET + $?)) ; echo $MSG "DONE"
208200

209-
MSG='Check for python2 new-style classes and for empty parentheses' ; echo $MSG
210-
invgrep -R --include="*.py" --include="*.pyx" -E "class\s\S*\((object)?\):" pandas asv_bench/benchmarks scripts
211-
RET=$(($RET + $?)) ; echo $MSG "DONE"
212-
213201
MSG='Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG
214202
invgrep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/
215203
RET=$(($RET + $?)) ; echo $MSG "DONE"
216204

217-
MSG='Check for incorrect sphinx directives' ; echo $MSG
218-
invgrep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source
219-
RET=$(($RET + $?)) ; echo $MSG "DONE"
220-
221205
# Check for the following code in testing: `unittest.mock`, `mock.Mock()` or `mock.patch`
222206
MSG='Check that unittest.mock is not used (pytest builtin monkeypatch fixture should be used instead)' ; echo $MSG
223207
invgrep -r -E --include '*.py' '(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)' pandas/tests/
@@ -259,15 +243,15 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
259243
invgrep -R --include=*.{py,pyx} '\.__class__' pandas
260244
RET=$(($RET + $?)) ; echo $MSG "DONE"
261245

262-
MSG='Check for use of xrange instead of range' ; echo $MSG
263-
invgrep -R --include=*.{py,pyx} 'xrange' pandas
264-
RET=$(($RET + $?)) ; echo $MSG "DONE"
265-
266246
MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
267247
INVGREP_APPEND=" <- trailing whitespaces found"
268248
invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *
269249
RET=$(($RET + $?)) ; echo $MSG "DONE"
270250
unset INVGREP_APPEND
251+
252+
MSG='Check code for instances of os.remove' ; echo $MSG
253+
invgrep -R --include="*.py*" --exclude "common.py" --exclude "test_writers.py" --exclude "test_store.py" -E "os\.remove" pandas/tests/
254+
RET=$(($RET + $?)) ; echo $MSG "DONE"
271255
fi
272256

273257
### CODE ###

doc/source/getting_started/install.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ BeautifulSoup4 4.6.0 HTML parser for read_html (see :ref
262262
Jinja2 2.10 Conditional formatting with DataFrame.style
263263
PyQt4 Clipboard I/O
264264
PyQt5 Clipboard I/O
265-
PyTables 3.4.4 HDF5-based reading / writing
265+
PyTables 3.5.1 HDF5-based reading / writing
266266
SQLAlchemy 1.2.8 SQL support for databases other than sqlite
267267
SciPy 1.12.0 Miscellaneous statistical functions
268268
xlsxwriter 1.0.2 Excel writing
@@ -280,7 +280,6 @@ psycopg2 2.7 PostgreSQL engine for sqlalchemy
280280
pyarrow 0.15.0 Parquet, ORC, and feather reading / writing
281281
pymysql 0.7.11 MySQL engine for sqlalchemy
282282
pyreadstat SPSS files (.sav) reading
283-
pytables 3.5.1 HDF5 reading / writing
284283
pyxlsb 1.0.6 Reading for xlsb files
285284
qtpy Clipboard I/O
286285
s3fs 0.4.0 Amazon S3 access

doc/source/reference/series.rst

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,6 @@ Attributes
2222
:toctree: api/
2323

2424
Series.index
25-
26-
.. autosummary::
27-
:toctree: api/
28-
2925
Series.array
3026
Series.values
3127
Series.dtype

doc/source/reference/window.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Standard moving window functions
3232
Rolling.apply
3333
Rolling.aggregate
3434
Rolling.quantile
35+
Rolling.sem
3536
Window.mean
3637
Window.sum
3738
Window.var
@@ -61,6 +62,7 @@ Standard expanding window functions
6162
Expanding.apply
6263
Expanding.aggregate
6364
Expanding.quantile
65+
Expanding.sem
6466

6567
Exponentially-weighted moving window functions
6668
----------------------------------------------

doc/source/user_guide/computation.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ We provide a number of common statistical functions:
328328
:meth:`~Rolling.apply`, Generic apply
329329
:meth:`~Rolling.cov`, Sample covariance (binary)
330330
:meth:`~Rolling.corr`, Sample correlation (binary)
331+
:meth:`~Rolling.sem`, Standard error of mean
331332

332333
.. _computation.window_variance.caveats:
333334

@@ -938,6 +939,7 @@ Method summary
938939
:meth:`~Expanding.apply`, Generic apply
939940
:meth:`~Expanding.cov`, Sample covariance (binary)
940941
:meth:`~Expanding.corr`, Sample correlation (binary)
942+
:meth:`~Expanding.sem`, Standard error of mean
941943

942944
.. note::
943945

doc/source/user_guide/io.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
2323
text;`JSON <https://www.json.org/>`__;:ref:`read_json<io.json_reader>`;:ref:`to_json<io.json_writer>`
2424
text;`HTML <https://en.wikipedia.org/wiki/HTML>`__;:ref:`read_html<io.read_html>`;:ref:`to_html<io.html>`
2525
text; Local clipboard;:ref:`read_clipboard<io.clipboard>`;:ref:`to_clipboard<io.clipboard>`
26-
;`MS Excel <https://en.wikipedia.org/wiki/Microsoft_Excel>`__;:ref:`read_excel<io.excel_reader>`;:ref:`to_excel<io.excel_writer>`
26+
binary;`MS Excel <https://en.wikipedia.org/wiki/Microsoft_Excel>`__;:ref:`read_excel<io.excel_reader>`;:ref:`to_excel<io.excel_writer>`
2727
binary;`OpenDocument <http://www.opendocumentformat.org>`__;:ref:`read_excel<io.ods>`;
2828
binary;`HDF5 Format <https://support.hdfgroup.org/HDF5/whatishdf5.html>`__;:ref:`read_hdf<io.hdf5>`;:ref:`to_hdf<io.hdf5>`
2929
binary;`Feather Format <https://github.com/wesm/feather>`__;:ref:`read_feather<io.feather>`;:ref:`to_feather<io.feather>`

doc/source/user_guide/text.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -302,10 +302,10 @@ positional argument (a regex object) and return a string.
302302
return m.group(0)[::-1]
303303
304304
305-
pd.Series(
306-
["foo 123", "bar baz", np.nan],
307-
dtype="string"
308-
).str.replace(pat, repl, regex=True)
305+
pd.Series(["foo 123", "bar baz", np.nan], dtype="string").str.replace(
306+
pat, repl, regex=True
307+
)
308+
309309
310310
# Using regex groups
311311
pat = r"(?P<one>\w+) (?P<two>\w+) (?P<three>\w+)"

doc/source/whatsnew/v1.1.4.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ Fixed regressions
2727

2828
Bug fixes
2929
~~~~~~~~~
30-
-
30+
- Bug causing ``groupby(...).sum()`` and similar to not preserve metadata (:issue:`29442`)
31+
- Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` raising a ``ValueError`` when the target was read-only (:issue:`37174`)
3132

3233
.. ---------------------------------------------------------------------------
3334

doc/source/whatsnew/v1.2.0.rst

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,32 @@ For example:
9696
buffer = io.BytesIO()
9797
data.to_csv(buffer, mode="w+b", encoding="utf-8", compression="gzip")
9898
99+
Support for short caption and table position in ``to_latex``
100+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
101+
102+
:meth:`DataFrame.to_latex` now allows one to specify
103+
a floating table position (:issue:`35281`)
104+
and a short caption (:issue:`36267`).
105+
106+
New keyword ``position`` is implemented to set the position.
107+
108+
.. ipython:: python
109+
110+
data = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
111+
table = data.to_latex(position='ht')
112+
print(table)
113+
114+
Usage of keyword ``caption`` is extended.
115+
Besides taking a single string as an argument,
116+
one can optionally provide a tuple of ``(full_caption, short_caption)``
117+
to add a short caption macro.
118+
119+
.. ipython:: python
120+
121+
data = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
122+
table = data.to_latex(caption=('the full long caption', 'short caption'))
123+
print(table)
124+
99125
.. _whatsnew_120.read_csv_table_precision_default:
100126

101127
Change in default floating precision for ``read_csv`` and ``read_table``
@@ -191,6 +217,10 @@ Other enhancements
191217
- :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`)
192218
- Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`)
193219
- Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`)
220+
- Added :meth:`Rolling.sem()` and :meth:`Expanding.sem()` to compute the standard error of mean (:issue:`26476`).
221+
- :meth:`Rolling.var()` and :meth:`Rolling.std()` use Kahan summation and Welfords Method to avoid numerical issues (:issue:`37051`)
222+
- :meth:`DataFrame.plot` now recognizes ``xlabel`` and ``ylabel`` arguments for plots of type ``scatter`` and ``hexbin`` (:issue:`37001`)
223+
- :class:`DataFrame` now supports ``divmod`` operation (:issue:`37165`)
194224

195225
.. _whatsnew_120.api_breaking.python:
196226

@@ -314,6 +344,8 @@ Performance improvements
314344
avoiding creating these again, if created on either. This can speed up operations that depend on creating copies of existing indexes (:issue:`36840`)
315345
- Performance improvement in :meth:`RollingGroupby.count` (:issue:`35625`)
316346
- Small performance decrease to :meth:`Rolling.min` and :meth:`Rolling.max` for fixed windows (:issue:`36567`)
347+
- Reduced peak memory usage in :meth:`DataFrame.to_pickle` when using ``protocol=5`` in python 3.8+ (:issue:`34244`)
348+
- Performance improvement in :class:`ExpandingGroupby` (:issue:`37064`)
317349

318350
.. ---------------------------------------------------------------------------
319351
@@ -344,8 +376,7 @@ Datetimelike
344376
Timedelta
345377
^^^^^^^^^
346378
- Bug in :class:`TimedeltaIndex`, :class:`Series`, and :class:`DataFrame` floor-division with ``timedelta64`` dtypes and ``NaT`` in the denominator (:issue:`35529`)
347-
-
348-
-
379+
- Bug in parsing of ISO 8601 durations in :class:`Timedelta`, :meth:`pd.to_datetime` (:issue:`37159`, fixes :issue:`29773` and :issue:`36204`)
349380

350381
Timezones
351382
^^^^^^^^^
@@ -366,6 +397,8 @@ Numeric
366397
- Bug in :class:`IntegerArray` multiplication with ``timedelta`` and ``np.timedelta64`` objects (:issue:`36870`)
367398
- Bug in :meth:`DataFrame.diff` with ``datetime64`` dtypes including ``NaT`` values failing to fill ``NaT`` results correctly (:issue:`32441`)
368399
- Bug in :class:`DataFrame` arithmetic ops incorrectly accepting keyword arguments (:issue:`36843`)
400+
- Bug in :class:`IntervalArray` comparisons with :class:`Series` not returning :class:`Series` (:issue:`36908`)
401+
- Bug in :class:`DataFrame` allowing arithmetic operations with list of array-likes with undefined results. Behavior changed to raising ``ValueError`` (:issue:`36702`)
369402

370403
Conversion
371404
^^^^^^^^^^
@@ -394,6 +427,9 @@ Indexing
394427
- Bug in :meth:`Index.get_indexer` and :meth:`Index.get_indexer_non_unique` where int64 arrays are returned instead of intp. (:issue:`36359`)
395428
- Bug in :meth:`DataFrame.sort_index` where parameter ascending passed as a list on a single level index gives wrong result. (:issue:`32334`)
396429
- Bug in :meth:`DataFrame.reset_index` was incorrectly raising a ``ValueError`` for input with a :class:`MultiIndex` with missing values in a level with ``Categorical`` dtype (:issue:`24206`)
430+
- Bug in indexing with boolean masks on datetime-like values sometimes returning a view instead of a copy (:issue:`36210`)
431+
- Bug in :meth:`DataFrame.__getitem__` and :meth:`DataFrame.loc.__getitem__` with :class:`IntervalIndex` columns and a numeric indexer (:issue:`26490`)
432+
- Bug in :meth:`Series.loc.__getitem__` with a non-unique :class:`MultiIndex` and an empty-list indexer (:issue:`13691`)
397433

398434
Missing
399435
^^^^^^^
@@ -426,6 +462,8 @@ I/O
426462
- Bug in :func:`read_table` and :func:`read_csv` when ``delim_whitespace=True`` and ``sep=default`` (:issue:`36583`)
427463
- Bug in :meth:`to_json` with ``lines=True`` and ``orient='records'`` the last line of the record is not appended with 'new line character' (:issue:`36888`)
428464
- Bug in :meth:`read_parquet` with fixed offset timezones. String representation of timezones was not recognized (:issue:`35997`, :issue:`36004`)
465+
- Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`)
466+
- Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty :class:`DataFrame` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`)
429467

430468
Plotting
431469
^^^^^^^^
@@ -453,6 +491,8 @@ Groupby/resample/rolling
453491
- Bug in :meth:`DataFrameGroupBy.ffill` and :meth:`DataFrameGroupBy.bfill` where a ``NaN`` group would return filled values instead of ``NaN`` when ``dropna=True`` (:issue:`34725`)
454492
- Bug in :meth:`RollingGroupby.count` where a ``ValueError`` was raised when specifying the ``closed`` parameter (:issue:`35869`)
455493
- Bug in :meth:`DataFrame.groupby.rolling` returning wrong values with partial centered window (:issue:`36040`).
494+
- Bug in :meth:`DataFrameGroupBy.rolling` returned wrong values with timeaware window containing ``NaN``. Raises ``ValueError`` because windows are not monotonic now (:issue:`34617`)
495+
- Bug in :meth:`Rolling.__iter__` where a ``ValueError`` was not raised when ``min_periods`` was larger than ``window`` (:issue:`37156`)
456496

457497
Reshaping
458498
^^^^^^^^^

0 commit comments

Comments
 (0)