Skip to content

Commit 6f79400

Browse files
authored
Merge branch 'master' into enh-column-wise-fillna
2 parents 6060212 + a43de44 commit 6f79400

File tree

211 files changed

+3791
-9549
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

211 files changed

+3791
-9549
lines changed

.devcontainer.json

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// For format details, see https://aka.ms/vscode-remote/devcontainer.json or the definition README at
2+
// https://github.com/microsoft/vscode-dev-containers/tree/master/containers/python-3-miniconda
3+
{
4+
"name": "pandas",
5+
"context": ".",
6+
"dockerFile": "Dockerfile",
7+
8+
// Use 'settings' to set *default* container specific settings.json values on container create.
9+
// You can edit these settings after create using File > Preferences > Settings > Remote.
10+
"settings": {
11+
"terminal.integrated.shell.linux": "/bin/bash",
12+
"python.condaPath": "/opt/conda/bin/conda",
13+
"python.pythonPath": "/opt/conda/bin/python",
14+
"python.formatting.provider": "black",
15+
"python.linting.enabled": true,
16+
"python.linting.flake8Enabled": true,
17+
"python.linting.pylintEnabled": false,
18+
"python.linting.mypyEnabled": true,
19+
"python.testing.pytestEnabled": true,
20+
"python.testing.cwd": "pandas/tests"
21+
},
22+
23+
// Add the IDs of extensions you want installed when the container is created in the array below.
24+
"extensions": [
25+
"ms-python.python",
26+
"ms-vscode.cpptools"
27+
]
28+
}

.travis.yml

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ python: 3.7
77
# travis cache --delete inside the project directory from the travis command line client
88
# The cache directories will be deleted if anything in ci/ changes in a commit
99
cache:
10-
ccache: true
11-
directories:
12-
- $HOME/.cache # cython cache
13-
- $HOME/.ccache # compiler cache
10+
ccache: true
11+
directories:
12+
- $HOME/.cache # cython cache
13+
- $HOME/.ccache # compiler cache
1414

1515
env:
1616
global:
@@ -20,13 +20,13 @@ env:
2020
- secure: "EkWLZhbrp/mXJOx38CHjs7BnjXafsqHtwxPQrqWy457VDFWhIY1DMnIR/lOWG+a20Qv52sCsFtiZEmMfUjf0pLGXOqurdxbYBGJ7/ikFLk9yV2rDwiArUlVM9bWFnFxHvdz9zewBH55WurrY4ShZWyV+x2dWjjceWG5VpWeI6sA="
2121

2222
git:
23-
# for cloning
24-
depth: false
23+
# for cloning
24+
depth: false
2525

2626
matrix:
27-
fast_finish: true
27+
fast_finish: true
2828

29-
include:
29+
include:
3030
- env:
3131
- JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network and not clipboard)"
3232

@@ -40,6 +40,9 @@ matrix:
4040
- postgresql
4141

4242
- env:
43+
# Enabling Deprecations when running tests
44+
# PANDAS_TESTING_MODE="deprecate" causes DeprecationWarning messages to be displayed in the logs
45+
# See pandas/_testing.py for more details.
4346
- JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36-cov.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true SQL="1"
4447
services:
4548
- mysql
@@ -70,7 +73,6 @@ before_install:
7073
# This overrides travis and tells it to look nowhere.
7174
- export BOTO_CONFIG=/dev/null
7275

73-
7476
install:
7577
- echo "install start"
7678
- ci/prep_cython_cache.sh
@@ -87,5 +89,5 @@ script:
8789
after_script:
8890
- echo "after_script start"
8991
- source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
90-
- ci/print_skipped.py
92+
- ci/print_skipped.py
9193
- echo "after_script done"

Dockerfile

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
FROM continuumio/miniconda3
2+
3+
# if you forked pandas, you can pass in your own GitHub username to use your fork
4+
# i.e. gh_username=myname
5+
ARG gh_username=pandas-dev
6+
ARG pandas_home="/home/pandas"
7+
8+
# Avoid warnings by switching to noninteractive
9+
ENV DEBIAN_FRONTEND=noninteractive
10+
11+
# Configure apt and install packages
12+
RUN apt-get update \
13+
&& apt-get -y install --no-install-recommends apt-utils dialog 2>&1 \
14+
#
15+
# Verify git, process tools, lsb-release (common in install instructions for CLIs) installed
16+
&& apt-get -y install git iproute2 procps iproute2 lsb-release \
17+
#
18+
# Install C compilers (gcc not enough, so just went with build-essential which admittedly might be overkill),
19+
# needed to build pandas C extensions
20+
&& apt-get -y install build-essential \
21+
#
22+
# cleanup
23+
&& apt-get autoremove -y \
24+
&& apt-get clean -y \
25+
&& rm -rf /var/lib/apt/lists/*
26+
27+
# Switch back to dialog for any ad-hoc use of apt-get
28+
ENV DEBIAN_FRONTEND=dialog
29+
30+
# Clone pandas repo
31+
RUN mkdir "$pandas_home" \
32+
&& git clone "https://github.com/$gh_username/pandas.git" "$pandas_home" \
33+
&& cd "$pandas_home" \
34+
&& git remote add upstream "https://github.com/pandas-dev/pandas.git" \
35+
&& git pull upstream master
36+
37+
# Because it is surprisingly difficult to activate a conda environment inside a DockerFile
38+
# (from personal experience and per https://github.com/ContinuumIO/docker-images/issues/89),
39+
# we just update the base/root one from the 'environment.yml' file instead of creating a new one.
40+
#
41+
# Set up environment
42+
RUN conda env update -n base -f "$pandas_home/environment.yml"
43+
44+
# Build C extensions and pandas
45+
RUN cd "$pandas_home" \
46+
&& python setup.py build_ext --inplace -j 4 \
47+
&& python -m pip install -e .

LICENSE

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
BSD 3-Clause License
22

3-
Copyright (c) 2008-2012, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
3+
Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
44
All rights reserved.
55

6+
Copyright (c) 2011-2020, Open source contributors.
7+
68
Redistribution and use in source and binary forms, with or without
79
modification, are permitted provided that the following conditions are met:
810

asv_bench/asv.conf.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
"matplotlib": [],
4444
"sqlalchemy": [],
4545
"scipy": [],
46+
"numba": [],
4647
"numexpr": [],
4748
"pytables": [null, ""], // platform dependent, see excludes below
4849
"tables": [null, ""],

asv_bench/benchmarks/reshape.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@ def time_pivot_table_categorical_observed(self):
161161
observed=True,
162162
)
163163

164+
def time_pivot_table_margins_only_column(self):
165+
self.df.pivot_table(columns=["key2", "key3"], margins=True)
166+
164167

165168
class Crosstab:
166169
def setup(self):

asv_bench/benchmarks/rolling.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,27 @@ def time_rolling(self, constructor, window, dtype, function, raw):
4444
self.roll.apply(function, raw=raw)
4545

4646

47+
class Engine:
48+
params = (
49+
["DataFrame", "Series"],
50+
["int", "float"],
51+
[np.sum, lambda x: np.sum(x) + 5],
52+
["cython", "numba"],
53+
)
54+
param_names = ["constructor", "dtype", "function", "engine"]
55+
56+
def setup(self, constructor, dtype, function, engine):
57+
N = 10 ** 3
58+
arr = (100 * np.random.random(N)).astype(dtype)
59+
self.data = getattr(pd, constructor)(arr)
60+
61+
def time_rolling_apply(self, constructor, dtype, function, engine):
62+
self.data.rolling(10).apply(function, raw=True, engine=engine)
63+
64+
def time_expanding_apply(self, constructor, dtype, function, engine):
65+
self.data.expanding().apply(function, raw=True, engine=engine)
66+
67+
4768
class ExpandingMethods:
4869

4970
params = (

ci/code_checks.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -314,8 +314,8 @@ fi
314314
### DOCSTRINGS ###
315315
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
316316

317-
MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA01, SA02, SA03, SA05)' ; echo $MSG
318-
$BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA01,SA02,SA03,SA05
317+
MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA02, SA03, SA05)' ; echo $MSG
318+
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA02,SA03,SA05
319319
RET=$(($RET + $?)) ; echo $MSG "DONE"
320320

321321
fi

ci/deps/azure-37-locale.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,6 @@ dependencies:
3434
- xlsxwriter
3535
- xlwt
3636
- pyarrow>=0.15
37+
- pip
38+
- pip:
39+
- pyxlsb

ci/deps/azure-macos-36.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,4 @@ dependencies:
3333
- pip
3434
- pip:
3535
- pyreadstat
36+
- pyxlsb

ci/deps/azure-windows-37.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,6 @@ dependencies:
3535
- xlsxwriter
3636
- xlwt
3737
- pyreadstat
38+
- pip
39+
- pip:
40+
- pyxlsb

ci/deps/travis-36-cov.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,4 @@ dependencies:
5151
- coverage
5252
- pandas-datareader
5353
- python-dateutil
54+
- pyxlsb

ci/print_skipped.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python
1+
#!/usr/bin/env python3
22
import os
33
import xml.etree.ElementTree as et
44

doc/make.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python
1+
#!/usr/bin/env python3
22
"""
33
Python script for building documentation.
44

doc/source/conf.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
# All configuration values have a default; values that are commented out
1111
# serve to show the default.
1212

13+
from datetime import datetime
1314
import importlib
1415
import inspect
1516
import logging
@@ -137,7 +138,7 @@
137138

138139
# General information about the project.
139140
project = "pandas"
140-
copyright = "2008-2020, the pandas development team"
141+
copyright = f"2008-{datetime.now().year}, the pandas development team"
141142

142143
# The version info for the project you're documenting, acts as replacement for
143144
# |version| and |release|, also used in various other places throughout the

doc/source/development/contributing.rst

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,17 @@ requires a C compiler and Python environment. If you're making documentation
146146
changes, you can skip to :ref:`contributing.documentation` but you won't be able
147147
to build the documentation locally before pushing your changes.
148148

149+
Using a Docker Container
150+
~~~~~~~~~~~~~~~~~~~~~~~~
151+
152+
Instead of manually setting up a development environment, you can use Docker to
153+
automatically create the environment with just several commands. Pandas provides a `DockerFile`
154+
in the root directory to build a Docker image with a full pandas development environment.
155+
156+
Even easier, you can use the DockerFile to launch a remote session with Visual Studio Code,
157+
a popular free IDE, using the `.devcontainer.json` file.
158+
See https://code.visualstudio.com/docs/remote/containers for details.
159+
149160
.. _contributing.dev_c:
150161

151162
Installing a C compiler
@@ -1525,3 +1536,19 @@ The branch will still exist on GitHub, so to delete it there do::
15251536
git push origin --delete shiny-new-feature
15261537

15271538
.. _Gitter: https://gitter.im/pydata/pandas
1539+
1540+
1541+
Tips for a successful Pull Request
1542+
==================================
1543+
1544+
If you have made it to the `Review your code`_ phase, one of the core contributors may
1545+
take a look. Please note however that a handful of people are responsible for reviewing
1546+
all of the contributions, which can often lead to bottlenecks.
1547+
1548+
To improve the chances of your pull request being reviewed, you should:
1549+
1550+
- **Reference an open issue** for non-trivial changes to clarify the PR's purpose
1551+
- **Ensure you have appropriate tests**. These should be the first part of any PR
1552+
- **Keep your pull requests as simple as possible**. Larger PRs take longer to review
1553+
- **Ensure that CI is in a green state**. Reviewers may not even look otherwise
1554+
- **Keep** `Updating your pull request`_, either by request or every few days

doc/source/ecosystem.rst

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,16 @@ Pyjanitor provides a clean API for cleaning data, using method chaining.
4141
Engarde is a lightweight library used to explicitly state assumptions about your datasets
4242
and check that they're *actually* true.
4343

44+
`pandas-path <https://github.com/drivendataorg/pandas-path/>`__
45+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
46+
47+
Since Python 3.4, `pathlib <https://docs.python.org/3/library/pathlib.html>`_ has been
48+
included in the Python standard library. Path objects provide a simple
49+
and delightful way to interact with the file system. The pandas-path package enables the
50+
Path API for pandas through a custom accessor ``.path``. Getting just the filenames from
51+
a series of full file paths is as simple as ``my_files.path.name``. Other convenient operations like
52+
joining paths, replacing file extensions, and checking if files exist are also available.
53+
4454
.. _ecosystem.stats:
4555

4656
Statistics and machine learning
@@ -112,16 +122,14 @@ also goes beyond matplotlib and pandas with the option to perform statistical
112122
estimation while plotting, aggregating across observations and visualizing the
113123
fit of statistical models to emphasize patterns in a dataset.
114124

115-
`yhat/ggpy <https://github.com/yhat/ggpy>`__
116-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
125+
`plotnine <https://github.com/has2k1/plotnine/>`__
126+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
117127

118128
Hadley Wickham's `ggplot2 <https://ggplot2.tidyverse.org/>`__ is a foundational exploratory visualization package for the R language.
119129
Based on `"The Grammar of Graphics" <https://www.cs.uic.edu/~wilkinson/TheGrammarOfGraphics/GOG.html>`__ it
120130
provides a powerful, declarative and extremely general way to generate bespoke plots of any kind of data.
121-
It's really quite incredible. Various implementations to other languages are available,
122-
but a faithful implementation for Python users has long been missing. Although still young
123-
(as of Jan-2014), the `yhat/ggpy <https://github.com/yhat/ggpy>`__ project has been
124-
progressing quickly in that direction.
131+
Various implementations to other languages are available.
132+
A good implementation for Python users is `has2k1/plotnine <https://github.com/has2k1/plotnine/>`__.
125133

126134
`IPython Vega <https://github.com/vega/ipyvega>`__
127135
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -386,12 +394,16 @@ A directory of projects providing
386394
:ref:`extension accessors <extending.register-accessors>`. This is for users to
387395
discover new accessors and for library authors to coordinate on the namespace.
388396

389-
============== ========== =========================
390-
Library Accessor Classes
391-
============== ========== =========================
392-
`cyberpandas`_ ``ip`` ``Series``
393-
`pdvega`_ ``vgplot`` ``Series``, ``DataFrame``
394-
============== ========== =========================
397+
=============== ========== ========================= ===============================================================
398+
Library Accessor Classes Description
399+
=============== ========== ========================= ===============================================================
400+
`cyberpandas`_ ``ip`` ``Series`` Provides common operations for working with IP addresses.
401+
`pdvega`_ ``vgplot`` ``Series``, ``DataFrame`` Provides plotting functions from the Altair_ library.
402+
`pandas_path`_ ``path`` ``Index``, ``Series`` Provides `pathlib.Path`_ functions for Series.
403+
=============== ========== ========================= ===============================================================
395404

396405
.. _cyberpandas: https://cyberpandas.readthedocs.io/en/latest
397406
.. _pdvega: https://altair-viz.github.io/pdvega/
407+
.. _Altair: https://altair-viz.github.io/
408+
.. _pandas_path: https://github.com/drivendataorg/pandas-path/
409+
.. _pathlib.Path: https://docs.python.org/3/library/pathlib.html

doc/source/getting_started/dsintro.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ Like a NumPy array, a pandas Series has a :attr:`~Series.dtype`.
136136
137137
This is often a NumPy dtype. However, pandas and 3rd-party libraries
138138
extend NumPy's type system in a few places, in which case the dtype would
139-
be a :class:`~pandas.api.extensions.ExtensionDtype`. Some examples within
139+
be an :class:`~pandas.api.extensions.ExtensionDtype`. Some examples within
140140
pandas are :ref:`categorical` and :ref:`integer_na`. See :ref:`basics.dtypes`
141141
for more.
142142

doc/source/getting_started/install.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ pyarrow 0.12.0 Parquet, ORC (requires 0.13.0), and
264264
pymysql 0.7.11 MySQL engine for sqlalchemy
265265
pyreadstat SPSS files (.sav) reading
266266
pytables 3.4.2 HDF5 reading / writing
267+
pyxlsb 1.0.6 Reading for xlsb files
267268
qtpy Clipboard I/O
268269
s3fs 0.3.0 Amazon S3 access
269270
tabulate 0.8.3 Printing in Markdown-friendly format (see `tabulate`_)

doc/source/user_guide/computation.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,7 @@ Numba will be applied in potentially two routines:
348348

349349
1. If ``func`` is a standard Python function, the engine will `JIT <http://numba.pydata.org/numba-doc/latest/user/overview.html>`__
350350
the passed function. ``func`` can also be a JITed function in which case the engine will not JIT the function again.
351+
351352
2. The engine will JIT the for loop where the apply function is applied to each window.
352353

353354
The ``engine_kwargs`` argument is a dictionary of keyword arguments that will be passed into the

0 commit comments

Comments
 (0)