diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 9e74eb46f7b1f..6844df495547a 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -57,10 +57,10 @@ DeprecatedOption = namedtuple("DeprecatedOption", "key msg rkey removal_ver") RegisteredOption = namedtuple("RegisteredOption", "key defval doc validator cb") -# holds deprecated option metdata +# holds deprecated option metadata _deprecated_options: Dict[str, DeprecatedOption] = {} -# holds registered option metdata +# holds registered option metadata _registered_options: Dict[str, RegisteredOption] = {} # holds the current values for registered options diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 9e5fa75ebeceb..abb8a6d388d26 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -791,7 +791,7 @@ def group_quantile(ndarray[float64_t] out, out[i] = NaN else: # Calculate where to retrieve the desired value - # Casting to int will intentionaly truncate result + # Casting to int will intentionally truncate result idx = grp_start + (q * (non_na_sz - 1)) val = values[sort_arr[idx]] diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 0ed48efb03035..ac8172146d351 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -288,7 +288,7 @@ cdef class IndexEngine: def get_indexer_non_unique(self, targets): """ - Return an indexer suitable for takng from a non unique index + Return an indexer suitable for taking from a non unique index return the labels in the same order ast the target and a missing indexer into the targets (which correspond to the -1 indices in the results diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e9a486894fbf0..3f578a453aa1d 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -510,7 +510,7 @@ def maybe_booleans_to_slice(ndarray[uint8_t] mask): @cython.boundscheck(False) def array_equivalent_object(left: object[:], right: object[:]) -> bool: """ - Perform an element by element comparion on 1-d object arrays + Perform an element by element comparison on 1-d object arrays taking into account nan positions. """ cdef: diff --git a/pandas/_libs/src/klib/khash.h b/pandas/_libs/src/klib/khash.h index 77ec519cc24da..bcf6350aa9090 100644 --- a/pandas/_libs/src/klib/khash.h +++ b/pandas/_libs/src/klib/khash.h @@ -498,7 +498,7 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) */ #define kh_n_buckets(h) ((h)->n_buckets) -/* More conenient interfaces */ +/* More convenient interfaces */ /*! @function @abstract Instantiate a hash set containing integer keys diff --git a/pandas/_libs/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c index a847b0f5d5102..26b00c0cacd31 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsondec.c +++ b/pandas/_libs/src/ujson/lib/ultrajsondec.c @@ -150,7 +150,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { case '7': case '8': case '9': { - // FIXME: Check for arithemtic overflow here + // FIXME: Check for arithmetic overflow here // PERF: Don't do 64-bit arithmetic here unless we know we have // to intValue = intValue * 10ULL + (JSLONG)(chr - 48); @@ -235,7 +235,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { } BREAK_FRC_LOOP: - // FIXME: Check for arithemtic overflow here + // FIXME: Check for arithmetic overflow here ds->lastType = JT_DOUBLE; ds->start = offset; return ds->dec->newDouble( @@ -282,7 +282,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { } BREAK_EXP_LOOP: - // FIXME: Check for arithemtic overflow here + // FIXME: Check for arithmetic overflow here ds->lastType = JT_DOUBLE; ds->start = offset; return ds->dec->newDouble( diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 5d17d3a2d7bcb..37e9c36a85327 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -1632,7 +1632,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, sprintf(buf, "%" NPY_INT64_FMT, value); len = strlen(cLabel); } - } else { // Fallack to string representation + } else { // Fallback to string representation PyObject *str = PyObject_Str(item); if (str == NULL) { Py_DECREF(item); diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index e4e7f65db8dea..a44f374264f09 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -324,7 +324,7 @@ class Timestamp(_Timestamp): Function is not implemented. Use pd.to_datetime(). """ - raise NotImplementedError("Timestamp.strptime() is not implmented." + raise NotImplementedError("Timestamp.strptime() is not implemented." "Use to_datetime() to parse date strings.") @classmethod diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index 935f657416396..ce16a1620eed5 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -290,7 +290,7 @@ def update_dtype(self, dtype): Returns ------- SparseDtype - A new SparseDtype with the corret `dtype` and fill value + A new SparseDtype with the correct `dtype` and fill value for that `dtype`. Raises diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 0d30aa06cd466..de254f662bb32 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -86,7 +86,7 @@ def __from_arrow__(self, array): results = [] for arr in chunks: - # using _from_sequence to ensure None is convered to NA + # using _from_sequence to ensure None is converted to NA str_arr = StringArray._from_sequence(np.array(arr)) results.append(str_arr) @@ -153,7 +153,7 @@ class StringArray(PandasArray): ... ValueError: StringArray requires an object-dtype ndarray of strings. - For comparision methods, this returns a :class:`pandas.BooleanArray` + For comparison methods, this returns a :class:`pandas.BooleanArray` >>> pd.array(["a", None, "c"], dtype="string") == "a" diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index e088400b25f0f..700d8d503d086 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -1,5 +1,5 @@ """ -Provide basic components for groupby. These defintiions +Provide basic components for groupby. These definitions hold the whitelist of methods that are exposed on the SeriesGroupBy and the DataFrameGroupBy objects. """ diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py index fd91e78451da9..8fb81faf313d7 100644 --- a/pandas/core/ops/mask_ops.py +++ b/pandas/core/ops/mask_ops.py @@ -1,5 +1,5 @@ """ -Ops for masked ararys. +Ops for masked arrays. """ from typing import Optional, Union diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 109df6584641d..124bd31c8d308 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1640,7 +1640,7 @@ def _get_format_datetime64_from_values( """ given values and a date_format, return a string format """ if isinstance(values, np.ndarray) and values.ndim > 1: - # We don't actaully care about the order of values, and DatetimeIndex + # We don't actually care about the order of values, and DatetimeIndex # only accepts 1D values values = values.ravel() diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 8a4a72021eb43..d9711f4f4626a 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -65,7 +65,7 @@ def read_gbq( *New in version 0.2.0 of pandas-gbq*. dialect : str, default 'legacy' - Note: The default value is changing to 'standard' in a future verion. + Note: The default value is changing to 'standard' in a future version. SQL syntax dialect to use. Value can be one of: diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 30c1c2d59e983..6cb811bb97755 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -314,7 +314,7 @@ def __init__( timedeltas = obj.select_dtypes(include=["timedelta"]).columns if len(timedeltas): obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat()) - # Convert PeriodIndex to datetimes before serialzing + # Convert PeriodIndex to datetimes before serializing if is_period_dtype(obj.index): obj.index = obj.index.to_timestamp() diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index c87edcc602686..a887a537a2201 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -3492,7 +3492,7 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None): # 2) index_names (column names) # # Both must be non-null to ensure a successful construction. Otherwise, - # we have to create a generic emtpy Index. + # we have to create a generic empty Index. if (index_col is None or index_col is False) or index_names is None: index = Index([]) else: diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 46ca7bd8f760a..014581682ac59 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -183,7 +183,7 @@ def _values_for_factorize(self): def _values_for_argsort(self): # Disable NumPy's shape inference by including an empty tuple... - # If all the elemnts of self are the same size P, NumPy will + # If all the elements of self are the same size P, NumPy will # cast them to an (N, P) array, instead of an (N,) array of tuples. frozen = [()] + [tuple(x.items()) for x in self] return np.array(frozen, dtype=object)[1:] diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index f95137cd1bf88..36cdaa8a6029b 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -798,7 +798,7 @@ def test_daterange_bug_456(self): # GH #456 rng1 = bdate_range("12/5/2011", "12/5/2011") rng2 = bdate_range("12/2/2011", "12/5/2011") - rng2._data.freq = BDay() # TODO: shouldnt this already be set? + rng2._data.freq = BDay() # TODO: shouldn't this already be set? result = rng1.union(rng2) assert isinstance(result, DatetimeIndex) @@ -855,7 +855,7 @@ def test_daterange_bug_456(self): # GH #456 rng1 = bdate_range("12/5/2011", "12/5/2011", freq="C") rng2 = bdate_range("12/2/2011", "12/5/2011", freq="C") - rng2._data.freq = CDay() # TODO: shouldnt this already be set? + rng2._data.freq = CDay() # TODO: shouldn't this already be set? result = rng1.union(rng2) assert isinstance(result, DatetimeIndex) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 08c14c36a195e..6e919571d1423 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1061,7 +1061,7 @@ class TestToDatetimeUnit: @pytest.mark.parametrize("cache", [True, False]) def test_unit(self, cache): # GH 11758 - # test proper behavior with erros + # test proper behavior with errors with pytest.raises(ValueError): to_datetime([1], unit="D", format="%Y%m%d", cache=cache) diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index d24f91a2c9e13..17ab85033acfb 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -228,7 +228,7 @@ def test_insert(self): def test_delete(self): idx = timedelta_range(start="1 Days", periods=5, freq="D", name="idx") - # prserve freq + # preserve freq expected_0 = timedelta_range(start="2 Days", periods=4, freq="D", name="idx") expected_4 = timedelta_range(start="1 Days", periods=4, freq="D", name="idx") @@ -257,7 +257,7 @@ def test_delete(self): def test_delete_slice(self): idx = timedelta_range(start="1 days", periods=10, freq="D", name="idx") - # prserve freq + # preserve freq expected_0_2 = timedelta_range(start="4 days", periods=7, freq="D", name="idx") expected_7_9 = timedelta_range(start="1 days", periods=7, freq="D", name="idx") diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index 5cea4fb5acca0..71a186dc2f3b0 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -84,7 +84,7 @@ def test_matplotlib_formatters(self): units = pytest.importorskip("matplotlib.units") # Can't make any assertion about the start state. - # We we check that toggling converters off remvoes it, and toggling it + # We we check that toggling converters off removes it, and toggling it # on restores it. with cf.option_context("plotting.matplotlib.register_converters", True):