Skip to content

CLN: Assorted cleanups #27094

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 28, 2019
Merged
3 changes: 2 additions & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,8 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1,
else:
if axis > 0:
swapped = True
values = values.swapaxes(0, axis)
assert axis == 1, axis
values = values.T
if arity > 1:
raise NotImplementedError("arity of more than 1 is not "
"supported for the 'how' argument")
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/internals/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

from .blocks import ( # noqa: F401
Block, BoolBlock, CategoricalBlock, ComplexBlock, DatetimeBlock,
DatetimeTZBlock, ExtensionBlock, FloatBlock, IntBlock, ObjectBlock,
Expand All @@ -9,7 +10,7 @@
from .blocks import _safe_reshape # noqa: F401; io.packers
from .blocks import make_block # noqa: F401; io.pytables, io.packers
from .managers import ( # noqa: F401; reshape.concat, reshape.merge
_transform_index,
concatenate_block_managers)
from .managers import items_overlap_with_suffix # noqa: F401; reshape.merge

from .blocks import _block_shape # noqa:F401; io.pytables
32 changes: 24 additions & 8 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,16 +722,28 @@ def replace(self, to_replace, value, inplace=False, filter=None,
try:
values, to_replace = self._try_coerce_args(self.values,
to_replace)
mask = missing.mask_missing(values, to_replace)
if filter is not None:
filtered_out = ~self.mgr_locs.isin(filter)
mask[filtered_out.nonzero()[0]] = False
except (TypeError, ValueError):
# GH 22083, TypeError or ValueError occurred within error handling
# causes infinite loop. Cast and retry only if not objectblock.
if is_object_dtype(self):
raise

# try again with a compatible block
block = self.astype(object)
return block.replace(to_replace=original_to_replace,
value=value,
inplace=inplace,
filter=filter,
regex=regex,
convert=convert)

mask = missing.mask_missing(values, to_replace)
if filter is not None:
filtered_out = ~self.mgr_locs.isin(filter)
mask[filtered_out.nonzero()[0]] = False

try:
blocks = self.putmask(mask, value, inplace=inplace)
if convert:
blocks = [b.convert(by_item=True, numeric=False,
copy=not inplace) for b in blocks]
return blocks
except (TypeError, ValueError):
# GH 22083, TypeError or ValueError occurred within error handling
# causes infinite loop. Cast and retry only if not objectblock.
Expand All @@ -746,6 +758,10 @@ def replace(self, to_replace, value, inplace=False, filter=None,
filter=filter,
regex=regex,
convert=convert)
if convert:
blocks = [b.convert(by_item=True, numeric=False,
copy=not inplace) for b in blocks]
return blocks

def _replace_single(self, *args, **kwargs):
""" no-op on a non-ObjectBlock """
Expand Down
42 changes: 0 additions & 42 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1859,48 +1859,6 @@ def _compare_or_regex_search(a, b, regex=False):
return result


# TODO: this is no longer used in this module, could be moved to concat
def items_overlap_with_suffix(left, lsuffix, right, rsuffix):
"""
If two indices overlap, add suffixes to overlapping entries.

If corresponding suffix is empty, the entry is simply converted to string.

"""
to_rename = left.intersection(right)
if len(to_rename) == 0:
return left, right
else:
if not lsuffix and not rsuffix:
raise ValueError('columns overlap but no suffix specified: '
'{rename}'.format(rename=to_rename))

def renamer(x, suffix):
"""Rename the left and right indices.

If there is overlap, and suffix is not None, add
suffix, otherwise, leave it as-is.

Parameters
----------
x : original column name
suffix : str or None

Returns
-------
x : renamed column name
"""
if x in to_rename and suffix is not None:
return '{x}{suffix}'.format(x=x, suffix=suffix)
return x

lrenamer = partial(renamer, suffix=lsuffix)
rrenamer = partial(renamer, suffix=rsuffix)

return (_transform_index(left, lrenamer),
_transform_index(right, rrenamer))


def _transform_index(index, func, level=None):
"""
Apply function to all values found in index.
Expand Down
54 changes: 48 additions & 6 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import copy
from functools import partial
import string
import warnings

Expand All @@ -27,8 +28,7 @@
from pandas.core.arrays.categorical import _recode_for_categories
import pandas.core.common as com
from pandas.core.frame import _merge_doc
from pandas.core.internals import (
concatenate_block_managers, items_overlap_with_suffix)
from pandas.core.internals import _transform_index, concatenate_block_managers
import pandas.core.sorting as sorting
from pandas.core.sorting import is_int64_overflow_possible

Expand Down Expand Up @@ -555,8 +555,8 @@ def get_result(self):
ldata, rdata = self.left._data, self.right._data
lsuf, rsuf = self.suffixes

llabels, rlabels = items_overlap_with_suffix(ldata.items, lsuf,
rdata.items, rsuf)
llabels, rlabels = _items_overlap_with_suffix(ldata.items, lsuf,
rdata.items, rsuf)

lindexers = {1: left_indexer} if left_indexer is not None else {}
rindexers = {1: right_indexer} if right_indexer is not None else {}
Expand Down Expand Up @@ -1303,8 +1303,8 @@ def get_result(self):
ldata, rdata = self.left._data, self.right._data
lsuf, rsuf = self.suffixes

llabels, rlabels = items_overlap_with_suffix(ldata.items, lsuf,
rdata.items, rsuf)
llabels, rlabels = _items_overlap_with_suffix(ldata.items, lsuf,
rdata.items, rsuf)

if self.fill_method == 'ffill':
left_join_indexer = libjoin.ffill_indexer(left_indexer)
Expand Down Expand Up @@ -1809,3 +1809,45 @@ def validate_operand(obj):
else:
raise TypeError('Can only merge Series or DataFrame objects, '
'a {obj} was passed'.format(obj=type(obj)))


def _items_overlap_with_suffix(left, lsuffix, right, rsuffix):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can be a later PR, can you type these args

"""
If two indices overlap, add suffixes to overlapping entries.

If corresponding suffix is empty, the entry is simply converted to string.

"""
to_rename = left.intersection(right)
if len(to_rename) == 0:
return left, right

if not lsuffix and not rsuffix:
raise ValueError('columns overlap but no suffix specified: '
'{rename}'.format(rename=to_rename))

def renamer(x, suffix):
"""
Rename the left and right indices.

If there is overlap, and suffix is not None, add
suffix, otherwise, leave it as-is.

Parameters
----------
x : original column name
suffix : str or None

Returns
-------
x : renamed column name
"""
if x in to_rename and suffix is not None:
return '{x}{suffix}'.format(x=x, suffix=suffix)
return x

lrenamer = partial(renamer, suffix=lsuffix)
rrenamer = partial(renamer, suffix=rsuffix)

return (_transform_index(left, lrenamer),
_transform_index(right, rrenamer))
1 change: 1 addition & 0 deletions pandas/tests/io/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ def test_bad_url_protocol(self):
self.read_html('git://github.com', match='.*Water.*')

@network
@pytest.mark.slow
def test_invalid_url(self):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

two versions of this test get run and each take 150s locally

try:
with pytest.raises(URLError):
Expand Down