Skip to content

BUG: correctly lookup global constants in query/eval #7181

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 20, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,8 @@ Bug Fixes
- Bug ``PeriodIndex`` string slicing with out of bounds values (:issue:`5407`)
- Fixed a memory error in the hashtable implementation/factorizer on resizing of large tables (:issue:`7157`)
- Bug in ``isnull`` when applied to 0-dimensional object arrays (:issue:`7176`)
- Bug in ``query``/``eval`` where global constants were not looked up correctly
(:issue:`7178`)

pandas 0.13.1
-------------
Expand Down
5 changes: 4 additions & 1 deletion pandas/computation/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import pandas.core.common as com
from pandas.core.base import StringMixin
from pandas.computation.common import _ensure_decoded, _result_type_many
from pandas.computation.scope import _DEFAULT_GLOBALS


_reductions = 'sum', 'prod'
Expand Down Expand Up @@ -48,7 +49,9 @@ def __init__(self, name, env, side=None, encoding=None):
self._name = name
self.env = env
self.side = side
self.is_local = text_type(name).startswith(_LOCAL_TAG)
tname = text_type(name)
self.is_local = (tname.startswith(_LOCAL_TAG) or
tname in _DEFAULT_GLOBALS)
self._value = self._resolve_name()
self.encoding = encoding

Expand Down
13 changes: 7 additions & 6 deletions pandas/computation/scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@
"""

import sys
import operator
import struct
import inspect
import datetime
import itertools
import pprint

import numpy as np

import pandas as pd
from pandas.compat import DeepChainMap, map, StringIO
from pandas.core import common as com
from pandas.core.base import StringMixin
from pandas.computation.ops import UndefinedVariableError, _LOCAL_TAG
import pandas.computation as compu


def _ensure_scope(level, global_dict=None, local_dict=None, resolvers=(),
Expand Down Expand Up @@ -45,14 +45,15 @@ def _raw_hex_id(obj):
return ''.join(map(_replacer, packed))



_DEFAULT_GLOBALS = {
'Timestamp': pd.lib.Timestamp,
'datetime': datetime.datetime,
'True': True,
'False': False,
'list': list,
'tuple': tuple
'tuple': tuple,
'inf': np.inf,
'Inf': np.inf,
}


Expand Down Expand Up @@ -186,7 +187,7 @@ def resolve(self, key, is_local):
# e.g., df[df > 0]
return self.temps[key]
except KeyError:
raise UndefinedVariableError(key, is_local)
raise compu.ops.UndefinedVariableError(key, is_local)

def swapkey(self, old_key, new_key, new_value=None):
"""Replace a variable name, with a potentially new value.
Expand Down
36 changes: 24 additions & 12 deletions pandas/computation/tests/test_eval.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
#!/usr/bin/env python

import functools
from itertools import product
from distutils.version import LooseVersion

import nose
from nose.tools import assert_raises, assert_true, assert_false, assert_equal
from nose.tools import assert_raises

from numpy.random import randn, rand, randint
import numpy as np
Expand Down Expand Up @@ -887,7 +886,7 @@ def check_complex_series_frame_alignment(self, engine, parser):
expected = expected2 + df

res = pd.eval('df2 + s + df', engine=engine, parser=parser)
assert_equal(res.shape, expected.shape)
tm.assert_equal(res.shape, expected.shape)
assert_frame_equal(res, expected)

@slow
Expand Down Expand Up @@ -930,13 +929,13 @@ def check_performance_warning_for_poor_alignment(self, engine, parser):
pd.eval('df + s', engine=engine, parser=parser)

if not is_python_engine:
assert_equal(len(w), 1)
tm.assert_equal(len(w), 1)
msg = str(w[0].message)
expected = ("Alignment difference on axis {0} is larger"
" than an order of magnitude on term {1!r}, "
"by more than {2:.4g}; performance may suffer"
"".format(1, 'df', np.log10(s.size - df.shape[1])))
assert_equal(msg, expected)
tm.assert_equal(msg, expected)

def test_performance_warning_for_poor_alignment(self):
for engine, parser in ENGINES_PARSERS:
Expand Down Expand Up @@ -982,17 +981,17 @@ def test_simple_arith_ops(self):
else:
expec = _eval_single_bin(1, op, 1, self.engine)
x = self.eval(ex, engine=self.engine, parser=self.parser)
assert_equal(x, expec)
tm.assert_equal(x, expec)

expec = _eval_single_bin(x, op, 1, self.engine)
y = self.eval(ex2, local_dict={'x': x}, engine=self.engine,
parser=self.parser)
assert_equal(y, expec)
tm.assert_equal(y, expec)

expec = _eval_single_bin(1, op, x + 1, self.engine)
y = self.eval(ex3, local_dict={'x': x},
engine=self.engine, parser=self.parser)
assert_equal(y, expec)
tm.assert_equal(y, expec)

def test_simple_bool_ops(self):
for op, lhs, rhs in product(expr._bool_ops_syms, (True, False),
Expand Down Expand Up @@ -1024,7 +1023,7 @@ def test_4d_ndarray_fails(self):

def test_constant(self):
x = self.eval('1')
assert_equal(x, 1)
tm.assert_equal(x, 1)

def test_single_variable(self):
df = DataFrame(randn(10, 2))
Expand Down Expand Up @@ -1379,7 +1378,7 @@ def check_no_new_locals(self, engine, parser):
pd.eval('x + 1', local_dict=lcls, engine=engine, parser=parser)
lcls2 = locals().copy()
lcls2.pop('lcls')
assert_equal(lcls, lcls2)
tm.assert_equal(lcls, lcls2)

def test_no_new_locals(self):
for engine, parser in product(_engines, expr._parsers):
Expand All @@ -1391,7 +1390,7 @@ def check_no_new_globals(self, engine, parser):
gbls = globals().copy()
pd.eval('x + 1', engine=engine, parser=parser)
gbls2 = globals().copy()
assert_equal(gbls, gbls2)
tm.assert_equal(gbls, gbls2)

def test_no_new_globals(self):
for engine, parser in product(_engines, expr._parsers):
Expand Down Expand Up @@ -1556,14 +1555,27 @@ def check_bool_ops_fails_on_scalars(gen, lhs, cmp, rhs, engine, parser):
def test_bool_ops_fails_on_scalars():
_bool_ops_syms = 'and', 'or'
dtypes = int, float
gen = {int: lambda : np.random.randint(10), float: np.random.randn}
gen = {int: lambda: np.random.randint(10), float: np.random.randn}
for engine, parser, dtype1, cmp, dtype2 in product(_engines, expr._parsers,
dtypes, _bool_ops_syms,
dtypes):
yield (check_bool_ops_fails_on_scalars, gen, gen[dtype1](), cmp,
gen[dtype2](), engine, parser)


def check_inf(engine, parser):
tm.skip_if_no_ne(engine)
s = 'inf + 1'
expected = np.inf
result = pd.eval(s, engine=engine, parser=parser)
tm.assert_equal(result, expected)


def test_inf():
for engine, parser in ENGINES_PARSERS:
yield check_inf, engine, parser


if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
12 changes: 12 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -13284,6 +13284,18 @@ def test_index_resolvers_come_after_columns_with_the_same_name(self):
expected = df.loc[df.index[df.index > 5]]
tm.assert_frame_equal(result, expected)

def test_inf(self):
n = 10
df = DataFrame({'a': np.random.rand(n), 'b': np.random.rand(n)})
df.loc[::2, 0] = np.inf
ops = '==', '!='
d = dict(zip(ops, (operator.eq, operator.ne)))
for op, f in d.items():
q = 'a %s inf' % op
expected = df[f(df.a, np.inf)]
result = df.query(q, engine=self.engine, parser=self.parser)
tm.assert_frame_equal(result, expected)


class TestDataFrameQueryNumExprPython(TestDataFrameQueryNumExprPandas):

Expand Down