Commit f3408193 authored by René Paris's avatar René Paris Committed by Reinhard Biegel
Browse files

feat(pyOpenPASS): Introduce approximate operator



The approximate operator ~= is now also availabile for value comparison
in query string, such as "col1 ~= 3.0 and col2 = 0.0". In such cases the
query is expanded to a boundary of +/- 1E-3 x VALUE, here for col1
"col1 < 3.003000E+00 and col1 < 2.997000E+00". For 0 values, +/- 1E-3 is
taken as absolute boundary.
Signed-off-by: René Paris's avatarRene Paris <rene.paris@in-tech.com>
parent 7743b343
......@@ -24,6 +24,34 @@ class Query:
self.operator = match.group(4)
self.value = match.group(5)
@staticmethod
def is_numeric(value):
try:
_ = float(value)
return True
except:
return False
def _expand_approximate(self, filter):
match = re.search(r'([^ )(]+)\s*~=\s*([^ )(]+)', filter)
if match:
lhs, rhs = match[1], match[2]
if self.is_numeric(lhs):
lhs, rhs = (rhs, lhs)
if self.is_numeric(lhs):
raise Exception(f"Unable to expand '{match[0]}': Both sides are numeric (maybe you forgot to quote a column name?)")
rhs = float(rhs)
delta = rhs * 1e-3 if rhs != 0 else 1e-3
return filter.replace(match[0], f'({lhs} > {(rhs - delta):.6E} and {lhs} < {(rhs + delta):.6E})')
return filter
def _quote_columns_containing_dashes(self, filter):
matches = re.findall(r'([a-zA-Z0-9_]*(?:-+[a-zA-Z0-9_]*)+[a-zA-Z0-9_]*)', filter)
for match in matches:
if not self.is_numeric(match):
filter = re.sub(match, f'`{match}`', filter)
return filter
def _parse_filter(self, filter):
iloc = None
match = re.search(r'(Timestep\s*==\s*{\s*first\s*})\s*([^\s]*)\s*', filter)
......@@ -34,8 +62,9 @@ class Query:
raise Exception("'Timestep == {first}' must succeeded by 'and'")
filter = filter[len(match[0]):]
iloc = 0
# quote columns containing `-` in name using backticks (required by DataFrame.query())
return re.sub(r'([a-zA-Z0-9_]+(?:-+[a-zA-Z0-9_]*)+[a-zA-Z0-9_]*)', r'`\1`', filter), iloc
filter = self._expand_approximate(filter)
filter = self._quote_columns_containing_dashes(filter)
return filter, iloc
def __init__(self, query):
self.raw_query = query
......@@ -70,7 +99,7 @@ class Query:
raw_query = raw_query.replace(
f'#({event})', f'Event_{normalize(event)}')
matches = set(re.findall(r"\|.*?(((?=[^'])[A-Za-z\d_-]+)-(\d+)(?=[^']))", raw_query))
matches = set(re.findall(r"\|.*?(((?=[^`])[A-Za-z\d_-]+)-(\d+)(?=[^`]))", raw_query))
for match, column, shift in matches:
new_column = f'{column}_prev{shift}'
......
......@@ -54,8 +54,8 @@ def test_query_parser_time_shift():
def test_query_parser_time_shift_not_applied_to_quoted_strings():
q = Query(
"count(aggregate | some == 0 and 'some-2' == 1) > 0")
assert(q.parsed == "count(aggregate | some == 0 and 'some-2' == 1) > 0")
"count(aggregate | some == 0 and `some-2` == 1) > 0")
assert(q.parsed == "count(aggregate | some == 0 and `some-2` == 1) > 0")
assert(len(q.shifted_columns) == 0)
......@@ -127,3 +127,19 @@ def test_query__first_timestep_token_not_followed_by_and__raises_error():
with pytest.raises(Exception) as e:
q = Query("count(aggregate | Timestep == {first} or some == 1) > 123")
assert 'and' in e.what()
def test_query_parser__value_approximate_equal__expands_to_boundary():
q = Query(
"count(aggregate-0-A | some ~= 0) == 0")
assert q.pd.filter == "(some > -1.000000E-03 and some < 1.000000E-03)"
def test_query_parser__value_approximate_equal__reverse():
q = Query(
"count(aggregate-0-A | 0 ~= some) == 0")
assert q.pd.filter == "(some > -1.000000E-03 and some < 1.000000E-03)"
def test_query_parser__value_approximate_equal__autoscale():
q = Query(
"count(aggregate-0-A | some ~= 0.1) == 0")
assert q.pd.filter == "(some > 9.990000E-02 and some < 1.001000E-01)"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment