Commit f3408193 by René Paris Committed by Reinhard Biegel

### feat(pyOpenPASS): Introduce approximate operator

```

The approximate operator ~= is now also availabile for value comparison
in query string, such as "col1 ~= 3.0 and col2 = 0.0". In such cases the
query is expanded to a boundary of +/- 1E-3 x VALUE, here for col1
"col1 < 3.003000E+00 and col1 < 2.997000E+00". For 0 values, +/- 1E-3 is
taken as absolute boundary.
Signed-off-by: Rene Paris <rene.paris@in-tech.com>```
parent 7743b343
 ... ... @@ -24,6 +24,34 @@ class Query: self.operator = match.group(4) self.value = match.group(5) @staticmethod def is_numeric(value): try: _ = float(value) return True except: return False def _expand_approximate(self, filter): match = re.search(r'([^ )(]+)\s*~=\s*([^ )(]+)', filter) if match: lhs, rhs = match[1], match[2] if self.is_numeric(lhs): lhs, rhs = (rhs, lhs) if self.is_numeric(lhs): raise Exception(f"Unable to expand '{match[0]}': Both sides are numeric (maybe you forgot to quote a column name?)") rhs = float(rhs) delta = rhs * 1e-3 if rhs != 0 else 1e-3 return filter.replace(match[0], f'({lhs} > {(rhs - delta):.6E} and {lhs} < {(rhs + delta):.6E})') return filter def _quote_columns_containing_dashes(self, filter): matches = re.findall(r'([a-zA-Z0-9_]*(?:-+[a-zA-Z0-9_]*)+[a-zA-Z0-9_]*)', filter) for match in matches: if not self.is_numeric(match): filter = re.sub(match, f'`{match}`', filter) return filter def _parse_filter(self, filter): iloc = None match = re.search(r'(Timestep\s*==\s*{\s*first\s*})\s*([^\s]*)\s*', filter) ... ... @@ -34,8 +62,9 @@ class Query: raise Exception("'Timestep == {first}' must succeeded by 'and'") filter = filter[len(match[0]):] iloc = 0 # quote columns containing `-` in name using backticks (required by DataFrame.query()) return re.sub(r'([a-zA-Z0-9_]+(?:-+[a-zA-Z0-9_]*)+[a-zA-Z0-9_]*)', r'`\1`', filter), iloc filter = self._expand_approximate(filter) filter = self._quote_columns_containing_dashes(filter) return filter, iloc def __init__(self, query): self.raw_query = query ... ... @@ -70,7 +99,7 @@ class Query: raw_query = raw_query.replace( f'#({event})', f'Event_{normalize(event)}') matches = set(re.findall(r"\|.*?(((?=[^'])[A-Za-z\d_-]+)-(\d+)(?=[^']))", raw_query)) matches = set(re.findall(r"\|.*?(((?=[^`])[A-Za-z\d_-]+)-(\d+)(?=[^`]))", raw_query)) for match, column, shift in matches: new_column = f'{column}_prev{shift}' ... ...
 ... ... @@ -54,8 +54,8 @@ def test_query_parser_time_shift(): def test_query_parser_time_shift_not_applied_to_quoted_strings(): q = Query( "count(aggregate | some == 0 and 'some-2' == 1) > 0") assert(q.parsed == "count(aggregate | some == 0 and 'some-2' == 1) > 0") "count(aggregate | some == 0 and `some-2` == 1) > 0") assert(q.parsed == "count(aggregate | some == 0 and `some-2` == 1) > 0") assert(len(q.shifted_columns) == 0) ... ... @@ -127,3 +127,19 @@ def test_query__first_timestep_token_not_followed_by_and__raises_error(): with pytest.raises(Exception) as e: q = Query("count(aggregate | Timestep == {first} or some == 1) > 123") assert 'and' in e.what() def test_query_parser__value_approximate_equal__expands_to_boundary(): q = Query( "count(aggregate-0-A | some ~= 0) == 0") assert q.pd.filter == "(some > -1.000000E-03 and some < 1.000000E-03)" def test_query_parser__value_approximate_equal__reverse(): q = Query( "count(aggregate-0-A | 0 ~= some) == 0") assert q.pd.filter == "(some > -1.000000E-03 and some < 1.000000E-03)" def test_query_parser__value_approximate_equal__autoscale(): q = Query( "count(aggregate-0-A | some ~= 0.1) == 0") assert q.pd.filter == "(some > 9.990000E-02 and some < 1.001000E-01)"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!