From 44339ce029d86ef0e43c01a21606e631641cc72a Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Mon, 2 Dec 2024 17:24:04 +0000 Subject: [PATCH] Add NodeTransformRule to pushdown regex terms known to fail index expansion --- .../rules/EvaluationOnlyPushdownRule.java | 44 +++++++++++ .../query/planner/rules/FieldPattern.java | 52 +++++++++++++ .../rules/EvaluationOnlyPushdownRuleTest.java | 74 +++++++++++++++++++ .../datawave/query/QueryLogicFactory.xml | 16 ++++ 4 files changed, 186 insertions(+) create mode 100644 warehouse/query-core/src/main/java/datawave/query/planner/rules/EvaluationOnlyPushdownRule.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/planner/rules/FieldPattern.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/planner/rules/EvaluationOnlyPushdownRuleTest.java diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/rules/EvaluationOnlyPushdownRule.java b/warehouse/query-core/src/main/java/datawave/query/planner/rules/EvaluationOnlyPushdownRule.java new file mode 100644 index 00000000000..e909aa293f8 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/planner/rules/EvaluationOnlyPushdownRule.java @@ -0,0 +1,44 @@ +package datawave.query.planner.rules; + +import java.util.HashSet; +import java.util.Set; + +import org.apache.commons.jexl3.parser.ASTERNode; +import org.apache.commons.jexl3.parser.JexlNode; + +import datawave.query.config.ShardQueryConfiguration; +import datawave.query.jexl.JexlASTHelper; +import datawave.query.jexl.nodes.QueryPropertyMarker; +import datawave.query.jexl.visitors.RebuildingVisitor; +import datawave.query.util.MetadataHelper; + +/** + * Push certain regex terms down to evaluation, thus skipping index expansion and field index execution + *

+ * Note: cannot configure index only fields + */ +public class EvaluationOnlyPushdownRule implements NodeTransformRule { + + private final Set fieldPatterns = new HashSet<>(); + + @Override + public JexlNode apply(JexlNode node, ShardQueryConfiguration config, MetadataHelper helper) { + if (node instanceof ASTERNode) { + String field = JexlASTHelper.getIdentifier(node); + String pattern = String.valueOf(JexlASTHelper.getLiteralValue(node)); + if (fieldPatterns.contains(new FieldPattern(field, pattern))) { + JexlNode copy = RebuildingVisitor.copy(node); + return QueryPropertyMarker.create(copy, QueryPropertyMarker.MarkerType.EVALUATION_ONLY); + } + } + return node; + } + + public Set getFieldPatterns() { + return fieldPatterns; + } + + public void setFieldPatterns(Set fieldPatterns) { + this.fieldPatterns.addAll(fieldPatterns); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/rules/FieldPattern.java b/warehouse/query-core/src/main/java/datawave/query/planner/rules/FieldPattern.java new file mode 100644 index 00000000000..d73e9c07583 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/planner/rules/FieldPattern.java @@ -0,0 +1,52 @@ +package datawave.query.planner.rules; + +import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.commons.lang3.builder.HashCodeBuilder; + +public class FieldPattern { + private String field; + private String pattern; + + public FieldPattern() { + // empty constructor + } + + public FieldPattern(String field, String pattern) { + this.field = field; + this.pattern = pattern; + } + + public String getField() { + return field; + } + + public void setField(String field) { + this.field = field; + } + + public String getPattern() { + return pattern; + } + + public void setPattern(String pattern) { + this.pattern = pattern; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + + if (o == null || getClass() != o.getClass()) + return false; + + FieldPattern that = (FieldPattern) o; + + return new EqualsBuilder().append(field, that.field).append(pattern, that.pattern).isEquals(); + } + + @Override + public int hashCode() { + return new HashCodeBuilder(17, 37).append(field).append(pattern).toHashCode(); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/planner/rules/EvaluationOnlyPushdownRuleTest.java b/warehouse/query-core/src/test/java/datawave/query/planner/rules/EvaluationOnlyPushdownRuleTest.java new file mode 100644 index 00000000000..98aac1ea61f --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/planner/rules/EvaluationOnlyPushdownRuleTest.java @@ -0,0 +1,74 @@ +package datawave.query.planner.rules; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.commons.jexl3.parser.JexlNode; +import org.apache.commons.jexl3.parser.ParseException; +import org.junit.jupiter.api.Test; + +import datawave.query.jexl.JexlASTHelper; +import datawave.query.jexl.visitors.JexlStringBuildingVisitor; + +public class EvaluationOnlyPushdownRuleTest { + + @Test + public void testPushdown() { + test("FIELD_A =~ 'abc.*'", "((_Eval_ = true) && (FIELD_A =~ 'abc.*'))"); + test("FIELD_B =~ 'xyz.*'", "((_Eval_ = true) && (FIELD_B =~ 'xyz.*'))"); + } + + @Test + public void testNoPushdown() { + // field does not match, regex does, rule not applied + test("FIELD_D =~ 'abc.*'"); + test("FIELD_E =~ 'xyz.*'"); + + // field matches, regex does not, rule not applied + test("FIELD_A =~ '123.*'"); + test("FIELD_B =~ '456.*'"); + } + + private void test(String query) { + test(query, query); + } + + private void test(String query, String expected) { + ASTJexlScript script = parse(query); + NodeTransformVisitor visitor = getVisitor(); + + JexlNode visited = (JexlNode) script.jjtAccept(visitor, null); + + String result = JexlStringBuildingVisitor.buildQuery(visited); + assertEquals(expected, result); + } + + private ASTJexlScript parse(String query) { + try { + return JexlASTHelper.parseAndFlattenJexlQuery(query); + } catch (ParseException e) { + fail("Failed to parse query: " + query, e); + throw new RuntimeException(e); + } + } + + private NodeTransformVisitor getVisitor() { + Set fieldPatterns = new HashSet<>(); + fieldPatterns.add(new FieldPattern("FIELD_A", "abc.*")); + fieldPatterns.add(new FieldPattern("FIELD_B", "xyz.*")); + + EvaluationOnlyPushdownRule rule = new EvaluationOnlyPushdownRule(); + rule.setFieldPatterns(fieldPatterns); + + List rules = new ArrayList<>(); + rules.add(rule); + + return new NodeTransformVisitor(null, null, rules); + } +} diff --git a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml index 7c043448b9e..c97bfeef532 100644 --- a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml +++ b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml @@ -361,10 +361,26 @@ [0-9a-zA-Z]{0,2}[.?*]+ + + + + + + + + + + + + + + + +