Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add NodeTransformRule to pushdown regex terms known to fail index exp… #2657

Open
wants to merge 1 commit into
base: integration
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package datawave.query.planner.rules;

import java.util.HashSet;
import java.util.Set;

import org.apache.commons.jexl3.parser.ASTERNode;
import org.apache.commons.jexl3.parser.JexlNode;

import datawave.query.config.ShardQueryConfiguration;
import datawave.query.jexl.JexlASTHelper;
import datawave.query.jexl.nodes.QueryPropertyMarker;
import datawave.query.jexl.visitors.RebuildingVisitor;
import datawave.query.util.MetadataHelper;

/**
* Push certain regex terms down to evaluation, thus skipping index expansion and field index execution
* <p>
* Note: cannot configure index only fields
*/
public class EvaluationOnlyPushdownRule implements NodeTransformRule {

private final Set<FieldPattern> fieldPatterns = new HashSet<>();

@Override
public JexlNode apply(JexlNode node, ShardQueryConfiguration config, MetadataHelper helper) {
if (node instanceof ASTERNode) {
String field = JexlASTHelper.getIdentifier(node);
String pattern = String.valueOf(JexlASTHelper.getLiteralValue(node));
if (fieldPatterns.contains(new FieldPattern(field, pattern))) {
JexlNode copy = RebuildingVisitor.copy(node);
return QueryPropertyMarker.create(copy, QueryPropertyMarker.MarkerType.EVALUATION_ONLY);
}
}
return node;
}

public Set<FieldPattern> getFieldPatterns() {
return fieldPatterns;
}

public void setFieldPatterns(Set<FieldPattern> fieldPatterns) {
this.fieldPatterns.addAll(fieldPatterns);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package datawave.query.planner.rules;

import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;

public class FieldPattern {
private String field;
private String pattern;

public FieldPattern() {
// empty constructor
}

public FieldPattern(String field, String pattern) {
this.field = field;
this.pattern = pattern;
}

public String getField() {
return field;
}

public void setField(String field) {
this.field = field;
}

public String getPattern() {
return pattern;
}

public void setPattern(String pattern) {
this.pattern = pattern;
}

@Override
public boolean equals(Object o) {
if (this == o)
return true;

if (o == null || getClass() != o.getClass())
return false;

FieldPattern that = (FieldPattern) o;

return new EqualsBuilder().append(field, that.field).append(pattern, that.pattern).isEquals();
}

@Override
public int hashCode() {
return new HashCodeBuilder(17, 37).append(field).append(pattern).toHashCode();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package datawave.query.planner.rules;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.commons.jexl3.parser.ASTJexlScript;
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.commons.jexl3.parser.ParseException;
import org.junit.jupiter.api.Test;

import datawave.query.jexl.JexlASTHelper;
import datawave.query.jexl.visitors.JexlStringBuildingVisitor;

public class EvaluationOnlyPushdownRuleTest {

@Test
public void testPushdown() {
test("FIELD_A =~ 'abc.*'", "((_Eval_ = true) && (FIELD_A =~ 'abc.*'))");
test("FIELD_B =~ 'xyz.*'", "((_Eval_ = true) && (FIELD_B =~ 'xyz.*'))");
}

@Test
public void testNoPushdown() {
// field does not match, regex does, rule not applied
test("FIELD_D =~ 'abc.*'");
test("FIELD_E =~ 'xyz.*'");

// field matches, regex does not, rule not applied
test("FIELD_A =~ '123.*'");
test("FIELD_B =~ '456.*'");
}

private void test(String query) {
test(query, query);
}

private void test(String query, String expected) {
ASTJexlScript script = parse(query);
NodeTransformVisitor visitor = getVisitor();

JexlNode visited = (JexlNode) script.jjtAccept(visitor, null);

String result = JexlStringBuildingVisitor.buildQuery(visited);
assertEquals(expected, result);
}

private ASTJexlScript parse(String query) {
try {
return JexlASTHelper.parseAndFlattenJexlQuery(query);
} catch (ParseException e) {
fail("Failed to parse query: " + query, e);
throw new RuntimeException(e);
}
}

private NodeTransformVisitor getVisitor() {
Set<FieldPattern> fieldPatterns = new HashSet<>();
fieldPatterns.add(new FieldPattern("FIELD_A", "abc.*"));
fieldPatterns.add(new FieldPattern("FIELD_B", "xyz.*"));

EvaluationOnlyPushdownRule rule = new EvaluationOnlyPushdownRule();
rule.setFieldPatterns(fieldPatterns);

List<NodeTransformRule> rules = new ArrayList<>();
rules.add(rule);

return new NodeTransformVisitor(null, null, rules);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -361,10 +361,26 @@
<value>[0-9a-zA-Z]{0,2}[.?*]+</value>
</util:list>

<util:set id="evalOnlyFieldPatterns" value-type="datawave.query.planner.rules.FieldPattern">
<bean class="datawave.query.planner.rules.FieldPattern">
<property name="field" value="FIELD_A"/>
<property name="pattern" value="abc.*"/>
</bean>
<bean class="datawave.query.planner.rules.FieldPattern">
<property name="field" value="FIELD_B"/>
<property name="pattern" value="xyz.*"/>
</bean>
</util:set>

<util:list id="transformRuleList" value-type="datawave.query.planner.rules.NodeTransformRule">
<bean scope="prototype" class="datawave.query.planner.rules.RegexSimplifierTransformRule"/>
<bean scope="prototype" class="datawave.query.planner.rules.RegexDotallTransformRule" />
<bean scope="prototype" class="datawave.query.planner.rules.RegexPushdownTransformRule">
<property name="regexPatterns" ref="pushdownRegexPatterns"/>
</bean>
<bean scope="prototype" class="datawave.query.planner.rules.EvaluationOnlyPushdownRule">
<property name="fieldPatterns" ref="evalOnlyFieldPatterns"/>
</bean>
</util:list>

<bean id="DefaultQueryPlanner" scope="prototype" class="datawave.query.planner.DefaultQueryPlanner" >
Expand Down
Loading