diff --git a/common-test/pom.xml b/common-test/pom.xml index 21eb3afd15b..a02bcd01b65 100644 --- a/common-test/pom.xml +++ b/common-test/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-common-test ${project.artifactId} diff --git a/contrib/datawave-quickstart/bin/query.sh b/contrib/datawave-quickstart/bin/query.sh index 83ca1c9a877..1eb874bffa6 100644 --- a/contrib/datawave-quickstart/bin/query.sh +++ b/contrib/datawave-quickstart/bin/query.sh @@ -55,7 +55,7 @@ function datawaveQuery() { local curlcmd="/usr/bin/curl \ --silent --write-out 'HTTP_STATUS_CODE:%{http_code};TOTAL_TIME:%{time_total};CONTENT_TYPE:%{content_type}' \ - --insecure --cert "${DW_CURL_CERT}" --key "${DW_CURL_KEY_RSA}" --cacert "${DW_CURL_CA}" \ + --insecure --cert "${DW_CURL_CERT}" --key "${DW_CURL_KEY_RSA}" --cacert "${DW_CURL_CA}" --keepalive-time 180 \ --header 'Content-Type: application/x-www-form-urlencoded;charset=UTF-8' --header 'Accept: application/json' \ ${DW_REQUEST_HEADERS} ${DW_CURL_DATA} -X POST ${DW_QUERY_URI}/${DW_QUERY_LOGIC}/${DW_QUERY_CREATE_MODE}" @@ -333,7 +333,7 @@ function closeQuery() { local curlcmd="/usr/bin/curl \ --silent --write-out 'HTTP_STATUS_CODE:%{http_code};TOTAL_TIME:%{time_total};CONTENT_TYPE:%{content_type}' \ - --insecure --cert "${DW_CURL_CERT}" --key "${DW_CURL_KEY_RSA}" --cacert "${DW_CURL_CA}" \ + --insecure --cert "${DW_CURL_CERT}" --key "${DW_CURL_KEY_RSA}" --cacert "${DW_CURL_CA} --keepalive-time 180 " \ -X PUT ${DW_QUERY_URI}/${DW_QUERY_ID}/close" local response="$( eval "${curlcmd}" )" @@ -368,7 +368,7 @@ function getNextPage() { local curlcmd="/usr/bin/curl \ --silent --write-out 'HTTP_STATUS_CODE:%{http_code};TOTAL_TIME:%{time_total};CONTENT_TYPE:%{content_type}' \ - --insecure --header 'Accept: application/json' ${DW_REQUEST_HEADERS} --cert "${DW_CURL_CERT}" --key "${DW_CURL_KEY_RSA}" --cacert "${DW_CURL_CA}" \ + --insecure --header 'Accept: application/json' ${DW_REQUEST_HEADERS} --cert "${DW_CURL_CERT}" --key "${DW_CURL_KEY_RSA}" --cacert "${DW_CURL_CA} --keepalive-time 180 " \ -X GET ${DW_QUERY_URI}/${DW_QUERY_ID}/next" local response="$( eval "${curlcmd}" )" diff --git a/contrib/datawave-quickstart/bin/services/datawave/test-web/run.sh b/contrib/datawave-quickstart/bin/services/datawave/test-web/run.sh index dcf5f169186..b2e3999be1c 100755 --- a/contrib/datawave-quickstart/bin/services/datawave/test-web/run.sh +++ b/contrib/datawave-quickstart/bin/services/datawave/test-web/run.sh @@ -280,7 +280,7 @@ function runTest() { TEST_COMMAND="${CURL} ${CURL_ADDITIONAL_OPTS} --silent \ --write-out 'HTTP_STATUS_CODE:%{http_code};TOTAL_TIME:%{time_total};CONTENT_TYPE:%{content_type}' \ ---insecure --cert '${DW_CURL_CERT}' --key '${DW_CURL_KEY_RSA}' --cacert '${DW_CURL_CA}' ${TEST_URL_OPTS}" +--insecure --cert '${DW_CURL_CERT}' --keepalive-time 180 --key '${DW_CURL_KEY_RSA}' --cacert '${DW_CURL_CA}' ${TEST_URL_OPTS}" if [ "${LIST_TESTS}" == true ] ; then printCurrentTestInfo @@ -533,4 +533,4 @@ if [ "${LIST_TESTS}" != true ] ; then printTestSummary cleanup exitWithTestStatus -fi \ No newline at end of file +fi diff --git a/docs/pom.xml b/docs/pom.xml index fa33bf0b6bd..54da592e601 100644 --- a/docs/pom.xml +++ b/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-docs diff --git a/pom.xml b/pom.xml index 47609e5b8d8..77f5a7325d7 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 gov.nsa.datawave datawave-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT pom DataWave DataWave is a Java-based ingest and query framework that leverages Apache Accumulo to provide fast, secure access to your data. diff --git a/properties/default.properties b/properties/default.properties index 61bf5ceb4cb..8765bc2e3ee 100644 --- a/properties/default.properties +++ b/properties/default.properties @@ -137,8 +137,11 @@ jboss.runas.user=jboss # Defines the size parameters of the worker's task thread pool # Suggest setting values here based on accumulo connection pool sizes, available cores, and expected access patterns +# From WildFly manual: Workers for I/O channel notification. The maximum number of threads for the worker task thread pool. +# default cpuCount * 16. Once this is filled, tasks that cannot be queued will be rejected. wildfly.io.worker.default.task-max-threads=16 # How many I/O (selector) threads should be maintained. Generally this number should be a small constant multiple of the number of available cores. +# From WildFly manual: Specify the number of I/O threads to create for the worker. Default cpuCount * 2 wildfly.io.worker.default.io-threads=2 ############################ diff --git a/warehouse/accumulo-extensions/pom.xml b/warehouse/accumulo-extensions/pom.xml index b0d95fa8a3b..1ef25f01fb4 100644 --- a/warehouse/accumulo-extensions/pom.xml +++ b/warehouse/accumulo-extensions/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-accumulo-extensions ${project.artifactId} diff --git a/warehouse/assemble/datawave/pom.xml b/warehouse/assemble/datawave/pom.xml index a3842f8776a..cc6cf63a47f 100644 --- a/warehouse/assemble/datawave/pom.xml +++ b/warehouse/assemble/datawave/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT assemble-datawave pom diff --git a/warehouse/assemble/pom.xml b/warehouse/assemble/pom.xml index 56b154db743..55dba081ae8 100644 --- a/warehouse/assemble/pom.xml +++ b/warehouse/assemble/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT assemble-parent pom diff --git a/warehouse/assemble/webservice/pom.xml b/warehouse/assemble/webservice/pom.xml index 961a0c76f08..0523b656f11 100644 --- a/warehouse/assemble/webservice/pom.xml +++ b/warehouse/assemble/webservice/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT assemble-webservice ${project.artifactId} diff --git a/warehouse/common/pom.xml b/warehouse/common/pom.xml index 20ce0f2def3..10ee3d50084 100644 --- a/warehouse/common/pom.xml +++ b/warehouse/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-common ${project.artifactId} diff --git a/warehouse/core/pom.xml b/warehouse/core/pom.xml index 7b94a9365ff..025579e84fd 100644 --- a/warehouse/core/pom.xml +++ b/warehouse/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-core jar diff --git a/warehouse/core/src/test/java/datawave/ingest/protobuf/TermWeightPositionTest.java b/warehouse/core/src/test/java/datawave/ingest/protobuf/TermWeightPositionTest.java index b12b5c0dc62..c7be52b5f5e 100644 --- a/warehouse/core/src/test/java/datawave/ingest/protobuf/TermWeightPositionTest.java +++ b/warehouse/core/src/test/java/datawave/ingest/protobuf/TermWeightPositionTest.java @@ -101,6 +101,24 @@ public void testComparator() { Collections.sort(result); Assert.assertEquals(listExpected, result); } + + @Test + public void testBuilderReset() { + TermWeightPosition.Builder builder = new TermWeightPosition.Builder(); + TermWeightPosition expected = builder.setOffset(1).setPrevSkips(0).setScore(0).setZeroOffsetMatch(true).build(); + TermWeightPosition position = builder.setOffset(1).setPrevSkips(0).setScore(0).setZeroOffsetMatch(true).build(); + Assert.assertEquals(expected, position); + + expected = builder.setOffset(1).setPrevSkips(0).setScore(0).setZeroOffsetMatch(true).build(); + builder.reset(); + position = builder.setOffset(1).setPrevSkips(0).setScore(0).setZeroOffsetMatch(true).build(); + Assert.assertEquals(expected, position); + + expected = builder.setOffset(1).setPrevSkips(0).setScore(0).setZeroOffsetMatch(false).build(); + builder.reset(); + position = builder.setOffset(1).setPrevSkips(0).setScore(0).setZeroOffsetMatch(false).build(); + Assert.assertEquals(expected, position); + } @Test public void testPositionScoreToTermWeightScore() { diff --git a/warehouse/data-dictionary-core/pom.xml b/warehouse/data-dictionary-core/pom.xml index eaaa7a22113..07ec8c9db82 100644 --- a/warehouse/data-dictionary-core/pom.xml +++ b/warehouse/data-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-data-dictionary-core jar diff --git a/warehouse/edge-dictionary-core/pom.xml b/warehouse/edge-dictionary-core/pom.xml index 6ed51d49fc3..fb513db5a5a 100644 --- a/warehouse/edge-dictionary-core/pom.xml +++ b/warehouse/edge-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-edge-dictionary-core jar diff --git a/warehouse/edge-model-configuration-core/pom.xml b/warehouse/edge-model-configuration-core/pom.xml index 87e21afebc7..9fdfff2cd6f 100644 --- a/warehouse/edge-model-configuration-core/pom.xml +++ b/warehouse/edge-model-configuration-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-edge-model-configuration-core jar diff --git a/warehouse/index-stats/pom.xml b/warehouse/index-stats/pom.xml index 2d5462a9d2c..edb3acded7b 100644 --- a/warehouse/index-stats/pom.xml +++ b/warehouse/index-stats/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-index-stats jar diff --git a/warehouse/ingest-configuration/pom.xml b/warehouse/ingest-configuration/pom.xml index 6d3b7496572..4a5e176927a 100644 --- a/warehouse/ingest-configuration/pom.xml +++ b/warehouse/ingest-configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-ingest-configuration diff --git a/warehouse/ingest-core/pom.xml b/warehouse/ingest-core/pom.xml index 77cbc366673..e1feb1f82bf 100644 --- a/warehouse/ingest-core/pom.xml +++ b/warehouse/ingest-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-ingest-core jar diff --git a/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/XMLFieldConfigHelperTest.java b/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/XMLFieldConfigHelperTest.java index 32ae7825e59..18d940a3beb 100644 --- a/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/XMLFieldConfigHelperTest.java +++ b/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/XMLFieldConfigHelperTest.java @@ -82,7 +82,7 @@ private HttpServer createFileServer(String path, int port) throws Exception { } private String readFile(String path) { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); InputStream istream = getClass().getClassLoader().getResourceAsStream(path); try (Scanner scanner = new Scanner(istream)) { diff --git a/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/job/SafeFileOutputCommitterTest.java b/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/job/SafeFileOutputCommitterTest.java index 427021fa074..568a87e5253 100644 --- a/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/job/SafeFileOutputCommitterTest.java +++ b/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/job/SafeFileOutputCommitterTest.java @@ -437,7 +437,7 @@ private void validateContent(Path dir) throws IOException { private void validateContent(File dir, String fileName) throws IOException { File expectedFile = new File(dir, fileName); assertTrue("Could not find " + expectedFile, expectedFile.exists()); - StringBuffer expectedOutput = new StringBuffer(); + StringBuilder expectedOutput = new StringBuilder(); expectedOutput.append(key1).append('\t').append(val1).append("\n"); expectedOutput.append(val1).append("\n"); expectedOutput.append(val2).append("\n"); diff --git a/warehouse/ingest-csv/pom.xml b/warehouse/ingest-csv/pom.xml index 8d32cd0e832..7afbf225d97 100644 --- a/warehouse/ingest-csv/pom.xml +++ b/warehouse/ingest-csv/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-ingest-csv jar diff --git a/warehouse/ingest-json/pom.xml b/warehouse/ingest-json/pom.xml index adef6b6c024..8902dc2337e 100644 --- a/warehouse/ingest-json/pom.xml +++ b/warehouse/ingest-json/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-ingest-json jar diff --git a/warehouse/ingest-nyctlc/pom.xml b/warehouse/ingest-nyctlc/pom.xml index f00bdc10298..536d28c7978 100644 --- a/warehouse/ingest-nyctlc/pom.xml +++ b/warehouse/ingest-nyctlc/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-ingest-nyctlc jar diff --git a/warehouse/ingest-nyctlc/src/main/java/datawave/ingest/nyctlc/NYCTLCReader.java b/warehouse/ingest-nyctlc/src/main/java/datawave/ingest/nyctlc/NYCTLCReader.java index 802d82d4f5a..6f162ace3ee 100644 --- a/warehouse/ingest-nyctlc/src/main/java/datawave/ingest/nyctlc/NYCTLCReader.java +++ b/warehouse/ingest-nyctlc/src/main/java/datawave/ingest/nyctlc/NYCTLCReader.java @@ -53,27 +53,30 @@ public boolean nextKeyValue() throws IOException { // followed by a blank line, followed by our entries // This is here to account for that boolean hasNext, completeRecord; + StringBuilder sb = new StringBuilder(); do { hasNext = super.nextKeyValue(); if (this.value != null && !this.value.toString().isEmpty() && !this.value.toString().equals(rawHeader)) { // update value to be list of field/value pairings - StringBuffer fvBuf = new StringBuffer(); String[] values = this.value.toString().split(((NYCTLCHelper) helper).getSeparator()); - if (values.length > ((NYCTLCHelper) helper).getParsedHeader().length) + if (values.length > ((NYCTLCHelper) helper).getParsedHeader().length) { log.debug("More values present than expected."); - + } int numFields = Math.min(values.length, ((NYCTLCHelper) helper).getParsedHeader().length); completeRecord = true; for (int fieldIdx = 0; fieldIdx < numFields; fieldIdx++) { - fvBuf.append(((NYCTLCHelper) helper).getParsedHeader()[fieldIdx] + "=" + values[fieldIdx]); - if ((fieldIdx + 1) < numFields) - fvBuf.append(((NYCTLCHelper) helper).getSeparator()); + sb.append(((NYCTLCHelper) helper).getParsedHeader()[fieldIdx] + "=" + values[fieldIdx]); + if ((fieldIdx + 1) < numFields) { + sb.append(((NYCTLCHelper) helper).getSeparator()); + } } - this.value = new Text(fvBuf.toString()); - } else + this.value = new Text(sb.toString()); + sb.setLength(0); + } else { completeRecord = false; + } } while (hasNext && !completeRecord); return hasNext; diff --git a/warehouse/ingest-scripts/pom.xml b/warehouse/ingest-scripts/pom.xml index 2213e1a3ea9..096438278fa 100644 --- a/warehouse/ingest-scripts/pom.xml +++ b/warehouse/ingest-scripts/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-ingest-scripts ${project.artifactId} diff --git a/warehouse/ingest-wikipedia/pom.xml b/warehouse/ingest-wikipedia/pom.xml index d0c2c6ea77c..e8d0220ca46 100644 --- a/warehouse/ingest-wikipedia/pom.xml +++ b/warehouse/ingest-wikipedia/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-ingest-wikipedia jar diff --git a/warehouse/metrics-core/pom.xml b/warehouse/metrics-core/pom.xml index 958e508dfa5..d22826faf1b 100644 --- a/warehouse/metrics-core/pom.xml +++ b/warehouse/metrics-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-metrics-core jar diff --git a/warehouse/ops-tools/config-compare/pom.xml b/warehouse/ops-tools/config-compare/pom.xml index f575fce751a..8a3a7bd42a3 100644 --- a/warehouse/ops-tools/config-compare/pom.xml +++ b/warehouse/ops-tools/config-compare/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-ops-tools-config-compare diff --git a/warehouse/ops-tools/index-validation/pom.xml b/warehouse/ops-tools/index-validation/pom.xml index da2b206525f..09b4017922e 100644 --- a/warehouse/ops-tools/index-validation/pom.xml +++ b/warehouse/ops-tools/index-validation/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-ops-tools-index-validation jar diff --git a/warehouse/ops-tools/pom.xml b/warehouse/ops-tools/pom.xml index d2ca55c172d..a562298c569 100644 --- a/warehouse/ops-tools/pom.xml +++ b/warehouse/ops-tools/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-ops-tools-parent pom diff --git a/warehouse/pom.xml b/warehouse/pom.xml index 81a32f03261..34ffa4678f6 100644 --- a/warehouse/pom.xml +++ b/warehouse/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-warehouse-parent pom diff --git a/warehouse/query-core/pom.xml b/warehouse/query-core/pom.xml index 373854ddf54..8b85d24f924 100644 --- a/warehouse/query-core/pom.xml +++ b/warehouse/query-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 5.6.0-SNAPSHOT + 5.7.0-SNAPSHOT datawave-query-core jar diff --git a/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java b/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java index ececf339d05..b519d521da4 100644 --- a/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java +++ b/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java @@ -137,6 +137,10 @@ public class QueryParameters { * Used to limit the number of values returned for specific fields */ public static final String LIMIT_FIELDS = "limit.fields"; + /** + * Used to tie field groups together such that if a field in one group is not being limited the fields in matching groups will not be limited. + */ + public static final String MATCHING_FIELD_SETS = "matching.field.sets"; public static final String GROUP_FIELDS = "group.fields"; public static final String GROUP_FIELDS_BATCH_SIZE = "group.fields.batch.size"; diff --git a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java index 0228705545d..5d52053362f 100644 --- a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java +++ b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java @@ -248,6 +248,7 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement private boolean termFrequenciesRequired = false; // Limit count of returned values for arbitrary fields. private Set limitFields = Collections.emptySet(); + private Set matchingFieldSets = Collections.emptySet(); /** * should limit fields be applied early */ @@ -524,6 +525,7 @@ public ShardQueryConfiguration(ShardQueryConfiguration other) { this.setQueryTermFrequencyFields(null == other.getQueryTermFrequencyFields() ? null : Sets.newHashSet(other.getQueryTermFrequencyFields())); this.setTermFrequenciesRequired(other.isTermFrequenciesRequired()); this.setLimitFields(null == other.getLimitFields() ? null : Sets.newHashSet(other.getLimitFields())); + this.setMatchingFieldSets(null == other.getMatchingFieldSets() ? null : Sets.newHashSet(other.getMatchingFieldSets())); this.setLimitFieldsPreQueryEvaluation(other.isLimitFieldsPreQueryEvaluation()); this.setLimitFieldsField(other.getLimitFieldsField()); this.setHitList(other.isHitList()); @@ -1565,6 +1567,18 @@ public String getLimitFieldsAsString() { return StringUtils.join(this.getLimitFields(), Constants.PARAM_VALUE_SEP); } + public Set getMatchingFieldSets() { + return matchingFieldSets; + } + + public void setMatchingFieldSets(Set matchingFieldSets) { + this.matchingFieldSets = matchingFieldSets; + } + + public String getMatchingFieldSetsAsString() { + return StringUtils.join(this.getMatchingFieldSets(), Constants.PARAM_VALUE_SEP); + } + public boolean isLimitFieldsPreQueryEvaluation() { return limitFieldsPreQueryEvaluation; } diff --git a/warehouse/query-core/src/main/java/datawave/query/function/LimitFields.java b/warehouse/query-core/src/main/java/datawave/query/function/LimitFields.java index 228ac486e84..b96e92bb495 100644 --- a/warehouse/query-core/src/main/java/datawave/query/function/LimitFields.java +++ b/warehouse/query-core/src/main/java/datawave/query/function/LimitFields.java @@ -18,16 +18,53 @@ import java.util.Map.Entry; import java.util.Set; +/** + *

+ * LimitFields will reduce the attributes in a document given the limits specified for fields. Attributes that are in the set of hits for a document will never + * be dropped. Also matching field sets can be specified which will avoid dropping fields in the same group when the values match between the fields in a + * matching field set. For example given the following field/values: + *

+ *
    + *
  • NAME.PERSON.1 = sam
  • + *
  • AGE.PERSON.1 = 10
  • + *
  • NAME.PERSON.2 = frank
  • + *
  • AGE.PERSON.2 = 11
  • + *
  • ACTOR.ACTOR.1 = sam
  • + *
  • FILM.ACTOR.1 = Johnny Goes Home
  • + *
  • ACTOR.ACTOR.2 = frank
  • + *
  • FILM.ACTOR.2 = Johnny Head
  • + *
+ *
    + *
  • and limit fields NAME=-1, AGE=-1, FILM=-1
  • + *
  • and a matching field set of NAME=ACTOR
  • + *
  • and a hit term of FILM.ACTOR.1=Johnny Goes Home
  • + *
+ *

+ * In this case the following fields should be returned: + *

+ *
    + *
  • ACTOR.ACTOR.1 = sam
  • + *
  • FILM.ACTOR.1 = Johnny Goes Home
  • + *
  • NAME.PERSON.1 = sam
  • + *
  • AGE.PERSON.1 = 10
  • + *
+ */ public class LimitFields implements Function,Entry> { private static final Logger log = Logger.getLogger(LimitFields.class); - public static final String ORIGINAL_COUNT_SUFFIX = "ORIGINAL_COUNT"; + public static final String ORIGINAL_COUNT_SUFFIX = "_ORIGINAL_COUNT"; - private Map limitFieldsMap; + // A map of fields and the number of values to limit the fields by + private final Map limitFieldsMap; - public LimitFields(Map limitFieldsMap) { + // A collection of field sets where if the values match then those values + // should not be dropped + private final Set> matchingFieldSets; + + public LimitFields(Map limitFieldsMap, Set> matchingFieldSets) { this.limitFieldsMap = limitFieldsMap; + this.matchingFieldSets = matchingFieldSets; if (log.isTraceEnabled()) log.trace("limitFieldsMap set to:" + limitFieldsMap); } @@ -41,6 +78,8 @@ public Entry apply(Entry entry) { CountMap countMissesRemainingForFieldMap = new CountMap(); CountMap countKeepersForFieldMap = new CountMap(); + MatchingFieldGroups matchingFieldGroups = new MatchingFieldGroups(matchingFieldSets); + int attributesToDrop = 0; // first pass is to set all of the hits to be kept, the misses to drop, and count em all @@ -69,20 +108,24 @@ public Entry apply(Entry entry) { for (Attribute> value : attrSet) { if (isHit(keyWithGrouping, value, hitTermMap)) { keepers++; + matchingFieldGroups.addHit(keyNoGrouping, value); } else { value.setToKeep(false); missesRemaining++; attributesToDrop++; + matchingFieldGroups.addPotential(keyNoGrouping, keyWithGrouping, value); } total++; } } else { if (isHit(keyWithGrouping, attr, hitTermMap)) { keepers++; + matchingFieldGroups.addHit(keyNoGrouping, attr); } else { attr.setToKeep(false); missesRemaining++; attributesToDrop++; + matchingFieldGroups.addPotential(keyNoGrouping, keyWithGrouping, attr); } total++; } @@ -92,7 +135,58 @@ public Entry apply(Entry entry) { } } - // second pass is to set any misses back to be kept if the limit allows + // the second pass is to process the limited fields that have matching groups + matchingFieldGroups.processMatches(); + if (matchingFieldGroups.hasMatches()) { + for (Map.Entry>> de : document.entrySet()) { + String keyWithGrouping = de.getKey(); + String keyNoGrouping = removeGrouping(keyWithGrouping); + + // if this was a limited field + if (this.limitFieldsMap.containsKey(keyNoGrouping)) { + + int keepers = countKeepersForFieldMap.get(keyNoGrouping); + int missesRemaining = countMissesRemainingForFieldMap.get(keyNoGrouping); + + // if we have matching group + if (matchingFieldGroups.isMatchingGroup(keyWithGrouping)) { + boolean foundMiss = false; + Attribute attr = de.getValue(); + if (attr instanceof Attributes) { + Attributes attrs = (Attributes) attr; + Set>> attrSet = attrs.getAttributes(); + + for (Attribute> value : attrSet) { + // if this was an attribute previously set to not keep, then it is one of the misses (not a hit) + if (!value.isToKeep()) { + value.setToKeep(true); + keepers++; + missesRemaining--; + attributesToDrop--; + foundMiss = true; + } + } + } else { + // if this was an attribute previously set to not keep, then it is one of the misses (not a hit) + if (!attr.isToKeep()) { + attr.setToKeep(true); + keepers++; + missesRemaining--; + attributesToDrop--; + foundMiss = true; + } + } + + if (foundMiss) { + countKeepersForFieldMap.put(keyNoGrouping, keepers); + countMissesRemainingForFieldMap.put(keyNoGrouping, missesRemaining); + } + } + } + } + } + + // third pass is to set any misses back to be kept if the limit allows for (Map.Entry>> de : document.entrySet()) { String keyWithGrouping = de.getKey(); String keyNoGrouping = removeGrouping(keyWithGrouping); @@ -202,7 +296,9 @@ private boolean isHit(String keyWithGrouping, Attribute attr, Multimap thing = (Type) clazz.newInstance(); thing.setDelegateFromString(String.valueOf(hitValue)); hitValue = thing; - } // otherwise, s is not a Type, just compare to value in hitTermMap using 'equals' + } else { // otherwise, s is not a Type, just compare as string values + s = String.valueOf(s); + } if (s.equals(hitValue)) { return true; } diff --git a/warehouse/query-core/src/main/java/datawave/query/function/MatchingFieldGroups.java b/warehouse/query-core/src/main/java/datawave/query/function/MatchingFieldGroups.java new file mode 100644 index 00000000000..2099789e8aa --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/function/MatchingFieldGroups.java @@ -0,0 +1,90 @@ +package datawave.query.function; + +import com.google.common.base.Joiner; +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; +import datawave.query.attributes.Attribute; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +public class MatchingFieldGroups { + + private final Multimap matchingFieldGroups; + private final Set matchingGroups; + private final Multimap potentialMatches; + + public MatchingFieldGroups(Set> matchingFieldSets) { + matchingFieldGroups = HashMultimap.create(); + if (matchingFieldSets != null) { + for (Set matchingFieldSet : matchingFieldSets) { + MatchingFieldHits matchingFieldGroup = new MatchingFieldHits(); + for (String field : matchingFieldSet) { + matchingFieldGroups.put(field, matchingFieldGroup); + } + } + } + matchingGroups = new HashSet<>(); + potentialMatches = ArrayListMultimap.create(); + } + + public void addHit(String keyNoGrouping, Attribute attr) { + if (matchingFieldGroups.containsKey(keyNoGrouping)) { + for (MatchingFieldHits matchingFieldGroup : matchingFieldGroups.get(keyNoGrouping)) { + matchingFieldGroup.addHitTermValue(getStringValue(attr)); + } + } + } + + public void addPotential(String keyNoGrouping, String keyWithGrouping, Attribute attr) { + if (matchingFieldGroups.containsKey(keyNoGrouping)) { + String group = getGroup(keyWithGrouping); + if (group != null) { + potentialMatches.put(keyNoGrouping, new String[] {group, getStringValue(attr)}); + } + } + } + + public void processMatches() { + for (Map.Entry potentialEntry : potentialMatches.entries()) { + String keyNoGrouping = potentialEntry.getKey(); + String group = potentialEntry.getValue()[0]; + String value = potentialEntry.getValue()[1]; + if (!matchingGroups.contains(group)) { + for (MatchingFieldHits matchingFieldGroup : matchingFieldGroups.get(keyNoGrouping)) { + if (matchingFieldGroup.containsHitTermValue(value)) { + matchingGroups.add(group); + break; + } + } + } + } + } + + public boolean hasMatches() { + return !matchingGroups.isEmpty(); + } + + public boolean isMatchingGroup(String keyWithGrouping) { + String group = getGroup(keyWithGrouping); + if (group != null) { + return matchingGroups.contains(group); + } + return false; + } + + static String getStringValue(Attribute attr) { + return String.valueOf(attr.getData()); + } + + static String getGroup(String keyWithGrouping) { + String[] keyTokens = LimitFields.getCommonalityAndGroupingContext(keyWithGrouping); + if (keyTokens != null) { + return Joiner.on('.').join(keyTokens); + } + return null; + } + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/function/MatchingFieldHits.java b/warehouse/query-core/src/main/java/datawave/query/function/MatchingFieldHits.java new file mode 100644 index 00000000000..5a9a10a23e4 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/function/MatchingFieldHits.java @@ -0,0 +1,20 @@ +package datawave.query.function; + +import java.util.HashSet; +import java.util.Set; + +public class MatchingFieldHits { + private final Set hitTermValues; + + public MatchingFieldHits() { + this.hitTermValues = new HashSet<>(); + } + + public void addHitTermValue(String value) { + hitTermValues.add(value); + } + + public boolean containsHitTermValue(String value) { + return hitTermValues.contains(value); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java index 673ba34eed4..3b98b76f885 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java @@ -3,7 +3,6 @@ import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; -import com.google.common.base.Throwables; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -981,10 +980,9 @@ public Entry apply(@Nullable Entry input) { // note that we have already reduced the document to those attributes to keep. This will reduce the attributes further // base on those fields we are limiting. if (gatherTimingDetails()) { - documents = Iterators.transform(documents, - new EvaluationTrackingFunction<>(QuerySpan.Stage.LimitFields, trackingSpan, new LimitFields(this.getLimitFieldsMap()))); + documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.LimitFields, trackingSpan, getLimitFields())); } else { - documents = Iterators.transform(documents, new LimitFields(this.getLimitFieldsMap())); + documents = Iterators.transform(documents, getLimitFields()); } } @@ -1155,6 +1153,10 @@ protected JexlEvaluation getJexlEvaluation(String query, NestedQueryIterator root, String prefix) debugBooleanLogicIterator(child, prefix + " "); } } + protected DocumentProjection getProjection() { DocumentProjection projection = new DocumentProjection(this.isIncludeGroupingContext(), this.isReducedResponse(), isTrackSizes()); if (this.useWhiteListedFields) { + // make sure we include any fields being matched in the limit fields mechanism + if (!this.matchingFieldSets.isEmpty()) { + this.whiteListedFields.addAll(getMatchingFieldList()); + } projection.setIncludes(this.whiteListedFields); return projection; } else if (this.useBlackListedFields) { + // make sure we are not excluding any fields being matched in the limit fields mechanism + if (!this.matchingFieldSets.isEmpty()) { + this.blackListedFields.removeAll(getMatchingFieldList()); + } projection.setExcludes(this.blackListedFields); return projection; } else { @@ -1318,6 +1329,10 @@ protected DocumentProjection getCompositeProjection() { } } } + // make sure we include any fields being matched in the limit fields mechanism + if (!this.matchingFieldSets.isEmpty()) { + composites.removeAll(getMatchingFieldList()); + } projection.setExcludes(composites); return projection; } diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java index ef7cea911bf..a2a684df706 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java @@ -135,6 +135,7 @@ public class QueryOptions implements OptionDescriber { public static final String TERM_FREQUENCIES_REQUIRED = "term.frequencies.are.required"; public static final String CONTENT_EXPANSION_FIELDS = "content.expansion.fields"; public static final String LIMIT_FIELDS = "limit.fields"; + public static final String MATCHING_FIELD_SETS = "matching.field.sets"; public static final String LIMIT_FIELDS_PRE_QUERY_EVALUATION = "limit.fields.pre.query.evaluation"; public static final String LIMIT_FIELDS_FIELD = "limit.fields.field"; public static final String GROUP_FIELDS = "group.fields"; @@ -291,6 +292,7 @@ public class QueryOptions implements OptionDescriber { protected boolean useBlackListedFields = false; protected Set blackListedFields = new HashSet<>(); protected Map limitFieldsMap = new HashMap<>(); + protected Set> matchingFieldSets = new HashSet<>(); protected boolean limitFieldsPreQueryEvaluation = false; protected String limitFieldsField = null; @@ -500,6 +502,7 @@ public void deepCopy(QueryOptions other) { this.compressResults = other.compressResults; this.limitFieldsMap = other.limitFieldsMap; + this.matchingFieldSets = other.matchingFieldSets; this.limitFieldsPreQueryEvaluation = other.limitFieldsPreQueryEvaluation; this.limitFieldsField = other.limitFieldsField; this.groupFields = other.groupFields; @@ -997,6 +1000,18 @@ public void setLimitFieldsMap(Map limitFieldsMap) { this.limitFieldsMap = limitFieldsMap; } + public Set> getMatchingFieldSets() { + return matchingFieldSets; + } + + public List getMatchingFieldList() { + return this.matchingFieldSets.stream().flatMap(s -> s.stream()).collect(Collectors.toList()); + } + + public void setMatchingFieldSets(Set> matchingFieldSets) { + this.matchingFieldSets = matchingFieldSets; + } + public boolean isLimitFieldsPreQueryEvaluation() { return limitFieldsPreQueryEvaluation; } @@ -1133,6 +1148,7 @@ public IteratorOptions describeOptions() { options.put(DOCUMENT_PERMUTATION_CLASSES, "Classes implementing DocumentPermutation which can transform the document prior to evaluation (e.g. expand/mutate fields)."); options.put(LIMIT_FIELDS, "limit fields"); + options.put(MATCHING_FIELD_SETS, "matching field sets (used along with limit fields)"); options.put(GROUP_FIELDS, "group fields"); options.put(GROUP_FIELDS_BATCH_SIZE, "group fields.batch.size"); options.put(UNIQUE_FIELDS, "unique fields"); @@ -1469,6 +1485,16 @@ public boolean validateOptions(Map options) { } } + if (options.containsKey(MATCHING_FIELD_SETS)) { + String matchingFieldSets = options.get(MATCHING_FIELD_SETS); + for (String fieldSet : Splitter.on(',').omitEmptyStrings().trimResults().split(matchingFieldSets)) { + String[] fields = Iterables.toArray(Splitter.on('=').omitEmptyStrings().trimResults().split(fieldSet), String.class); + if (fields.length != 0) { + this.getMatchingFieldSets().add(new HashSet(Arrays.asList(fields))); + } + } + } + if (options.containsKey(LIMIT_FIELDS_PRE_QUERY_EVALUATION)) { this.setLimitFieldsPreQueryEvaluation(Boolean.parseBoolean(options.get(LIMIT_FIELDS_PRE_QUERY_EVALUATION))); } diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/JexlASTHelper.java b/warehouse/query-core/src/main/java/datawave/query/jexl/JexlASTHelper.java index 64eb2be9d84..f4e8c76ccc6 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/JexlASTHelper.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/JexlASTHelper.java @@ -772,6 +772,12 @@ private static int numNegations(JexlNode node) { } } + /** + * Iterate through provided node and its children, then return a list of nodes that are an instance of ASTEQNode. + * + * @param node + * @return List of ASTEQNode nodes. + */ public static List getEQNodes(JexlNode node) { List eqNodes = Lists.newArrayList(); @@ -780,6 +786,12 @@ public static List getEQNodes(JexlNode node) { return eqNodes; } + /** + * Check if the provided node is an instance of ASTEQNode. If yes, then add the node to the provided list. + * + * @param node + * @param eqNodes + */ private static void getEQNodes(JexlNode node, List eqNodes) { if (node instanceof ASTEQNode) { eqNodes.add((ASTEQNode) node); @@ -790,6 +802,12 @@ private static void getEQNodes(JexlNode node, List eqNodes) { } } + /** + * Iterate through provided node and its children, then return a list of nodes that are an instance of ASTERNode. + * + * @param node + * @return List of ASTERNode nodes. + */ public static List getERNodes(JexlNode node) { List erNodes = Lists.newArrayList(); @@ -798,6 +816,12 @@ public static List getERNodes(JexlNode node) { return erNodes; } + /** + * Check if the provided node is an instance of ASTERNode. If yes, then add the node to the provided list. + * + * @param node + * @param erNodes + */ private static void getERNodes(JexlNode node, List erNodes) { if (node instanceof ASTERNode) { erNodes.add((ASTERNode) node); @@ -808,6 +832,43 @@ private static void getERNodes(JexlNode node, List erNodes) { } } + /** + * Iterate through provided node and its children, then return a list of nodes matching the provided class. The provided class must extend JexlNode. + * + * @see org.apache.commons.jexl2.parser.JexlNode + * + * @param node + * @param typeKey + * @return List of nodes matching provided class. + */ + public static List getNodesOfType(JexlNode node, Class typeKey) { + List nodes = Lists.newArrayList(); + + getNodesOfType(node, nodes, typeKey); + + return nodes; + } + + /** + * Check if the provided node is an instance of the provided class. If yes, then add the node to the provided list. The provided class must extend JexlNode. + * + * @see org.apache.commons.jexl2.parser.JexlNode + * + * @param node + * @param nodes + * @param typeKey + */ + @SuppressWarnings("unchecked") + private static void getNodesOfType(JexlNode node, List nodes, Class typeKey) { + if (typeKey.isInstance(node)) { + nodes.add((T) node); + } else { + for (int i = 0; i < node.jjtGetNumChildren(); i++) { + getNodesOfType(node.jjtGetChild(i), nodes, typeKey); + } + } + } + public static List getLiteralValues(JexlNode node) { return getLiterals(node).stream().map(JexlASTHelper::getLiteralValue).collect(Collectors.toList()); } @@ -1207,7 +1268,15 @@ public static LiteralRange getBigDecimalBoundedRange(JexlNode[] chil return range; } - public static boolean isWithinOr(JexlNode node) { + /** + * Iterate through provided node's ancestors and return true if any are an instance of ASTOrNode. + * + * @see org.apache.commons.jexl2.parser.ASTOrNode + * + * @param node + * @return True if any ancestor is an instance of ASTOrNode. If not, then False. + */ + public static boolean isDescendantOfOr(JexlNode node) { if (null != node && null != node.jjtGetParent()) { JexlNode parent = node.jjtGetParent(); @@ -1215,35 +1284,100 @@ public static boolean isWithinOr(JexlNode node) { return true; } - return isWithinOr(parent); + return isDescendantOfOr(parent); } return false; } - public static boolean isWithinNot(JexlNode node) { - while (null != node && null != node.jjtGetParent()) { + /** + * Iterate through provided node's ancestors and return true if any are an instance of ASTNotNode. + * + * @see org.apache.commons.jexl2.parser.ASTNotNode + * + * @param node + * @return True if any ancestor is an instance of ASTNotNode. If not, then False. + */ + public static boolean isDescendantOfNot(JexlNode node) { + if (null != node && null != node.jjtGetParent()) { JexlNode parent = node.jjtGetParent(); if (parent instanceof ASTNotNode) { return true; } - return isWithinNot(parent); + return isDescendantOfNot(parent); } return false; } - public static boolean isWithinAnd(JexlNode node) { - while (null != node && null != node.jjtGetParent()) { + /** + * Iterate through provided node's ancestors and return true if any are an instance of ASTAndNode. + * + * @see org.apache.commons.jexl2.parser.ASTAndNode + * + * @param node + * @return True if any ancestor is an instance of ASTAndNode. If not, then False. + */ + public static boolean isDescendantOfAnd(JexlNode node) { + if (null != node && null != node.jjtGetParent()) { JexlNode parent = node.jjtGetParent(); if (parent instanceof ASTAndNode) { return true; } - return isWithinAnd(parent); + return isDescendantOfAnd(parent); + } + + return false; + } + + /** + * Iterate through provided node's ancestors and return true if any are an instance of the provided class. The provided class must extend JexlNode. + * + * @see org.apache.commons.jexl2.parser.JexlNode + * + * @param node + * @param typeKey + * @return True if any ancestor is an instance of the provided class. If not, then False. + */ + public static boolean isDescendantOfNodeType(JexlNode node, Class typeKey) { + if (null != node && null != node.jjtGetParent()) { + JexlNode parent = node.jjtGetParent(); + + if (typeKey.isInstance(parent)) { + return true; + } + + return isDescendantOfNodeType(parent, typeKey); + } + + return false; + } + + /** + * Iterate through all descendants of the provided node and return true if any are an instance of the provided class. The provided class must extend + * JexlNode. + * + * @see org.apache.commons.jexl2.parser.JexlNode + * + * @param node + * @param typeKey + * @return True if any descendant is an instance of the provided class. If not, then False. + */ + public static boolean descendantsContainNodeType(JexlNode node, Class typeKey) { + if (node != null) { + int numChildren = node.jjtGetNumChildren(); + + for (int i = 0; i < numChildren; i++) { + JexlNode child = node.jjtGetChild(i); + + if (typeKey.isInstance(child) || descendantsContainNodeType(child, typeKey)) { + return true; + } + } } return false; diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java index 361df1b39a0..64cec329b20 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java @@ -532,20 +532,21 @@ private void configureIterator(ShardQueryConfiguration config, IteratorSetting c // fields setCommonIteratorOptions(config, cfg); - addOption(cfg, QueryOptions.LIMIT_FIELDS, config.getLimitFieldsAsString(), true); - addOption(cfg, QueryOptions.GROUP_FIELDS, config.getGroupFieldsAsString(), true); - addOption(cfg, QueryOptions.GROUP_FIELDS_BATCH_SIZE, config.getGroupFieldsBatchSizeAsString(), true); - addOption(cfg, QueryOptions.UNIQUE_FIELDS, config.getUniqueFields().toString(), true); - addOption(cfg, QueryOptions.EXCERPT_FIELDS, config.getExcerptFields().toString(), true); + addOption(cfg, QueryOptions.LIMIT_FIELDS, config.getLimitFieldsAsString(), false); + addOption(cfg, QueryOptions.MATCHING_FIELD_SETS, config.getMatchingFieldSetsAsString(), false); + addOption(cfg, QueryOptions.GROUP_FIELDS, config.getGroupFieldsAsString(), false); + addOption(cfg, QueryOptions.GROUP_FIELDS_BATCH_SIZE, config.getGroupFieldsBatchSizeAsString(), false); + addOption(cfg, QueryOptions.UNIQUE_FIELDS, config.getUniqueFields().toString(), false); + addOption(cfg, QueryOptions.EXCERPT_FIELDS, config.getExcerptFields().toString(), false); addOption(cfg, QueryOptions.EXCERPT_ITERATOR, config.getExcerptIterator().getName(), false); addOption(cfg, QueryOptions.HIT_LIST, Boolean.toString(config.isHitList()), false); addOption(cfg, QueryOptions.TERM_FREQUENCY_FIELDS, Joiner.on(',').join(config.getQueryTermFrequencyFields()), false); - addOption(cfg, QueryOptions.TERM_FREQUENCIES_REQUIRED, Boolean.toString(config.isTermFrequenciesRequired()), true); + addOption(cfg, QueryOptions.TERM_FREQUENCIES_REQUIRED, Boolean.toString(config.isTermFrequenciesRequired()), false); addOption(cfg, QueryOptions.QUERY, newQueryString, false); addOption(cfg, QueryOptions.QUERY_ID, config.getQuery().getId().toString(), false); addOption(cfg, QueryOptions.FULL_TABLE_SCAN_ONLY, Boolean.toString(isFullTable), false); - addOption(cfg, QueryOptions.TRACK_SIZES, Boolean.toString(config.isTrackSizes()), true); - addOption(cfg, QueryOptions.ACTIVE_QUERY_LOG_NAME, config.getActiveQueryLogName(), true); + addOption(cfg, QueryOptions.TRACK_SIZES, Boolean.toString(config.isTrackSizes()), false); + addOption(cfg, QueryOptions.ACTIVE_QUERY_LOG_NAME, config.getActiveQueryLogName(), false); // Set the start and end dates configureTypeMappings(config, cfg, metadataHelper, compressMappings); } diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java b/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java index 8a66f558b42..a3b67158020 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java @@ -31,6 +31,10 @@ public static void apply(Map optionsMap, ShardQueryConfiguration String[] lf = StringUtils.split(value, Constants.PARAM_VALUE_SEP); config.setLimitFields(Sets.newHashSet(lf)); break; + case QueryParameters.MATCHING_FIELD_SETS: + String[] mfs = StringUtils.split(value, Constants.PARAM_VALUE_SEP); + config.setMatchingFieldSets(Sets.newHashSet(mfs)); + break; case QueryParameters.GROUP_FIELDS: String[] groups = StringUtils.split(value, Constants.PARAM_VALUE_SEP); config.setGroupFields(Sets.newHashSet(groups)); diff --git a/warehouse/query-core/src/main/java/datawave/query/postprocessing/tf/TermOffsetPopulator.java b/warehouse/query-core/src/main/java/datawave/query/postprocessing/tf/TermOffsetPopulator.java index e34f8bdcfa0..78428c2dadc 100644 --- a/warehouse/query-core/src/main/java/datawave/query/postprocessing/tf/TermOffsetPopulator.java +++ b/warehouse/query-core/src/main/java/datawave/query/postprocessing/tf/TermOffsetPopulator.java @@ -11,13 +11,13 @@ import datawave.data.type.Type; import datawave.ingest.protobuf.TermWeight; import datawave.ingest.protobuf.TermWeightPosition; +import datawave.query.data.parsers.TermFrequencyKey; +import datawave.query.jexl.functions.TermFrequencyList; +import datawave.query.predicate.EventDataQueryFilter; import datawave.query.Constants; import datawave.query.attributes.Content; import datawave.query.attributes.Document; -import datawave.query.jexl.functions.TermFrequencyList; import datawave.query.jexl.visitors.LiteralNodeSubsetVisitor; -import datawave.query.predicate.EventDataQueryFilter; -import datawave.util.StringUtils; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.PartialKey; import org.apache.accumulo.core.data.Range; @@ -154,34 +154,28 @@ public Map getContextMap(Key docKey, Set keys, Set f if (evaluationFilter != null) { evaluationFilter.startNewDocument(docKey); } - + + TermFrequencyKey parser = new TermFrequencyKey(); + TermWeightPosition.Builder position = new TermWeightPosition.Builder(); Map termOffsetMap = Maps.newHashMap(); - + while (tfSource.hasTop()) { Key key = tfSource.getTopKey(); - FieldValue fv = FieldValue.getFieldValue(key); - - // add the zone and term to our internal document - Content attr = new Content(fv.getValue(), source.getTopKey(), evaluationFilter == null || evaluationFilter.keep(key)); + parser.parse(key); - // no need to apply the evaluation filter here as the TermFrequencyIterator above is already doing more filtering than we can do here. - // So this filter is simply extraneous. However if the an EventDataQueryFilter implementation gets smarter somehow, then it can be added back in - // here. - // For example the AncestorQueryLogic may require this.... - // if (evaluationFilter == null || evaluationFilter.apply(Maps.immutableEntry(key, StringUtils.EMPTY_STRING))) { - - this.document.put(fv.getField(), attr); + // add the zone and term to our internal document. + Content attr = new Content(parser.getValue(), source.getTopKey(), evaluationFilter == null || evaluationFilter.keep(key)); + + this.document.put(parser.getField(), attr); TreeMultimap offsets = TreeMultimap.create(); try { TermWeight.Info twInfo = TermWeight.Info.parseFrom(tfSource.getTopValue().get()); // if no content expansion fields then assume every field is permitted for unfielded content functions - TermFrequencyList.Zone twZone = new TermFrequencyList.Zone(fv.getField(), - (contentExpansionFields == null || contentExpansionFields.isEmpty() || contentExpansionFields.contains(fv.getField())), - TermFrequencyList.getEventId(key)); - - TermWeightPosition.Builder position = new TermWeightPosition.Builder(); + boolean isContentExpansionField = contentExpansionFields == null || contentExpansionFields.isEmpty() || contentExpansionFields.contains(parser.getField()); + TermFrequencyList.Zone twZone = new TermFrequencyList.Zone(parser.getField(), isContentExpansionField, TermFrequencyList.getEventId(key)); + for (int i = 0; i < twInfo.getTermOffsetCount(); i++) { position.setTermWeightOffsetInfo(twInfo, i); offsets.put(twZone, position.build()); @@ -190,14 +184,13 @@ public Map getContextMap(Key docKey, Set keys, Set f } catch (InvalidProtocolBufferException e) { log.error("Could not deserialize TermWeight protocol buffer for: " + source.getTopKey()); - - return null; + return Collections.emptyMap(); } // First time looking up this term in a field - TermFrequencyList tfl = termOffsetMap.get(fv.getValue()); + TermFrequencyList tfl = termOffsetMap.get(parser.getValue()); if (null == tfl) { - termOffsetMap.put(fv.getValue(), new TermFrequencyList(offsets)); + termOffsetMap.put(parser.getValue(), new TermFrequencyList(offsets)); } else { // Merge in the offsets for the current field+term with all previous // offsets from other fields in the same term @@ -215,7 +208,6 @@ public Map getContextMap(Key docKey, Set keys, Set f // Load the actual map into map that will be put into the JexlContext Map map = new HashMap<>(); map.put(Constants.TERM_OFFSET_MAP_JEXL_VARIABLE_NAME, new TermOffsetMap(termOffsetMap)); - return map; } @@ -402,130 +394,4 @@ public static Multimap getTermFrequencyFieldValues(Multimap\0} - */ - public static class FieldValue implements Comparable { - private int nullOffset; - private String valueField; - - public FieldValue(String field, String value) { - this.nullOffset = value.length(); - this.valueField = value + '\0' + field; - } - - /** - * A distance between this field value and another. Here we want a distance that correlates with the number of keys between here and there for the same. - * Essentially we want the inverse of the number of bytes that match. document. - * - * @param fv - * a field value - * @return a distance between here and there (negative means there is before here) - */ - public double distance(FieldValue fv) { - byte[] s1 = getValueField().getBytes(); - byte[] s2 = fv.getValueField().getBytes(); - int len = Math.min(s1.length, s2.length); - - int matches = 0; - int lastCharDiff = 0; - - for (int i = 0; i <= len; i++) { - lastCharDiff = getValue(s2, i) - getValue(s1, i); - if (lastCharDiff == 0) { - matches++; - } else { - break; - } - } - - return Math.copySign(1.0d / (matches + 1), lastCharDiff); - } - - private int getValue(byte[] bytes, int index) { - if (index >= bytes.length) { - return 0; - } else { - return bytes[index]; - } - } - - public String getValueField() { - return valueField; - } - - public String getField() { - return valueField.substring(nullOffset + 1); - } - - public String getValue() { - return valueField.substring(0, nullOffset); - } - - @Override - public int compareTo(FieldValue o) { - return valueField.compareTo(o.valueField); - } - - @Override - public int hashCode() { - return valueField.hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (obj instanceof FieldValue) { - return valueField.equals(((FieldValue) obj).valueField); - } - return false; - } - - @Override - public String toString() { - return getField() + " -> " + getValue(); - } - - public static FieldValue getFieldValue(Key key) { - return getFieldValue(key.getColumnQualifier()); - } - - public static FieldValue getFieldValue(Text cqText) { - if (cqText == null) { - return null; - } - return getFieldValue(cqText.toString()); - } - - public static FieldValue getFieldValue(String cq) { - if (cq == null) { - return null; - } - - // pull apart the cq - String[] cqParts = StringUtils.split(cq, '\0'); - - // if we do not even have the first datatype\0uid, then lets find it - if (cqParts.length <= 2) { - return null; - } - - // get the value and field - String value = ""; - String field = ""; - if (cqParts.length >= 4) { - field = cqParts[cqParts.length - 1]; - value = cqParts[2]; - // in case the value had null characters therein - for (int i = 3; i < (cqParts.length - 1); i++) { - value = value + '\0' + cqParts[i]; - } - } else if (cqParts.length == 3) { - value = cqParts[2]; - } - - return new FieldValue(field, value); - } - - } } diff --git a/warehouse/query-core/src/main/java/datawave/query/predicate/NegationPredicate.java b/warehouse/query-core/src/main/java/datawave/query/predicate/NegationPredicate.java index eb4469da456..054a25a7777 100644 --- a/warehouse/query-core/src/main/java/datawave/query/predicate/NegationPredicate.java +++ b/warehouse/query-core/src/main/java/datawave/query/predicate/NegationPredicate.java @@ -55,7 +55,7 @@ public Object visit(ASTNENode node, Object data) { @Override public Object visit(ASTEQNode node, Object data) { - hasNot |= JexlASTHelper.isWithinNot(node); + hasNot |= JexlASTHelper.isDescendantOfNot(node); return super.visit(node, data); } diff --git a/warehouse/query-core/src/main/java/datawave/query/tables/BatchScannerSession.java b/warehouse/query-core/src/main/java/datawave/query/tables/BatchScannerSession.java index 0b6799a6f6f..9edf703d1fc 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tables/BatchScannerSession.java +++ b/warehouse/query-core/src/main/java/datawave/query/tables/BatchScannerSession.java @@ -48,8 +48,6 @@ public class BatchScannerSession extends ScannerSession implements Iterator>, FutureCallback, SessionArbiter, UncaughtExceptionHandler { - private static final int THIRTY_MINUTES = 108000000; - private static final double RANGE_MULTIPLIER = 5; private static final double QUEUE_MULTIPLIER = 25; @@ -75,8 +73,6 @@ public class BatchScannerSession extends ScannerSession implements Iterator> visitorFunctions = Lists.newArrayList(); @@ -166,10 +162,8 @@ public BatchScannerSession(String tableName, Set auths, Resource currentBatch = Queues.newLinkedBlockingDeque(); setThreads(1); - - listenerService = Executors.newFixedThreadPool(1); - - addListener(new BatchScannerListener(), listenerService); + + addListener(new BatchScannerListener(), MoreExecutors.newDirectExecutorService()); serverFailureMap = Maps.newConcurrentMap(); @@ -370,26 +364,26 @@ protected void submitTasks(List newChunks) { chunk.setQueryId(settings.getId().toString()); - scan = new SpeculativeScan(localTableName, localAuths, chunk, delegatorReference, delegatedResourceInitializer, resultQueue, listenerService); + scan = new SpeculativeScan(localTableName, localAuths, chunk, delegatorReference, delegatedResourceInitializer, resultQueue, service); scan.setVisitors(visitorFunctions); Scan childScan = new Scan(localTableName, localAuths, new ScannerChunk(chunk), delegatorReference, BatchResource.class, - ((SpeculativeScan) scan).getQueue(), listenerService); + ((SpeculativeScan) scan).getQueue(), service); childScan.setVisitors(visitorFunctions); ((SpeculativeScan) scan).addScan(childScan); childScan = new Scan(localTableName, localAuths, new ScannerChunk(chunk), delegatorReference, delegatedResourceInitializer, - ((SpeculativeScan) scan).getQueue(), listenerService); + ((SpeculativeScan) scan).getQueue(), service); childScan.setVisitors(visitorFunctions); ((SpeculativeScan) scan).addScan(childScan); } else { - scan = new Scan(localTableName, localAuths, chunk, delegatorReference, delegatedResourceInitializer, resultQueue, listenerService); + scan = new Scan(localTableName, localAuths, chunk, delegatorReference, delegatedResourceInitializer, resultQueue, service); } if (backoffEnabled) { @@ -430,16 +424,16 @@ protected void submitTasks() { if (log.isTraceEnabled()) { log.trace("Using speculative execution"); } - scan = new SpeculativeScan(localTableName, localAuths, chunk, delegatorReference, delegatedResourceInitializer, resultQueue, listenerService); + scan = new SpeculativeScan(localTableName, localAuths, chunk, delegatorReference, delegatedResourceInitializer, resultQueue, service); ((SpeculativeScan) scan).addScan(new Scan(localTableName, localAuths, new ScannerChunk(chunk), delegatorReference, BatchResource.class, - ((SpeculativeScan) scan).getQueue(), listenerService)); + ((SpeculativeScan) scan).getQueue(), service)); ((SpeculativeScan) scan).addScan(new Scan(localTableName, localAuths, new ScannerChunk(chunk), delegatorReference, - delegatedResourceInitializer, ((SpeculativeScan) scan).getQueue(), listenerService)); + delegatedResourceInitializer, ((SpeculativeScan) scan).getQueue(), service)); } else { - scan = new Scan(localTableName, localAuths, chunk, delegatorReference, delegatedResourceInitializer, resultQueue, listenerService); + scan = new Scan(localTableName, localAuths, chunk, delegatorReference, delegatedResourceInitializer, resultQueue, service); } if (backoffEnabled) { @@ -645,7 +639,6 @@ public void failed(State from, Throwable failure) { */ protected void shutdownServices() { service.shutdownNow(); - listenerService.shutdownNow(); int count = 0; try { while (!service.awaitTermination(250, TimeUnit.MILLISECONDS) && count < MAX_WAIT) { @@ -665,7 +658,6 @@ protected void shutdownServices() { public void close() { stopAsync(); service.shutdownNow(); - listenerService.shutdownNow(); } public void addVisitor(Function visitorFunction) { diff --git a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java index b4b8727941a..1858ca43fa9 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java +++ b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java @@ -49,7 +49,6 @@ import datawave.query.util.MetadataHelper; import datawave.query.util.MetadataHelperFactory; import datawave.query.util.QueryStopwatch; -import datawave.util.StringUtils; import datawave.util.time.TraceStopwatch; import datawave.webservice.common.connection.AccumuloConnectionFactory; import datawave.webservice.common.logging.ThreadConfigurableLogger; @@ -72,6 +71,7 @@ import org.apache.accumulo.core.iterators.SortedKeyValueIterator; import org.apache.accumulo.core.security.Authorizations; import org.apache.commons.lang.builder.HashCodeBuilder; +import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; import java.io.IOException; @@ -304,7 +304,7 @@ public String getJexlQueryString(Query settings) throws ParseException { // enforce mandatoryQuerySyntax if set if (null != this.mandatoryQuerySyntax) { - if (org.apache.commons.lang.StringUtils.isEmpty(querySyntax)) { + if (StringUtils.isEmpty(querySyntax)) { throw new IllegalStateException("Must specify one of the following syntax options: " + this.mandatoryQuerySyntax); } else { if (!this.mandatoryQuerySyntax.contains(querySyntax)) { @@ -316,7 +316,7 @@ public String getJexlQueryString(Query settings) throws ParseException { QueryParser querySyntaxParser = getParser(); - if (org.apache.commons.lang.StringUtils.isBlank(querySyntax)) { + if (StringUtils.isBlank(querySyntax)) { // Default to the class's query parser when one is not provided // Falling back to Jexl when one is not set on this class if (null == querySyntaxParser) { @@ -591,7 +591,7 @@ public QueryLogicTransformer getTransformer(Query settings) { boolean reducedInSettings = false; String reducedResponseStr = settings.findParameter(QueryOptions.REDUCED_RESPONSE).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(reducedResponseStr)) { + if (StringUtils.isNotBlank(reducedResponseStr)) { reducedInSettings = Boolean.parseBoolean(reducedResponseStr); } boolean reduced = (this.isReducedResponse() || reducedInSettings); @@ -655,7 +655,7 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting TraceStopwatch stopwatch = config.getTimers().newStartedStopwatch("ShardQueryLogic - Parse query parameters"); boolean rawDataOnly = false; String rawDataOnlyStr = settings.findParameter(QueryParameters.RAW_DATA_ONLY).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(rawDataOnlyStr)) { + if (StringUtils.isNotBlank(rawDataOnlyStr)) { rawDataOnly = Boolean.valueOf(rawDataOnlyStr); // if the master option raw.data.only is set, then set all of the transforming options appropriately. // note that if any of these other options are set, then it overrides the settings here @@ -694,7 +694,7 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting // Get the datatype set if specified String typeList = settings.findParameter(QueryParameters.DATATYPE_FILTER_SET).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(typeList)) { + if (StringUtils.isNotBlank(typeList)) { HashSet typeFilter = new HashSet<>(); typeFilter.addAll(Arrays.asList(StringUtils.split(typeList, Constants.PARAM_VALUE_SEP))); @@ -707,7 +707,7 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting // Get the list of fields to project up the stack. May be null. String projectFields = settings.findParameter(QueryParameters.RETURN_FIELDS).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(projectFields)) { + if (StringUtils.isNotBlank(projectFields)) { List projectFieldsList = Arrays.asList(StringUtils.split(projectFields, Constants.PARAM_VALUE_SEP)); // Only set the projection fields if we were actually given some @@ -726,7 +726,7 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting // if the TRANSFORM_CONTENT_TO_UID is false, then unset the list of content field names preventing the DocumentTransformer from // transforming them. String transformContentStr = settings.findParameter(QueryParameters.TRANSFORM_CONTENT_TO_UID).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(transformContentStr)) { + if (StringUtils.isNotBlank(transformContentStr)) { if (!Boolean.valueOf(transformContentStr)) { setContentFieldNames(Collections.EMPTY_LIST); } @@ -734,7 +734,7 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting // Get the list of blacklisted fields. May be null. String tBlacklistedFields = settings.findParameter(QueryParameters.BLACKLISTED_FIELDS).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(tBlacklistedFields)) { + if (StringUtils.isNotBlank(tBlacklistedFields)) { List blacklistedFieldsList = Arrays.asList(StringUtils.split(tBlacklistedFields, Constants.PARAM_VALUE_SEP)); // Only set the blacklisted fields if we were actually given some @@ -753,7 +753,7 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting // Get the LIMIT_FIELDS parameter if given String limitFields = settings.findParameter(QueryParameters.LIMIT_FIELDS).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(limitFields)) { + if (StringUtils.isNotBlank(limitFields)) { List limitFieldsList = Arrays.asList(StringUtils.split(limitFields, Constants.PARAM_VALUE_SEP)); // Only set the limit fields if we were actually given some @@ -762,22 +762,33 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting } } + // Get the MATCHING_FIELD_SETS parameter if given + String matchingFieldSets = settings.findParameter(QueryParameters.MATCHING_FIELD_SETS).getParameterValue().trim(); + if (StringUtils.isNotBlank(matchingFieldSets)) { + List matchingFieldSetsList = Arrays.asList(StringUtils.split(matchingFieldSets, Constants.PARAM_VALUE_SEP)); + + // Only set the limit fields if we were actually given some + if (!matchingFieldSetsList.isEmpty()) { + config.setMatchingFieldSets(new HashSet<>(matchingFieldSetsList)); + } + } + String limitFieldsPreQueryEvaluation = settings.findParameter(QueryOptions.LIMIT_FIELDS_PRE_QUERY_EVALUATION).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(limitFieldsPreQueryEvaluation)) { + if (StringUtils.isNotBlank(limitFieldsPreQueryEvaluation)) { Boolean limitFieldsPreQueryEvaluationValue = Boolean.parseBoolean(limitFieldsPreQueryEvaluation); this.setLimitFieldsPreQueryEvaluation(limitFieldsPreQueryEvaluationValue); config.setLimitFieldsPreQueryEvaluation(limitFieldsPreQueryEvaluationValue); } String limitFieldsField = settings.findParameter(QueryOptions.LIMIT_FIELDS_FIELD).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(limitFieldsField)) { + if (StringUtils.isNotBlank(limitFieldsField)) { this.setLimitFieldsField(limitFieldsField); config.setLimitFieldsField(limitFieldsField); } // Get the GROUP_FIELDS parameter if given String groupFields = settings.findParameter(QueryParameters.GROUP_FIELDS).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(groupFields)) { + if (StringUtils.isNotBlank(groupFields)) { List groupFieldsList = Arrays.asList(StringUtils.split(groupFields, Constants.PARAM_VALUE_SEP)); // Only set the group fields if we were actually given some @@ -789,7 +800,7 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting } String groupFieldsBatchSizeString = settings.findParameter(QueryParameters.GROUP_FIELDS_BATCH_SIZE).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(groupFieldsBatchSizeString)) { + if (StringUtils.isNotBlank(groupFieldsBatchSizeString)) { int groupFieldsBatchSize = Integer.parseInt(groupFieldsBatchSizeString); this.setGroupFieldsBatchSize(groupFieldsBatchSize); config.setGroupFieldsBatchSize(groupFieldsBatchSize); @@ -797,7 +808,7 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting // Get the UNIQUE_FIELDS parameter if given String uniqueFieldsParam = settings.findParameter(QueryParameters.UNIQUE_FIELDS).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(uniqueFieldsParam)) { + if (StringUtils.isNotBlank(uniqueFieldsParam)) { UniqueFields uniqueFields = UniqueFields.from(uniqueFieldsParam); // Only set the unique fields if we were actually given some if (!uniqueFields.isEmpty()) { @@ -808,7 +819,7 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting // Get the EXCERPT_FIELDS parameter if given String excerptFieldsParam = settings.findParameter(QueryParameters.EXCERPT_FIELDS).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(excerptFieldsParam)) { + if (StringUtils.isNotBlank(excerptFieldsParam)) { ExcerptFields excerptFields = ExcerptFields.from(excerptFieldsParam); // Only set the excerpt fields if we were actually given some if (!excerptFieldsParam.isEmpty()) { @@ -819,35 +830,35 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting // Get the HIT_LIST parameter if given String hitListString = settings.findParameter(QueryParameters.HIT_LIST).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(hitListString)) { + if (StringUtils.isNotBlank(hitListString)) { Boolean hitListBool = Boolean.parseBoolean(hitListString); config.setHitList(hitListBool); } // Get the BYPASS_ACCUMULO parameter if given String bypassAccumuloString = settings.findParameter(BYPASS_ACCUMULO).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(bypassAccumuloString)) { + if (StringUtils.isNotBlank(bypassAccumuloString)) { Boolean bypassAccumuloBool = Boolean.parseBoolean(bypassAccumuloString); config.setBypassAccumulo(bypassAccumuloBool); } // Get the DATE_INDEX_TIME_TRAVEL parameter if given String dateIndexTimeTravelString = settings.findParameter(QueryOptions.DATE_INDEX_TIME_TRAVEL).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(dateIndexTimeTravelString)) { + if (StringUtils.isNotBlank(dateIndexTimeTravelString)) { Boolean dateIndexTimeTravel = Boolean.parseBoolean(dateIndexTimeTravelString); config.setDateIndexTimeTravel(dateIndexTimeTravel); } // get the RAW_TYPES parameter if given String rawTypesString = settings.findParameter(QueryParameters.RAW_TYPES).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(rawTypesString)) { + if (StringUtils.isNotBlank(rawTypesString)) { Boolean rawTypesBool = Boolean.parseBoolean(rawTypesString); config.setRawTypes(rawTypesBool); } // Get the FILTER_MASKED_VALUES spring setting String filterMaskedValuesStr = settings.findParameter(QueryParameters.FILTER_MASKED_VALUES).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(filterMaskedValuesStr)) { + if (StringUtils.isNotBlank(filterMaskedValuesStr)) { Boolean filterMaskedValuesBool = Boolean.parseBoolean(filterMaskedValuesStr); this.setFilterMaskedValues(filterMaskedValuesBool); config.setFilterMaskedValues(filterMaskedValuesBool); @@ -855,14 +866,14 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting // Get the INCLUDE_DATATYPE_AS_FIELD spring setting String includeDatatypeAsFieldStr = settings.findParameter(QueryParameters.INCLUDE_DATATYPE_AS_FIELD).getParameterValue().trim(); - if (((org.apache.commons.lang.StringUtils.isNotBlank(includeDatatypeAsFieldStr) && Boolean.valueOf(includeDatatypeAsFieldStr))) + if (((StringUtils.isNotBlank(includeDatatypeAsFieldStr) && Boolean.valueOf(includeDatatypeAsFieldStr))) || (this.getIncludeDataTypeAsField() && !rawDataOnly)) { config.setIncludeDataTypeAsField(true); } // Get the INCLUDE_RECORD_ID spring setting String includeRecordIdStr = settings.findParameter(QueryParameters.INCLUDE_RECORD_ID).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(includeRecordIdStr)) { + if (StringUtils.isNotBlank(includeRecordIdStr)) { boolean includeRecordIdBool = Boolean.parseBoolean(includeRecordIdStr) && !rawDataOnly; this.setIncludeRecordId(includeRecordIdBool); config.setIncludeRecordId(includeRecordIdBool); @@ -870,7 +881,7 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting // Get the INCLUDE_HIERARCHY_FIELDS spring setting String includeHierarchyFieldsStr = settings.findParameter(QueryParameters.INCLUDE_HIERARCHY_FIELDS).getParameterValue().trim(); - if (((org.apache.commons.lang.StringUtils.isNotBlank(includeHierarchyFieldsStr) && Boolean.valueOf(includeHierarchyFieldsStr))) + if (((StringUtils.isNotBlank(includeHierarchyFieldsStr) && Boolean.valueOf(includeHierarchyFieldsStr))) || (this.getIncludeHierarchyFields() && !rawDataOnly)) { config.setIncludeHierarchyFields(true); @@ -880,7 +891,7 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting // Get the query profile to allow us to select the tune profile of the query String queryProfile = settings.findParameter(QueryParameters.QUERY_PROFILE).getParameterValue().trim(); - if ((org.apache.commons.lang.StringUtils.isNotBlank(queryProfile))) { + if ((StringUtils.isNotBlank(queryProfile))) { selectedProfile = configuredProfiles.get(queryProfile); @@ -892,21 +903,21 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting // Get the include.grouping.context = true/false spring setting String includeGroupingContextStr = settings.findParameter(QueryParameters.INCLUDE_GROUPING_CONTEXT).getParameterValue().trim(); - if (((org.apache.commons.lang.StringUtils.isNotBlank(includeGroupingContextStr) && Boolean.valueOf(includeGroupingContextStr))) + if (((StringUtils.isNotBlank(includeGroupingContextStr) && Boolean.valueOf(includeGroupingContextStr))) || (this.getIncludeGroupingContext() && !rawDataOnly)) { config.setIncludeGroupingContext(true); } // Check if the default modelName and modelTableNames have been overridden by custom parameters. String parameterModelName = settings.findParameter(QueryParameters.PARAMETER_MODEL_NAME).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(parameterModelName)) { + if (StringUtils.isNotBlank(parameterModelName)) { this.setModelName(parameterModelName); } config.setModelName(this.getModelName()); String parameterModelTableName = settings.findParameter(QueryParameters.PARAMETER_MODEL_TABLE_NAME).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(parameterModelTableName)) { + if (StringUtils.isNotBlank(parameterModelTableName)) { this.setModelTableName(parameterModelTableName); } @@ -927,7 +938,7 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting config.setLimitTermExpansionToModel(this.isExpansionLimitedToModelContents()); String reducedResponseStr = settings.findParameter(QueryOptions.REDUCED_RESPONSE).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(reducedResponseStr)) { + if (StringUtils.isNotBlank(reducedResponseStr)) { Boolean reducedResponseValue = Boolean.parseBoolean(reducedResponseStr); this.setReducedResponse(reducedResponseValue); config.setReducedResponse(reducedResponseValue); @@ -938,14 +949,14 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting final String postProcessingOptions = settings.findParameter(QueryOptions.POSTPROCESSING_OPTIONS).getParameterValue().trim(); // build the post p - if (org.apache.commons.lang.StringUtils.isNotBlank(postProcessingClasses)) { + if (StringUtils.isNotBlank(postProcessingClasses)) { List filterClasses = config.getFilterClassNames(); if (null == filterClasses) { filterClasses = new ArrayList<>(); } - for (String fClassName : StringUtils.splitIterable(postProcessingClasses, ',', true)) { + for (String fClassName : new datawave.util.StringUtils.SplitIterable(postProcessingClasses, ',', true)) { filterClasses.add(fClassName); } config.setFilterClassNames(filterClasses); @@ -955,11 +966,11 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting config.putFilterOptions(options); } - if (org.apache.commons.lang.StringUtils.isNotBlank(postProcessingOptions)) { - for (String filterOptionStr : StringUtils.splitIterable(postProcessingOptions, ',', true)) { - if (org.apache.commons.lang.StringUtils.isNotBlank(filterOptionStr)) { + if (StringUtils.isNotBlank(postProcessingOptions)) { + for (String filterOptionStr : new datawave.util.StringUtils.SplitIterable(postProcessingOptions, ',', true)) { + if (StringUtils.isNotBlank(filterOptionStr)) { final String filterValueString = settings.findParameter(filterOptionStr).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(filterValueString)) { + if (StringUtils.isNotBlank(filterValueString)) { config.putFilterOptions(filterOptionStr, filterValueString); } } @@ -968,7 +979,7 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting } String tCompressServerSideResults = settings.findParameter(QueryOptions.COMPRESS_SERVER_SIDE_RESULTS).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(tCompressServerSideResults)) { + if (StringUtils.isNotBlank(tCompressServerSideResults)) { boolean compress = Boolean.parseBoolean(tCompressServerSideResults); config.setCompressServerSideResults(compress); } @@ -989,7 +1000,7 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting QueryLogicTransformer transformer = getTransformer(settings); if (transformer instanceof WritesQueryMetrics) { String logTimingDetailsStr = settings.findParameter(QueryOptions.LOG_TIMING_DETAILS).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(logTimingDetailsStr)) { + if (StringUtils.isNotBlank(logTimingDetailsStr)) { setLogTimingDetails(Boolean.valueOf(logTimingDetailsStr)); } if (getLogTimingDetails()) { @@ -998,7 +1009,7 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting } else { String collectTimingDetailsStr = settings.findParameter(QueryOptions.COLLECT_TIMING_DETAILS).getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(collectTimingDetailsStr)) { + if (StringUtils.isNotBlank(collectTimingDetailsStr)) { setCollectTimingDetails(Boolean.valueOf(collectTimingDetailsStr)); } } @@ -1015,7 +1026,7 @@ void configureDocumentAggregation(Query settings) { Parameter disabledIndexOnlyDocument = settings.findParameter(QueryOptions.DISABLE_DOCUMENTS_WITHOUT_EVENTS); if (null != disabledIndexOnlyDocument) { final String disabledIndexOnlyDocumentStr = disabledIndexOnlyDocument.getParameterValue().trim(); - if (org.apache.commons.lang.StringUtils.isNotBlank(disabledIndexOnlyDocumentStr)) { + if (StringUtils.isNotBlank(disabledIndexOnlyDocumentStr)) { Boolean disabledIndexOnlyDocuments = Boolean.parseBoolean(disabledIndexOnlyDocumentStr); setDisableIndexOnlyDocuments(disabledIndexOnlyDocuments); } @@ -1258,6 +1269,14 @@ public void setLimitFields(Set limitFields) { getConfig().setLimitFields(limitFields); } + public Set getMatchingFieldSets() { + return getConfig().getMatchingFieldSets(); + } + + public void setMatchingFieldSets(Set matchingFieldSets) { + getConfig().setMatchingFieldSets(matchingFieldSets); + } + public boolean isLimitFieldsPreQueryEvaluation() { return getConfig().isLimitFieldsPreQueryEvaluation(); } @@ -2043,6 +2062,7 @@ public Set getOptionalQueryParameters() { optionalParams.add(QueryOptions.HIT_LIST); optionalParams.add(QueryOptions.DATE_INDEX_TIME_TRAVEL); optionalParams.add(QueryParameters.LIMIT_FIELDS); + optionalParams.add(QueryParameters.MATCHING_FIELD_SETS); optionalParams.add(QueryParameters.GROUP_FIELDS); optionalParams.add(QueryParameters.UNIQUE_FIELDS); optionalParams.add(QueryOptions.LOG_TIMING_DETAILS); diff --git a/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java b/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java index a429aca0640..a49dc277c76 100644 --- a/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java @@ -511,7 +511,7 @@ public void testIsTldQuery() { */ @Test public void testCheckForNewAdditions() throws IOException { - int expectedObjectCount = 199; + int expectedObjectCount = 201; ShardQueryConfiguration config = ShardQueryConfiguration.create(); ObjectMapper mapper = new ObjectMapper(); JsonNode root = mapper.readTree(mapper.writeValueAsString(config)); diff --git a/warehouse/query-core/src/test/java/datawave/query/function/HitsAreAlwaysIncludedTest.java b/warehouse/query-core/src/test/java/datawave/query/function/HitsAreAlwaysIncludedTest.java index 10f4e60d3f3..07012936417 100644 --- a/warehouse/query-core/src/test/java/datawave/query/function/HitsAreAlwaysIncludedTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/function/HitsAreAlwaysIncludedTest.java @@ -176,13 +176,17 @@ protected void runTestQuery(AccumuloClient client, String queryString, Date star GenericQueryConfiguration config = logic.initialize(client, settings, authSet); logic.setupQuery(config); + List extraValues = new ArrayList<>(); + Set docs = new HashSet<>(); for (Entry entry : logic) { Document d = deserializer.apply(entry).getValue(); + log.trace(entry.getKey() + " => " + d); docs.add(d); Attribute hitAttribute = d.get(JexlEvaluation.HIT_TERM_FIELD); + Attribute recordId = d.get(Document.DOCKEY_FIELD_NAME); if (hitAttribute instanceof Attributes) { Attributes attributes = (Attributes) hitAttribute; @@ -208,31 +212,43 @@ protected void runTestQuery(AccumuloClient client, String queryString, Date star for (Entry>> dictionaryEntry : dictionary.entrySet()) { Attribute> attribute = dictionaryEntry.getValue(); + if (attribute == hitAttribute || attribute == recordId) { + continue; + } + if (attribute instanceof Attributes) { for (Attribute attr : ((Attributes) attribute).getAttributes()) { String toFind = dictionaryEntry.getKey() + ":" + attr; boolean found = goodResults.remove(toFind); - if (found) + if (found) { log.debug("removed " + toFind); - else - log.debug("Did not remove " + toFind); + } else if (toFind.contains(LimitFields.ORIGINAL_COUNT_SUFFIX)) { + log.debug("Ignoring original count field " + toFind); + } else { + extraValues.add('"' + toFind + '"'); + } } } else { String toFind = dictionaryEntry.getKey() + ":" + dictionaryEntry.getValue(); boolean found = goodResults.remove(toFind); - if (found) + if (found) { log.debug("removed " + toFind); - else - log.debug("Did not remove " + toFind); + } else if (toFind.contains(LimitFields.ORIGINAL_COUNT_SUFFIX)) { + log.debug("Ignoring original count field " + toFind); + } else { + extraValues.add('"' + toFind + '"'); + } } } - - Assert.assertTrue(goodResults + " good results was not empty", goodResults.isEmpty()); } + + Assert.assertTrue(goodResults + " good results was not empty", goodResults.isEmpty()); + Assert.assertTrue(extraValues + " extra values was not empty", extraValues.isEmpty()); Assert.assertTrue("No docs were returned!", !docs.isEmpty()); + Assert.assertEquals("Expected exactly one document", 1, docs.size()); } @Test @@ -256,12 +272,28 @@ public void testHitForIndexedQueryTerm() throws Exception { Map extraParameters = new HashMap<>(); extraParameters.put("include.grouping.context", "true"); extraParameters.put("hit.list", "true"); - extraParameters.put("limit.fields", "FOO_1_BAR=3,FOO_1=2,FOO_3=2,FOO_3_BAR=2,FOO_4=3,FOO_1_BAR_1=4"); + extraParameters.put("limit.fields", "FOO_1_BAR=3,FOO_1=2,FOO_3=2,FOO_3_BAR=2,FOO_4=3,FOO_1_BAR_1=4,BAR_1=0,BAR_2=0,BAR_3=0"); String queryString = "FOO_3_BAR == 'defg'"; - Set goodResults = Sets.newHashSet("FOO_1_BAR.FOO.3:good", "FOO_3_BAR.FOO.3:defg", "FOO_3.FOO.3.3:defg", "FOO_4.FOO.4.3:yes", - "FOO_1.FOO.1.3:good"); + //@formatter:off + Set goodResults = Sets.newHashSet( + // the hit and associated fields in the same group + "FOO_1_BAR.FOO.3:good", + "FOO_3_BAR.FOO.3:defg", + "FOO_3.FOO.3.3:defg", + "FOO_4.FOO.4.3:yes", + "FOO_1.FOO.1.3:good", + // the additional values included per the limits + "FOO_1.FOO.1.0:yawn", + "FOO_1_BAR.FOO.0:yawn", + "FOO_1_BAR.FOO.1:yawn", + "FOO_1_BAR_1.FOO.0:2021-03-24T16:00:00.000Z", + "FOO_3.FOO.3.0:abcd", + "FOO_3_BAR.FOO.0:abcd", + "FOO_4.FOO.4.0:purr", + "FOO_4.FOO.4.1:purr"); + //@formatter:on Set expectedHits = Sets.newHashSet("FOO_3_BAR.FOO.3:defg"); runTestQuery(queryString, format.parse("20091231"), format.parse("20150101"), extraParameters, expectedHits, goodResults); @@ -272,10 +304,26 @@ public void testHitForIndexedQueryTermWithOptionsInQueryFunction() throws Except Map extraParameters = new HashMap<>(); String queryString = "FOO_3_BAR == 'defg' and f:options('include.grouping.context', 'true', " - + "'hit.list', 'true', 'limit.fields', 'FOO_1_BAR=3,FOO_1=2,FOO_3=2,FOO_3_BAR=2,FOO_4=3,FOO_1_BAR_1=4')"; - - Set goodResults = Sets.newHashSet("FOO_1_BAR.FOO.3:good", "FOO_3_BAR.FOO.3:defg", "FOO_3.FOO.3.3:defg", "FOO_4.FOO.4.3:yes", - "FOO_1.FOO.1.3:good"); + + "'hit.list', 'true', 'limit.fields', 'FOO_1_BAR=3,FOO_1=2,FOO_3=2,FOO_3_BAR=2,FOO_4=3,FOO_1_BAR_1=4,BAR_1=0,BAR_2=0,BAR_3=0')"; + + //@formatter:off + Set goodResults = Sets.newHashSet( + // the hit and associated fields in the same group + "FOO_1_BAR.FOO.3:good", + "FOO_3_BAR.FOO.3:defg", + "FOO_3.FOO.3.3:defg", + "FOO_4.FOO.4.3:yes", + "FOO_1.FOO.1.3:good", + // the additional values included per the limits + "FOO_1.FOO.1.0:yawn", + "FOO_1_BAR.FOO.0:yawn", + "FOO_1_BAR.FOO.1:yawn", + "FOO_1_BAR_1.FOO.0:2021-03-24T16:00:00.000Z", + "FOO_3.FOO.3.0:abcd", + "FOO_3_BAR.FOO.0:abcd", + "FOO_4.FOO.4.0:purr", + "FOO_4.FOO.4.1:purr"); + //@formatter:on Set expectedHits = Sets.newHashSet("FOO_3_BAR.FOO.3:defg"); runTestQuery(queryString, format.parse("20091231"), format.parse("20150101"), extraParameters, expectedHits, goodResults); @@ -286,12 +334,28 @@ public void testHitForIndexedQueryOnUnrealmed() throws Exception { Map extraParameters = new HashMap<>(); extraParameters.put("include.grouping.context", "true"); extraParameters.put("hit.list", "true"); - extraParameters.put("limit.fields", "FOO_1_BAR=3,FOO_1=2,FOO_3=2,FOO_3_BAR=2,FOO_4=3,FOO_1_BAR_1=4"); + extraParameters.put("limit.fields", "FOO_1_BAR=3,FOO_1=2,FOO_3=2,FOO_3_BAR=2,FOO_4=3,FOO_1_BAR_1=4,BAR_1=0,BAR_2=0,BAR_3=0"); String queryString = "FOO_3 == 'defg'"; - Set goodResults = Sets.newHashSet("FOO_1_BAR.FOO.3:good", "FOO_3_BAR.FOO.3:defg", "FOO_3.FOO.3.3:defg", "FOO_4.FOO.4.3:yes", - "FOO_1.FOO.1.3:good"); + //@formatter:off + Set goodResults = Sets.newHashSet( + // the hit and associated fields in the same group + "FOO_1_BAR.FOO.3:good", + "FOO_3_BAR.FOO.3:defg", + "FOO_3.FOO.3.3:defg", + "FOO_4.FOO.4.3:yes", + "FOO_1.FOO.1.3:good", + // the additional values included per the limits + "FOO_1.FOO.1.0:yawn", + "FOO_1_BAR.FOO.0:yawn", + "FOO_1_BAR.FOO.1:yawn", + "FOO_1_BAR_1.FOO.0:2021-03-24T16:00:00.000Z", + "FOO_3.FOO.3.0:abcd", + "FOO_3_BAR.FOO.0:abcd", + "FOO_4.FOO.4.0:purr", + "FOO_4.FOO.4.1:purr"); + //@formatter:on Set expectedHits = Sets.newHashSet("FOO_3.FOO.3.3:defg"); runTestQuery(queryString, format.parse("20091231"), format.parse("20150101"), extraParameters, expectedHits, goodResults); @@ -302,12 +366,26 @@ public void testHitForIndexedQueryAndAnyfieldLimit() throws Exception { Map extraParameters = new HashMap<>(); extraParameters.put("include.grouping.context", "true"); extraParameters.put("hit.list", "true"); - extraParameters.put("limit.fields", "_ANYFIELD_=2"); + extraParameters.put("limit.fields", "_ANYFIELD_=2,BAR_1=0,BAR_2=0,BAR_3=0"); String queryString = "FOO_3_BAR == 'defg'"; - Set goodResults = Sets.newHashSet("FOO_1_BAR.FOO.3:good", "FOO_3_BAR.FOO.3:defg", "FOO_3.FOO.3.3:defg", "FOO_4.FOO.4.3:yes", - "FOO_1.FOO.1.3:good"); + //@formatter:off + Set goodResults = Sets.newHashSet( + // the hit and associated fields in the same group + "FOO_1_BAR.FOO.3:good", + "FOO_3_BAR.FOO.3:defg", + "FOO_3.FOO.3.3:defg", + "FOO_4.FOO.4.3:yes", + "FOO_1.FOO.1.3:good", + // the additional values included per the limits + "FOO_1.FOO.1.0:yawn", + "FOO_1_BAR.FOO.0:yawn", + "FOO_1_BAR_1.FOO.0:2021-03-24T16:00:00.000Z", + "FOO_3.FOO.3.0:abcd", + "FOO_3_BAR.FOO.0:abcd", + "FOO_4.FOO.4.0:purr"); + //@formatter:on Set expectedHits = Sets.newHashSet("FOO_3_BAR.FOO.3:defg"); runTestQuery(queryString, format.parse("20091231"), format.parse("20150101"), extraParameters, expectedHits, goodResults); @@ -318,12 +396,25 @@ public void testHitForIndexedAndUnindexedQueryAndAnyfieldLimit() throws Exceptio Map extraParameters = new HashMap<>(); extraParameters.put("include.grouping.context", "true"); extraParameters.put("hit.list", "true"); - extraParameters.put("limit.fields", "FOO_1_BAR=3,FOO_1=2,FOO_3=2,FOO_3_BAR=2,FOO_4=3,FOO_1_BAR_1=4"); + extraParameters.put("limit.fields", "_ANYFIELD_=2,BAR_1=0,BAR_2=0,BAR_3=0"); String queryString = "FOO_3_BAR == 'defg' and FOO_1 == 'good'"; - - Set goodResults = Sets.newHashSet("FOO_1_BAR.FOO.3:good", "FOO_3_BAR.FOO.3:defg", "FOO_3.FOO.3.3:defg", "FOO_4.FOO.4.3:yes", - "FOO_1.FOO.1.3:good"); + //@formatter:off + Set goodResults = Sets.newHashSet( + // the hit and associated fields in the same group + "FOO_1_BAR.FOO.3:good", + "FOO_3_BAR.FOO.3:defg", + "FOO_3.FOO.3.3:defg", + "FOO_4.FOO.4.3:yes", + // the additional values included per the limits + "FOO_1.FOO.1.0:yawn", + "FOO_1.FOO.1.3:good", + "FOO_1_BAR.FOO.0:yawn", + "FOO_1_BAR_1.FOO.0:2021-03-24T16:00:00.000Z", + "FOO_3.FOO.3.0:abcd", + "FOO_3_BAR.FOO.0:abcd", + "FOO_4.FOO.4.0:purr"); + //@formatter:on Set expectedHits = Sets.newHashSet("FOO_3_BAR.FOO.3:defg", "FOO_1.FOO.1.3:good"); runTestQuery(queryString, format.parse("20091231"), format.parse("20150101"), extraParameters, expectedHits, goodResults); @@ -334,14 +425,30 @@ public void testHitWithoutGroupingContext() throws Exception { Map extraParameters = new HashMap<>(); extraParameters.put("include.grouping.context", "false"); extraParameters.put("hit.list", "true"); - extraParameters.put("limit.fields", "FOO_1_BAR=3,FOO_1=2,FOO_3=2,FOO_3_BAR=2,FOO_4=3,FOO_1_BAR_1=4"); + extraParameters.put("limit.fields", "FOO_1_BAR=3,FOO_1=2,FOO_3=2,FOO_3_BAR=2,FOO_4=3,FOO_1_BAR_1=4,BAR_1=0,BAR_2=0,BAR_3=0"); String queryString = "FOO_3_BAR == 'defg'"; - // there is no grouping context so i can expect only the original term, not the related ones (in the same group) - Set goodResults = Sets.newHashSet("FOO_3_BAR:defg"); + // there is no grouping context so I can expect only the original term, not the related ones (in the same group) + //@formatter:off + Set goodResults = Sets.newHashSet( + // the hit + "FOO_3_BAR:defg", + // the additional values included per the limits + "FOO_1:yawn", + "FOO_1:good", + "FOO_1_BAR:yawn", + "FOO_1_BAR:good", + "FOO_1_BAR_1:2021-03-24T16:00:00.000Z", + "FOO_3:abcd", + "FOO_3:bcde", + "FOO_3_BAR:abcd", + "FOO_4:purr", + "FOO_4:yes"); + //@formatter:on + Set expectedHits = Sets.newHashSet("FOO_3_BAR:defg"); - runTestQuery(queryString, format.parse("20091231"), format.parse("20150101"), extraParameters, new HashSet<>(goodResults), goodResults); + runTestQuery(queryString, format.parse("20091231"), format.parse("20150101"), extraParameters, expectedHits, goodResults); } @Test @@ -349,13 +456,28 @@ public void testHitWithRange() throws Exception { Map extraParameters = new HashMap<>(); extraParameters.put("include.grouping.context", "false"); extraParameters.put("hit.list", "true"); - extraParameters.put("limit.fields", "FOO_1_BAR=3,FOO_1=2,FOO_3=2,FOO_3_BAR=2,FOO_4=3,FOO_1_BAR_1=4"); + extraParameters.put("limit.fields", "FOO_1_BAR=3,FOO_1=2,FOO_3=2,FOO_3_BAR=2,FOO_4=3,FOO_1_BAR_1=4,BAR_1=0,BAR_2=0,BAR_3=0"); String queryString = "((_Bounded_ = true) && (FOO_1_BAR_1 >= '2021-03-01 00:00:00' && FOO_1_BAR_1 <= '2021-04-01 00:00:00'))"; - // there is no grouping context so i can expect only the original term, not the related ones (in the same group) + // there is no grouping context so I can expect only the original term, not the related ones (in the same group) Set expectedHits = Sets.newHashSet("FOO_1_BAR_1:Wed Mar 24 16:00:00 GMT 2021"); - Set goodResults = Sets.newHashSet("FOO_1_BAR_1:2021-03-24T16:00:00.000Z"); + //@formatter:off + Set goodResults = Sets.newHashSet( + // the hit + "FOO_1_BAR_1:2021-03-24T16:00:00.000Z", + // the additional values included per the limits + "FOO_1:yawn", + "FOO_1:good", + "FOO_1_BAR:yawn", + "FOO_1_BAR:good", + "FOO_3:abcd", + "FOO_3:bcde", + "FOO_3_BAR:abcd", + "FOO_3_BAR:bcde", + "FOO_4:purr", + "FOO_4:yes"); + //@formatter:on runTestQuery(queryString, format.parse("20091231"), format.parse("20150101"), extraParameters, expectedHits, goodResults); } @@ -365,13 +487,28 @@ public void testHitWithDate() throws Exception { Map extraParameters = new HashMap<>(); extraParameters.put("include.grouping.context", "false"); extraParameters.put("hit.list", "true"); - extraParameters.put("limit.fields", "FOO_1_BAR=3,FOO_1=2,FOO_3=2,FOO_3_BAR=2,FOO_4=3,FOO_1_BAR_1=4"); + extraParameters.put("limit.fields", "FOO_1_BAR=3,FOO_1=2,FOO_3=2,FOO_3_BAR=2,FOO_4=3,FOO_1_BAR_1=4,BAR_1=0,BAR_2=0,BAR_3=0"); String queryString = "FOO_1_BAR_1 == '2021-03-24T16:00:00.000Z'"; - // there is no grouping context so i can expect only the original term, not the related ones (in the same group) + // there is no grouping context so I can expect only the original term, not the related ones (in the same group) Set expectedHits = Sets.newHashSet("FOO_1_BAR_1:Wed Mar 24 16:00:00 GMT 2021"); - Set goodResults = Sets.newHashSet("FOO_1_BAR_1:2021-03-24T16:00:00.000Z"); + //@formatter:off + Set goodResults = Sets.newHashSet( + // the hit + "FOO_1_BAR_1:2021-03-24T16:00:00.000Z", + // the additional values included per the limits + "FOO_1:yawn", + "FOO_1:good", + "FOO_1_BAR:yawn", + "FOO_1_BAR:good", + "FOO_3:abcd", + "FOO_3:bcde", + "FOO_3_BAR:abcd", + "FOO_3_BAR:bcde", + "FOO_4:purr", + "FOO_4:yes"); + //@formatter:on runTestQuery(queryString, format.parse("20091231"), format.parse("20150101"), extraParameters, expectedHits, goodResults); } @@ -381,37 +518,252 @@ public void testHitWithExceededOrThreshold() throws Exception { Map extraParameters = new HashMap<>(); extraParameters.put("include.grouping.context", "false"); extraParameters.put("hit.list", "true"); - extraParameters.put("limit.fields", "FOO_1_BAR=3,FOO_1=2,FOO_3=2,FOO_3_BAR=4,FOO_4=3,FOO_1_BAR_1=4"); + extraParameters.put("limit.fields", "FOO_1_BAR=3,FOO_1=2,FOO_3=2,FOO_3_BAR=1,FOO_4=3,FOO_1_BAR_1=4,BAR_1=0,BAR_2=0,BAR_3=0"); logic.setMaxOrExpansionThreshold(1); ivaratorConfig(); String queryString = "FOO_3_BAR == 'defg' || FOO_3_BAR == 'abcd'"; - // there is no grouping context so i can expect only the original term, not the related ones (in the same group) - Set goodResults = Sets.newHashSet("FOO_3_BAR:defg", "FOO_3_BAR:abcd"); + // there is no grouping context so I can expect only the original term, not the related ones (in the same group) + //@formatter:off + Set goodResults = Sets.newHashSet( + // the hits + "FOO_3_BAR:defg", + "FOO_3_BAR:abcd", + // the additional values included per the limits + "FOO_1:yawn", + "FOO_1:good", + "FOO_1_BAR:yawn", + "FOO_1_BAR:good", + "FOO_1_BAR_1:2021-03-24T16:00:00.000Z", + "FOO_3:abcd", + "FOO_3:bcde", + "FOO_4:purr", + "FOO_4:yes"); + //@formatter:on Set expectedHits = Sets.newHashSet("FOO_3_BAR:defg", "FOO_3_BAR:abcd"); runTestQuery(queryString, format.parse("20091231"), format.parse("20150101"), extraParameters, expectedHits, goodResults); } @Test - public void testHitWithFunction() throws Exception { + public void testHitsOnly() throws Exception { Map extraParameters = new HashMap<>(); extraParameters.put("include.grouping.context", "false"); extraParameters.put("hit.list", "true"); - extraParameters.put("limit.fields", "FOO_1_BAR=3,FOO_1=2,FOO_3=2,FOO_3_BAR=4,FOO_4=3,FOO_1_BAR_1=4"); + extraParameters.put("limit.fields", "FOO_1_BAR=-1,FOO_1=-1,FOO_3=-1,FOO_3_BAR=-1,FOO_4=-1,FOO_1_BAR_1=-1,BAR_1=0,BAR_2=0,BAR_3=0"); logic.setMaxOrExpansionThreshold(1); ivaratorConfig(); String queryString = "FOO_3_BAR == 'defg' || FOO_3_BAR == 'abcd'"; - // there is no grouping context so i can expect only the original term, not the related ones (in the same group) + // there is no grouping context so I can expect only the original term, not the related ones (in the same group) + //@formatter:off Set goodResults = Sets.newHashSet("FOO_3_BAR:defg", "FOO_3_BAR:abcd"); + //@formatter:on Set expectedHits = Sets.newHashSet("FOO_3_BAR:defg", "FOO_3_BAR:abcd"); runTestQuery(queryString, format.parse("20091231"), format.parse("20150101"), extraParameters, expectedHits, goodResults); } + @Test + public void testGroupedHitsOnly() throws Exception { + Map extraParameters = new HashMap<>(); + extraParameters.put("include.grouping.context", "true"); + extraParameters.put("hit.list", "true"); + extraParameters.put("limit.fields", "FOO_1_BAR=-1,FOO_1=-1,FOO_3=-1,FOO_3_BAR=-1,FOO_4=-1,FOO_1_BAR_1=-1,BAR_1=0,BAR_2=0,BAR_3=0"); + logic.setMaxOrExpansionThreshold(1); + ivaratorConfig(); + + String queryString = "FOO_3_BAR == 'defg' || FOO_3_BAR == 'abcd'"; + //@formatter:off + Set goodResults = Sets.newHashSet( + // the hit and associated fields in the same group + "FOO_1_BAR.FOO.3:good", + "FOO_3_BAR.FOO.3:defg", + "FOO_3.FOO.3.3:defg", + "FOO_4.FOO.4.3:yes", + // the additional values included per the limits + "FOO_1.FOO.1.3:good", + "FOO_1.FOO.1.0:yawn", + "FOO_4.FOO.4.0:purr", + "FOO_3.FOO.3.0:abcd", + "FOO_3_BAR.FOO.0:abcd", + "FOO_1_BAR.FOO.0:yawn", + "FOO_1_BAR_1.FOO.0:2021-03-24T16:00:00.000Z"); + //@formatter:on + Set expectedHits = Sets.newHashSet("FOO_3_BAR.FOO.3:defg", "FOO_3_BAR.FOO.0:abcd"); + + runTestQuery(queryString, format.parse("20091231"), format.parse("20150101"), extraParameters, expectedHits, goodResults); + } + + @Test + public void testGroupedHitsWithMatchingField() throws Exception { + Map extraParameters = new HashMap<>(); + extraParameters.put("include.grouping.context", "true"); + extraParameters.put("hit.list", "true"); + extraParameters.put("limit.fields", "FOO_1_BAR=-1,FOO_1=-1,FOO_3=-1,FOO_3_BAR=-1,FOO_4=-1,FOO_1_BAR_1=-1,BAR_1=0,BAR_2=0,BAR_3=0"); + extraParameters.put("matching.field.sets", "FOO_4"); + logic.setMaxOrExpansionThreshold(1); + ivaratorConfig(); + + String queryString = "FOO_3_BAR == 'abcd'"; + + //@formatter:off + Set goodResults = Sets.newHashSet( + // the hit and associated fields in the same group + "FOO_3_BAR.FOO.0:abcd", + "FOO_1.FOO.1.0:yawn", + "FOO_4.FOO.4.0:purr", + "FOO_3.FOO.3.0:abcd", + "FOO_1_BAR.FOO.0:yawn", + "FOO_1_BAR_1.FOO.0:2021-03-24T16:00:00.000Z", + // the additional values included per the matching field sets + "FOO_1.FOO.1.1:yawn", + "FOO_4.FOO.4.1:purr", + "FOO_3.FOO.3.1:bcde", + "FOO_3_BAR.FOO.1:bcde", + "FOO_1_BAR.FOO.1:yawn", + "FOO_1.FOO.1.2:yawn", + "FOO_4.FOO.4.2:purr", + "FOO_3.FOO.3.2:cdef", + "FOO_3_BAR.FOO.2:cdef", + "FOO_1_BAR.FOO.2:yawn"); + //@formatter:on + Set expectedHits = Sets.newHashSet("FOO_3_BAR.FOO.0:abcd"); + + runTestQuery(queryString, format.parse("20091231"), format.parse("20150101"), extraParameters, expectedHits, goodResults); + } + + @Test + public void testGroupedHitsWithMatchingFields() throws Exception { + Map extraParameters = new HashMap<>(); + extraParameters.put("include.grouping.context", "true"); + extraParameters.put("hit.list", "true"); + extraParameters.put("limit.fields", "FOO_1_BAR=-1,FOO_1=-1,FOO_3=-1,FOO_3_BAR=-1,FOO_4=-1,FOO_1_BAR_1=-1,BAR_1=0,BAR_2=0,BAR_3=0"); + extraParameters.put("matching.field.sets", "FOO_4=BAR_1"); + logic.setMaxOrExpansionThreshold(1); + ivaratorConfig(); + + String queryString = "FOO_3_BAR == 'abcd'"; + + //@formatter:off + Set goodResults = Sets.newHashSet( + // the hit and associated fields in the same group + "FOO_3_BAR.FOO.0:abcd", + "FOO_1.FOO.1.0:yawn", + "FOO_4.FOO.4.0:purr", + "FOO_3.FOO.3.0:abcd", + "FOO_1_BAR.FOO.0:yawn", + "FOO_1_BAR_1.FOO.0:2021-03-24T16:00:00.000Z", + // the additional values included per the matching field sets + "FOO_1.FOO.1.1:yawn", + "FOO_4.FOO.4.1:purr", + "FOO_3.FOO.3.1:bcde", + "FOO_3_BAR.FOO.1:bcde", + "FOO_1_BAR.FOO.1:yawn", + "FOO_1.FOO.1.2:yawn", + "FOO_4.FOO.4.2:purr", + "FOO_3.FOO.3.2:cdef", + "FOO_3_BAR.FOO.2:cdef", + "FOO_1_BAR.FOO.2:yawn", + "BAR_1.BAR.1.3:purr", + "BAR_2.BAR.2.3:tiger", + "BAR_3.BAR.3.3:spotted"); + //@formatter:on + Set expectedHits = Sets.newHashSet("FOO_3_BAR.FOO.0:abcd"); + + runTestQuery(queryString, format.parse("20091231"), format.parse("20150101"), extraParameters, expectedHits, goodResults); + } + + @Test + public void testGroupedHitsWithMoreMatchingFields() throws Exception { + Map extraParameters = new HashMap<>(); + extraParameters.put("include.grouping.context", "true"); + extraParameters.put("hit.list", "true"); + extraParameters.put("limit.fields", "FOO_1_BAR=-1,FOO_1=-1,FOO_3=-1,FOO_3_BAR=-1,FOO_4=-1,FOO_1_BAR_1=-1,BAR_1=0,BAR_2=0,BAR_3=0"); + extraParameters.put("matching.field.sets", "FOO_4=BAR_1=FOO_1"); + logic.setMaxOrExpansionThreshold(1); + ivaratorConfig(); + + String queryString = "FOO_3_BAR == 'abcd'"; + + //@formatter:off + Set goodResults = Sets.newHashSet( + // the hit and associated fields in the same group + "FOO_3_BAR.FOO.0:abcd", + "FOO_1.FOO.1.0:yawn", + "FOO_4.FOO.4.0:purr", + "FOO_3.FOO.3.0:abcd", + "FOO_1_BAR.FOO.0:yawn", + "FOO_1_BAR_1.FOO.0:2021-03-24T16:00:00.000Z", + // the additional values included per the matching field sets + "FOO_1.FOO.1.1:yawn", + "FOO_4.FOO.4.1:purr", + "FOO_3.FOO.3.1:bcde", + "FOO_3_BAR.FOO.1:bcde", + "FOO_1_BAR.FOO.1:yawn", + "FOO_1.FOO.1.2:yawn", + "FOO_4.FOO.4.2:purr", + "FOO_3.FOO.3.2:cdef", + "FOO_3_BAR.FOO.2:cdef", + "FOO_1_BAR.FOO.2:yawn", + "BAR_1.BAR.1.2:yawn", + "BAR_2.BAR.2.2:siberian", + "BAR_3.BAR.3.2:pink", + "BAR_1.BAR.1.3:purr", + "BAR_2.BAR.2.3:tiger", + "BAR_3.BAR.3.3:spotted"); + //@formatter:on + Set expectedHits = Sets.newHashSet("FOO_3_BAR.FOO.0:abcd"); + + runTestQuery(queryString, format.parse("20091231"), format.parse("20150101"), extraParameters, expectedHits, goodResults); + } + + @Test + public void testGroupedHitsWithMatchingFieldSets() throws Exception { + Map extraParameters = new HashMap<>(); + extraParameters.put("include.grouping.context", "true"); + extraParameters.put("hit.list", "true"); + extraParameters.put("limit.fields", "FOO_1_BAR=-1,FOO_1=-1,FOO_3=-1,FOO_3_BAR=-1,FOO_4=-1,FOO_1_BAR_1=-1,BAR_1=0,BAR_2=0,BAR_3=0"); + extraParameters.put("matching.field.sets", "FOO_4=BAR_1,FOO_1=BAR_1"); + logic.setMaxOrExpansionThreshold(1); + ivaratorConfig(); + + String queryString = "FOO_3_BAR == 'abcd'"; + + //@formatter:off + Set goodResults = Sets.newHashSet( + // the hit and associated fields in the same group + "FOO_3_BAR.FOO.0:abcd", + "FOO_1.FOO.1.0:yawn", + "FOO_4.FOO.4.0:purr", + "FOO_3.FOO.3.0:abcd", + "FOO_1_BAR.FOO.0:yawn", + "FOO_1_BAR_1.FOO.0:2021-03-24T16:00:00.000Z", + // the additional values included per the matching field sets + "FOO_1.FOO.1.1:yawn", + "FOO_4.FOO.4.1:purr", + "FOO_3.FOO.3.1:bcde", + "FOO_3_BAR.FOO.1:bcde", + "FOO_1_BAR.FOO.1:yawn", + "FOO_1.FOO.1.2:yawn", + "FOO_4.FOO.4.2:purr", + "FOO_3.FOO.3.2:cdef", + "FOO_3_BAR.FOO.2:cdef", + "FOO_1_BAR.FOO.2:yawn", + "BAR_1.BAR.1.2:yawn", + "BAR_2.BAR.2.2:siberian", + "BAR_3.BAR.3.2:pink", + "BAR_1.BAR.1.3:purr", + "BAR_2.BAR.2.3:tiger", + "BAR_3.BAR.3.3:spotted"); + //@formatter:on + Set expectedHits = Sets.newHashSet("FOO_3_BAR.FOO.0:abcd"); + + runTestQuery(queryString, format.parse("20091231"), format.parse("20150101"), extraParameters, expectedHits, goodResults); + } + protected void ivaratorConfig() throws IOException { final URL hdfsConfig = this.getClass().getResource("/testhadoop.config"); Assert.assertNotNull(hdfsConfig); diff --git a/warehouse/query-core/src/test/java/datawave/query/iterator/ParentQueryIteratorTest.java b/warehouse/query-core/src/test/java/datawave/query/iterator/ParentQueryIteratorTest.java index b8f9cf3f5d4..91c177ec4ea 100644 --- a/warehouse/query-core/src/test/java/datawave/query/iterator/ParentQueryIteratorTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/iterator/ParentQueryIteratorTest.java @@ -1,18 +1,388 @@ package datawave.query.iterator; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import datawave.query.Constants; +import datawave.query.attributes.Attribute; +import datawave.query.attributes.Attributes; +import datawave.query.attributes.Document; import datawave.query.function.RangeProvider; +import datawave.query.function.deserializer.DocumentDeserializer; +import datawave.query.function.deserializer.KryoDocumentDeserializer; import datawave.query.predicate.ParentRangeProvider; +import datawave.query.predicate.TimeFilter; +import org.apache.accumulo.core.data.ByteSequence; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.iteratorsImpl.system.SortedMapIterator; +import org.junit.Before; import org.junit.Test; +import java.util.Collections; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedMap; + import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; public class ParentQueryIteratorTest { - + + private static final String ID_PREFIX = "idpart1.idpart2."; + DocumentDeserializer deserializer = null; + + @Before + public void setup() { + this.deserializer = new KryoDocumentDeserializer(); + } + + @Test + public void test() throws Throwable { + ParentQueryIterator qitr = new ParentQueryIterator(); + Map options = Maps.newHashMap(); + + SortedMap data = QueryIteratorTest.createTestData(ID_PREFIX + "idpart3"); + + createChildren(data); + + options.put(QueryOptions.DISABLE_EVALUATION, "false"); + options.put(QueryOptions.QUERY, "FOO=='bars'"); + options.put(QueryOptions.TYPE_METADATA, "FOO:[test:datawave.data.type.LcNoDiacriticsType]"); + options.put(QueryOptions.REDUCED_RESPONSE, "false"); + options.put(Constants.RETURN_TYPE, "kryo"); + options.put(QueryOptions.FULL_TABLE_SCAN_ONLY, "false"); + options.put(QueryOptions.FILTER_MASKED_VALUES, "true"); + options.put(QueryOptions.TERM_FREQUENCY_FIELDS, "FOO"); + options.put(QueryOptions.INCLUDE_DATATYPE, "true"); + options.put(QueryOptions.INDEX_ONLY_FIELDS, "FOO"); + options.put(QueryOptions.START_TIME, "0"); + options.put(QueryOptions.END_TIME, Long.toString(Long.MAX_VALUE)); + options.put(QueryOptions.POSTPROCESSING_CLASSES, ""); + options.put(QueryOptions.INCLUDE_GROUPING_CONTEXT, "false"); + options.put(QueryOptions.NON_INDEXED_DATATYPES, ""); + options.put(QueryOptions.CONTAINS_INDEX_ONLY_TERMS, "true"); + + // the iterator will npe if these guys aren't set + qitr.setTimeFilter(TimeFilter.alwaysTrue()); + + qitr.init(new SortedMapIterator(data), options, new SourceManagerTest.MockIteratorEnvironment()); + qitr.seek(new Range(new Key("20121126_0", "foobar\u0000idpart1.idpart2.idpart31"), true, new Key("2121126_0", "foobar\u0000idpart1.idpart2" + "\0"), + false), Collections.emptySet(), false); + + assertTrue(qitr.hasTop()); + Key topKey = qitr.getTopKey(); + Key expectedKey = new Key("20121126_0", "foobar\0" + ID_PREFIX + "idpart31.1", QueryIteratorTest.DEFAULT_CQ, "", QueryIteratorTest.getTimeStamp()); + assertEquals(expectedKey, topKey); + + Entry doc = deserializer.apply(Maps.immutableEntry(topKey, qitr.getTopValue())); + + Attribute recordId = doc.getValue().get(Document.DOCKEY_FIELD_NAME); + if (recordId instanceof Attributes) { + recordId = ((Attributes) recordId).getAttributes().iterator().next(); + } + assertEquals("20121126_0/foobar/idpart1.idpart2.idpart31", recordId.getData()); + + assertTrue(qitr.hasTop()); + qitr.next(); + expectedKey = new Key("20121126_0", "foobar\0" + ID_PREFIX + "idpart31.2", QueryIteratorTest.DEFAULT_CQ, "", QueryIteratorTest.getTimeStamp()); + topKey = qitr.getTopKey(); + assertEquals(expectedKey, topKey); + + doc = deserializer.apply(Maps.immutableEntry(topKey, qitr.getTopValue())); + + recordId = doc.getValue().get(Document.DOCKEY_FIELD_NAME); + if (recordId instanceof Attributes) { + recordId = ((Attributes) recordId).getAttributes().iterator().next(); + } + assertEquals("20121126_0/foobar/idpart1.idpart2.idpart31", recordId.getData()); + + qitr.next(); + + assertTrue(qitr.hasTop()); + + expectedKey = new Key("20121126_0", "foobar\0" + ID_PREFIX + "idpart31.3", QueryIteratorTest.DEFAULT_CQ, "", QueryIteratorTest.getTimeStamp()); + topKey = qitr.getTopKey(); + assertEquals(expectedKey, topKey); + + doc = deserializer.apply(Maps.immutableEntry(topKey, qitr.getTopValue())); + + recordId = doc.getValue().get(Document.DOCKEY_FIELD_NAME); + if (recordId instanceof Attributes) { + recordId = ((Attributes) recordId).getAttributes().iterator().next(); + } + assertEquals("20121126_0/foobar/idpart1.idpart2.idpart31", recordId.getData()); + + qitr.next(); + + assertFalse(qitr.hasTop()); + } + @Test public void testGetRangeProvider() { ParentQueryIterator iterator = new ParentQueryIterator(); RangeProvider provider = iterator.getRangeProvider(); assertEquals(ParentRangeProvider.class.getSimpleName(), provider.getClass().getSimpleName()); } - + + private void createChildren(SortedMap map) { + long ts = QueryIteratorTest.getTimeStamp(); + + long ts2 = ts + 10000; + long ts3 = ts + 200123; + + map.put(new Key("20121126_0", "fi\0" + "FOO", "bars\0" + "foobar\0" + ID_PREFIX + "idpart31." + 1, ts), new Value(new byte[0])); + map.put(new Key("20121126_0", "fi\0" + "FOO", "bars\0" + "foobar\0" + ID_PREFIX + "idpart31." + 2, ts), new Value(new byte[0])); + map.put(new Key("20121126_0", "fi\0" + "FOO", "bars\0" + "foobar\0" + ID_PREFIX + "idpart31." + 3, ts), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + ID_PREFIX + "idpart31." + 1, "FOO\0bars", ts), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + ID_PREFIX + "idpart31." + 1, "BAR\0foo", ts2), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + ID_PREFIX + "idpart31." + 2, "FOO\0bars", ts), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + ID_PREFIX + "idpart31." + 2, "BAR\0foo", ts2), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + ID_PREFIX + "idpart31." + 3, "FOO\0bars", ts), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + ID_PREFIX + "idpart31." + 3, "BAR\0foo", ts2), new Value(new byte[0])); + + map.put(new Key("20121126_1", "fi\0" + "FOO", "bar\0" + "foobar\0" + ID_PREFIX + "idpart32." + 4, ts), new Value(new byte[0])); + map.put(new Key("20121126_1", "fi\0" + "FOO", "bar\0" + "foobar\0" + ID_PREFIX + "idpart32." + 5, ts), new Value(new byte[0])); + map.put(new Key("20121126_1", "fi\0" + "FOO", "bar\0" + "foobar\0" + ID_PREFIX + "idpart32." + 6, ts), new Value(new byte[0])); + map.put(new Key("20121126_1", "foobar\0" + ID_PREFIX + "idpart32." + 4, "FOO\0bar", ts), new Value(new byte[0])); + map.put(new Key("20121126_1", "foobar\0" + ID_PREFIX + "idpart32." + 5, "FOO\0bar", ts), new Value(new byte[0])); + map.put(new Key("20121126_1", "foobar\0" + ID_PREFIX + "idpart32." + 5, "BAR\0foo", ts2), new Value(new byte[0])); + map.put(new Key("20121126_1", "foobar\0" + ID_PREFIX + "idpart32." + 6, "FOO\0bar", ts2), new Value(new byte[0])); + + map.put(new Key("20121126_2", "fi\0" + "FOO", "bar\0" + "foobar\0" + ID_PREFIX + "idpart33." + 7, ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "fi\0" + "FOO", "bar\0" + "foobar\0" + ID_PREFIX + "idpart33." + 8, ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "fi\0" + "FOO", "bar\0" + "foobar\0" + ID_PREFIX + "idpart33." + 9, ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart33." + 7, "FOO\0bar", ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart33." + 7, "BAR\0foo", ts3), new Value(new byte[0])); + map.put(new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart33." + 8, "FOO\0bar", ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart33." + 8, "BAR\0foo", ts3), new Value(new byte[0])); + map.put(new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart33." + 9, "FOO\0bar", ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart33." + 9, "BAR\0foo", ts3), new Value(new byte[0])); + } + + @Test + public void testParentFiOnlyDocsAllowed() throws Throwable { + ParentQueryIterator qitr = new ParentQueryIterator(); + Map options = Maps.newHashMap(); + + SortedMap data = QueryIteratorTest.createTestData(ID_PREFIX + "idpart3"); + + createOrphanedChildren(data); + + options.put(QueryOptions.DISABLE_EVALUATION, "false"); + options.put(QueryOptions.QUERY, "FOO=='baz'"); + options.put(QueryOptions.TYPE_METADATA, "FOO:[test:datawave.data.type.LcNoDiacriticsType]"); + options.put(QueryOptions.REDUCED_RESPONSE, "false"); + options.put(Constants.RETURN_TYPE, "kryo"); + options.put(QueryOptions.FULL_TABLE_SCAN_ONLY, "false"); + options.put(QueryOptions.FILTER_MASKED_VALUES, "true"); + options.put(QueryOptions.TERM_FREQUENCY_FIELDS, "FOO"); + options.put(QueryOptions.INCLUDE_DATATYPE, "true"); + options.put(QueryOptions.INDEX_ONLY_FIELDS, "FOO"); + options.put(QueryOptions.START_TIME, "0"); + options.put(QueryOptions.END_TIME, Long.toString(Long.MAX_VALUE)); + options.put(QueryOptions.POSTPROCESSING_CLASSES, ""); + options.put(QueryOptions.INCLUDE_GROUPING_CONTEXT, "false"); + options.put(QueryOptions.NON_INDEXED_DATATYPES, ""); + options.put(QueryOptions.CONTAINS_INDEX_ONLY_TERMS, "true"); + + // the iterator will npe if these guys aren't set + qitr.setTimeFilter(TimeFilter.alwaysTrue()); + + qitr.init(new SortedMapIterator(data), options, new SourceManagerTest.MockIteratorEnvironment()); + qitr.seek(new Range(new Key("20121126_2", "foobar\u0000idpart1.idpart2.idpart34"), true, new Key("2121126_3", "foobar\u0000idpart1.idpart2.idpart35"), + false), Collections.emptySet(), false); + + assertTrue(qitr.hasTop()); + Key topKey = qitr.getTopKey(); + Key expectedKey = new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart36.1", QueryIteratorTest.DEFAULT_CQ, "", QueryIteratorTest.getTimeStamp()); + assertEquals(expectedKey, topKey); + + Entry doc = deserializer.apply(Maps.immutableEntry(topKey, qitr.getTopValue())); + + Attribute recordId = doc.getValue().get(Document.DOCKEY_FIELD_NAME); + if (recordId instanceof Attributes) { + recordId = ((Attributes) recordId).getAttributes().iterator().next(); + } + assertEquals("20121126_2/foobar/idpart1.idpart2.idpart36", recordId.getData()); + + assertTrue(qitr.hasTop()); + qitr.next(); + expectedKey = new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart36.2", QueryIteratorTest.DEFAULT_CQ, "", QueryIteratorTest.getTimeStamp()); + topKey = qitr.getTopKey(); + assertEquals(expectedKey, topKey); + + doc = deserializer.apply(Maps.immutableEntry(topKey, qitr.getTopValue())); + + recordId = doc.getValue().get(Document.DOCKEY_FIELD_NAME); + if (recordId instanceof Attributes) { + recordId = ((Attributes) recordId).getAttributes().iterator().next(); + } + assertEquals("20121126_2/foobar/idpart1.idpart2.idpart36", recordId.getData()); + + qitr.next(); + + assertFalse(qitr.hasTop()); + } + + @Test + public void testParentNoFiOnlyDocs() throws Throwable { + ParentQueryIterator qitr = new ParentQueryIterator(); + Map options = Maps.newHashMap(); + + SortedMap data = QueryIteratorTest.createTestData(ID_PREFIX + "idpart3"); + + createOrphanedChildren(data); + + options.put(QueryOptions.DISABLE_EVALUATION, "false"); + options.put(QueryOptions.QUERY, "FOO=='baz'"); + options.put(QueryOptions.TYPE_METADATA, "FOO:[test:datawave.data.type.LcNoDiacriticsType]"); + options.put(QueryOptions.REDUCED_RESPONSE, "false"); + options.put(Constants.RETURN_TYPE, "kryo"); + options.put(QueryOptions.FULL_TABLE_SCAN_ONLY, "false"); + options.put(QueryOptions.FILTER_MASKED_VALUES, "true"); + options.put(QueryOptions.TERM_FREQUENCY_FIELDS, "FOO"); + options.put(QueryOptions.INCLUDE_DATATYPE, "true"); + options.put(QueryOptions.INDEX_ONLY_FIELDS, "FOO"); + options.put(QueryOptions.START_TIME, "0"); + options.put(QueryOptions.DISABLE_DOCUMENTS_WITHOUT_EVENTS, "true"); + options.put(QueryOptions.END_TIME, Long.toString(Long.MAX_VALUE)); + options.put(QueryOptions.POSTPROCESSING_CLASSES, ""); + options.put(QueryOptions.INCLUDE_GROUPING_CONTEXT, "false"); + options.put(QueryOptions.NON_INDEXED_DATATYPES, ""); + options.put(QueryOptions.CONTAINS_INDEX_ONLY_TERMS, "true"); + + // the iterator will npe if these guys aren't set + qitr.setTimeFilter(TimeFilter.alwaysTrue()); + + qitr.init(new SortedMapIterator(data), options, new SourceManagerTest.MockIteratorEnvironment()); + qitr.seek(new Range(new Key("20121126_2", "foobar\u0000idpart1.idpart2.idpart34"), true, new Key("2121126_3", "foobar\u0000idpart1.idpart2.idpart35"), + false), Collections.emptySet(), false); + + assertTrue(qitr.hasTop()); + Key topKey = qitr.getTopKey(); + Key expectedKey = new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart36.1", QueryIteratorTest.DEFAULT_CQ, "", QueryIteratorTest.getTimeStamp()); + assertEquals(expectedKey, topKey); + + Entry doc = deserializer.apply(Maps.immutableEntry(topKey, qitr.getTopValue())); + + Attribute recordId = doc.getValue().get(Document.DOCKEY_FIELD_NAME); + if (recordId instanceof Attributes) { + recordId = ((Attributes) recordId).getAttributes().iterator().next(); + } + assertEquals("20121126_2/foobar/idpart1.idpart2.idpart36", recordId.getData()); + + assertTrue(qitr.hasTop()); + qitr.next(); + expectedKey = new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart36.2", QueryIteratorTest.DEFAULT_CQ, "", QueryIteratorTest.getTimeStamp()); + topKey = qitr.getTopKey(); + assertEquals(expectedKey, topKey); + + doc = deserializer.apply(Maps.immutableEntry(topKey, qitr.getTopValue())); + + recordId = doc.getValue().get(Document.DOCKEY_FIELD_NAME); + if (recordId instanceof Attributes) { + recordId = ((Attributes) recordId).getAttributes().iterator().next(); + } + assertEquals("20121126_2/foobar/idpart1.idpart2.idpart36", recordId.getData()); + + qitr.next(); + + assertFalse(qitr.hasTop()); + } + + private void createOrphanedChildren(SortedMap map) { + long ts = QueryIteratorTest.getTimeStamp(); + + long ts3 = ts + 200123; + + // scenario 1, fi keys for child docs, but on cihldren or parent + map.put(new Key("20121126_2", "fi\0" + "FOO", "baz\0" + "foobar\0" + ID_PREFIX + "idpart34." + 1, ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "fi\0" + "FOO", "baz\0" + "foobar\0" + ID_PREFIX + "idpart34." + 2, ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "fi\0" + "FOO", "baz\0" + "foobar\0" + ID_PREFIX + "idpart34." + 3, ts), new Value(new byte[0])); + + // scenario 2, fi keys for child docs, children exist but no parent + map.put(new Key("20121126_2", "fi\0" + "FOO", "baz\0" + "foobar\0" + ID_PREFIX + "idpart35." + 1, ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "fi\0" + "FOO", "baz\0" + "foobar\0" + ID_PREFIX + "idpart35." + 2, ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart35." + 1, "FOO\0baz", ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart35." + 2, "BAR\0foo", ts3), new Value(new byte[0])); + + // scenario 3, fi keys child docs, no children and parent exists + map.put(new Key("20121126_2", "fi\0" + "FOO", "baz\0" + "foobar\0" + ID_PREFIX + "idpart36." + 1, ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "fi\0" + "FOO", "baz\0" + "foobar\0" + ID_PREFIX + "idpart36." + 2, ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart36", "FOO\0baz", ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart36", "BAR\0foo", ts3), new Value(new byte[0])); + } + + @Test + public void testTearDown() throws Exception { + SortedMapIterator iter = new SortedMapIterator(QueryIteratorTest.createTestData(ID_PREFIX + "idpart3")); + Set expectation = Sets.newHashSet( + new Key("20121126_0", "foobar\0" + ID_PREFIX + "idpart3" + 1, QueryIteratorTest.DEFAULT_CQ, "", QueryIteratorTest.getTimeStamp()), + new Key("20121126_0", "foobar\0" + ID_PREFIX + "idpart3" + 2, QueryIteratorTest.DEFAULT_CQ, "", QueryIteratorTest.getTimeStamp()), + new Key("20121126_0", "foobar\0" + ID_PREFIX + "idpart3" + 3, QueryIteratorTest.DEFAULT_CQ, "", QueryIteratorTest.getTimeStamp()), + new Key("20121126_1", "foobar\0" + ID_PREFIX + "idpart3" + 5, QueryIteratorTest.DEFAULT_CQ, "", QueryIteratorTest.getTimeStamp()), + new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart3" + 7, QueryIteratorTest.DEFAULT_CQ, "", QueryIteratorTest.getTimeStamp()), + new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart3" + 8, QueryIteratorTest.DEFAULT_CQ, "", QueryIteratorTest.getTimeStamp()), + new Key("20121126_2", "foobar\0" + ID_PREFIX + "idpart39", QueryIteratorTest.DEFAULT_CQ, "", QueryIteratorTest.getTimeStamp())); + + Map options = Maps.newHashMap(); + + options.put(QueryOptions.DISABLE_EVALUATION, "false"); + options.put(QueryOptions.QUERY, "FOO == 'bar' && BAR == 'foo'"); + options.put(QueryOptions.TYPE_METADATA, "FOO:[test:datawave.data.type.LcNoDiacriticsType]"); + options.put(QueryOptions.REDUCED_RESPONSE, "true"); + options.put(Constants.RETURN_TYPE, "kryo"); + options.put(QueryOptions.FULL_TABLE_SCAN_ONLY, "false"); + options.put(QueryOptions.FILTER_MASKED_VALUES, "true"); + options.put(QueryOptions.INCLUDE_DATATYPE, "true"); + options.put(QueryOptions.INDEX_ONLY_FIELDS, ""); + options.put(QueryOptions.START_TIME, "0"); + options.put(QueryOptions.END_TIME, Long.toString(Long.MAX_VALUE)); + options.put(QueryOptions.POSTPROCESSING_CLASSES, ""); + options.put(QueryOptions.INCLUDE_GROUPING_CONTEXT, "false"); + options.put(QueryOptions.NON_INDEXED_DATATYPES, ""); + options.put(QueryOptions.CONTAINS_INDEX_ONLY_TERMS, "false"); + + ParentQueryIterator qi = new ParentQueryIterator(); + + qi.init(iter, options, new SourceManagerTest.MockIteratorEnvironment()); + + qi.seek(new Range(new Key("20121126"), false, new Key("20121127"), false), + Collections.emptyList(), false); + + while (qi.hasTop()) { + System.out.println("begin loop1: " + expectation); + + Key tk = qi.getTopKey(); + assertTrue("Could not remove " + tk + " from " + expectation.size(), expectation.remove(tk)); + + String cf = tk.getColumnFamily().toString(); + + if (cf.contains("idpart35")) { + break; + } + + qi.next(); + + System.out.println("ender loop1: " + expectation); + } + + qi = new ParentQueryIterator(); + + qi.init(iter, options, new SourceManagerTest.MockIteratorEnvironment()); + + qi.seek(new Range(new Key("20121126_1", "foobar\0" + ID_PREFIX + "idpart35"), false, new Key("20121127"), false), + Collections.emptyList(), false); + + while (qi.hasTop()) { + Key tk = qi.getTopKey(); + assertTrue("Could not remove " + tk + " from " + expectation, expectation.remove(tk)); + qi.next(); + } + + assertTrue("Still had expected keys: " + expectation, expectation.isEmpty()); + } } diff --git a/warehouse/query-core/src/test/java/datawave/query/iterator/QueryIteratorTest.java b/warehouse/query-core/src/test/java/datawave/query/iterator/QueryIteratorTest.java index fbcbb55f60b..b8f78b29274 100644 --- a/warehouse/query-core/src/test/java/datawave/query/iterator/QueryIteratorTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/iterator/QueryIteratorTest.java @@ -1,38 +1,104 @@ package datawave.query.iterator; +import com.google.common.collect.Maps; import datawave.query.function.DocumentRangeProvider; import datawave.query.function.Equality; import datawave.query.function.PrefixEquality; import datawave.query.function.RangeProvider; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.data.Value; import org.apache.hadoop.io.Text; import org.junit.Test; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.SortedMap; +import java.util.TimeZone; +import java.util.TreeMap; + import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; /** * Unit test for {@link QueryIterator}. - * + *

* Currently only covers some helper methods. */ public class QueryIteratorTest { - + public static final String DEFAULT_CQ = "\uffff"; + private static final SimpleDateFormat shardFormatter = new SimpleDateFormat("yyyyMMdd HHmmss"); + private static long ts = -1; + + public static SortedMap createTestData() throws ParseException { + return createTestData(""); + } + + public static SortedMap createTestData(String preId) throws ParseException { + shardFormatter.setTimeZone(TimeZone.getTimeZone("GMT")); + ts = shardFormatter.parse("20121126 123023").getTime(); + long ts2 = ts + 10000; + long ts3 = ts + 200123; + + TreeMap map = Maps.newTreeMap(); + + map.put(new Key("20121126_0", "fi\0" + "FOO", "bar\0" + "foobar\0" + preId + 1, ts), new Value(new byte[0])); + map.put(new Key("20121126_0", "fi\0" + "FOO", "bar\0" + "foobar\0" + preId + 2, ts), new Value(new byte[0])); + map.put(new Key("20121126_0", "fi\0" + "FOO", "bar\0" + "foobar\0" + preId + 3, ts), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + preId + 1, "FOO\0bar", ts), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + preId + 1, "BAR\0foo", ts2), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + preId + 2, "FOO\0bar", ts), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + preId + 2, "BAR\0foo", ts2), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + preId + 3, "FOO\0bar", ts), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + preId + 3, "BAR\0foo", ts2), new Value(new byte[0])); + + map.put(new Key("20121126_0", "foobar\0" + preId + 23, "FOO\0bar1", ts), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + preId + 23, "BAR\0foo1", ts2), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + preId + 24, "FOO\0bar2", ts), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + preId + 24, "BAR\0foo2", ts2), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + preId + 25, "FOO\0bar3", ts), new Value(new byte[0])); + map.put(new Key("20121126_0", "foobar\0" + preId + 25, "BAR\0foo3", ts2), new Value(new byte[0])); + + map.put(new Key("20121126_1", "fi\0" + "FOO", "bar\0" + "foobar\0" + preId + 4, ts), new Value(new byte[0])); + map.put(new Key("20121126_1", "fi\0" + "FOO", "bar\0" + "foobar\0" + preId + 5, ts), new Value(new byte[0])); + map.put(new Key("20121126_1", "fi\0" + "FOO", "bar\0" + "foobar\0" + preId + 6, ts), new Value(new byte[0])); + map.put(new Key("20121126_1", "foobar\0" + preId + 4, "FOO\0bar", ts), new Value(new byte[0])); + map.put(new Key("20121126_1", "foobar\0" + preId + 5, "FOO\0bar", ts), new Value(new byte[0])); + map.put(new Key("20121126_1", "foobar\0" + preId + 5, "BAR\0foo", ts2), new Value(new byte[0])); + map.put(new Key("20121126_1", "foobar\0" + preId + 6, "FOO\0bar", ts), new Value(new byte[0])); + + map.put(new Key("20121126_2", "fi\0" + "FOO", "bar\0" + "foobar\0" + preId + 7, ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "fi\0" + "FOO", "bar\0" + "foobar\0" + preId + 8, ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "fi\0" + "FOO", "bar\0" + "foobar\0" + preId + 9, ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "foobar\0" + preId + 7, "FOO\0bar", ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "foobar\0" + preId + 7, "BAR\0foo", ts3), new Value(new byte[0])); + map.put(new Key("20121126_2", "foobar\0" + preId + 8, "FOO\0bar", ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "foobar\0" + preId + 8, "BAR\0foo", ts3), new Value(new byte[0])); + map.put(new Key("20121126_2", "foobar\0" + preId + 9, "FOO\0bar", ts), new Value(new byte[0])); + map.put(new Key("20121126_2", "foobar\0" + preId + 9, "BAR\0foo", ts3), new Value(new byte[0])); + map.put(new Key("20121126_3", "fi\0" + "FOOSICKLES", "bar\0" + "foobar\0" + 33, ts), new Value(new byte[0])); + + return map; + } + + public static long getTimeStamp() { + return ts; + } + @Test public void testIsDocumentSpecificRange_withInfiniteKeys() { // Test the case of an infinite start key Key end = new Key("20190314_0", "dataType\0doc0\0"); Range infiniteStartRange = new Range(null, end); assertFalse(QueryIterator.isDocumentSpecificRange(infiniteStartRange)); - + // Test the case of an infinite end key Key start = new Key("20190314_0", "dataType\0doc0"); Range infiniteEndRange = new Range(start, null); assertFalse(QueryIterator.isDocumentSpecificRange(infiniteEndRange)); } - + @Test public void testIsDocumentSpecificRange_spansMultipleRows() { Key start = new Key("20190314_0", "dataType\0doc0"); @@ -40,7 +106,7 @@ public void testIsDocumentSpecificRange_spansMultipleRows() { Range multipleRowRange = new Range(start, end); assertFalse(QueryIterator.isDocumentSpecificRange(multipleRowRange)); } - + @Test public void testIsDocumentSpecificRange_withDocRange() { Key start = new Key("20190314_0", "dataType\0doc0"); @@ -48,7 +114,7 @@ public void testIsDocumentSpecificRange_withDocRange() { Range docRange = new Range(start, end); assertTrue(QueryIterator.isDocumentSpecificRange(docRange)); } - + @Test public void testIsDocumentSpecificRange_withShardRange() { Key start = new Key("20190314_0"); @@ -56,7 +122,7 @@ public void testIsDocumentSpecificRange_withShardRange() { Range shardRange = new Range(start, end); assertFalse(QueryIterator.isDocumentSpecificRange(shardRange)); } - + @Test public void testIsDocumentSpecificRange_withRebuiltShardRange() { Key start = new Key("20190314_0", "dataType\0doc0"); @@ -64,7 +130,7 @@ public void testIsDocumentSpecificRange_withRebuiltShardRange() { Range range = new Range(start, false, end, false); assertFalse(QueryIterator.isDocumentSpecificRange(range)); } - + /** *

      * Shard key format
@@ -76,26 +142,26 @@ public void testIsDocumentSpecificRange_withRebuiltShardRange() {
     @Test
     public void testRowColfamToString() {
         String expected = "20190314_0 test%00;doc0:FOO%00;bar";
-        
+
         Text row = new Text("20190314_0");
         Text cf = new Text("test\0doc0");
         Text cq = new Text("FOO\0bar");
         Key key = new Key(row, cf, cq);
-        
+
         String parsed = QueryIterator.rowColFamToString(key);
         assertEquals(expected, parsed);
-        
+
         // Test the null case as well
         assertEquals("null", QueryIterator.rowColFamToString(null));
     }
-    
+
     @Test
     public void testGetRangeProvider() {
         QueryIterator iterator = new QueryIterator();
         RangeProvider provider = iterator.getRangeProvider();
         assertEquals(DocumentRangeProvider.class.getSimpleName(), provider.getClass().getSimpleName());
     }
-    
+
     @Test
     public void testGetEquality() {
         QueryIterator iterator = new QueryIterator();
diff --git a/warehouse/query-core/src/test/java/datawave/query/iterator/SourceManagerTest.java b/warehouse/query-core/src/test/java/datawave/query/iterator/SourceManagerTest.java
index 7758018da51..c6518a3bb58 100644
--- a/warehouse/query-core/src/test/java/datawave/query/iterator/SourceManagerTest.java
+++ b/warehouse/query-core/src/test/java/datawave/query/iterator/SourceManagerTest.java
@@ -1,26 +1,11 @@
 package datawave.query.iterator;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.TimeZone;
-import java.util.TreeMap;
-
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.client.sample.SamplerConfiguration;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.conf.DefaultConfiguration;
-import org.apache.accumulo.core.conf.SiteConfiguration;
 import org.apache.accumulo.core.crypto.CryptoFactoryLoader;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
@@ -38,8 +23,21 @@
 import org.junit.Before;
 import org.junit.Test;
 
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TimeZone;
+import java.util.TreeMap;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
 
 public class SourceManagerTest {
     private static final SimpleDateFormat shardFormatter = new SimpleDateFormat("yyyyMMdd HHmmss");
@@ -346,14 +344,14 @@ public SortedKeyValueIterator deepCopy(IteratorEnvironment env) {
         }
     }
     
-    public class MockIteratorEnvironment implements IteratorEnvironment {
-        
+    public static class MockIteratorEnvironment implements IteratorEnvironment {
+
         AccumuloConfiguration conf;
-        
+
         public MockIteratorEnvironment(AccumuloConfiguration conf) {
             this.conf = conf;
         }
-        
+
         public MockIteratorEnvironment() {
             this.conf = DefaultConfiguration.getInstance();
         }
diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/JexlASTHelperTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/JexlASTHelperTest.java
index 4ed55d87444..bdf1b6cfa51 100644
--- a/warehouse/query-core/src/test/java/datawave/query/jexl/JexlASTHelperTest.java
+++ b/warehouse/query-core/src/test/java/datawave/query/jexl/JexlASTHelperTest.java
@@ -5,15 +5,22 @@
 import com.google.common.collect.Sets;
 import datawave.data.type.LcNoDiacriticsType;
 import datawave.data.type.NumberType;
+import datawave.query.attributes.Document;
+import datawave.query.function.JexlEvaluation;
 import datawave.query.jexl.JexlNodeFactory.ContainerType;
 import datawave.query.jexl.visitors.JexlStringBuildingVisitor;
 import datawave.query.jexl.visitors.PrintingVisitor;
+import datawave.query.language.parser.jexl.LuceneToJexlQueryParser;
 import datawave.query.util.MockMetadataHelper;
+import datawave.query.util.Tuple3;
+import org.apache.accumulo.core.data.Key;
 import org.apache.commons.jexl2.parser.ASTAndNode;
 import org.apache.commons.jexl2.parser.ASTEQNode;
 import org.apache.commons.jexl2.parser.ASTERNode;
 import org.apache.commons.jexl2.parser.ASTJexlScript;
+import org.apache.commons.jexl2.parser.ASTNotNode;
 import org.apache.commons.jexl2.parser.ASTNumberLiteral;
+import org.apache.commons.jexl2.parser.ASTOrNode;
 import org.apache.commons.jexl2.parser.ASTReference;
 import org.apache.commons.jexl2.parser.JexlNode;
 import org.apache.commons.jexl2.parser.ParseException;
@@ -43,7 +50,7 @@ public void test() throws Exception {
         List eqNodes = JexlASTHelper.getEQNodes(query);
         
         for (JexlNode eqNode : eqNodes) {
-            assertFalse(JexlASTHelper.isWithinOr(eqNode));
+            assertFalse(JexlASTHelper.isDescendantOfOr(eqNode));
         }
     }
     
@@ -62,7 +69,7 @@ public void test1() throws Exception {
             String value = JexlASTHelper.getLiteralValue(eqNode).toString();
             assertTrue(expectations.containsKey(value));
             
-            assertEquals(expectations.get(value), JexlASTHelper.isWithinOr(eqNode));
+            assertEquals(expectations.get(value), JexlASTHelper.isDescendantOfOr(eqNode));
         }
     }
     
@@ -81,7 +88,7 @@ public void test2() throws Exception {
             String value = JexlASTHelper.getLiteralValue(eqNode).toString();
             assertTrue(expectations.containsKey(value));
             
-            assertEquals(expectations.get(value), JexlASTHelper.isWithinOr(eqNode));
+            assertEquals(expectations.get(value), JexlASTHelper.isDescendantOfOr(eqNode));
         }
     }
     
@@ -97,7 +104,7 @@ public void test3() throws Exception {
             String value = JexlASTHelper.getLiteralValue(eqNode).toString();
             assertTrue(expectations.containsKey(value));
             
-            assertEquals(expectations.get(value), JexlASTHelper.isWithinOr(eqNode));
+            assertEquals(expectations.get(value), JexlASTHelper.isDescendantOfOr(eqNode));
         }
     }
     
@@ -115,7 +122,7 @@ public void test4() throws Exception {
             String value = JexlASTHelper.getLiteralValue(eqNode).toString();
             assertTrue(expectations.containsKey(value));
             
-            assertEquals(expectations.get(value), JexlASTHelper.isWithinOr(eqNode));
+            assertEquals(expectations.get(value), JexlASTHelper.isDescendantOfOr(eqNode));
         }
         
         List erNodes = JexlASTHelper.getERNodes(query);
@@ -127,7 +134,7 @@ public void test4() throws Exception {
         for (JexlNode erNode : erNodes) {
             String value = JexlASTHelper.getLiteralValue(erNode).toString();
             assertTrue(expectations.containsKey(value));
-            assertEquals(expectations.get(value), JexlASTHelper.isWithinOr(erNode));
+            assertEquals(expectations.get(value), JexlASTHelper.isDescendantOfOr(erNode));
         }
     }
     
@@ -783,4 +790,210 @@ private void testDeconstructionGroupingFalse(String expected, String input) {
         assertEquals(expected, actual);
     }
     
+    // Verify the nesting order between 'or' and 'and' in a Jexl query.
+    @Test
+    public void testArtificialParenthesisTreeOrAnd() throws Exception {
+        ASTJexlScript query = JexlASTHelper.parseJexlQuery("FOO == '1' or FOO == '2' and BAR == '3'");
+        // Parenthesis artificially added in should be: "((FOO == '1') or ((FOO == '2') and (BAR == '3')))"
+        
+        List orNodes = JexlASTHelper.getNodesOfType(query, ASTOrNode.class);
+        List andNodes = JexlASTHelper.getNodesOfType(query, ASTAndNode.class);
+        
+        for (JexlNode orNode : orNodes) {
+            assertTrue(JexlASTHelper.descendantsContainNodeType(orNode, ASTAndNode.class));
+        }
+        
+        for (JexlNode andNode : andNodes) {
+            assertTrue(JexlASTHelper.isDescendantOfOr(andNode));
+        }
+    }
+    
+    // Verify the nesting order between 'and' and 'not' in a Jexl query.
+    @Test
+    public void testArtificialParenthesisTreeAndNot() throws Exception {
+        ASTJexlScript query = JexlASTHelper.parseJexlQuery("not (FOO == '1') and BAR == '3'");
+        // Parenthesis artificially added in should be: "((not (FOO == '1')) and (BAR == '3'))"
+        
+        List andNodes = JexlASTHelper.getNodesOfType(query, ASTAndNode.class);
+        List notNodes = JexlASTHelper.getNodesOfType(query, ASTNotNode.class);
+        
+        for (JexlNode andNode : andNodes) {
+            assertTrue(JexlASTHelper.descendantsContainNodeType(andNode, ASTNotNode.class));
+        }
+        
+        for (JexlNode notNode : notNodes) {
+            assertTrue(JexlASTHelper.isDescendantOfNodeType(notNode, ASTAndNode.class));
+        }
+    }
+    
+    // Verify the nesting order between 'or' and 'not' in a Jexl query.
+    @Test
+    public void testArtificialParenthesisTreeOrNot() throws Exception {
+        ASTJexlScript query = JexlASTHelper.parseJexlQuery("not (FOO == '1') or BAR == '3'");
+        // Parenthesis artificially added in should be: "((not (FOO == '1')) or (BAR == '3'))"
+        
+        List orNodes = JexlASTHelper.getNodesOfType(query, ASTOrNode.class);
+        List notNodes = JexlASTHelper.getNodesOfType(query, ASTNotNode.class);
+        
+        for (JexlNode orNode : orNodes) {
+            assertTrue(JexlASTHelper.descendantsContainNodeType(orNode, ASTNotNode.class));
+        }
+        
+        for (JexlNode notNode : notNodes) {
+            assertTrue(JexlASTHelper.isDescendantOfNodeType(notNode, ASTOrNode.class));
+        }
+    }
+    
+    // Verify the nesting order when 'or', 'and', and 'not' are all present in a Jexl query.
+    @Test
+    public void testArtificialParenthesisTreeOrAndNot() throws Exception {
+        ASTJexlScript query = JexlASTHelper.parseJexlQuery("FOO == '1' or not (BAR == '3') and FOO == '2'");
+        // Parenthesis artificially added in should be: "((FOO == '1') or ((not (BAR == '3')) and (FOO == '2')))"
+        
+        List orNodes = JexlASTHelper.getNodesOfType(query, ASTOrNode.class);
+        List andNodes = JexlASTHelper.getNodesOfType(query, ASTAndNode.class);
+        List notNodes = JexlASTHelper.getNodesOfType(query, ASTNotNode.class);
+        
+        for (JexlNode orNode : orNodes) {
+            assertTrue(JexlASTHelper.descendantsContainNodeType(orNode, ASTAndNode.class));
+            assertTrue(JexlASTHelper.descendantsContainNodeType(orNode, ASTNotNode.class));
+        }
+        
+        for (JexlNode andNode : andNodes) {
+            assertTrue(JexlASTHelper.isDescendantOfNodeType(andNode, ASTOrNode.class));
+            assertTrue(JexlASTHelper.descendantsContainNodeType(andNode, ASTNotNode.class));
+        }
+        
+        for (JexlNode notNode : notNodes) {
+            assertTrue(JexlASTHelper.isDescendantOfNodeType(notNode, ASTOrNode.class));
+            assertTrue(JexlASTHelper.isDescendantOfNodeType(notNode, ASTAndNode.class));
+        }
+    }
+    
+    @Test
+    public void testChildrenContainNodeType() throws Exception {
+        ASTJexlScript query = JexlASTHelper.parseJexlQuery("not FOO == '1' or (FOO == '2' and BAR == '3')");
+        
+        List orNodes = JexlASTHelper.getNodesOfType(query, ASTOrNode.class);
+        List andNodes = JexlASTHelper.getNodesOfType(query, ASTAndNode.class);
+        
+        for (JexlNode orNode : orNodes) {
+            assertFalse(JexlASTHelper.descendantsContainNodeType(orNode, ASTOrNode.class));
+            assertTrue(JexlASTHelper.descendantsContainNodeType(orNode, ASTAndNode.class));
+            assertTrue(JexlASTHelper.descendantsContainNodeType(orNode, ASTNotNode.class));
+            assertTrue(JexlASTHelper.descendantsContainNodeType(orNode, ASTEQNode.class));
+        }
+        
+        for (JexlNode andNode : andNodes) {
+            assertFalse(JexlASTHelper.descendantsContainNodeType(andNode, ASTOrNode.class));
+            assertTrue(JexlASTHelper.descendantsContainNodeType(andNode, ASTEQNode.class));
+        }
+    }
+    
+    @Test
+    public void testArtificialParenthesisQueryMeaningChange() throws Exception {
+        // Query with parenthesis
+        String parenthesisQueryString = "(FOO == '1' or FOO == '2') and BAR == '3'";
+        
+        // Query without parenthesis
+        String noParenthesisQueryString = "FOO == '1' or FOO == '2' and BAR == '3'";
+        // Parenthesis artificially added in should be: "((FOO == '1') or ((FOO == '2') and (BAR == '3')))"
+        
+        DatawaveJexlContext djc = new DatawaveJexlContext();
+        djc.set("FOO", 1);
+        djc.set("BAR", 2);
+        
+        Tuple3 evalArgs = new Tuple3<>(new Key(), new Document(), djc);
+        
+        JexlEvaluation eval = new JexlEvaluation(parenthesisQueryString);
+        assertFalse(eval.apply(evalArgs));
+        
+        JexlEvaluation evalBad = new JexlEvaluation(noParenthesisQueryString);
+        assertTrue(evalBad.apply(evalArgs));
+    }
+    
+    // Verify the nesting order between 'or' and 'and' in a Lucene query.
+    @Test
+    public void testLuceneArtificialParenthesisTreeOrAnd() throws Exception {
+        String queryString = "FOO:1 OR FOO:2 AND BAR:3";
+        ASTJexlScript query = JexlASTHelper.parseJexlQuery(new LuceneToJexlQueryParser().convertToJexlNode(queryString).toString());
+        // Parenthesis artificially added in should be: "((FOO:1) OR ((FOO:2) AND (BAR:3)))"
+        
+        List orNodes = JexlASTHelper.getNodesOfType(query, ASTOrNode.class);
+        List andNodes = JexlASTHelper.getNodesOfType(query, ASTAndNode.class);
+        
+        for (JexlNode orNode : orNodes) {
+            assertTrue(JexlASTHelper.descendantsContainNodeType(orNode, ASTAndNode.class));
+        }
+        
+        for (JexlNode andNode : andNodes) {
+            assertTrue(JexlASTHelper.isDescendantOfOr(andNode));
+        }
+    }
+    
+    // Verify the nesting order between 'AND' and 'NOT' in a Lucene query. #EXCLUDE() utilizes ASTNotNode.
+    @Test
+    public void testLuceneArtificialParenthesisTreeAndNot() throws Exception {
+        String queryString = "#EXCLUDE(FOO) AND BAR:3";
+        ASTJexlScript query = JexlASTHelper.parseJexlQuery(new LuceneToJexlQueryParser().convertToJexlNode(queryString).toString());
+        // Parenthesis artificially added in should be: "((#EXCLUDE(FOO)) AND (BAR:3))"
+        
+        List andNodes = JexlASTHelper.getNodesOfType(query, ASTAndNode.class);
+        List notNodes = JexlASTHelper.getNodesOfType(query, ASTNotNode.class);
+        
+        for (JexlNode andNode : andNodes) {
+            assertTrue(JexlASTHelper.descendantsContainNodeType(andNode, ASTNotNode.class));
+        }
+        
+        for (JexlNode notNode : notNodes) {
+            assertTrue(JexlASTHelper.isDescendantOfNodeType(notNode, ASTAndNode.class));
+        }
+    }
+    
+    // Verify the nesting order between 'OR' and 'NOT' in a Lucene query. #EXCLUDE() utilizes ASTNotNode.
+    @Test
+    public void testLuceneArtificialParenthesisTreeOrNot() throws Exception {
+        String queryString = "#EXCLUDE(FOO) OR BAR:3";
+        ASTJexlScript query = JexlASTHelper.parseJexlQuery(new LuceneToJexlQueryParser().convertToJexlNode(queryString).toString());
+        // Parenthesis artificially added in should be: "((#EXCLUDE(FOO)) OR (BAR:3))"
+        
+        List orNodes = JexlASTHelper.getNodesOfType(query, ASTOrNode.class);
+        List notNodes = JexlASTHelper.getNodesOfType(query, ASTNotNode.class);
+        
+        for (JexlNode orNode : orNodes) {
+            assertTrue(JexlASTHelper.descendantsContainNodeType(orNode, ASTNotNode.class));
+        }
+        
+        for (JexlNode notNode : notNodes) {
+            assertTrue(JexlASTHelper.isDescendantOfNodeType(notNode, ASTOrNode.class));
+        }
+    }
+    
+    // Verify the nesting order when 'OR', 'AND', and 'NOT' are all present in a Lucene query. 'NOT' becomes 'AND NOT'.
+    @Test
+    public void testLuceneArtificialParenthesisTreeOrAndNot() throws Exception {
+        String queryString = "BAR:3 OR FOO:1 NOT FOO:2";
+        ASTJexlScript query = JexlASTHelper.parseJexlQuery(new LuceneToJexlQueryParser().convertToJexlNode(queryString).toString());
+        // Parenthesis artificially added in should be: "((BAR:3) OR ((FOO:1) AND (NOT (FOO:2))))"
+        
+        List orNodes = JexlASTHelper.getNodesOfType(query, ASTOrNode.class);
+        List andNodes = JexlASTHelper.getNodesOfType(query, ASTAndNode.class);
+        List notNodes = JexlASTHelper.getNodesOfType(query, ASTNotNode.class);
+        
+        for (JexlNode orNode : orNodes) {
+            assertTrue(JexlASTHelper.descendantsContainNodeType(orNode, ASTAndNode.class));
+            assertTrue(JexlASTHelper.descendantsContainNodeType(orNode, ASTNotNode.class));
+        }
+        
+        for (JexlNode andNode : andNodes) {
+            assertTrue(JexlASTHelper.isDescendantOfNodeType(andNode, ASTOrNode.class));
+            assertTrue(JexlASTHelper.descendantsContainNodeType(andNode, ASTNotNode.class));
+        }
+        
+        for (JexlNode notNode : notNodes) {
+            assertTrue(JexlASTHelper.isDescendantOfNodeType(notNode, ASTOrNode.class));
+            assertTrue(JexlASTHelper.isDescendantOfNodeType(notNode, ASTAndNode.class));
+        }
+    }
+    
 }
diff --git a/warehouse/query-core/src/test/java/datawave/query/planner/GeoSortedQueryDataTest.java b/warehouse/query-core/src/test/java/datawave/query/planner/GeoSortedQueryDataTest.java
index 674a2301c71..285a7070576 100644
--- a/warehouse/query-core/src/test/java/datawave/query/planner/GeoSortedQueryDataTest.java
+++ b/warehouse/query-core/src/test/java/datawave/query/planner/GeoSortedQueryDataTest.java
@@ -217,11 +217,12 @@ public static void setupEnvVariables() {
     }
     
     public static void resolveEnvVariables(Configuration conf) {
+        StringBuilder sb = new StringBuilder();
         Pattern p = Pattern.compile("\\$\\{(\\w+)\\}|\\$(\\w+)");
         for (Map.Entry entry : conf) {
             boolean reset = false;
             Matcher m = p.matcher(entry.getKey());
-            StringBuffer sb = new StringBuffer();
+
             while (m.find()) {
                 String envVarName = null == m.group(1) ? m.group(2) : m.group(1);
                 String envVarValue = System.getProperty(envVarName);
@@ -232,7 +233,7 @@ public static void resolveEnvVariables(Configuration conf) {
             String key = sb.toString();
             
             m = p.matcher(entry.getValue());
-            sb = new StringBuffer();
+            sb.setLength(0);
             while (m.find()) {
                 String envVarName = null == m.group(1) ? m.group(2) : m.group(1);
                 String envVarValue = System.getProperty(envVarName);
@@ -241,6 +242,7 @@ public static void resolveEnvVariables(Configuration conf) {
             }
             m.appendTail(sb);
             String value = sb.toString();
+            sb.setLength(0);
             
             if (reset) {
                 conf.unset(entry.getKey());
diff --git a/warehouse/query-core/src/test/java/datawave/query/util/LimitFieldsTestingIngest.java b/warehouse/query-core/src/test/java/datawave/query/util/LimitFieldsTestingIngest.java
index 8a5289f6f02..6d642741554 100644
--- a/warehouse/query-core/src/test/java/datawave/query/util/LimitFieldsTestingIngest.java
+++ b/warehouse/query-core/src/test/java/datawave/query/util/LimitFieldsTestingIngest.java
@@ -76,7 +76,19 @@ public static void writeItAll(AccumuloClient client, WhatKindaRange range) throw
             mutation.put(datatype + "\u0000" + myUID, "FOO_3.FOO.3.3" + "\u0000" + "defg", columnVisibility, timeStamp, emptyValue);
             mutation.put(datatype + "\u0000" + myUID, "FOO_3_BAR.FOO.3" + "\u0000" + "defg", columnVisibility, timeStamp, emptyValue);
             mutation.put(datatype + "\u0000" + myUID, "FOO_1_BAR.FOO.3" + "\u0000" + "good", columnVisibility, timeStamp, emptyValue);
-            
+
+            mutation.put(datatype + "\u0000" + myUID, "BAR_1.BAR.1.1" + "\u0000" + "growl", columnVisibility, timeStamp, emptyValue);
+            mutation.put(datatype + "\u0000" + myUID, "BAR_2.BAR.2.1" + "\u0000" + "big cat", columnVisibility, timeStamp, emptyValue);
+            mutation.put(datatype + "\u0000" + myUID, "BAR_3.BAR.3.1" + "\u0000" + "fluffy", columnVisibility, timeStamp, emptyValue);
+
+            mutation.put(datatype + "\u0000" + myUID, "BAR_1.BAR.1.2" + "\u0000" + "yawn", columnVisibility, timeStamp, emptyValue);
+            mutation.put(datatype + "\u0000" + myUID, "BAR_2.BAR.2.2" + "\u0000" + "siberian", columnVisibility, timeStamp, emptyValue);
+            mutation.put(datatype + "\u0000" + myUID, "BAR_3.BAR.3.2" + "\u0000" + "pink", columnVisibility, timeStamp, emptyValue);
+
+            mutation.put(datatype + "\u0000" + myUID, "BAR_1.BAR.1.3" + "\u0000" + "purr", columnVisibility, timeStamp, emptyValue);
+            mutation.put(datatype + "\u0000" + myUID, "BAR_2.BAR.2.3" + "\u0000" + "tiger", columnVisibility, timeStamp, emptyValue);
+            mutation.put(datatype + "\u0000" + myUID, "BAR_3.BAR.3.3" + "\u0000" + "spotted", columnVisibility, timeStamp, emptyValue);
+
             bw.addMutation(mutation);
             
         } finally {
diff --git a/warehouse/regression-testing/pom.xml b/warehouse/regression-testing/pom.xml
index d248628f0db..fd52ae379a6 100644
--- a/warehouse/regression-testing/pom.xml
+++ b/warehouse/regression-testing/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave
         datawave-warehouse-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-regression-testing
     ${project.artifactId}
diff --git a/web-services/accumulo/pom.xml b/web-services/accumulo/pom.xml
index 995c4f20fb8..3924ca0b643 100644
--- a/web-services/accumulo/pom.xml
+++ b/web-services/accumulo/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-accumulo
     ejb
diff --git a/web-services/atom/pom.xml b/web-services/atom/pom.xml
index 5a2d9d1df4e..18459c96e66 100644
--- a/web-services/atom/pom.xml
+++ b/web-services/atom/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-atom
     ejb
diff --git a/web-services/cached-results/pom.xml b/web-services/cached-results/pom.xml
index d0d661d382c..a577fba7bc8 100644
--- a/web-services/cached-results/pom.xml
+++ b/web-services/cached-results/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-cached-results
     ejb
diff --git a/web-services/client/pom.xml b/web-services/client/pom.xml
index bc6360083a9..97fc4a2b63c 100644
--- a/web-services/client/pom.xml
+++ b/web-services/client/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-client
     jar
diff --git a/web-services/common-util/pom.xml b/web-services/common-util/pom.xml
index 5bec18f46c5..06d97c3bc6c 100644
--- a/web-services/common-util/pom.xml
+++ b/web-services/common-util/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-common-util
     jar
diff --git a/web-services/common/pom.xml b/web-services/common/pom.xml
index 18241f8ba72..e16ca743992 100644
--- a/web-services/common/pom.xml
+++ b/web-services/common/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-common
     ejb
diff --git a/web-services/deploy/application/pom.xml b/web-services/deploy/application/pom.xml
index ed0776b08dc..5b23484ae2b 100644
--- a/web-services/deploy/application/pom.xml
+++ b/web-services/deploy/application/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-deploy-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-deploy-application
     ear
diff --git a/web-services/deploy/configuration/pom.xml b/web-services/deploy/configuration/pom.xml
index c481e5229d7..72d470f844f 100644
--- a/web-services/deploy/configuration/pom.xml
+++ b/web-services/deploy/configuration/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-deploy-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-deploy-configuration
     jar
diff --git a/web-services/deploy/docs/pom.xml b/web-services/deploy/docs/pom.xml
index 4abe9fbde80..94a15087966 100644
--- a/web-services/deploy/docs/pom.xml
+++ b/web-services/deploy/docs/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-deploy-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-deploy-docs
     war
diff --git a/web-services/deploy/pom.xml b/web-services/deploy/pom.xml
index 291ab469541..7362b6fc4dc 100644
--- a/web-services/deploy/pom.xml
+++ b/web-services/deploy/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     gov.nsa.datawave.webservices
     datawave-ws-deploy-parent
diff --git a/web-services/deploy/spring-framework-integration/pom.xml b/web-services/deploy/spring-framework-integration/pom.xml
index 0bdb2c9f2ee..7bf0f82e898 100644
--- a/web-services/deploy/spring-framework-integration/pom.xml
+++ b/web-services/deploy/spring-framework-integration/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-deploy-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     spring-framework-integration
     ${project.artifactId}
diff --git a/web-services/dictionary/pom.xml b/web-services/dictionary/pom.xml
index b38ca422f51..30df4b17a5a 100644
--- a/web-services/dictionary/pom.xml
+++ b/web-services/dictionary/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-dictionary
     ejb
diff --git a/web-services/examples/client-login/pom.xml b/web-services/examples/client-login/pom.xml
index b79064acb65..11bba1840fb 100644
--- a/web-services/examples/client-login/pom.xml
+++ b/web-services/examples/client-login/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-examples-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-examples-client-login
     ejb
diff --git a/web-services/examples/http-client/pom.xml b/web-services/examples/http-client/pom.xml
index 75ecd032363..5e800e4d4ac 100644
--- a/web-services/examples/http-client/pom.xml
+++ b/web-services/examples/http-client/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-examples-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-examples-http-client
     jar
diff --git a/web-services/examples/jms-client/pom.xml b/web-services/examples/jms-client/pom.xml
index d00922b951b..216eaa2adbb 100644
--- a/web-services/examples/jms-client/pom.xml
+++ b/web-services/examples/jms-client/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-examples-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-examples-jms-client
     jar
diff --git a/web-services/examples/pom.xml b/web-services/examples/pom.xml
index ea78eebdf08..644e5c42c61 100644
--- a/web-services/examples/pom.xml
+++ b/web-services/examples/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-examples-parent
     pom
diff --git a/web-services/examples/query-war/pom.xml b/web-services/examples/query-war/pom.xml
index 313d1b140c0..f20788dcceb 100644
--- a/web-services/examples/query-war/pom.xml
+++ b/web-services/examples/query-war/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-examples-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-examples-query-war
     war
diff --git a/web-services/map-reduce-embedded/pom.xml b/web-services/map-reduce-embedded/pom.xml
index b0ff25a0b48..249750373fb 100644
--- a/web-services/map-reduce-embedded/pom.xml
+++ b/web-services/map-reduce-embedded/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-map-reduce-embedded
     jar
diff --git a/web-services/map-reduce-status/pom.xml b/web-services/map-reduce-status/pom.xml
index 60f4be375fc..0ddd5d84a54 100644
--- a/web-services/map-reduce-status/pom.xml
+++ b/web-services/map-reduce-status/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-map-reduce-status
     ejb
diff --git a/web-services/map-reduce/pom.xml b/web-services/map-reduce/pom.xml
index a55d485d888..55457bff811 100644
--- a/web-services/map-reduce/pom.xml
+++ b/web-services/map-reduce/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-map-reduce
     ejb
diff --git a/web-services/model/pom.xml b/web-services/model/pom.xml
index 743f7199f2c..fc0589590a2 100644
--- a/web-services/model/pom.xml
+++ b/web-services/model/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-model
     ejb
diff --git a/web-services/modification/pom.xml b/web-services/modification/pom.xml
index 89154108e8b..e85e8a6473c 100644
--- a/web-services/modification/pom.xml
+++ b/web-services/modification/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-modification
     ejb
diff --git a/web-services/pom.xml b/web-services/pom.xml
index db35bcd5f37..d2cbedeae9b 100644
--- a/web-services/pom.xml
+++ b/web-services/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave
         datawave-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     gov.nsa.datawave.webservices
     datawave-ws-parent
diff --git a/web-services/query-websocket/pom.xml b/web-services/query-websocket/pom.xml
index 29f91b952aa..7c8a392fb9c 100644
--- a/web-services/query-websocket/pom.xml
+++ b/web-services/query-websocket/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-query-websocket
     war
diff --git a/web-services/query/pom.xml b/web-services/query/pom.xml
index c95552e97f3..10847392483 100644
--- a/web-services/query/pom.xml
+++ b/web-services/query/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-query
     ejb
diff --git a/web-services/rest-api/pom.xml b/web-services/rest-api/pom.xml
index af4f2a40006..4601e3f8a41 100644
--- a/web-services/rest-api/pom.xml
+++ b/web-services/rest-api/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-rest-api
     war
diff --git a/web-services/security/pom.xml b/web-services/security/pom.xml
index c6d7db5e182..df882a7525b 100644
--- a/web-services/security/pom.xml
+++ b/web-services/security/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-security
     ejb
diff --git a/web-services/web-root/pom.xml b/web-services/web-root/pom.xml
index 19227b5d1d1..265e9fa22e5 100644
--- a/web-services/web-root/pom.xml
+++ b/web-services/web-root/pom.xml
@@ -4,7 +4,7 @@
     
         gov.nsa.datawave.webservices
         datawave-ws-parent
-        5.6.0-SNAPSHOT
+        5.7.0-SNAPSHOT
     
     datawave-ws-web-root
     war