diff --git a/common-test/pom.xml b/common-test/pom.xml
index 21eb3afd15b..a02bcd01b65 100644
--- a/common-test/pom.xml
+++ b/common-test/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-common-test${project.artifactId}
diff --git a/contrib/datawave-quickstart/bin/query.sh b/contrib/datawave-quickstart/bin/query.sh
index 83ca1c9a877..1eb874bffa6 100644
--- a/contrib/datawave-quickstart/bin/query.sh
+++ b/contrib/datawave-quickstart/bin/query.sh
@@ -55,7 +55,7 @@ function datawaveQuery() {
local curlcmd="/usr/bin/curl \
--silent --write-out 'HTTP_STATUS_CODE:%{http_code};TOTAL_TIME:%{time_total};CONTENT_TYPE:%{content_type}' \
- --insecure --cert "${DW_CURL_CERT}" --key "${DW_CURL_KEY_RSA}" --cacert "${DW_CURL_CA}" \
+ --insecure --cert "${DW_CURL_CERT}" --key "${DW_CURL_KEY_RSA}" --cacert "${DW_CURL_CA}" --keepalive-time 180 \
--header 'Content-Type: application/x-www-form-urlencoded;charset=UTF-8' --header 'Accept: application/json' \
${DW_REQUEST_HEADERS} ${DW_CURL_DATA} -X POST ${DW_QUERY_URI}/${DW_QUERY_LOGIC}/${DW_QUERY_CREATE_MODE}"
@@ -333,7 +333,7 @@ function closeQuery() {
local curlcmd="/usr/bin/curl \
--silent --write-out 'HTTP_STATUS_CODE:%{http_code};TOTAL_TIME:%{time_total};CONTENT_TYPE:%{content_type}' \
- --insecure --cert "${DW_CURL_CERT}" --key "${DW_CURL_KEY_RSA}" --cacert "${DW_CURL_CA}" \
+ --insecure --cert "${DW_CURL_CERT}" --key "${DW_CURL_KEY_RSA}" --cacert "${DW_CURL_CA} --keepalive-time 180 " \
-X PUT ${DW_QUERY_URI}/${DW_QUERY_ID}/close"
local response="$( eval "${curlcmd}" )"
@@ -368,7 +368,7 @@ function getNextPage() {
local curlcmd="/usr/bin/curl \
--silent --write-out 'HTTP_STATUS_CODE:%{http_code};TOTAL_TIME:%{time_total};CONTENT_TYPE:%{content_type}' \
- --insecure --header 'Accept: application/json' ${DW_REQUEST_HEADERS} --cert "${DW_CURL_CERT}" --key "${DW_CURL_KEY_RSA}" --cacert "${DW_CURL_CA}" \
+ --insecure --header 'Accept: application/json' ${DW_REQUEST_HEADERS} --cert "${DW_CURL_CERT}" --key "${DW_CURL_KEY_RSA}" --cacert "${DW_CURL_CA} --keepalive-time 180 " \
-X GET ${DW_QUERY_URI}/${DW_QUERY_ID}/next"
local response="$( eval "${curlcmd}" )"
diff --git a/contrib/datawave-quickstart/bin/services/datawave/test-web/run.sh b/contrib/datawave-quickstart/bin/services/datawave/test-web/run.sh
index dcf5f169186..b2e3999be1c 100755
--- a/contrib/datawave-quickstart/bin/services/datawave/test-web/run.sh
+++ b/contrib/datawave-quickstart/bin/services/datawave/test-web/run.sh
@@ -280,7 +280,7 @@ function runTest() {
TEST_COMMAND="${CURL} ${CURL_ADDITIONAL_OPTS} --silent \
--write-out 'HTTP_STATUS_CODE:%{http_code};TOTAL_TIME:%{time_total};CONTENT_TYPE:%{content_type}' \
---insecure --cert '${DW_CURL_CERT}' --key '${DW_CURL_KEY_RSA}' --cacert '${DW_CURL_CA}' ${TEST_URL_OPTS}"
+--insecure --cert '${DW_CURL_CERT}' --keepalive-time 180 --key '${DW_CURL_KEY_RSA}' --cacert '${DW_CURL_CA}' ${TEST_URL_OPTS}"
if [ "${LIST_TESTS}" == true ] ; then
printCurrentTestInfo
@@ -533,4 +533,4 @@ if [ "${LIST_TESTS}" != true ] ; then
printTestSummary
cleanup
exitWithTestStatus
-fi
\ No newline at end of file
+fi
diff --git a/docs/pom.xml b/docs/pom.xml
index fa33bf0b6bd..54da592e601 100644
--- a/docs/pom.xml
+++ b/docs/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-docs
diff --git a/pom.xml b/pom.xml
index 47609e5b8d8..77f5a7325d7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
4.0.0gov.nsa.datawavedatawave-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTpomDataWaveDataWave is a Java-based ingest and query framework that leverages Apache Accumulo to provide fast, secure access to your data.
diff --git a/properties/default.properties b/properties/default.properties
index 61bf5ceb4cb..8765bc2e3ee 100644
--- a/properties/default.properties
+++ b/properties/default.properties
@@ -137,8 +137,11 @@ jboss.runas.user=jboss
# Defines the size parameters of the worker's task thread pool
# Suggest setting values here based on accumulo connection pool sizes, available cores, and expected access patterns
+# From WildFly manual: Workers for I/O channel notification. The maximum number of threads for the worker task thread pool.
+# default cpuCount * 16. Once this is filled, tasks that cannot be queued will be rejected.
wildfly.io.worker.default.task-max-threads=16
# How many I/O (selector) threads should be maintained. Generally this number should be a small constant multiple of the number of available cores.
+# From WildFly manual: Specify the number of I/O threads to create for the worker. Default cpuCount * 2
wildfly.io.worker.default.io-threads=2
############################
diff --git a/warehouse/accumulo-extensions/pom.xml b/warehouse/accumulo-extensions/pom.xml
index b0d95fa8a3b..1ef25f01fb4 100644
--- a/warehouse/accumulo-extensions/pom.xml
+++ b/warehouse/accumulo-extensions/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-accumulo-extensions${project.artifactId}
diff --git a/warehouse/assemble/datawave/pom.xml b/warehouse/assemble/datawave/pom.xml
index a3842f8776a..cc6cf63a47f 100644
--- a/warehouse/assemble/datawave/pom.xml
+++ b/warehouse/assemble/datawave/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawaveassemble-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTassemble-datawavepom
diff --git a/warehouse/assemble/pom.xml b/warehouse/assemble/pom.xml
index 56b154db743..55dba081ae8 100644
--- a/warehouse/assemble/pom.xml
+++ b/warehouse/assemble/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTassemble-parentpom
diff --git a/warehouse/assemble/webservice/pom.xml b/warehouse/assemble/webservice/pom.xml
index 961a0c76f08..0523b656f11 100644
--- a/warehouse/assemble/webservice/pom.xml
+++ b/warehouse/assemble/webservice/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawaveassemble-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTassemble-webservice${project.artifactId}
diff --git a/warehouse/common/pom.xml b/warehouse/common/pom.xml
index 20ce0f2def3..10ee3d50084 100644
--- a/warehouse/common/pom.xml
+++ b/warehouse/common/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-common${project.artifactId}
diff --git a/warehouse/core/pom.xml b/warehouse/core/pom.xml
index 7b94a9365ff..025579e84fd 100644
--- a/warehouse/core/pom.xml
+++ b/warehouse/core/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-corejar
diff --git a/warehouse/core/src/test/java/datawave/ingest/protobuf/TermWeightPositionTest.java b/warehouse/core/src/test/java/datawave/ingest/protobuf/TermWeightPositionTest.java
index b12b5c0dc62..c7be52b5f5e 100644
--- a/warehouse/core/src/test/java/datawave/ingest/protobuf/TermWeightPositionTest.java
+++ b/warehouse/core/src/test/java/datawave/ingest/protobuf/TermWeightPositionTest.java
@@ -101,6 +101,24 @@ public void testComparator() {
Collections.sort(result);
Assert.assertEquals(listExpected, result);
}
+
+ @Test
+ public void testBuilderReset() {
+ TermWeightPosition.Builder builder = new TermWeightPosition.Builder();
+ TermWeightPosition expected = builder.setOffset(1).setPrevSkips(0).setScore(0).setZeroOffsetMatch(true).build();
+ TermWeightPosition position = builder.setOffset(1).setPrevSkips(0).setScore(0).setZeroOffsetMatch(true).build();
+ Assert.assertEquals(expected, position);
+
+ expected = builder.setOffset(1).setPrevSkips(0).setScore(0).setZeroOffsetMatch(true).build();
+ builder.reset();
+ position = builder.setOffset(1).setPrevSkips(0).setScore(0).setZeroOffsetMatch(true).build();
+ Assert.assertEquals(expected, position);
+
+ expected = builder.setOffset(1).setPrevSkips(0).setScore(0).setZeroOffsetMatch(false).build();
+ builder.reset();
+ position = builder.setOffset(1).setPrevSkips(0).setScore(0).setZeroOffsetMatch(false).build();
+ Assert.assertEquals(expected, position);
+ }
@Test
public void testPositionScoreToTermWeightScore() {
diff --git a/warehouse/data-dictionary-core/pom.xml b/warehouse/data-dictionary-core/pom.xml
index eaaa7a22113..07ec8c9db82 100644
--- a/warehouse/data-dictionary-core/pom.xml
+++ b/warehouse/data-dictionary-core/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-data-dictionary-corejar
diff --git a/warehouse/edge-dictionary-core/pom.xml b/warehouse/edge-dictionary-core/pom.xml
index 6ed51d49fc3..fb513db5a5a 100644
--- a/warehouse/edge-dictionary-core/pom.xml
+++ b/warehouse/edge-dictionary-core/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-edge-dictionary-corejar
diff --git a/warehouse/edge-model-configuration-core/pom.xml b/warehouse/edge-model-configuration-core/pom.xml
index 87e21afebc7..9fdfff2cd6f 100644
--- a/warehouse/edge-model-configuration-core/pom.xml
+++ b/warehouse/edge-model-configuration-core/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-edge-model-configuration-corejar
diff --git a/warehouse/index-stats/pom.xml b/warehouse/index-stats/pom.xml
index 2d5462a9d2c..edb3acded7b 100644
--- a/warehouse/index-stats/pom.xml
+++ b/warehouse/index-stats/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-index-statsjar
diff --git a/warehouse/ingest-configuration/pom.xml b/warehouse/ingest-configuration/pom.xml
index 6d3b7496572..4a5e176927a 100644
--- a/warehouse/ingest-configuration/pom.xml
+++ b/warehouse/ingest-configuration/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-ingest-configuration
diff --git a/warehouse/ingest-core/pom.xml b/warehouse/ingest-core/pom.xml
index 77cbc366673..e1feb1f82bf 100644
--- a/warehouse/ingest-core/pom.xml
+++ b/warehouse/ingest-core/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-ingest-corejar
diff --git a/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/XMLFieldConfigHelperTest.java b/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/XMLFieldConfigHelperTest.java
index 32ae7825e59..18d940a3beb 100644
--- a/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/XMLFieldConfigHelperTest.java
+++ b/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/XMLFieldConfigHelperTest.java
@@ -82,7 +82,7 @@ private HttpServer createFileServer(String path, int port) throws Exception {
}
private String readFile(String path) {
- StringBuffer sb = new StringBuffer();
+ StringBuilder sb = new StringBuilder();
InputStream istream = getClass().getClassLoader().getResourceAsStream(path);
try (Scanner scanner = new Scanner(istream)) {
diff --git a/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/job/SafeFileOutputCommitterTest.java b/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/job/SafeFileOutputCommitterTest.java
index 427021fa074..568a87e5253 100644
--- a/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/job/SafeFileOutputCommitterTest.java
+++ b/warehouse/ingest-core/src/test/java/datawave/ingest/mapreduce/job/SafeFileOutputCommitterTest.java
@@ -437,7 +437,7 @@ private void validateContent(Path dir) throws IOException {
private void validateContent(File dir, String fileName) throws IOException {
File expectedFile = new File(dir, fileName);
assertTrue("Could not find " + expectedFile, expectedFile.exists());
- StringBuffer expectedOutput = new StringBuffer();
+ StringBuilder expectedOutput = new StringBuilder();
expectedOutput.append(key1).append('\t').append(val1).append("\n");
expectedOutput.append(val1).append("\n");
expectedOutput.append(val2).append("\n");
diff --git a/warehouse/ingest-csv/pom.xml b/warehouse/ingest-csv/pom.xml
index 8d32cd0e832..7afbf225d97 100644
--- a/warehouse/ingest-csv/pom.xml
+++ b/warehouse/ingest-csv/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-ingest-csvjar
diff --git a/warehouse/ingest-json/pom.xml b/warehouse/ingest-json/pom.xml
index adef6b6c024..8902dc2337e 100644
--- a/warehouse/ingest-json/pom.xml
+++ b/warehouse/ingest-json/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-ingest-jsonjar
diff --git a/warehouse/ingest-nyctlc/pom.xml b/warehouse/ingest-nyctlc/pom.xml
index f00bdc10298..536d28c7978 100644
--- a/warehouse/ingest-nyctlc/pom.xml
+++ b/warehouse/ingest-nyctlc/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-ingest-nyctlcjar
diff --git a/warehouse/ingest-nyctlc/src/main/java/datawave/ingest/nyctlc/NYCTLCReader.java b/warehouse/ingest-nyctlc/src/main/java/datawave/ingest/nyctlc/NYCTLCReader.java
index 802d82d4f5a..6f162ace3ee 100644
--- a/warehouse/ingest-nyctlc/src/main/java/datawave/ingest/nyctlc/NYCTLCReader.java
+++ b/warehouse/ingest-nyctlc/src/main/java/datawave/ingest/nyctlc/NYCTLCReader.java
@@ -53,27 +53,30 @@ public boolean nextKeyValue() throws IOException {
// followed by a blank line, followed by our entries
// This is here to account for that
boolean hasNext, completeRecord;
+ StringBuilder sb = new StringBuilder();
do {
hasNext = super.nextKeyValue();
if (this.value != null && !this.value.toString().isEmpty() && !this.value.toString().equals(rawHeader)) {
// update value to be list of field/value pairings
- StringBuffer fvBuf = new StringBuffer();
String[] values = this.value.toString().split(((NYCTLCHelper) helper).getSeparator());
- if (values.length > ((NYCTLCHelper) helper).getParsedHeader().length)
+ if (values.length > ((NYCTLCHelper) helper).getParsedHeader().length) {
log.debug("More values present than expected.");
-
+ }
int numFields = Math.min(values.length, ((NYCTLCHelper) helper).getParsedHeader().length);
completeRecord = true;
for (int fieldIdx = 0; fieldIdx < numFields; fieldIdx++) {
- fvBuf.append(((NYCTLCHelper) helper).getParsedHeader()[fieldIdx] + "=" + values[fieldIdx]);
- if ((fieldIdx + 1) < numFields)
- fvBuf.append(((NYCTLCHelper) helper).getSeparator());
+ sb.append(((NYCTLCHelper) helper).getParsedHeader()[fieldIdx] + "=" + values[fieldIdx]);
+ if ((fieldIdx + 1) < numFields) {
+ sb.append(((NYCTLCHelper) helper).getSeparator());
+ }
}
- this.value = new Text(fvBuf.toString());
- } else
+ this.value = new Text(sb.toString());
+ sb.setLength(0);
+ } else {
completeRecord = false;
+ }
} while (hasNext && !completeRecord);
return hasNext;
diff --git a/warehouse/ingest-scripts/pom.xml b/warehouse/ingest-scripts/pom.xml
index 2213e1a3ea9..096438278fa 100644
--- a/warehouse/ingest-scripts/pom.xml
+++ b/warehouse/ingest-scripts/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-ingest-scripts${project.artifactId}
diff --git a/warehouse/ingest-wikipedia/pom.xml b/warehouse/ingest-wikipedia/pom.xml
index d0c2c6ea77c..e8d0220ca46 100644
--- a/warehouse/ingest-wikipedia/pom.xml
+++ b/warehouse/ingest-wikipedia/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-ingest-wikipediajar
diff --git a/warehouse/metrics-core/pom.xml b/warehouse/metrics-core/pom.xml
index 958e508dfa5..d22826faf1b 100644
--- a/warehouse/metrics-core/pom.xml
+++ b/warehouse/metrics-core/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-metrics-corejar
diff --git a/warehouse/ops-tools/config-compare/pom.xml b/warehouse/ops-tools/config-compare/pom.xml
index f575fce751a..8a3a7bd42a3 100644
--- a/warehouse/ops-tools/config-compare/pom.xml
+++ b/warehouse/ops-tools/config-compare/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-ops-tools-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-ops-tools-config-compare
diff --git a/warehouse/ops-tools/index-validation/pom.xml b/warehouse/ops-tools/index-validation/pom.xml
index da2b206525f..09b4017922e 100644
--- a/warehouse/ops-tools/index-validation/pom.xml
+++ b/warehouse/ops-tools/index-validation/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-ops-tools-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-ops-tools-index-validationjar
diff --git a/warehouse/ops-tools/pom.xml b/warehouse/ops-tools/pom.xml
index d2ca55c172d..a562298c569 100644
--- a/warehouse/ops-tools/pom.xml
+++ b/warehouse/ops-tools/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-ops-tools-parentpom
diff --git a/warehouse/pom.xml b/warehouse/pom.xml
index 81a32f03261..34ffa4678f6 100644
--- a/warehouse/pom.xml
+++ b/warehouse/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-warehouse-parentpom
diff --git a/warehouse/query-core/pom.xml b/warehouse/query-core/pom.xml
index 373854ddf54..8b85d24f924 100644
--- a/warehouse/query-core/pom.xml
+++ b/warehouse/query-core/pom.xml
@@ -4,7 +4,7 @@
gov.nsa.datawavedatawave-warehouse-parent
- 5.6.0-SNAPSHOT
+ 5.7.0-SNAPSHOTdatawave-query-corejar
diff --git a/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java b/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java
index ececf339d05..b519d521da4 100644
--- a/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java
+++ b/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java
@@ -137,6 +137,10 @@ public class QueryParameters {
* Used to limit the number of values returned for specific fields
*/
public static final String LIMIT_FIELDS = "limit.fields";
+ /**
+ * Used to tie field groups together such that if a field in one group is not being limited the fields in matching groups will not be limited.
+ */
+ public static final String MATCHING_FIELD_SETS = "matching.field.sets";
public static final String GROUP_FIELDS = "group.fields";
public static final String GROUP_FIELDS_BATCH_SIZE = "group.fields.batch.size";
diff --git a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java
index 0228705545d..5d52053362f 100644
--- a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java
+++ b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java
@@ -248,6 +248,7 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement
private boolean termFrequenciesRequired = false;
// Limit count of returned values for arbitrary fields.
private Set limitFields = Collections.emptySet();
+ private Set matchingFieldSets = Collections.emptySet();
/**
* should limit fields be applied early
*/
@@ -524,6 +525,7 @@ public ShardQueryConfiguration(ShardQueryConfiguration other) {
this.setQueryTermFrequencyFields(null == other.getQueryTermFrequencyFields() ? null : Sets.newHashSet(other.getQueryTermFrequencyFields()));
this.setTermFrequenciesRequired(other.isTermFrequenciesRequired());
this.setLimitFields(null == other.getLimitFields() ? null : Sets.newHashSet(other.getLimitFields()));
+ this.setMatchingFieldSets(null == other.getMatchingFieldSets() ? null : Sets.newHashSet(other.getMatchingFieldSets()));
this.setLimitFieldsPreQueryEvaluation(other.isLimitFieldsPreQueryEvaluation());
this.setLimitFieldsField(other.getLimitFieldsField());
this.setHitList(other.isHitList());
@@ -1565,6 +1567,18 @@ public String getLimitFieldsAsString() {
return StringUtils.join(this.getLimitFields(), Constants.PARAM_VALUE_SEP);
}
+ public Set getMatchingFieldSets() {
+ return matchingFieldSets;
+ }
+
+ public void setMatchingFieldSets(Set matchingFieldSets) {
+ this.matchingFieldSets = matchingFieldSets;
+ }
+
+ public String getMatchingFieldSetsAsString() {
+ return StringUtils.join(this.getMatchingFieldSets(), Constants.PARAM_VALUE_SEP);
+ }
+
public boolean isLimitFieldsPreQueryEvaluation() {
return limitFieldsPreQueryEvaluation;
}
diff --git a/warehouse/query-core/src/main/java/datawave/query/function/LimitFields.java b/warehouse/query-core/src/main/java/datawave/query/function/LimitFields.java
index 228ac486e84..b96e92bb495 100644
--- a/warehouse/query-core/src/main/java/datawave/query/function/LimitFields.java
+++ b/warehouse/query-core/src/main/java/datawave/query/function/LimitFields.java
@@ -18,16 +18,53 @@
import java.util.Map.Entry;
import java.util.Set;
+/**
+ *
+ * LimitFields will reduce the attributes in a document given the limits specified for fields. Attributes that are in the set of hits for a document will never
+ * be dropped. Also matching field sets can be specified which will avoid dropping fields in the same group when the values match between the fields in a
+ * matching field set. For example given the following field/values:
+ *
+ *
+ *
NAME.PERSON.1 = sam
+ *
AGE.PERSON.1 = 10
+ *
NAME.PERSON.2 = frank
+ *
AGE.PERSON.2 = 11
+ *
ACTOR.ACTOR.1 = sam
+ *
FILM.ACTOR.1 = Johnny Goes Home
+ *
ACTOR.ACTOR.2 = frank
+ *
FILM.ACTOR.2 = Johnny Head
+ *
+ *
+ *
and limit fields NAME=-1, AGE=-1, FILM=-1
+ *
and a matching field set of NAME=ACTOR
+ *
and a hit term of FILM.ACTOR.1=Johnny Goes Home
+ *
+ *
+ * In this case the following fields should be returned:
+ *
+ *
+ *
ACTOR.ACTOR.1 = sam
+ *
FILM.ACTOR.1 = Johnny Goes Home
+ *
NAME.PERSON.1 = sam
+ *
AGE.PERSON.1 = 10
+ *
+ */
public class LimitFields implements Function,Entry> {
private static final Logger log = Logger.getLogger(LimitFields.class);
- public static final String ORIGINAL_COUNT_SUFFIX = "ORIGINAL_COUNT";
+ public static final String ORIGINAL_COUNT_SUFFIX = "_ORIGINAL_COUNT";
- private Map limitFieldsMap;
+ // A map of fields and the number of values to limit the fields by
+ private final Map limitFieldsMap;
- public LimitFields(Map limitFieldsMap) {
+ // A collection of field sets where if the values match then those values
+ // should not be dropped
+ private final Set> matchingFieldSets;
+
+ public LimitFields(Map limitFieldsMap, Set> matchingFieldSets) {
this.limitFieldsMap = limitFieldsMap;
+ this.matchingFieldSets = matchingFieldSets;
if (log.isTraceEnabled())
log.trace("limitFieldsMap set to:" + limitFieldsMap);
}
@@ -41,6 +78,8 @@ public Entry apply(Entry entry) {
CountMap countMissesRemainingForFieldMap = new CountMap();
CountMap countKeepersForFieldMap = new CountMap();
+ MatchingFieldGroups matchingFieldGroups = new MatchingFieldGroups(matchingFieldSets);
+
int attributesToDrop = 0;
// first pass is to set all of the hits to be kept, the misses to drop, and count em all
@@ -69,20 +108,24 @@ public Entry apply(Entry entry) {
for (Attribute extends Comparable>> value : attrSet) {
if (isHit(keyWithGrouping, value, hitTermMap)) {
keepers++;
+ matchingFieldGroups.addHit(keyNoGrouping, value);
} else {
value.setToKeep(false);
missesRemaining++;
attributesToDrop++;
+ matchingFieldGroups.addPotential(keyNoGrouping, keyWithGrouping, value);
}
total++;
}
} else {
if (isHit(keyWithGrouping, attr, hitTermMap)) {
keepers++;
+ matchingFieldGroups.addHit(keyNoGrouping, attr);
} else {
attr.setToKeep(false);
missesRemaining++;
attributesToDrop++;
+ matchingFieldGroups.addPotential(keyNoGrouping, keyWithGrouping, attr);
}
total++;
}
@@ -92,7 +135,58 @@ public Entry apply(Entry entry) {
}
}
- // second pass is to set any misses back to be kept if the limit allows
+ // the second pass is to process the limited fields that have matching groups
+ matchingFieldGroups.processMatches();
+ if (matchingFieldGroups.hasMatches()) {
+ for (Map.Entry>> de : document.entrySet()) {
+ String keyWithGrouping = de.getKey();
+ String keyNoGrouping = removeGrouping(keyWithGrouping);
+
+ // if this was a limited field
+ if (this.limitFieldsMap.containsKey(keyNoGrouping)) {
+
+ int keepers = countKeepersForFieldMap.get(keyNoGrouping);
+ int missesRemaining = countMissesRemainingForFieldMap.get(keyNoGrouping);
+
+ // if we have matching group
+ if (matchingFieldGroups.isMatchingGroup(keyWithGrouping)) {
+ boolean foundMiss = false;
+ Attribute> attr = de.getValue();
+ if (attr instanceof Attributes) {
+ Attributes attrs = (Attributes) attr;
+ Set>> attrSet = attrs.getAttributes();
+
+ for (Attribute extends Comparable>> value : attrSet) {
+ // if this was an attribute previously set to not keep, then it is one of the misses (not a hit)
+ if (!value.isToKeep()) {
+ value.setToKeep(true);
+ keepers++;
+ missesRemaining--;
+ attributesToDrop--;
+ foundMiss = true;
+ }
+ }
+ } else {
+ // if this was an attribute previously set to not keep, then it is one of the misses (not a hit)
+ if (!attr.isToKeep()) {
+ attr.setToKeep(true);
+ keepers++;
+ missesRemaining--;
+ attributesToDrop--;
+ foundMiss = true;
+ }
+ }
+
+ if (foundMiss) {
+ countKeepersForFieldMap.put(keyNoGrouping, keepers);
+ countMissesRemainingForFieldMap.put(keyNoGrouping, missesRemaining);
+ }
+ }
+ }
+ }
+ }
+
+ // third pass is to set any misses back to be kept if the limit allows
for (Map.Entry>> de : document.entrySet()) {
String keyWithGrouping = de.getKey();
String keyNoGrouping = removeGrouping(keyWithGrouping);
@@ -202,7 +296,9 @@ private boolean isHit(String keyWithGrouping, Attribute> attr, Multimap thing = (Type>) clazz.newInstance();
thing.setDelegateFromString(String.valueOf(hitValue));
hitValue = thing;
- } // otherwise, s is not a Type, just compare to value in hitTermMap using 'equals'
+ } else { // otherwise, s is not a Type, just compare as string values
+ s = String.valueOf(s);
+ }
if (s.equals(hitValue)) {
return true;
}
diff --git a/warehouse/query-core/src/main/java/datawave/query/function/MatchingFieldGroups.java b/warehouse/query-core/src/main/java/datawave/query/function/MatchingFieldGroups.java
new file mode 100644
index 00000000000..2099789e8aa
--- /dev/null
+++ b/warehouse/query-core/src/main/java/datawave/query/function/MatchingFieldGroups.java
@@ -0,0 +1,90 @@
+package datawave.query.function;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Multimap;
+import datawave.query.attributes.Attribute;
+
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+public class MatchingFieldGroups {
+
+ private final Multimap matchingFieldGroups;
+ private final Set matchingGroups;
+ private final Multimap potentialMatches;
+
+ public MatchingFieldGroups(Set> matchingFieldSets) {
+ matchingFieldGroups = HashMultimap.create();
+ if (matchingFieldSets != null) {
+ for (Set matchingFieldSet : matchingFieldSets) {
+ MatchingFieldHits matchingFieldGroup = new MatchingFieldHits();
+ for (String field : matchingFieldSet) {
+ matchingFieldGroups.put(field, matchingFieldGroup);
+ }
+ }
+ }
+ matchingGroups = new HashSet<>();
+ potentialMatches = ArrayListMultimap.create();
+ }
+
+ public void addHit(String keyNoGrouping, Attribute attr) {
+ if (matchingFieldGroups.containsKey(keyNoGrouping)) {
+ for (MatchingFieldHits matchingFieldGroup : matchingFieldGroups.get(keyNoGrouping)) {
+ matchingFieldGroup.addHitTermValue(getStringValue(attr));
+ }
+ }
+ }
+
+ public void addPotential(String keyNoGrouping, String keyWithGrouping, Attribute attr) {
+ if (matchingFieldGroups.containsKey(keyNoGrouping)) {
+ String group = getGroup(keyWithGrouping);
+ if (group != null) {
+ potentialMatches.put(keyNoGrouping, new String[] {group, getStringValue(attr)});
+ }
+ }
+ }
+
+ public void processMatches() {
+ for (Map.Entry potentialEntry : potentialMatches.entries()) {
+ String keyNoGrouping = potentialEntry.getKey();
+ String group = potentialEntry.getValue()[0];
+ String value = potentialEntry.getValue()[1];
+ if (!matchingGroups.contains(group)) {
+ for (MatchingFieldHits matchingFieldGroup : matchingFieldGroups.get(keyNoGrouping)) {
+ if (matchingFieldGroup.containsHitTermValue(value)) {
+ matchingGroups.add(group);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ public boolean hasMatches() {
+ return !matchingGroups.isEmpty();
+ }
+
+ public boolean isMatchingGroup(String keyWithGrouping) {
+ String group = getGroup(keyWithGrouping);
+ if (group != null) {
+ return matchingGroups.contains(group);
+ }
+ return false;
+ }
+
+ static String getStringValue(Attribute attr) {
+ return String.valueOf(attr.getData());
+ }
+
+ static String getGroup(String keyWithGrouping) {
+ String[] keyTokens = LimitFields.getCommonalityAndGroupingContext(keyWithGrouping);
+ if (keyTokens != null) {
+ return Joiner.on('.').join(keyTokens);
+ }
+ return null;
+ }
+
+}
diff --git a/warehouse/query-core/src/main/java/datawave/query/function/MatchingFieldHits.java b/warehouse/query-core/src/main/java/datawave/query/function/MatchingFieldHits.java
new file mode 100644
index 00000000000..5a9a10a23e4
--- /dev/null
+++ b/warehouse/query-core/src/main/java/datawave/query/function/MatchingFieldHits.java
@@ -0,0 +1,20 @@
+package datawave.query.function;
+
+import java.util.HashSet;
+import java.util.Set;
+
+public class MatchingFieldHits {
+ private final Set hitTermValues;
+
+ public MatchingFieldHits() {
+ this.hitTermValues = new HashSet<>();
+ }
+
+ public void addHitTermValue(String value) {
+ hitTermValues.add(value);
+ }
+
+ public boolean containsHitTermValue(String value) {
+ return hitTermValues.contains(value);
+ }
+}
diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java
index 673ba34eed4..3b98b76f885 100644
--- a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java
+++ b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java
@@ -3,7 +3,6 @@
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
-import com.google.common.base.Throwables;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
@@ -981,10 +980,9 @@ public Entry apply(@Nullable Entry input) {
// note that we have already reduced the document to those attributes to keep. This will reduce the attributes further
// base on those fields we are limiting.
if (gatherTimingDetails()) {
- documents = Iterators.transform(documents,
- new EvaluationTrackingFunction<>(QuerySpan.Stage.LimitFields, trackingSpan, new LimitFields(this.getLimitFieldsMap())));
+ documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.LimitFields, trackingSpan, getLimitFields()));
} else {
- documents = Iterators.transform(documents, new LimitFields(this.getLimitFieldsMap()));
+ documents = Iterators.transform(documents, getLimitFields());
}
}
@@ -1155,6 +1153,10 @@ protected JexlEvaluation getJexlEvaluation(String query, NestedQueryIterator root, String prefix)
debugBooleanLogicIterator(child, prefix + " ");
}
}
+
protected DocumentProjection getProjection() {
DocumentProjection projection = new DocumentProjection(this.isIncludeGroupingContext(), this.isReducedResponse(), isTrackSizes());
if (this.useWhiteListedFields) {
+ // make sure we include any fields being matched in the limit fields mechanism
+ if (!this.matchingFieldSets.isEmpty()) {
+ this.whiteListedFields.addAll(getMatchingFieldList());
+ }
projection.setIncludes(this.whiteListedFields);
return projection;
} else if (this.useBlackListedFields) {
+ // make sure we are not excluding any fields being matched in the limit fields mechanism
+ if (!this.matchingFieldSets.isEmpty()) {
+ this.blackListedFields.removeAll(getMatchingFieldList());
+ }
projection.setExcludes(this.blackListedFields);
return projection;
} else {
@@ -1318,6 +1329,10 @@ protected DocumentProjection getCompositeProjection() {
}
}
}
+ // make sure we include any fields being matched in the limit fields mechanism
+ if (!this.matchingFieldSets.isEmpty()) {
+ composites.removeAll(getMatchingFieldList());
+ }
projection.setExcludes(composites);
return projection;
}
diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java
index ef7cea911bf..a2a684df706 100644
--- a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java
+++ b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java
@@ -135,6 +135,7 @@ public class QueryOptions implements OptionDescriber {
public static final String TERM_FREQUENCIES_REQUIRED = "term.frequencies.are.required";
public static final String CONTENT_EXPANSION_FIELDS = "content.expansion.fields";
public static final String LIMIT_FIELDS = "limit.fields";
+ public static final String MATCHING_FIELD_SETS = "matching.field.sets";
public static final String LIMIT_FIELDS_PRE_QUERY_EVALUATION = "limit.fields.pre.query.evaluation";
public static final String LIMIT_FIELDS_FIELD = "limit.fields.field";
public static final String GROUP_FIELDS = "group.fields";
@@ -291,6 +292,7 @@ public class QueryOptions implements OptionDescriber {
protected boolean useBlackListedFields = false;
protected Set blackListedFields = new HashSet<>();
protected Map limitFieldsMap = new HashMap<>();
+ protected Set> matchingFieldSets = new HashSet<>();
protected boolean limitFieldsPreQueryEvaluation = false;
protected String limitFieldsField = null;
@@ -500,6 +502,7 @@ public void deepCopy(QueryOptions other) {
this.compressResults = other.compressResults;
this.limitFieldsMap = other.limitFieldsMap;
+ this.matchingFieldSets = other.matchingFieldSets;
this.limitFieldsPreQueryEvaluation = other.limitFieldsPreQueryEvaluation;
this.limitFieldsField = other.limitFieldsField;
this.groupFields = other.groupFields;
@@ -997,6 +1000,18 @@ public void setLimitFieldsMap(Map limitFieldsMap) {
this.limitFieldsMap = limitFieldsMap;
}
+ public Set> getMatchingFieldSets() {
+ return matchingFieldSets;
+ }
+
+ public List getMatchingFieldList() {
+ return this.matchingFieldSets.stream().flatMap(s -> s.stream()).collect(Collectors.toList());
+ }
+
+ public void setMatchingFieldSets(Set> matchingFieldSets) {
+ this.matchingFieldSets = matchingFieldSets;
+ }
+
public boolean isLimitFieldsPreQueryEvaluation() {
return limitFieldsPreQueryEvaluation;
}
@@ -1133,6 +1148,7 @@ public IteratorOptions describeOptions() {
options.put(DOCUMENT_PERMUTATION_CLASSES,
"Classes implementing DocumentPermutation which can transform the document prior to evaluation (e.g. expand/mutate fields).");
options.put(LIMIT_FIELDS, "limit fields");
+ options.put(MATCHING_FIELD_SETS, "matching field sets (used along with limit fields)");
options.put(GROUP_FIELDS, "group fields");
options.put(GROUP_FIELDS_BATCH_SIZE, "group fields.batch.size");
options.put(UNIQUE_FIELDS, "unique fields");
@@ -1469,6 +1485,16 @@ public boolean validateOptions(Map options) {
}
}
+ if (options.containsKey(MATCHING_FIELD_SETS)) {
+ String matchingFieldSets = options.get(MATCHING_FIELD_SETS);
+ for (String fieldSet : Splitter.on(',').omitEmptyStrings().trimResults().split(matchingFieldSets)) {
+ String[] fields = Iterables.toArray(Splitter.on('=').omitEmptyStrings().trimResults().split(fieldSet), String.class);
+ if (fields.length != 0) {
+ this.getMatchingFieldSets().add(new HashSet(Arrays.asList(fields)));
+ }
+ }
+ }
+
if (options.containsKey(LIMIT_FIELDS_PRE_QUERY_EVALUATION)) {
this.setLimitFieldsPreQueryEvaluation(Boolean.parseBoolean(options.get(LIMIT_FIELDS_PRE_QUERY_EVALUATION)));
}
diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/JexlASTHelper.java b/warehouse/query-core/src/main/java/datawave/query/jexl/JexlASTHelper.java
index 64eb2be9d84..f4e8c76ccc6 100644
--- a/warehouse/query-core/src/main/java/datawave/query/jexl/JexlASTHelper.java
+++ b/warehouse/query-core/src/main/java/datawave/query/jexl/JexlASTHelper.java
@@ -772,6 +772,12 @@ private static int numNegations(JexlNode node) {
}
}
+ /**
+ * Iterate through provided node and its children, then return a list of nodes that are an instance of ASTEQNode.
+ *
+ * @param node
+ * @return List of ASTEQNode nodes.
+ */
public static List getEQNodes(JexlNode node) {
List eqNodes = Lists.newArrayList();
@@ -780,6 +786,12 @@ public static List getEQNodes(JexlNode node) {
return eqNodes;
}
+ /**
+ * Check if the provided node is an instance of ASTEQNode. If yes, then add the node to the provided list.
+ *
+ * @param node
+ * @param eqNodes
+ */
private static void getEQNodes(JexlNode node, List eqNodes) {
if (node instanceof ASTEQNode) {
eqNodes.add((ASTEQNode) node);
@@ -790,6 +802,12 @@ private static void getEQNodes(JexlNode node, List eqNodes) {
}
}
+ /**
+ * Iterate through provided node and its children, then return a list of nodes that are an instance of ASTERNode.
+ *
+ * @param node
+ * @return List of ASTERNode nodes.
+ */
public static List getERNodes(JexlNode node) {
List erNodes = Lists.newArrayList();
@@ -798,6 +816,12 @@ public static List getERNodes(JexlNode node) {
return erNodes;
}
+ /**
+ * Check if the provided node is an instance of ASTERNode. If yes, then add the node to the provided list.
+ *
+ * @param node
+ * @param erNodes
+ */
private static void getERNodes(JexlNode node, List erNodes) {
if (node instanceof ASTERNode) {
erNodes.add((ASTERNode) node);
@@ -808,6 +832,43 @@ private static void getERNodes(JexlNode node, List erNodes) {
}
}
+ /**
+ * Iterate through provided node and its children, then return a list of nodes matching the provided class. The provided class must extend JexlNode.
+ *
+ * @see org.apache.commons.jexl2.parser.JexlNode
+ *
+ * @param node
+ * @param typeKey
+ * @return List of nodes matching provided class.
+ */
+ public static List getNodesOfType(JexlNode node, Class typeKey) {
+ List nodes = Lists.newArrayList();
+
+ getNodesOfType(node, nodes, typeKey);
+
+ return nodes;
+ }
+
+ /**
+ * Check if the provided node is an instance of the provided class. If yes, then add the node to the provided list. The provided class must extend JexlNode.
+ *
+ * @see org.apache.commons.jexl2.parser.JexlNode
+ *
+ * @param node
+ * @param nodes
+ * @param typeKey
+ */
+ @SuppressWarnings("unchecked")
+ private static void getNodesOfType(JexlNode node, List nodes, Class typeKey) {
+ if (typeKey.isInstance(node)) {
+ nodes.add((T) node);
+ } else {
+ for (int i = 0; i < node.jjtGetNumChildren(); i++) {
+ getNodesOfType(node.jjtGetChild(i), nodes, typeKey);
+ }
+ }
+ }
+
public static List