From a781aac94452b66045da3b0ca215a62c25da74c5 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Tue, 11 Apr 2023 12:19:35 +0000 Subject: [PATCH] * Refactored Document and TimingMetadata to not extend Attribute * Simplified any logic looking for nested documents * Ensure everything is actually serializable * Made the projection testing a little more robust * Updated to avoid creating a copy of the document --- .../datawave/query/attributes/Attribute.java | 130 ++-------- ...buteBag.java => AttributeBagMetadata.java} | 69 +++--- .../query/attributes/AttributeMetadata.java | 223 ++++++++++++++++++ .../datawave/query/attributes/Attributes.java | 25 +- .../datawave/query/attributes/Document.java | 177 ++++++++------ .../datawave/query/attributes/Metadata.java | 3 - .../query/attributes/TimingMetadata.java | 208 +++++++++++----- .../query/common/grouping/GroupingUtil.java | 2 +- .../query/function/DocumentProjection.java | 90 +------ .../datawave/query/function/LogTiming.java | 5 +- .../datawave/query/iterator/QueryOptions.java | 16 +- .../EventDataQueryExpressionVisitor.java | 6 +- .../query/predicate/EmptyDocumentFilter.java | 2 +- .../DocumentTransformerSupport.java | 14 +- .../java/datawave/query/QueryAuthsTest.java | 13 +- .../function/DocumentProjectionTest.java | 109 +++------ .../testframework/QueryLogicTestHarness.java | 5 +- .../transformer/UniqueTransformTest.java | 8 +- 18 files changed, 613 insertions(+), 492 deletions(-) rename warehouse/query-core/src/main/java/datawave/query/attributes/{AttributeBag.java => AttributeBagMetadata.java} (74%) create mode 100644 warehouse/query-core/src/main/java/datawave/query/attributes/AttributeMetadata.java delete mode 100644 warehouse/query-core/src/main/java/datawave/query/attributes/Metadata.java diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/Attribute.java b/warehouse/query-core/src/main/java/datawave/query/attributes/Attribute.java index d45660e1f56..3e32de0c77d 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/Attribute.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/Attribute.java @@ -10,7 +10,6 @@ import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.security.ColumnVisibility; import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableUtils; import org.apache.log4j.Logger; @@ -26,136 +25,63 @@ public abstract class Attribute> implements WritableComparable, KryoSerializable { private static final Logger log = Logger.getLogger(Attribute.class); - private static final Text EMPTY_TEXT = new Text(); /** * The metadata for this attribute. Really only the column visibility and timestamp are preserved in this metadata when serializing and deserializing. * However more information (e.g. the document key) can be maintained in this field for use locally. */ - protected Key metadata = null; + protected AttributeMetadata metadata = new AttributeMetadata(); protected boolean toKeep = true; // a flag denoting whether this attribute is to be kept in the returned results (transient or not) protected boolean fromIndex = true; // Assume attributes are from the index unless specified otherwise. public Attribute() {} - public Attribute(Key metadata, boolean toKeep) { + public Attribute(boolean toKeep) { this.toKeep = toKeep; - setMetadata(metadata); } - public boolean isMetadataSet() { - return (metadata != null); + public Attribute(Key key, boolean toKeep) { + this.toKeep = toKeep; + metadata.setMetadata(key); } - public ColumnVisibility getColumnVisibility() { - if (isMetadataSet()) { - return metadata.getColumnVisibilityParsed(); - } - return Constants.EMPTY_VISIBILITY; + public boolean isMetadataSet() { + return metadata.isMetadataSet(); } - public void setColumnVisibility(ColumnVisibility columnVisibility) { - if (isMetadataSet()) { - metadata = new Key(metadata.getRow(), metadata.getColumnFamily(), metadata.getColumnQualifier(), columnVisibility, metadata.getTimestamp()); - } else { - metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, columnVisibility, -1); - } + public Key getMetadata() { + return metadata.getMetadata(); } - public long getTimestamp() { - if (isMetadataSet()) { - return metadata.getTimestamp(); - } - return -1; + public void setMetadata(Key key) { + metadata.setMetadata(key); } - public void setTimestamp(long ts) { - if (isMetadataSet()) { - metadata = new Key(metadata.getRow(), metadata.getColumnFamily(), metadata.getColumnQualifier(), metadata.getColumnVisibility(), ts); - } else { - metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, Constants.EMPTY_VISIBILITY, ts); - } + public void setMetadata(ColumnVisibility vis, long ts) { + metadata.setMetadata(vis, ts); } - /* - * - * Set the metadata. This should only be set here or from extended classes. - */ - protected void setMetadata(ColumnVisibility vis, long ts) { - if (isMetadataSet()) { - metadata = new Key(metadata.getRow(), metadata.getColumnFamily(), metadata.getColumnQualifier(), vis, ts); - } else { - metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, vis, ts); - } - } - - private static final ByteSequence EMPTY_BYTE_SEQUENCE = new ArrayByteSequence(new byte[0]); - - /* - * Given a key, set the metadata. Expected input keys can be an event key, an fi key, or a tf key. Expected metadata is row=shardid, cf = type\0uid; cq = - * empty; cv, ts left as is. - */ - protected void setMetadata(Key key) { - if (key == null) { - this.metadata = null; - } else { - // convert the key to the form shard type\0uid cv, ts. Possible inputs are an event key, a fi key, or a tf key - final ByteSequence row = key.getRowData(), cf = key.getColumnFamilyData(), cv = key.getColumnVisibilityData(); - if (isFieldIndex(cf)) { - // find the first null byte in the cq and take everything after that (cq = Normalized Field Value\0Data Type\0UID) - final ByteSequence cq = key.getColumnQualifierData(); - int nullOffset = 0; - for (int i = 0; i < cq.length(); i++) { - if (cq.byteAt(i) == '\0') { - nullOffset = i; - break; - } - } - this.metadata = new Key(row.getBackingArray(), row.offset(), row.length(), cq.getBackingArray(), nullOffset + 1, cq.length() - (nullOffset + 1), - EMPTY_BYTE_SEQUENCE.getBackingArray(), EMPTY_BYTE_SEQUENCE.offset(), EMPTY_BYTE_SEQUENCE.length(), cv.getBackingArray(), - cv.offset(), cv.length(), key.getTimestamp()); - } else if (isTermFrequency(cf)) { - // find the second null byte in the cq and take everything before that (cq = DataType\0UID\0Normalized Field Value\0Field Name) - final ByteSequence cq = key.getColumnQualifierData(); - int nullOffset = 0; - int count = 0; - for (int i = 0; i < cf.length(); i++) { - if (cf.byteAt(i) == '\0') { - count++; - if (count == 2) { - nullOffset = i; - break; - } - } - } - this.metadata = new Key(row.getBackingArray(), row.offset(), row.length(), cq.getBackingArray(), cq.offset(), nullOffset, - EMPTY_BYTE_SEQUENCE.getBackingArray(), EMPTY_BYTE_SEQUENCE.offset(), EMPTY_BYTE_SEQUENCE.length(), cv.getBackingArray(), - cv.offset(), cv.length(), key.getTimestamp()); - } else { - this.metadata = new Key(row.getBackingArray(), row.offset(), row.length(), cf.getBackingArray(), cf.offset(), cf.length(), - EMPTY_BYTE_SEQUENCE.getBackingArray(), EMPTY_BYTE_SEQUENCE.offset(), EMPTY_BYTE_SEQUENCE.length(), cv.getBackingArray(), - cv.offset(), cv.length(), key.getTimestamp()); - } - } + public ColumnVisibility getColumnVisibility() { + return metadata.getColumnVisibility(); } - protected boolean isFieldIndex(ByteSequence cf) { - return (cf.length() >= 3 && cf.byteAt(0) == 'f' && cf.byteAt(1) == 'i' && cf.byteAt(2) == '\0'); + public void setColumnVisibility(ColumnVisibility vis) { + metadata.setColumnVisibility(vis); } - protected boolean isTermFrequency(ByteSequence cf) { - return (cf.length() == 2 && cf.byteAt(0) == 't' && cf.byteAt(1) == 'f'); + public long getTimestamp() { + return metadata.getTimestamp(); } - public Key getMetadata() { - return metadata; + public void setTimestamp(long ts) { + metadata.setTimestamp(ts); } /** * Unset the metadata. This should only be set here or from extended classes. */ protected void clearMetadata() { - metadata = null; + metadata.setMetadata(null); } protected void writeMetadata(DataOutput out, Boolean reducedResponse) throws IOException { @@ -249,17 +175,7 @@ public int size() { } private long getMetadataSizeInBytes() { - long size = 0; - if (isMetadataSet()) { - size += roundUp(33); - // 33 is object overhead, 4 array refs, 1 long and 1 boolean - size += roundUp(metadata.getRowData().length() + 12); - size += roundUp(metadata.getColumnFamilyData().length() + 12); - size += roundUp(metadata.getColumnQualifierData().length() + 12); - size += roundUp(metadata.getColumnVisibilityData().length() + 12); - // 12 is array overhead - } - return size; + return metadata.getSizeInBytes(); } public long sizeInBytes() { diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/AttributeBag.java b/warehouse/query-core/src/main/java/datawave/query/attributes/AttributeBagMetadata.java similarity index 74% rename from warehouse/query-core/src/main/java/datawave/query/attributes/AttributeBag.java rename to warehouse/query-core/src/main/java/datawave/query/attributes/AttributeBagMetadata.java index 4278f2235f9..5d5a31efcfe 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/AttributeBag.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/AttributeBagMetadata.java @@ -3,7 +3,6 @@ import java.io.Serializable; import java.util.Collection; -import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.security.ColumnVisibility; import org.apache.commons.lang.mutable.MutableLong; import org.apache.log4j.Logger; @@ -11,45 +10,49 @@ import com.google.common.collect.Sets; import datawave.marking.MarkingFunctions; -import datawave.marking.MarkingFunctions.Exception; import datawave.marking.MarkingFunctionsFactory; -public abstract class AttributeBag> extends Attribute implements Serializable { - - private static final long serialVersionUID = 1L; - private static final Logger log = Logger.getLogger(AttributeBag.class); - protected long shardTimestamp = Long.MAX_VALUE; - protected boolean validMetadata = false; +public class AttributeBagMetadata extends AttributeMetadata implements Serializable { + private static final long serialVersionUID = -1; + private static final Logger log = Logger.getLogger(AttributeBagMetadata.class); private static final long ONE_DAY_MS = 1000l * 60 * 60 * 24; - protected static final MarkingFunctions markingFunctions = MarkingFunctionsFactory.createMarkingFunctions(); + private static final MarkingFunctions markingFunctions = MarkingFunctionsFactory.createMarkingFunctions(); - public MarkingFunctions getMarkingFunctions() { - return markingFunctions; - } + private long shardTimestamp = Long.MAX_VALUE; - protected AttributeBag() { - this(true); + public interface AttributesGetter { + Collection>> getAttributes(); } - public AttributeBag(boolean toKeep) { - super(null, toKeep); + private final AttributesGetter attributes; + + private boolean validMetadata = false; + + public AttributeBagMetadata(AttributesGetter attributes) { + this.attributes = attributes; } - public AttributeBag(Key metadata, boolean toKeep) { - super(metadata, toKeep); + public boolean isValidMetadata() { + return (this.validMetadata && isMetadataSet()); } public void invalidateMetadata() { - this.validMetadata = false; + setValidMetadata(false); } - public boolean isValidMetadata() { - return (this.validMetadata && isMetadataSet()); + public void setValidMetadata(boolean valid) { + this.validMetadata = valid; + } + + public long getShardTimestamp() { + return shardTimestamp; } - public abstract Collection>> getAttributes(); + public void setShardTimestamp(long shardTimestamp) { + this.shardTimestamp = shardTimestamp; + } @Override public long getTimestamp() { @@ -59,7 +62,6 @@ public long getTimestamp() { return super.getTimestamp(); } - @Override public ColumnVisibility getColumnVisibility() { if (isValidMetadata() == false) this.updateMetadata(); @@ -70,25 +72,25 @@ private void updateMetadata() { long ts = updateTimestamps(); ColumnVisibility vis = super.getColumnVisibility(); try { - vis = this.combineAndSetColumnVisibilities(getAttributes()); - } catch (Exception e) { + vis = this.combineAndSetColumnVisibilities(attributes.getAttributes()); + } catch (MarkingFunctions.Exception e) { log.error("got error combining visibilities", e); } setMetadata(vis, ts); - validMetadata = true; + setValidMetadata(true); } - protected ColumnVisibility combineAndSetColumnVisibilities(Collection>> attributes) throws Exception { + protected ColumnVisibility combineAndSetColumnVisibilities(Collection>> attributes) throws MarkingFunctions.Exception { Collection columnVisibilities = Sets.newHashSet(); for (Attribute attr : attributes) { columnVisibilities.add(attr.getColumnVisibility()); } - return AttributeBag.markingFunctions.combine(columnVisibilities); + return markingFunctions.combine(columnVisibilities); } private long updateTimestamps() { MutableLong ts = new MutableLong(Long.MAX_VALUE); - for (Attribute attribute : getAttributes()) { + for (Attribute attribute : attributes.getAttributes()) { mergeTimestamps(attribute, ts); } return ts.longValue(); @@ -96,9 +98,9 @@ private long updateTimestamps() { private void mergeTimestamps(Attribute other, MutableLong ts) { // if this is a set of attributes, then examine each one. Note not recursing on a Document as it should have already applied the shard time. - if (other instanceof AttributeBag) { + if (other instanceof Attributes) { // recurse on the sub attributes - for (Attribute attribute : ((AttributeBag) other).getAttributes()) { + for (Attribute attribute : ((Attributes) other).getAttributes()) { mergeTimestamps(attribute, ts); } } else if (other.isMetadataSet()) { @@ -143,9 +145,4 @@ private void mergeTimestamps(Attribute other, MutableLong ts) { } } - @Override - public void setToKeep(boolean toKeep) { - super.setToKeep(toKeep); - // do not change values of child attributes to avoid overriding earlier decisions - } } diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/AttributeMetadata.java b/warehouse/query-core/src/main/java/datawave/query/attributes/AttributeMetadata.java new file mode 100644 index 00000000000..a879b652945 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/AttributeMetadata.java @@ -0,0 +1,223 @@ +package datawave.query.attributes; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; + +import org.apache.accumulo.core.data.ArrayByteSequence; +import org.apache.accumulo.core.data.ByteSequence; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.commons.lang3.SerializationUtils; +import org.apache.hadoop.io.Text; + +import datawave.query.Constants; + +public class AttributeMetadata implements Comparable, Serializable { + private static final long serialVersionUID = -1; + private static final Text EMPTY_TEXT = new Text(); + private static final ByteSequence EMPTY_BYTE_SEQUENCE = new ArrayByteSequence(new byte[0]); + + private transient Key metadata; + + public Key getMetadata() { + return metadata; + } + + public boolean isMetadataSet() { + return metadata != null; + } + + public ColumnVisibility getColumnVisibility() { + if (isMetadataSet()) { + return metadata.getColumnVisibilityParsed(); + } + return Constants.EMPTY_VISIBILITY; + } + + public void setColumnVisibility(ColumnVisibility columnVisibility) { + if (isMetadataSet()) { + metadata = new Key(metadata.getRow(), metadata.getColumnFamily(), metadata.getColumnQualifier(), columnVisibility, metadata.getTimestamp()); + } else { + metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, columnVisibility, -1); + } + } + + public long getTimestamp() { + if (isMetadataSet()) { + return metadata.getTimestamp(); + } + return -1; + } + + public void setTimestamp(long ts) { + if (isMetadataSet()) { + metadata = new Key(metadata.getRow(), metadata.getColumnFamily(), metadata.getColumnQualifier(), metadata.getColumnVisibility(), ts); + } else { + metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, Constants.EMPTY_VISIBILITY, ts); + } + } + + /* + * + * Set the metadata. This should only be set here or from extended classes. + */ + public void setMetadata(ColumnVisibility vis, long ts) { + if (isMetadataSet()) { + metadata = new Key(metadata.getRow(), metadata.getColumnFamily(), metadata.getColumnQualifier(), vis, ts); + } else { + metadata = new Key(EMPTY_TEXT, EMPTY_TEXT, EMPTY_TEXT, vis, ts); + } + } + + /* + * Given a key, set the metadata. Expected input keys can be an event key, an fi key, or a tf key. Expected metadata is row=shardid, cf = type\0uid; cq = + * empty; cv, ts left as is. + */ + public void setMetadata(Key key) { + if (key == null) { + this.metadata = null; + } else { + // convert the key to the form shard type\0uid cv, ts. Possible inputs are an event key, a fi key, or a tf key + final ByteSequence row = key.getRowData(), cf = key.getColumnFamilyData(), cv = key.getColumnVisibilityData(); + if (isFieldIndex(cf)) { + // find the first null byte in the cq and take everything after that (cq = Normalized Field Value\0Data Type\0UID) + final ByteSequence cq = key.getColumnQualifierData(); + int nullOffset = 0; + for (int i = 0; i < cq.length(); i++) { + if (cq.byteAt(i) == '\0') { + nullOffset = i; + break; + } + } + this.metadata = new Key(row.getBackingArray(), row.offset(), row.length(), cq.getBackingArray(), nullOffset + 1, cq.length() - (nullOffset + 1), + EMPTY_BYTE_SEQUENCE.getBackingArray(), EMPTY_BYTE_SEQUENCE.offset(), EMPTY_BYTE_SEQUENCE.length(), cv.getBackingArray(), + cv.offset(), cv.length(), key.getTimestamp()); + } else if (isTermFrequency(cf)) { + // find the second null byte in the cq and take everything before that (cq = DataType\0UID\0Normalized Field Value\0Field Name) + final ByteSequence cq = key.getColumnQualifierData(); + int nullOffset = 0; + int count = 0; + for (int i = 0; i < cf.length(); i++) { + if (cf.byteAt(i) == '\0') { + count++; + if (count == 2) { + nullOffset = i; + break; + } + } + } + this.metadata = new Key(row.getBackingArray(), row.offset(), row.length(), cq.getBackingArray(), cq.offset(), nullOffset, + EMPTY_BYTE_SEQUENCE.getBackingArray(), EMPTY_BYTE_SEQUENCE.offset(), EMPTY_BYTE_SEQUENCE.length(), cv.getBackingArray(), + cv.offset(), cv.length(), key.getTimestamp()); + } else { + this.metadata = new Key(row.getBackingArray(), row.offset(), row.length(), cf.getBackingArray(), cf.offset(), cf.length(), + EMPTY_BYTE_SEQUENCE.getBackingArray(), EMPTY_BYTE_SEQUENCE.offset(), EMPTY_BYTE_SEQUENCE.length(), cv.getBackingArray(), + cv.offset(), cv.length(), key.getTimestamp()); + } + } + } + + protected boolean isFieldIndex(ByteSequence cf) { + return (cf.length() >= 3 && cf.byteAt(0) == 'f' && cf.byteAt(1) == 'i' && cf.byteAt(2) == '\0'); + } + + protected boolean isTermFrequency(ByteSequence cf) { + return (cf.length() == 2 && cf.byteAt(0) == 't' && cf.byteAt(1) == 'f'); + } + + private void writeObject(ObjectOutputStream stream) throws IOException { + stream.writeBoolean(isMetadataSet()); + if (isMetadataSet()) { + writeText(metadata.getRow(), stream); + writeText(metadata.getColumnFamily(), stream); + writeText(metadata.getColumnQualifier(), stream); + writeText(metadata.getColumnVisibility(), stream); + stream.writeLong(metadata.getTimestamp()); + stream.writeBoolean(metadata.isDeleted()); + } + } + + private void writeText(Text text, ObjectOutputStream stream) throws IOException { + stream.write(text.getLength()); + stream.write(text.getBytes(), 0, text.getLength()); + } + + private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException { + boolean hasMetadata = stream.readBoolean(); + if (hasMetadata) { + Text row = readText(stream); + Text cf = readText(stream); + Text cq = readText(stream); + Text cv = readText(stream); + long ts = stream.readLong(); + boolean deleted = stream.readBoolean(); + metadata = new Key(row, cf, cq, cv, ts); + metadata.setDeleted(deleted); + } + } + + private Text readText(ObjectInputStream stream) throws IOException { + int len = stream.readInt(); + byte[] bytes = new byte[len]; + int read = 0; + while (read < len) { + read += stream.read(bytes, read, len - read); + } + return new Text(bytes); + } + + @Override + public int compareTo(AttributeMetadata o) { + return metadata.compareTo(o.getMetadata()); + } + + public long getSizeInBytes() { + // 4 for the key reference + long size = 4; + if (isMetadataSet()) { + size += roundUp(33); + // 33 is object overhead, 4 array refs, 1 long and 1 boolean + size += roundUp(metadata.getRowData().length() + 12); + size += roundUp(metadata.getColumnFamilyData().length() + 12); + size += roundUp(metadata.getColumnQualifierData().length() + 12); + size += roundUp(metadata.getColumnVisibilityData().length() + 12); + // 12 is array overhead + } + return size; + } + + protected static long roundUp(long size) { + long extra = size % 8; + if (extra > 0) { + size = size + 8 - extra; + } + return size; + } + + @Override + public int hashCode() { + return metadata.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof AttributeMetadata) { + return metadata.equals(((AttributeMetadata) obj).metadata); + } + return false; + } + + @Override + protected Object clone() throws CloneNotSupportedException { + AttributeMetadata clone = new AttributeMetadata(); + clone.setMetadata(metadata); + return clone; + } + + @Override + public String toString() { + return String.valueOf(metadata); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/Attributes.java b/warehouse/query-core/src/main/java/datawave/query/attributes/Attributes.java index 50a2e5ea684..20287d8c4cb 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/Attributes.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/Attributes.java @@ -24,11 +24,11 @@ import datawave.query.collections.FunctionalSet; import datawave.query.jexl.DatawaveJexlContext; -public class Attributes extends AttributeBag implements Serializable { +public class Attributes extends Attribute implements Serializable, AttributeBagMetadata.AttributesGetter { private static final long serialVersionUID = 1L; private static final Logger log = Logger.getLogger(Attributes.class); - private Set>> attributes; + private final Set>> attributes = new LinkedHashSet(); private int _count = 0; // cache the size in bytes as it can be expensive to compute on the fly if we have many attributes private long _bytes = super.sizeInBytes(16) + 16 + 48; @@ -53,8 +53,8 @@ public Attributes(boolean toKeep) { public Attributes(boolean toKeep, boolean trackSizes) { super(toKeep); - attributes = new LinkedHashSet<>(); this.trackSizes = trackSizes; + this.metadata = new AttributeBagMetadata(this); } public Attributes(Collection>> attributes, boolean toKeep) { @@ -69,6 +69,15 @@ public Attributes(Collection>> attributes, boo } } + private void invalidateMetadata() { + ((AttributeBagMetadata) metadata).invalidateMetadata(); + } + + private boolean isValidMetadata() { + return ((AttributeBagMetadata) metadata).isValidMetadata(); + } + + @Override public Set>> getAttributes() { return Collections.unmodifiableSet(this.attributes); } @@ -135,7 +144,7 @@ public void readFields(DataInput in) throws IOException { this._count = WritableUtils.readVInt(in); this.trackSizes = in.readBoolean(); int numAttrs = WritableUtils.readVInt(in); - this.attributes = new LinkedHashSet<>(); + this.attributes.clear(); for (int i = 0; i < numAttrs; i++) { String attrClassName = WritableUtils.readString(in); Class clz; @@ -166,7 +175,7 @@ public void readFields(DataInput in) throws IOException { this.attributes.add(attr); } - this.invalidateMetadata(); + invalidateMetadata(); } @Override @@ -309,7 +318,7 @@ public void read(Kryo kryo, Input input) { this.trackSizes = input.readBoolean(); int numAttrs = input.readInt(true); - this.attributes = new LinkedHashSet<>(); + this.attributes.clear(); for (int i = 0; i < numAttrs; i++) { String attrClassName = input.readString(); Class clz; @@ -357,8 +366,8 @@ public Attributes copy() { attrs.add((Attribute) attr.copy()); } - attrs.setMetadata(getMetadata()); - attrs.validMetadata = this.validMetadata; + attrs.metadata.setMetadata(getMetadata()); + ((AttributeBagMetadata) attrs.metadata).setValidMetadata(isValidMetadata()); return attrs; } diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/Document.java b/warehouse/query-core/src/main/java/datawave/query/attributes/Document.java index 01eceaa96bd..4963d9050f8 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/Document.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/Document.java @@ -17,11 +17,14 @@ import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.security.ColumnVisibility; import org.apache.commons.lang.builder.HashCodeBuilder; +import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableUtils; import org.apache.log4j.Logger; import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.KryoSerializable; import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; import com.google.common.collect.Iterators; @@ -39,7 +42,7 @@ import datawave.query.util.TypeMetadata; import datawave.util.time.DateHelper; -public class Document extends AttributeBag implements Serializable { +public class Document implements Serializable, AttributeBagMetadata.AttributesGetter, Comparable, WritableComparable, KryoSerializable { private static final long serialVersionUID = 1L; private static final Logger log = Logger.getLogger(Document.class); @@ -47,8 +50,11 @@ public class Document extends AttributeBag implements Serializable { public static final String DOCKEY_FIELD_NAME = "RECORD_ID"; private int _count = 0; - long _bytes = 0; - TreeMap>> dict; + private long _bytes = 0; + private TreeMap>> dict; + private AttributeBagMetadata metadata; + private boolean toKeep; + private TimingMetadata timingMetadata; /** * should sizes of the documents be tracked @@ -60,20 +66,6 @@ public class Document extends AttributeBag implements Serializable { */ private boolean intermediateResult; - private static final long ONE_DAY_MS = 1000l * 60 * 60 * 24; - - public MarkingFunctions getMarkingFunctions() { - return MarkingFunctions.Factory.createMarkingFunctions(); - } - - public Map getMarkings() { - try { - MarkingFunctions markingFunctions = MarkingFunctions.Factory.createMarkingFunctions(); - return markingFunctions.translateFromColumnVisibility(getColumnVisibility()); - } catch (MarkingFunctions.Exception e) {} - return Collections.emptyMap(); - } - public Document() { this(null, true); } @@ -83,8 +75,10 @@ public Document(Key key, boolean toKeep) { } public Document(Key key, boolean toKeep, boolean trackSizes) { - super(key, toKeep); dict = new TreeMap<>(); + metadata = new AttributeBagMetadata(this); + metadata.setMetadata(key); + this.toKeep = toKeep; this.trackSizes = trackSizes; } @@ -106,6 +100,34 @@ public Document(Key key, Set docKeys, boolean fromIndex, Iterator>> getAttributes() { return Collections.unmodifiableCollection(this.dict.values()); @@ -127,6 +149,14 @@ public Iterator>>> iterator() { return getDictionary().entrySet().iterator(); } + public boolean isToKeep() { + return toKeep; + } + + public void setToKeep(boolean toKeep) { + this.toKeep = toKeep; + } + /** * Given an iterator over {@code Entry}, and a set of normalizers, this method will merge the attributes scanned over by the supplied iterator * into this Document. @@ -154,14 +184,14 @@ public Iterator>>> iterator() { public Document consumeRawData(Key docKey, Set docKeys, Iterator> iter, TypeMetadata typeMetadata, CompositeMetadata compositeMetadata, boolean includeGroupingContext, boolean keepRecordId, EventDataQueryFilter attrFilter, boolean fromIndex) { - invalidateMetadata(); + metadata.invalidateMetadata(); // extract the sharded time from the dockey if possible try { - this.shardTimestamp = DateHelper.parseWithGMT(docKey.getRow().toString()).getTime(); + metadata.setShardTimestamp(DateHelper.parseWithGMT(docKey.getRow().toString()).getTime()); } catch (DateTimeParseException e) { log.warn("Unable to parse document key row as a shard id of the form yyyyMMdd...: " + docKey.getRow(), e); // leave the shardTimestamp empty - this.shardTimestamp = Long.MAX_VALUE; + metadata.setShardTimestamp(Long.MAX_VALUE); } // Extract the fieldName from the Key @@ -312,7 +342,7 @@ public void put(String key, Attribute value, Boolean includeGroupingContext, _bytes += Attribute.sizeInBytes(key); } - invalidateMetadata(); + metadata.invalidateMetadata(); } else { if (!existingAttr.equals(value)) { Attributes attrs = null; @@ -389,7 +419,7 @@ public void put(String key, Attribute value, Boolean includeGroupingContext, } } - invalidateMetadata(); + metadata.invalidateMetadata(); } // else, a Document cannot contain the same Field:Value, thus // when we find a duplicate value in the same field, we ignore it. @@ -444,7 +474,7 @@ public Attribute remove(String key) { this._bytes -= attr.sizeInBytes(); this._bytes -= Attribute.sizeInBytes(key); } - invalidateMetadata(); + metadata.invalidateMetadata(); return this._getDictionary().remove(key); } @@ -468,62 +498,42 @@ private void _removeAll(Map>> dict, Str while (iter.hasNext()) { Entry>> entry = iter.next(); - if (entry.getKey().equals(key) || entry.getValue() instanceof Document) { + if (entry.getKey().equals(key)) { // Remove the Attribute's size this._count -= entry.getValue().size(); if (trackSizes) { this._bytes -= entry.getValue().sizeInBytes(); } - invalidateMetadata(); + metadata.invalidateMetadata(); - if (entry.getKey().equals(key)) { - iter.remove(); - if (trackSizes) { - this._bytes -= Attribute.sizeInBytes(key); - } - } else { - // Recursively delete if it's a Document - Document subDocument = (Document) entry.getValue(); - subDocument.invalidateMetadata(); - - // Recursive delete - subDocument.removeAll(key); - - // Re-add what's left from this subDocument after - // the recursive deletion - this._count += subDocument.size(); - if (trackSizes) { - this._bytes += subDocument.sizeInBytes(); - } + iter.remove(); + if (trackSizes) { + this._bytes -= Attribute.sizeInBytes(key); } } } } - @Override public int size() { return _count; } - @Override public long sizeInBytes() { if (trackSizes) { - return super.sizeInBytes(40) + _bytes + (this.dict.size() * 24) + 40; - // 32 for local members + return metadata.getSizeInBytes() + _bytes + (this.dict.size() * 24) + 88; // 24 for TreeMap.Entry overhead, and members + // 32 for local members // 56 for TreeMap members and overhead } else { return 1; } } - @Override public Object getData() { return Collections.unmodifiableMap(this.dict); } - @Override - public Attribute reduceToKeep() { + public Document reduceToKeep() { for (Iterator>>> it = dict.entrySet().iterator(); it.hasNext();) { Entry>> entry = it.next(); Attribute attr = entry.getValue(); @@ -555,7 +565,7 @@ public Attribute reduceToKeep() { it.remove(); } } - invalidateMetadata(); + metadata.invalidateMetadata(); return this; } @@ -564,7 +574,6 @@ public void write(DataOutput out) throws IOException { write(out, false); } - @Override public void write(DataOutput out, boolean reducedResponse) throws IOException { WritableUtils.writeVInt(out, _count); out.writeBoolean(trackSizes); @@ -584,7 +593,12 @@ public void write(DataOutput out, boolean reducedResponse) throws IOException { entry.getValue().write(out); } - WritableUtils.writeVLong(out, shardTimestamp); + WritableUtils.writeVLong(out, metadata.getShardTimestamp()); + + out.writeBoolean(hasTimingMetadata()); + if (hasTimingMetadata()) { + timingMetadata.write(out); + } } @Override @@ -632,9 +646,14 @@ public void readFields(DataInput in) throws IOException { this.dict.put(fieldName, attr); } - this.shardTimestamp = WritableUtils.readVLong(in); + metadata.setShardTimestamp(WritableUtils.readVLong(in)); + + metadata.invalidateMetadata(); - invalidateMetadata(); + if (in.readBoolean()) { + timingMetadata = new TimingMetadata(); + timingMetadata.readFields(in); + } } @SuppressWarnings("unchecked") @@ -724,7 +743,6 @@ public int hashCode() { } @SuppressWarnings("unchecked") - @Override public Collection visit(Collection queryFieldNames, DatawaveJexlContext context) { if (log.isTraceEnabled()) { log.trace("queryFieldNames: " + queryFieldNames); @@ -800,7 +818,6 @@ public void write(Kryo kryo, Output output) { write(kryo, output, false); } - @Override public void write(Kryo kryo, Output output, Boolean reducedResponse) { output.writeInt(this._count, true); output.writeBoolean(trackSizes); @@ -819,7 +836,12 @@ public void write(Kryo kryo, Output output, Boolean reducedResponse) { attribute.write(kryo, output, reducedResponse); } - output.writeLong(this.shardTimestamp); + output.writeLong(this.metadata.getShardTimestamp()); + + output.writeBoolean(hasTimingMetadata()); + if (hasTimingMetadata()) { + timingMetadata.write(kryo, output); + } } @Override @@ -866,24 +888,14 @@ public void read(Kryo kryo, Input input) { this.dict.put(fieldName, attr); } - this.shardTimestamp = input.readLong(); + this.metadata.setShardTimestamp(input.readLong()); - this.invalidateMetadata(); - } + this.metadata.invalidateMetadata(); - @Override - public Document copy() { - Document d = new Document(this.getMetadata(), this.isToKeep(), trackSizes); - - // _count will be set via put operations - Set>>> entries = this._getDictionary().entrySet(); - for (Entry>> entry : entries) { - d.put(entry.getKey(), (Attribute) entry.getValue().copy()); + if (input.readBoolean()) { + this.timingMetadata = new TimingMetadata(); + this.timingMetadata.read(kryo, input); } - - d.shardTimestamp = this.shardTimestamp; - - return d; } public void setIntermediateResult(boolean intermediateResult) { @@ -894,4 +906,19 @@ public boolean isIntermediateResult() { return intermediateResult; } + public TimingMetadata getTimingMetadata() { + return timingMetadata; + } + + public void setTimingMetadata(TimingMetadata timingMetadata) { + this.timingMetadata = timingMetadata; + } + + public void clearTimingMetadata() { + this.timingMetadata = null; + } + + public boolean hasTimingMetadata() { + return this.timingMetadata != null; + } } diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/Metadata.java b/warehouse/query-core/src/main/java/datawave/query/attributes/Metadata.java deleted file mode 100644 index 32766ba9dc3..00000000000 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/Metadata.java +++ /dev/null @@ -1,3 +0,0 @@ -package datawave.query.attributes; - -public class Metadata extends Document {} diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/TimingMetadata.java b/warehouse/query-core/src/main/java/datawave/query/attributes/TimingMetadata.java index 97d851bbaad..f1763badb6c 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/TimingMetadata.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/TimingMetadata.java @@ -1,13 +1,27 @@ package datawave.query.attributes; -import java.util.LinkedHashMap; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.io.Serializable; +import java.util.Collections; import java.util.Map; +import java.util.TreeMap; + +import org.apache.commons.lang3.builder.CompareToBuilder; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.WritableUtils; + +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.KryoSerializable; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; /** * Holds timing information for query iterator next, source, seek, and yield counts. */ -public class TimingMetadata extends Metadata { - +public class TimingMetadata implements WritableComparable, KryoSerializable, Comparable, Serializable { + private static final long serialVersionUID = -1; private static final String NEXT_COUNT = "NEXT_COUNT"; private static final String SOURCE_COUNT = "SOURCE_COUNT"; private static final String SEEK_COUNT = "SEEK_COUNT"; @@ -15,94 +29,170 @@ public class TimingMetadata extends Metadata { private static final String STAGE_TIMERS = "STAGE_TIMERS"; private static final String HOST = "HOST"; - public long getNextCount() { - Numeric numericValue = (Numeric) get(NEXT_COUNT); - if (numericValue != null) { - return ((Number) numericValue.getData()).longValue(); - } else { - return 0; + private String host; + private TreeMap metadata = new TreeMap<>(); + private TreeMap stageTimers = new TreeMap<>(); + + public long get(String name) { + if (metadata.containsKey(name)) { + return metadata.get(name).longValue(); } + return 0; } - public void setNextCount(long nextCount) { - put(NEXT_COUNT, new Numeric(nextCount, this.getMetadata(), this.isToKeep())); + public void put(String name, long value) { + metadata.put(name, new Long(value)); + } + public long getNextCount() { + return get(NEXT_COUNT); + } + + public void setNextCount(long nextCount) { + put(NEXT_COUNT, nextCount); } public long getSourceCount() { - Numeric numericValue = (Numeric) get(SOURCE_COUNT); - if (numericValue != null) { - return ((Number) numericValue.getData()).longValue(); - } else { - return 0; - } + return get(SOURCE_COUNT); } public void setSourceCount(long sourceCount) { - put(SOURCE_COUNT, new Numeric(sourceCount, this.getMetadata(), this.isToKeep())); + put(SOURCE_COUNT, sourceCount); } public long getSeekCount() { - Numeric numericValue = (Numeric) get(SEEK_COUNT); - if (numericValue != null) { - return ((Number) numericValue.getData()).longValue(); - } else { - return 0; - } + return get(SEEK_COUNT); } public void setSeekCount(long seekCount) { - put(SEEK_COUNT, new Numeric(seekCount, this.getMetadata(), this.isToKeep())); + put(SEEK_COUNT, seekCount); } public long getYieldCount() { - Numeric numericValue = (Numeric) get(YIELD_COUNT); - if (numericValue != null) { - return ((Number) numericValue.getData()).longValue(); - } else { - return 0L; - } + return get(YIELD_COUNT); } public void setYieldCount(long yieldCount) { - put(YIELD_COUNT, new Numeric(yieldCount, this.getMetadata(), this.isToKeep())); + put(YIELD_COUNT, yieldCount); } - public void addStageTimer(String stageName, Numeric elapsed) { - Metadata stageTimers = (Metadata) get(STAGE_TIMERS); - if (stageTimers == null) { - stageTimers = new Metadata(); - put(STAGE_TIMERS, stageTimers); - } - stageTimers.put(stageName, elapsed); - put(STAGE_TIMERS, stageTimers); + public void addStageTimer(String stageName, long elapsed) { + stageTimers.put(stageName, new Long(elapsed)); } public Map getStageTimers() { - Map stageTimers = new LinkedHashMap<>(); - Attribute stageTimersAttribute = get(STAGE_TIMERS); - if (stageTimersAttribute instanceof Metadata) { - Metadata stageTimersMetadata = (Metadata) stageTimersAttribute; - for (Map.Entry>> entry : stageTimersMetadata.entrySet()) { - if (entry.getValue() instanceof Numeric) { - Number value = (Number) entry.getValue().getData(); - stageTimers.put(entry.getKey(), value.longValue()); - } - } - } - return stageTimers; + return Collections.unmodifiableMap(stageTimers); } public String getHost() { - Attribute hostAttribute = get(HOST); - if (hostAttribute instanceof Content) { - return ((Content) hostAttribute).getContent(); - } else { - return null; - } + return host; } public void setHost(String host) { - put(HOST, new Content(host, this.getMetadata(), this.isToKeep())); + this.host = host; + } + + @Override + public boolean equals(Object o) { + if (o instanceof TimingMetadata) { + return (compareTo((TimingMetadata) o) == 0); + } + return false; + } + + @Override + public int hashCode() { + return metadata.hashCode() + stageTimers.hashCode() + host.hashCode(); + } + + @Override + public int compareTo(TimingMetadata o) { + return new CompareToBuilder().append(metadata, o.metadata).append(stageTimers, o.stageTimers).append(host, o.host).toComparison(); + } + + @Override + public void write(Kryo kryo, Output output) { + write(metadata, kryo, output); + write(stageTimers, kryo, output); + output.writeString(host == null ? "" : host); } + + public void write(Map metadata, Kryo kryo, Output output) { + output.writeInt(metadata.size(), true); + + for (Map.Entry entry : metadata.entrySet()) { + // Write out the field name + // writeAscii fails to be read correctly if the value has only one character + // need to use writeString here + output.writeString(entry.getKey()); + output.writeLong(entry.getValue()); + } + } + + @Override + public void read(Kryo kryo, Input input) { + metadata = readMap(kryo, input); + stageTimers = readMap(kryo, input); + host = input.readString(); + if (host.isEmpty()) { + host = null; + } + } + + private TreeMap readMap(Kryo kryo, Input input) { + int numAttrs = input.readInt(true); + TreeMap map = new TreeMap<>(); + + for (int i = 0; i < numAttrs; i++) { + // Get the fieldName + String fieldName = input.readString(); + long fieldValue = input.readLong(); + map.put(fieldName, fieldValue); + } + return map; + } + + @Override + public void write(DataOutput out) throws IOException { + write(metadata, out); + write(stageTimers, out); + WritableUtils.writeString(out, host == null ? "" : host); + } + + private void write(Map metadata, DataOutput out) throws IOException { + // Write out the number of metadata entries we're going to store + WritableUtils.writeVInt(out, this.metadata.size()); + + for (Map.Entry entry : this.metadata.entrySet()) { + // Write out the field name + WritableUtils.writeString(out, entry.getKey()); + + // Write out the value + WritableUtils.writeVLong(out, entry.getValue()); + } + } + + @Override + public void readFields(DataInput in) throws IOException { + metadata = readMap(in); + stageTimers = readMap(in); + host = WritableUtils.readString(in); + if (host.isEmpty()) { + host = null; + } + } + + private TreeMap readMap(DataInput in) throws IOException { + int numAttrs = WritableUtils.readVInt(in); + TreeMap map = new TreeMap<>(); + + for (int i = 0; i < numAttrs; i++) { + // Get the fieldName + String fieldName = WritableUtils.readString(in); + long fieldValue = WritableUtils.readVLong(in); + map.put(fieldName, fieldValue); + } + return map; + } + } diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtil.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtil.java index 2ac3d8a4614..4ce116aa67c 100644 --- a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtil.java +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupingUtil.java @@ -313,7 +313,7 @@ private int compareMetadataRow(Attribute other) { return -1; } } else if (this.isMetadataSet()) { - return this.metadata.compareRow(other.getMetadata().getRow()); + return this.getMetadata().compareRow(other.getMetadata().getRow()); } else { return 0; } diff --git a/warehouse/query-core/src/main/java/datawave/query/function/DocumentProjection.java b/warehouse/query-core/src/main/java/datawave/query/function/DocumentProjection.java index 2ad0656f7ee..f6732ef31b2 100644 --- a/warehouse/query-core/src/main/java/datawave/query/function/DocumentProjection.java +++ b/warehouse/query-core/src/main/java/datawave/query/function/DocumentProjection.java @@ -83,102 +83,24 @@ public Projection getProjection() { */ @Override public Entry apply(Entry from) { - Document returnDoc = trim(from.getValue()); - return Maps.immutableEntry(from.getKey(), returnDoc); + trim(from.getValue()); + return Maps.immutableEntry(from.getKey(), from.getValue()); } - private Document trim(Document d) { + private void trim(Document d) { if (log.isTraceEnabled()) { log.trace("Applying projection " + projection + " to " + d); } Map>> dict = d.getDictionary(); - Document newDoc = new Document(); for (Entry>> entry : dict.entrySet()) { String fieldName = entry.getKey(); Attribute attr = entry.getValue(); - - if (projection.apply(fieldName)) { - - // If the projection is configured to exclude, we must fully traverse the subtree - if (projection.isUseExcludes()) { - if (attr instanceof Document) { - Document newSubDoc = trim((Document) attr); - - if (0 < newSubDoc.size()) { - newDoc.put(fieldName, newSubDoc.copy(), this.includeGroupingContext, this.reducedResponse); - } - - continue; - } else if (attr instanceof Attributes) { - Attributes subAttrs = trim((Attributes) attr, fieldName); - - if (0 < subAttrs.size()) { - newDoc.put(fieldName, subAttrs.copy(), this.includeGroupingContext, this.reducedResponse); - } - - continue; - } - } - - // We just want to add this subtree - newDoc.put(fieldName, (Attribute) attr.copy(), this.includeGroupingContext, this.reducedResponse); - - } else if (!projection.isUseExcludes()) { - // excludes will completely exclude a subtree, but an includes may - // initially retain a parent whose children do not match the includes, - // i.e., a child attribute does not match the includes - if (attr instanceof Document) { - Document newSubDoc = trim((Document) attr); - - if (0 < newSubDoc.size()) { - newDoc.put(fieldName, newSubDoc.copy(), this.includeGroupingContext, this.reducedResponse); - } - } else if (attr instanceof Attributes) { - // Since Document instances can be nested under attributes and vice-versa - // all the way down, we need to pass along the fieldName so that when we - // have come up with a nested document it can be evaluated by its own name - Attributes subAttrs = trim((Attributes) attr, fieldName); - - if (0 < subAttrs.size()) { - newDoc.put(fieldName, subAttrs.copy(), this.includeGroupingContext, this.reducedResponse); - } - } - } - } - - if (log.isTraceEnabled()) { - log.trace("Document after projection: " + newDoc); - } - - return newDoc; - } - - private Attributes trim(Attributes attrs, String fieldName) { - Attributes newAttrs = new Attributes(attrs.isToKeep(), trackSizes); - for (Attribute> attr : attrs.getAttributes()) { - if (attr instanceof Document) { - Document newAttr = trim((Document) attr); - - if (0 < newAttr.size()) { - newAttrs.add(newAttr); - } - } else if (attr instanceof Attributes) { - Attributes newAttr = trim((Attributes) attr, fieldName); - - if (0 < newAttr.size()) { - newAttrs.add(newAttr); - } - } else if (projection.apply(fieldName)) { - // If we're trimming an Attributes and find an Attribute that - // doesn't nest more Attribute's (Document, Attributes), otherwise, - // we can retain the "singular" Attribute's (Content, Numeric, etc) - // if it applies - newAttrs.add(attr); - } + attr.setToKeep(attr.isToKeep() && projection.apply(fieldName)); } - return newAttrs; + // reduce the document to those to keep + d.reduceToKeep(); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/function/LogTiming.java b/warehouse/query-core/src/main/java/datawave/query/function/LogTiming.java index eed51322de1..0c50e74af4f 100644 --- a/warehouse/query-core/src/main/java/datawave/query/function/LogTiming.java +++ b/warehouse/query-core/src/main/java/datawave/query/function/LogTiming.java @@ -19,7 +19,6 @@ */ public class LogTiming implements Function,Entry> { - public static final String TIMING_METADATA = "TIMING_METADATA"; protected QuerySpan spanRunner; private static String host = null; private static Logger log = Logger.getLogger(QuerySpan.class); @@ -63,12 +62,12 @@ public static void addTimingMetadata(Document document, QuerySpan querySpan) { double threshold = totalStageTimers * 0.05; for (Entry e : querySpan.getStageTimers().entrySet()) { if (e.getValue().longValue() >= threshold) { - timingMetadata.addStageTimer(e.getKey(), new Numeric(e.getValue(), document.getMetadata(), document.isToKeep())); + timingMetadata.addStageTimer(e.getKey(), e.getValue()); } } querySpan.reset(); } - document.put(TIMING_METADATA, timingMetadata); + document.setTimingMetadata(timingMetadata); } } } diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java index bad9ebca615..e2f2d6b7f50 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java @@ -89,7 +89,6 @@ import datawave.query.util.TypeMetadata; import datawave.query.util.sortedset.FileSortedSet; import datawave.util.StringUtils; -import datawave.util.UniversalSet; /** * QueryOptions are set on the iterators. @@ -1286,17 +1285,15 @@ public boolean validateOptions(Map options) { if (options.containsKey(PROJECTION_FIELDS)) { this.projectResults = true; - this.useWhiteListedFields = true; String fieldList = options.get(PROJECTION_FIELDS); - if (fieldList != null && EVERYTHING.equals(fieldList)) { - this.whiteListedFields = UniversalSet.instance(); - } else if (fieldList != null && !fieldList.trim().equals("")) { + if (fieldList != null && !EVERYTHING.equals(fieldList) && !fieldList.trim().equals("")) { + this.useWhiteListedFields = true; this.whiteListedFields = new HashSet<>(); Collections.addAll(this.whiteListedFields, StringUtils.split(fieldList, Constants.PARAM_VALUE_SEP)); - } - if (options.containsKey(HIT_LIST) && Boolean.parseBoolean(options.get(HIT_LIST))) { - this.whiteListedFields.add(JexlEvaluation.HIT_TERM_FIELD); + if (options.containsKey(HIT_LIST) && Boolean.parseBoolean(options.get(HIT_LIST))) { + this.whiteListedFields.add(JexlEvaluation.HIT_TERM_FIELD); + } } } @@ -1316,6 +1313,9 @@ public boolean validateOptions(Map options) { } } + // if we never actually set an allow or deny list, then no need to project results + this.projectResults = this.useBlackListedFields || this.useWhiteListedFields; + this.evaluationFilter = null; this.getDocumentKey = GetStartKey.instance(); this.mustUseFieldIndex = false; diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/EventDataQueryExpressionVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/EventDataQueryExpressionVisitor.java index f23205e7a6a..68c1a0fcd67 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/EventDataQueryExpressionVisitor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/EventDataQueryExpressionVisitor.java @@ -33,8 +33,8 @@ import datawave.data.type.NoOpType; import datawave.data.type.Type; import datawave.query.attributes.Attribute; -import datawave.query.attributes.AttributeBag; import datawave.query.attributes.AttributeFactory; +import datawave.query.attributes.Attributes; import datawave.query.attributes.TypeAttribute; import datawave.query.data.parsers.DatawaveKey; import datawave.query.jexl.JexlASTHelper; @@ -535,8 +535,8 @@ public static Set extractTypes(AttributeFactory attrFactory, String fieldN TypeAttribute dta = (TypeAttribute) attr; Type t = dta.getType(); types.add(t); - } else if (AttributeBag.class.isAssignableFrom(attr.getClass())) { - attrQueue.addAll(((AttributeBag) attr).getAttributes()); + } else if (Attributes.class.isAssignableFrom(attr.getClass())) { + attrQueue.addAll(((Attributes) attr).getAttributes()); } else { log.warn("Unexpected attribute type when extracting type: " + attr.getClass().getCanonicalName()); } diff --git a/warehouse/query-core/src/main/java/datawave/query/predicate/EmptyDocumentFilter.java b/warehouse/query-core/src/main/java/datawave/query/predicate/EmptyDocumentFilter.java index 0cfd4e11eb7..7056c2fcf17 100644 --- a/warehouse/query-core/src/main/java/datawave/query/predicate/EmptyDocumentFilter.java +++ b/warehouse/query-core/src/main/java/datawave/query/predicate/EmptyDocumentFilter.java @@ -21,7 +21,7 @@ public class EmptyDocumentFilter implements Predicate> { @Override public boolean apply(Entry input) { - boolean nonempty = (input.getValue().size() > 0); + boolean nonempty = (!input.getValue().isEmpty() || input.getValue().hasTimingMetadata()); if (log.isTraceEnabled()) log.trace("Testing exclusion" + input.getValue()); diff --git a/warehouse/query-core/src/main/java/datawave/query/transformer/DocumentTransformerSupport.java b/warehouse/query-core/src/main/java/datawave/query/transformer/DocumentTransformerSupport.java index 07da72a2614..aef3bdaa49d 100644 --- a/warehouse/query-core/src/main/java/datawave/query/transformer/DocumentTransformerSupport.java +++ b/warehouse/query-core/src/main/java/datawave/query/transformer/DocumentTransformerSupport.java @@ -35,7 +35,6 @@ import datawave.query.cardinality.CardinalityConfiguration; import datawave.query.cardinality.CardinalityRecord; import datawave.query.function.JexlEvaluation; -import datawave.query.function.LogTiming; import datawave.query.function.deserializer.DocumentDeserializer; import datawave.query.iterator.QueryOptions; import datawave.query.iterator.profile.QuerySpan; @@ -204,10 +203,6 @@ protected Collection> buildDocumentFields(Key documentKey, String d Attribute attribute = null; for (Entry>> data : documentData.entrySet()) { - // skip metadata fields - if (data.getValue() instanceof datawave.query.attributes.Metadata) { - continue; - } fn = (documentName == null) ? data.getKey() : documentName; // Some fields were added by the queryPlanner. This will ensure that the original projectFields and blacklistFields are honored @@ -226,10 +221,8 @@ protected Collection> buildDocumentFields(Key documentKey, String d protected void extractMetrics(Document document, Key documentKey) { - Map>> dictionary = document.getDictionary(); - Attribute> timingMetadataAttribute = dictionary.get(LogTiming.TIMING_METADATA); - if (timingMetadataAttribute != null && timingMetadataAttribute instanceof TimingMetadata) { - TimingMetadata timingMetadata = (TimingMetadata) timingMetadataAttribute; + if (document.hasTimingMetadata()) { + TimingMetadata timingMetadata = document.getTimingMetadata(); long currentSourceCount = timingMetadata.getSourceCount(); long currentNextCount = timingMetadata.getNextCount(); long currentSeekCount = timingMetadata.getSeekCount(); @@ -258,7 +251,8 @@ protected void extractMetrics(Document document, Key documentKey) { log.info(sb.toString()); } } - if (dictionary.size() == 1) { + + if (document.isEmpty()) { // this document contained only timing metadata throw new EmptyObjectException(); } diff --git a/warehouse/query-core/src/test/java/datawave/query/QueryAuthsTest.java b/warehouse/query-core/src/test/java/datawave/query/QueryAuthsTest.java index e9b74a41c6a..ac99edaa1bd 100644 --- a/warehouse/query-core/src/test/java/datawave/query/QueryAuthsTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/QueryAuthsTest.java @@ -25,7 +25,6 @@ import datawave.query.attributes.Attribute; import datawave.query.attributes.Attributes; import datawave.query.attributes.Document; -import datawave.query.attributes.TimingMetadata; import datawave.query.testframework.AbstractFunctionalQuery; import datawave.query.testframework.AccumuloSetup; import datawave.query.testframework.BaseRawData; @@ -164,10 +163,6 @@ public VisibilityChecker(String... visibilities) { @Override public void assertValid(Document doc) { - if (doc instanceof TimingMetadata) { - return; - } - Map>> dict = doc.getDictionary(); for (Map.Entry>> entry : dict.entrySet()) { @@ -179,9 +174,7 @@ public void assertValid(Document doc) { Attribute attr = entry.getValue(); - if (attr instanceof Document) { - assertValid((Document) attr); - } else if (attr instanceof Attributes) { + if (attr instanceof Attributes) { assertValid((Attributes) attr, fieldName); } else { assertValid(attr, fieldName); @@ -192,9 +185,7 @@ public void assertValid(Document doc) { protected void assertValid(Attributes attrs, String fieldName) { for (Attribute> attr : attrs.getAttributes()) { - if (attr instanceof Document) { - assertValid((Document) attr); - } else if (attr instanceof Attributes) { + if (attr instanceof Attributes) { assertValid((Attributes) attr, fieldName); } else { assertValid(attr, fieldName); diff --git a/warehouse/query-core/src/test/java/datawave/query/function/DocumentProjectionTest.java b/warehouse/query-core/src/test/java/datawave/query/function/DocumentProjectionTest.java index 32921395ede..e8dc7862115 100644 --- a/warehouse/query-core/src/test/java/datawave/query/function/DocumentProjectionTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/function/DocumentProjectionTest.java @@ -25,6 +25,7 @@ public class DocumentProjectionTest { private final ColumnVisibility cv = new ColumnVisibility("PUBLIC"); private Document d; + private int ALL_FIELDS; @Before public void setup() { @@ -32,27 +33,23 @@ public void setup() { d.put("FOO", new Content("foofighter", new Key("row", "dt\0uid", "", cv, -1), true)); d.put("ID", new Numeric(123, new Key("row", "dt\0uid", "", cv, -1), true)); - Document primes = new Document(); - primes.put("PRIME", new Numeric(2, new Key("row", "dt\0uid", "", cv, -1), true)); - primes.put("PRIME", new Numeric(3, new Key("row", "dt\0uid", "", cv, -1), true)); - primes.put("PRIME", new Numeric(5, new Key("row", "dt\0uid", "", cv, -1), true)); - primes.put("PRIME", new Numeric(7, new Key("row", "dt\0uid", "", cv, -1), true)); - primes.put("PRIME", new Numeric(11, new Key("row", "dt\0uid", "", cv, -1), true)); + Attributes primes = new Attributes(true); + primes.add(new Numeric(2, new Key("row", "dt\0uid", "", cv, -1), true)); + primes.add(new Numeric(3, new Key("row", "dt\0uid", "", cv, -1), true)); + primes.add(new Numeric(5, new Key("row", "dt\0uid", "", cv, -1), true)); + primes.add(new Numeric(7, new Key("row", "dt\0uid", "", cv, -1), true)); + primes.add(new Numeric(11, new Key("row", "dt\0uid", "", cv, -1), true)); d.put("PRIMES", primes); - Attributes others = new Attributes(true); + d.put("FOO.1", new Content("bar", new Key("row", "dt\0uid", "", cv, -1), true), true, false); + d.put("ID.1", new Numeric(456, new Key("row", "dt\0uid", "", cv, -1), true), true, false); - Document sub1 = new Document(); - sub1.put("FOO.1", new Content("bar", new Key("row", "dt\0uid", "", cv, -1), true)); - sub1.put("ID.1", new Numeric(456, new Key("row", "dt\0uid", "", cv, -1), true)); - others.add(sub1); + d.put("FOO.2", new Content("baz", new Key("row", "dt\0uid", "", cv, -1), true), true, false); + d.put("ID.2", new Numeric(789, new Key("row", "dt\0uid", "", cv, -1), true), true, false); - Document sub2 = new Document(); - sub2.put("FOO.2", new Content("baz", new Key("row", "dt\0uid", "", cv, -1), true)); - sub2.put("ID.2", new Numeric(789, new Key("row", "dt\0uid", "", cv, -1), true)); - others.add(sub2); + d.put("PRIMARY_ID", new Numeric(789, new Key("row", "dt\0uid", "", cv, -1), true), true, false); - d.put("OTHERS", others); // others' attributes have grouping context + ALL_FIELDS = d.getDictionary().size() + primes.size() - 1; } @Test @@ -61,9 +58,9 @@ public void testIncludesSingleField() { DocumentProjection projection = new DocumentProjection(); projection.setIncludes(includes); - assertEquals(11, d.size()); + assertEquals(ALL_FIELDS, d.size()); Map.Entry result = projection.apply(Maps.immutableEntry(new Key(), d)); - assertEquals(4, result.getValue().size()); + assertEquals(0, result.getValue().size()); } @Test @@ -72,7 +69,7 @@ public void testIncludesTwoFields() { DocumentProjection projection = new DocumentProjection(); projection.setIncludes(includes); - assertEquals(11, d.size()); + assertEquals(ALL_FIELDS, d.size()); Map.Entry result = projection.apply(Maps.immutableEntry(new Key(), d)); assertEquals(6, result.getValue().size()); } @@ -82,33 +79,20 @@ public void testIncludesNoFieldsSpecified() { DocumentProjection projection = new DocumentProjection(); projection.setIncludes(Collections.emptySet()); - assertEquals(11, d.size()); + assertEquals(ALL_FIELDS, d.size()); Map.Entry result = projection.apply(Maps.immutableEntry(new Key(), d)); assertEquals(0, result.getValue().size()); } @Test public void testIncludesAllFields() { - Set includes = Sets.newHashSet("FOO", "ID", "PRIMES", "PRIME", "CHILDREN"); + Set includes = Sets.newHashSet("FOO", "ID", "PRIMES", "PRIME", "CHILDREN", "PRIMARY_ID"); DocumentProjection projection = new DocumentProjection(); projection.setIncludes(includes); - assertEquals(11, d.size()); + assertEquals(ALL_FIELDS, d.size()); Map.Entry result = projection.apply(Maps.immutableEntry(new Key(), d)); - assertEquals(11, result.getValue().size()); - } - - // even though the sub-document field is not on the includes list, all the fields in the - // sub-document are. Therefore, we keep the child document. - @Test - public void testIncludesAllFieldsExceptNestedDocumentFields() { - Set includes = Sets.newHashSet("FOO", "ID", "PRIME"); - DocumentProjection projection = new DocumentProjection(); - projection.setIncludes(includes); - - assertEquals(11, d.size()); - Map.Entry result = projection.apply(Maps.immutableEntry(new Key(), d)); - assertEquals(11, result.getValue().size()); + assertEquals(ALL_FIELDS, result.getValue().size()); } @Test @@ -117,53 +101,31 @@ public void testExcludeSingleField() { DocumentProjection projection = new DocumentProjection(); projection.setExcludes(excludes); - assertEquals(11, d.size()); + assertEquals(ALL_FIELDS, d.size()); Map.Entry result = projection.apply(Maps.immutableEntry(new Key(), d)); - assertEquals(8, result.getValue().size()); - } - - @Test - public void testExcludeChildDocumentField() { - Set excludes = Sets.newHashSet("CHILDREN"); - DocumentProjection projection = new DocumentProjection(); - projection.setExcludes(excludes); - - assertEquals(11, d.size()); - Map.Entry result = projection.apply(Maps.immutableEntry(new Key(), d)); - assertEquals(11, result.getValue().size()); + assertEquals(ALL_FIELDS - 3, result.getValue().size()); } @Test public void testExcludeAllFields() { - Set excludes = Sets.newHashSet("FOO", "ID", "PRIMES", "PRIME", "CHILDREN"); + Set excludes = Sets.newHashSet("FOO", "ID", "PRIMES", "PRIME", "CHILDREN", "PRIMARY_ID"); DocumentProjection projection = new DocumentProjection(); projection.setExcludes(excludes); - assertEquals(11, d.size()); + assertEquals(ALL_FIELDS, d.size()); Map.Entry result = projection.apply(Maps.immutableEntry(new Key(), d)); assertEquals(0, result.getValue().size()); } - @Test - public void testExcludeNestedField() { - Set excludes = Sets.newHashSet("PRIME"); - DocumentProjection projection = new DocumentProjection(); - projection.setExcludes(excludes); - - assertEquals(11, d.size()); - Map.Entry result = projection.apply(Maps.immutableEntry(new Key(), d)); - assertEquals(6, result.getValue().size()); - } - @Test public void testConfirmFieldExcluded() { Set excludes = Sets.newHashSet("PRIMES"); DocumentProjection projection = new DocumentProjection(); projection.setExcludes(excludes); - assertEquals(11, d.size()); + assertEquals(ALL_FIELDS, d.size()); Map.Entry result = projection.apply(Maps.immutableEntry(new Key(), d)); - assertEquals(6, result.getValue().size()); + assertEquals(ALL_FIELDS - 5, result.getValue().size()); assertFalse(result.getValue().containsKey("PRIMES")); // key no longer exists } @@ -173,9 +135,9 @@ public void testConfirmGroupingContext() { DocumentProjection projection = new DocumentProjection(); projection.setExcludes(excludes); - assertEquals(11, d.size()); + assertEquals(ALL_FIELDS, d.size()); Map.Entry result = projection.apply(Maps.immutableEntry(new Key(), d)); - assertEquals(8, result.getValue().size()); + assertEquals(ALL_FIELDS - 3, result.getValue().size()); assertFalse(result.getValue().containsKey("FOO")); // key no longer exists } @@ -210,19 +172,12 @@ private Document buildExampleDocument() { d.put("NAME", new Content("bob", new Key("row", "dt\0uid", "", cv, -1), true)); d.put("AGE", new Numeric(40, new Key("row", "dt\0uid", "", cv, -1), true)); - Attributes children = new Attributes(true); - - Document frank = new Document(); - frank.put("NAME", new Content("frank", new Key("row", "dt\0uid", "", cv, -1), true)); - frank.put("AGE", new Numeric(12, new Key("row", "dt\0uid", "", cv, -1), true)); - children.add(frank); + d.put("NAME.1", new Content("frank", new Key("row", "dt\0uid", "", cv, -1), true), true, false); + d.put("AGE.1", new Numeric(ALL_FIELDS, new Key("row", "dt\0uid", "", cv, -1), true), true, false); - Document sally = new Document(); - sally.put("NAME", new Content("sally", new Key("row", "dt\0uid", "", cv, -1), true)); - sally.put("AGE", new Numeric(10, new Key("row", "dt\0uid", "", cv, -1), true)); - children.add(sally); + d.put("NAME.2", new Content("sally", new Key("row", "dt\0uid", "", cv, -1), true), true, false); + d.put("AGE.2", new Numeric(10, new Key("row", "dt\0uid", "", cv, -1), true), true, false); - d.put("CHILDREN", children); // others' attributes have grouping context return d; } diff --git a/warehouse/query-core/src/test/java/datawave/query/testframework/QueryLogicTestHarness.java b/warehouse/query-core/src/test/java/datawave/query/testframework/QueryLogicTestHarness.java index 3cdf08174bc..d60ee17bbaf 100644 --- a/warehouse/query-core/src/test/java/datawave/query/testframework/QueryLogicTestHarness.java +++ b/warehouse/query-core/src/test/java/datawave/query/testframework/QueryLogicTestHarness.java @@ -17,7 +17,6 @@ import datawave.query.attributes.AttributeFactory; import datawave.query.attributes.Attributes; import datawave.query.attributes.Document; -import datawave.query.attributes.TimingMetadata; import datawave.query.attributes.TypeAttribute; import datawave.query.function.deserializer.KryoDocumentDeserializer; import datawave.query.iterator.profile.FinalDocumentTrackingIterator; @@ -89,9 +88,7 @@ public void assertLogicResults(BaseQueryLogic> logic, Colle // AttributeFactory class int count = 0; for (Attribute> attribute : document.getAttributes()) { - if (attribute instanceof TimingMetadata) { - // ignore - } else if (attribute instanceof Attributes) { + if (attribute instanceof Attributes) { Attributes attrs = (Attributes) attribute; Collection> types = new HashSet<>(); for (Attribute> attr : attrs.getAttributes()) { diff --git a/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformTest.java b/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformTest.java index 6992f2c19ad..29fbffd2d50 100644 --- a/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformTest.java @@ -46,7 +46,6 @@ import datawave.query.attributes.TimingMetadata; import datawave.query.attributes.UniqueFields; import datawave.query.attributes.UniqueGranularity; -import datawave.query.function.LogTiming; import datawave.query.jexl.JexlASTHelper; public class UniqueTransformTest { @@ -303,7 +302,7 @@ public void testUniquenessWithTimingMetric() { TimingMetadata timingMetadata = new TimingMetadata(); timingMetadata.setNextCount(5l); - givenInputDocument(MARKER_STRING).withKeyValue(LogTiming.TIMING_METADATA, timingMetadata.toString()).isExpectedToBeUnique(); + givenInputDocument(MARKER_STRING).withTimingMetadata(timingMetadata).isExpectedToBeUnique(); givenInputDocument().withKeyValue("ATTR0", randomValues.get(0)).isExpectedToBeUnique(); givenInputDocument().withKeyValue("ATTR1", randomValues.get(1)).isExpectedToBeUnique(); givenInputDocument().withKeyValue("ATTR1", randomValues.get(2)); @@ -607,6 +606,11 @@ InputDocumentBuilder isExpectedToBeUnique() { expectedUniqueDocuments.add(document); return this; } + + InputDocumentBuilder withTimingMetadata(TimingMetadata metadata) { + document.setTimingMetadata(metadata); + return this; + } } private class ExpectedOrderedFieldValuesBuilder {