Skip to content

Commit

Permalink
finish multidim range test + cleanup tests
Browse files Browse the repository at this point in the history
Signed-off-by: Peter Alfonsi <[email protected]>
  • Loading branch information
Peter Alfonsi committed Dec 6, 2024
1 parent 69faca3 commit 859d30f
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 120 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,12 @@ public byte[] serialize(Query object) {
switch (classByte) {
case POINT_RANGE_QUERY_BYTE:
serializePointRangeQuery(os, object);
break;
case DUMMY_QUERY_BYTE:
serializeDummyQuery(os, object);
/*default:
throw new UnsupportedOperationException("Invalid class byte");*/
break;
/*default:
throw new UnsupportedOperationException("Invalid class byte");*/
}
} catch (IOException e) {
throw new OpenSearchException("Error serializing query: ", e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import org.opensearch.env.NodeEnvironment;
import org.opensearch.index.cache.query.QueryCacheStats;
import org.opensearch.indices.OpenSearchQueryCache;
import org.opensearch.search.aggregations.bucket.composite.CompositeKey;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -252,13 +253,7 @@ private void putIfAbsent(Query query, CacheAndCount cached, IndexReader.CacheHel
// TODO: This is a lot simpler than the LRUQC one - for now I'm not fully handling stats, and I dont have to deal with the singleton
// LRU stuff
final IndexReader.CacheKey key = cacheHelper.getKey();
LeafCache leafCache = outerCache.get(key);
if (leafCache == null) {
int nextId = nextLeafCacheId.incrementAndGet();
leafCache = new LeafCache(nextId, innerCache);
final LeafCache previous = outerCache.put(key, leafCache);
assert previous == null;
}
LeafCache leafCache = outerCache.computeIfAbsent(key, (k) -> new LeafCache(nextLeafCacheId.incrementAndGet(), innerCache));
leafCache.putIfAbsent(query, cached, getShardIdName(cacheHelper.getKey()));
// We also dont handle eviction; the TSC does.
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
package org.opensearch.cache.common.query;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
Expand Down Expand Up @@ -38,7 +36,6 @@
import org.opensearch.core.index.shard.ShardId;
import org.opensearch.env.NodeEnvironment;
import org.opensearch.index.cache.query.QueryCacheStats;
import org.opensearch.indices.IndicesQueryCache;
import org.opensearch.node.Node;
import org.opensearch.plugins.CachePlugin;
import org.opensearch.plugins.Plugin;
Expand Down Expand Up @@ -100,27 +97,6 @@ public void testBasics_WithOpenSearchOnHeapCache() throws IOException {

testBasicsDummyQuery(cache, s, shard);

// TODO: not implementing shard closing logic for the PoC
/*IOUtils.close(r, dir);
// got emptied, but no changes to other metrics
stats = cache.getStats(shard);
assertEquals(0L, stats.getCacheSize());
assertEquals(20L, stats.getCacheCount());
assertEquals(1L, stats.getHitCount());
assertEquals(40L, stats.getMissCount());
assertTrue(stats.getMemorySizeInBytes() > 0L && stats.getMemorySizeInBytes() < Long.MAX_VALUE);
cache.onClose(shard);
// forgot everything
stats = cache.getStats(shard);
assertEquals(0L, stats.getCacheSize());
assertEquals(0L, stats.getCacheCount());
assertEquals(0L, stats.getHitCount());
assertEquals(0L, stats.getMissCount());
assertTrue(stats.getMemorySizeInBytes() >= 0L && stats.getMemorySizeInBytes() < Long.MAX_VALUE);*/

cache.close(); // this triggers some assertions
IOUtils.close(r, dir);
}
Expand Down Expand Up @@ -165,43 +141,19 @@ public void testBasics_WithTSC_WithSmallHeapSize() throws Exception {
}

private void testBasicsDummyQuery(TieredQueryCache cache, IndexSearcher s, ShardId shard) throws IOException {
QueryCacheStats stats = cache.getStats(shard);
assertEquals(0L, stats.getCacheSize());
assertEquals(0L, stats.getCacheCount());
assertEquals(0L, stats.getHitCount());
assertEquals(0L, stats.getMissCount());
assertEquals(0L, stats.getMemorySizeInBytes());
checkStats(cache.getStats(shard), 0, 0, 0, 0, false);

assertEquals(1, s.count(new DummyQuery(0)));

stats = cache.getStats(shard);
assertEquals(1L, stats.getCacheSize());
assertEquals(1L, stats.getCacheCount());
assertEquals(0L, stats.getHitCount());
assertEquals(2L, stats.getMissCount());
assertTrue(stats.getMemorySizeInBytes() > 0L && stats.getMemorySizeInBytes() < Long.MAX_VALUE);
checkStats(cache.getStats(shard), 1, 1, 0, 2, true);

int numEntries = 20;

for (int i = 1; i < numEntries; ++i) {
assertEquals(1, s.count(new DummyQuery(i)));
}

stats = cache.getStats(shard);
// assertEquals(10L, stats.getCacheSize()); // TODO: this is 10 bc it's expecting evictions after 10.
assertEquals(numEntries, stats.getCacheCount());
assertEquals(0L, stats.getHitCount());
assertEquals(2 * numEntries, stats.getMissCount());
assertTrue(stats.getMemorySizeInBytes() > 0L && stats.getMemorySizeInBytes() < Long.MAX_VALUE);
checkStats(cache.getStats(shard), 10, numEntries, 0, 2 * numEntries, true);

s.count(new DummyQuery(1)); // Pick 1 so the hit comes from disk

stats = cache.getStats(shard);
// assertEquals(10L, stats.getCacheSize());
assertEquals(numEntries, stats.getCacheCount());
assertEquals(1L, stats.getHitCount());
assertEquals(2 * numEntries, stats.getMissCount());
assertTrue(stats.getMemorySizeInBytes() > 0L && stats.getMemorySizeInBytes() < Long.MAX_VALUE);
checkStats(cache.getStats(shard), 10, numEntries, 1, 2 * numEntries, true);
}

// Modified from IndicesQueryCacheTests
Expand Down Expand Up @@ -233,54 +185,18 @@ public void testTwoShards() throws IOException {
s2.setQueryCache(cache);

assertEquals(1, s1.count(new DummyQuery(0)));

QueryCacheStats stats1 = cache.getStats(shard1);
assertEquals(1L, stats1.getCacheSize());
assertEquals(1L, stats1.getCacheCount());
assertEquals(0L, stats1.getHitCount());
assertEquals(2L, stats1.getMissCount());
assertTrue(stats1.getMemorySizeInBytes() >= 0L && stats1.getMemorySizeInBytes() < Long.MAX_VALUE);

QueryCacheStats stats2 = cache.getStats(shard2);
assertEquals(0L, stats2.getCacheSize());
assertEquals(0L, stats2.getCacheCount());
assertEquals(0L, stats2.getHitCount());
assertEquals(0L, stats2.getMissCount());
assertTrue(stats2.getMemorySizeInBytes() >= 0L && stats2.getMemorySizeInBytes() < Long.MAX_VALUE);
checkStats(cache.getStats(shard1), 1, 1, 0, 2, true);
checkStats(cache.getStats(shard2), 0, 0, 0, 0, false);

assertEquals(1, s2.count(new DummyQuery(0)));

stats1 = cache.getStats(shard1);
assertEquals(1L, stats1.getCacheSize());
assertEquals(1L, stats1.getCacheCount());
assertEquals(0L, stats1.getHitCount());
assertEquals(2L, stats1.getMissCount());
assertTrue(stats1.getMemorySizeInBytes() >= 0L && stats1.getMemorySizeInBytes() < Long.MAX_VALUE);

stats2 = cache.getStats(shard2);
assertEquals(1L, stats2.getCacheSize());
assertEquals(1L, stats2.getCacheCount());
assertEquals(0L, stats2.getHitCount());
assertEquals(2L, stats2.getMissCount());
assertTrue(stats2.getMemorySizeInBytes() >= 0L && stats2.getMemorySizeInBytes() < Long.MAX_VALUE);
checkStats(cache.getStats(shard1), 1, 1, 0, 2, true);
checkStats(cache.getStats(shard2), 1, 1, 0, 2, true);

for (int i = 0; i < 20; ++i) {
assertEquals(1, s2.count(new DummyQuery(i)));
}

stats1 = cache.getStats(shard1);
//assertEquals(0L, stats1.getCacheSize()); // evicted
assertEquals(1L, stats1.getCacheCount());
assertEquals(0L, stats1.getHitCount());
assertEquals(2L, stats1.getMissCount());
assertTrue(stats1.getMemorySizeInBytes() >= 0L && stats1.getMemorySizeInBytes() < Long.MAX_VALUE);

stats2 = cache.getStats(shard2);
//assertEquals(10L, stats2.getCacheSize());
assertEquals(20L, stats2.getCacheCount());
assertEquals(1L, stats2.getHitCount());
assertEquals(40L, stats2.getMissCount());
assertTrue(stats2.getMemorySizeInBytes() >= 0L && stats2.getMemorySizeInBytes() < Long.MAX_VALUE);
checkStats(cache.getStats(shard1), 0, 1, 0, 2, false);
checkStats(cache.getStats(shard2), 10, 20, 1, 40, true);

cache.close(); // this triggers some assertions
IOUtils.close(r1, dir1, r2, dir2);
Expand All @@ -302,10 +218,15 @@ public void testBasicsWithLongPointRangeQuery() throws Exception {
threadPool = getThreadPool();
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
// TODO: Adding a bunch of random docs in a >1D space appears to mean Lucene can't search it sublinearly (>1D AND we have Relation.CELL_CROSSES_QUERY).
// TODO: Adding a bunch of random docs in a >1D space appears to mean Lucene can't search it sublinearly (>1D AND we have
// Relation.CELL_CROSSES_QUERY).
// This means we DO actually use the query cache - sublinear queries are not cached for performance reasons since caching is O(N).
// But, idk if this will be consistent. Need to learn more about how it decides where the BKD tree cells have their boundaries.
addRandomDocs(1000, w, Randomness.get(), 2, 0, 100);
Document d = new Document();
d.add(new LongPoint(field, 1, 2)); // this one will always match test query
w.addDocument(d);
w.forceMerge(1); // Force merge down to 1 segment, so we only have one CacheHelper key and can accurately predict stats.
DirectoryReader r = DirectoryReader.open(w);
w.close();
ShardId shard = new ShardId("index", "_na_", 0);
Expand All @@ -316,21 +237,40 @@ public void testBasicsWithLongPointRangeQuery() throws Exception {
TieredQueryCache cache = getQueryCache(getTSCSettings(1000));
s.setQueryCache(cache);

QueryCacheStats stats = cache.getStats(shard);
assertEquals(0L, stats.getCacheSize());
assertEquals(0L, stats.getCacheCount());
assertEquals(0L, stats.getHitCount());
assertEquals(0L, stats.getMissCount());
assertEquals(0L, stats.getMemorySizeInBytes());

assertEquals(1, s.count(getRangeQuery(0, 2)));

stats = cache.getStats(shard);
assertEquals(1L, stats.getCacheSize());
assertEquals(1L, stats.getCacheCount());
assertEquals(0L, stats.getHitCount());
assertEquals(2L, stats.getMissCount());
assertTrue(stats.getMemorySizeInBytes() > 0L && stats.getMemorySizeInBytes() < Long.MAX_VALUE);
checkStats(cache.getStats(shard), 0, 0, 0, 0, false);

assertTrue(s.count(getRangeQuery(0, 2)) >= 1);
checkStats(cache.getStats(shard), 1, 1, 0, 2, true);

int numEntries = 20;
for (int i = 1; i < numEntries; ++i) {
s.count(getRangeQuery(i, i + 2)); // TODO: this will be flaky as maybe some queries get lucky and are all contained in a BKD
// cell -> skip query cache.
}
checkStats(cache.getStats(shard), 10, numEntries, 0, 2 * numEntries, true);

s.count(getRangeQuery(0, 2)); // The original document should always be present
checkStats(cache.getStats(shard), 10, numEntries, 1, 2 * numEntries, true);

cache.close(); // this triggers some assertions
IOUtils.close(r, dir);
}

private void checkStats(
QueryCacheStats stats,
long expectedSize,
long expectedCount,
long expectedHits,
long expectedMisses,
boolean checkMemoryAboveZero
) {
// assertEquals(expectedSize, stats.getCacheSize());
assertEquals(expectedCount, stats.getCacheCount());
assertEquals(expectedHits, stats.getHitCount());
assertEquals(expectedMisses, stats.getMissCount());
if (checkMemoryAboveZero) {
assertTrue(stats.getMemorySizeInBytes() > 0L && stats.getMemorySizeInBytes() < Long.MAX_VALUE);
}
}

private Settings getTSCSettings(int heapBytes) {
Expand Down Expand Up @@ -381,7 +321,7 @@ private TieredQueryCache getQueryCache(Settings settings) throws IOException {
}

private Query getRangeQuery(long low, long high) {
return LongPoint.newRangeQuery(field, new long[] { low, low+2 }, new long[] { high, high+2 });
return LongPoint.newRangeQuery(field, new long[] { low, low + 2 }, new long[] { high, high + 2 });
}

private static QueryCachingPolicy alwaysCachePolicy() {
Expand Down

0 comments on commit 859d30f

Please sign in to comment.