Skip to content

Commit

Permalink
Bump to CRoaring 4.0, add Bitmap64 statistics (#134)
Browse files Browse the repository at this point in the history
This introduces a breaking change: we remove the deprecated sum_value field from the Statistics type. Plan to yank the 2.0 croaring-rs version, and publish as 2.0.1, wish I'd remembered this was coming up before releasing 2.0
  • Loading branch information
Dr-Emann authored Jun 25, 2024
2 parents f1a9af1 + 574c123 commit 0c63c34
Show file tree
Hide file tree
Showing 13 changed files with 278 additions and 65 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 30 additions & 3 deletions croaring-sys/CRoaring/bindgen_bundled_version.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
/* automatically generated by rust-bindgen 0.69.4 */

pub const ROARING_VERSION: &[u8; 6] = b"3.0.1\0";
pub const ROARING_VERSION_MAJOR: _bindgen_ty_1 = 3;
pub const ROARING_VERSION: &[u8; 6] = b"4.0.0\0";
pub const ROARING_VERSION_MAJOR: _bindgen_ty_1 = 4;
pub const ROARING_VERSION_MINOR: _bindgen_ty_1 = 0;
pub const ROARING_VERSION_REVISION: _bindgen_ty_1 = 1;
pub const ROARING_VERSION_REVISION: _bindgen_ty_1 = 0;
pub type _bindgen_ty_1 = ::core::ffi::c_uint;
#[doc = " Roaring arrays are array-based key-value pairs having containers as values\n and 16-bit integer keys. A roaring bitmap might be implemented as such."]
#[repr(C)]
Expand Down Expand Up @@ -45,6 +45,26 @@ pub struct roaring_statistics_s {
}
#[doc = " (For advanced users.)\n The roaring_statistics_t can be used to collect detailed statistics about\n the composition of a roaring bitmap."]
pub type roaring_statistics_t = roaring_statistics_s;
#[doc = " (For advanced users.)\n The roaring64_statistics_t can be used to collect detailed statistics about\n the composition of a roaring64 bitmap."]
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct roaring64_statistics_s {
pub n_containers: u64,
pub n_array_containers: u64,
pub n_run_containers: u64,
pub n_bitset_containers: u64,
pub n_values_array_containers: u64,
pub n_values_run_containers: u64,
pub n_values_bitset_containers: u64,
pub n_bytes_array_containers: u64,
pub n_bytes_run_containers: u64,
pub n_bytes_bitset_containers: u64,
pub max_value: u64,
pub min_value: u64,
pub cardinality: u64,
}
#[doc = " (For advanced users.)\n The roaring64_statistics_t can be used to collect detailed statistics about\n the composition of a roaring64 bitmap."]
pub type roaring64_statistics_t = roaring64_statistics_s;
#[doc = " Roaring-internal type used to iterate within a roaring container."]
#[repr(C)]
#[derive(Debug, Copy, Clone)]
Expand Down Expand Up @@ -973,6 +993,13 @@ extern "C" {
#[doc = " Returns true if the result has at least one run container."]
pub fn roaring64_bitmap_run_optimize(r: *mut roaring64_bitmap_t) -> bool;
}
extern "C" {
#[doc = " (For advanced users.)\n Collect statistics about the bitmap"]
pub fn roaring64_bitmap_statistics(
r: *const roaring64_bitmap_t,
stat: *mut roaring64_statistics_t,
);
}
extern "C" {
#[doc = " Perform internal consistency checks.\n\n Returns true if the bitmap is consistent. It may be useful to call this\n after deserializing bitmaps from untrusted sources. If\n roaring64_bitmap_internal_validate returns true, then the bitmap is\n consistent and can be trusted not to cause crashes or memory corruption.\n\n If reason is non-null, it will be set to a string describing the first\n inconsistency found if any."]
pub fn roaring64_bitmap_internal_validate(
Expand Down
103 changes: 64 additions & 39 deletions croaring-sys/CRoaring/roaring.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!!
// Created by amalgamation.sh on 2024-04-02T13:42:32Z
// Created by amalgamation.sh on 2024-05-13T21:29:25Z

/*
* The CRoaring project is under a dual license (Apache/MIT).
Expand Down Expand Up @@ -10141,7 +10141,7 @@ static art_indexed_child_t art_node_next_child(const art_node_t *node,
return art_node256_next_child((art_node256_t *)node, index);
default:
assert(false);
return (art_indexed_child_t){0};
return (art_indexed_child_t){0, 0, 0};
}
}

Expand All @@ -10165,7 +10165,7 @@ static art_indexed_child_t art_node_prev_child(const art_node_t *node,
return art_node256_prev_child((art_node256_t *)node, index);
default:
assert(false);
return (art_indexed_child_t){0};
return (art_indexed_child_t){0, 0, 0};
}
}

Expand All @@ -10189,7 +10189,7 @@ static art_indexed_child_t art_node_child_at(const art_node_t *node,
return art_node256_child_at((art_node256_t *)node, index);
default:
assert(false);
return (art_indexed_child_t){0};
return (art_indexed_child_t){0, 0, 0};
}
}

Expand All @@ -10213,7 +10213,7 @@ static art_indexed_child_t art_node_lower_bound(const art_node_t *node,
return art_node256_lower_bound((art_node256_t *)node, key_chunk);
default:
assert(false);
return (art_indexed_child_t){0};
return (art_indexed_child_t){0, 0, 0};
}
}

Expand Down Expand Up @@ -10770,7 +10770,7 @@ static bool art_node_iterator_lower_bound(const art_node_t *node,
}

art_iterator_t art_init_iterator(const art_t *art, bool first) {
art_iterator_t iterator = {0};
art_iterator_t iterator = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
if (art->root == NULL) {
return iterator;
}
Expand Down Expand Up @@ -10827,15 +10827,15 @@ bool art_iterator_lower_bound(art_iterator_t *iterator,
}

art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key) {
art_iterator_t iterator = {0};
art_iterator_t iterator = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
if (art->root != NULL) {
art_node_iterator_lower_bound(art->root, &iterator, key);
}
return iterator;
}

art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key) {
art_iterator_t iterator = {0};
art_iterator_t iterator = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
if (art->root != NULL) {
if (art_node_iterator_lower_bound(art->root, &iterator, key) &&
art_compare_keys(iterator.key, key) == 0) {
Expand Down Expand Up @@ -19469,7 +19469,7 @@ roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) {
// todo: could be greatly optimized but we do not expect this call to ever
// include long lists
roaring_bitmap_t *answer = roaring_bitmap_create();
roaring_bulk_context_t context = {0};
roaring_bulk_context_t context = {0, 0, 0, 0};
va_list ap;
va_start(ap, n_args);
for (size_t i = 0; i < n_args; i++) {
Expand Down Expand Up @@ -19641,20 +19641,6 @@ void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) {
printf("}");
}

typedef struct min_max_sum_s {
uint32_t min;
uint32_t max;
uint64_t sum;
} min_max_sum_t;

static bool min_max_sum_fnc(uint32_t value, void *param) {
min_max_sum_t *mms = (min_max_sum_t *)param;
if (value > mms->max) mms->max = value;
if (value < mms->min) mms->min = value;
mms->sum += value;
return true; // we always process all data points
}

/**
* (For advanced users.)
* Collect statistics about the bitmap
Expand All @@ -19665,15 +19651,8 @@ void roaring_bitmap_statistics(const roaring_bitmap_t *r,

memset(stat, 0, sizeof(*stat));
stat->n_containers = ra->size;
stat->cardinality = roaring_bitmap_get_cardinality(r);
min_max_sum_t mms;
mms.min = UINT32_C(0xFFFFFFFF);
mms.max = UINT32_C(0);
mms.sum = 0;
roaring_iterate(r, &min_max_sum_fnc, &mms);
stat->min_value = mms.min;
stat->max_value = mms.max;
stat->sum_value = mms.sum;
stat->min_value = roaring_bitmap_minimum(r);
stat->max_value = roaring_bitmap_maximum(r);

for (int i = 0; i < ra->size; ++i) {
uint8_t truetype =
Expand All @@ -19682,6 +19661,7 @@ void roaring_bitmap_statistics(const roaring_bitmap_t *r,
container_get_cardinality(ra->containers[i], ra->typecodes[i]);
uint32_t sbytes =
container_size_in_bytes(ra->containers[i], ra->typecodes[i]);
stat->cardinality += card;
switch (truetype) {
case BITSET_CONTAINER_TYPE:
stat->n_bitset_containers++;
Expand Down Expand Up @@ -20831,7 +20811,7 @@ roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) {
if (bitmap == NULL) {
return NULL;
}
roaring_bulk_context_t context = {0};
roaring_bulk_context_t context = {0, 0, 0, 0};
for (uint32_t i = 0; i < card; i++) {
// elems may not be aligned, read with memcpy
uint32_t elem;
Expand Down Expand Up @@ -20874,7 +20854,7 @@ roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf,
if (bitmap == NULL) {
return NULL;
}
roaring_bulk_context_t context = {0};
roaring_bulk_context_t context = {0, 0, 0, 0};
for (uint32_t i = 0; i < card; i++) {
// elems may not be aligned, read with memcpy
uint32_t elem;
Expand Down Expand Up @@ -22876,7 +22856,7 @@ roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args,

roaring64_bitmap_t *roaring64_bitmap_of(size_t n_args, ...) {
roaring64_bitmap_t *r = roaring64_bitmap_create();
roaring64_bulk_context_t context = {0};
roaring64_bulk_context_t context = {0, 0, 0, 0, 0, 0, 0};
va_list ap;
va_start(ap, n_args);
for (size_t i = 0; i < n_args; i++) {
Expand Down Expand Up @@ -22969,7 +22949,7 @@ void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args,
return;
}
const uint64_t *end = vals + n_args;
roaring64_bulk_context_t context = {0};
roaring64_bulk_context_t context = {0, 0, 0, 0, 0, 0, 0};
for (const uint64_t *current_val = vals; current_val != end;
current_val++) {
roaring64_bitmap_add_bulk(r, &context, *current_val);
Expand Down Expand Up @@ -23108,7 +23088,8 @@ bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r,
uint8_t high48[ART_KEY_BYTES];
uint16_t low16 = split_key(val, high48);

if (context->leaf == NULL || context->high_bytes != high48) {
if (context->leaf == NULL ||
art_compare_keys(context->high_bytes, high48) != 0) {
// We're not positioned anywhere yet or the high bits of the key
// differ.
leaf_t *leaf = (leaf_t *)art_find(&r->art, high48);
Expand Down Expand Up @@ -23292,7 +23273,7 @@ void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args,
return;
}
const uint64_t *end = vals + n_args;
roaring64_bulk_context_t context = {0};
roaring64_bulk_context_t context = {0, 0, 0, 0, 0, 0, 0};
for (const uint64_t *current_val = vals; current_val != end;
current_val++) {
roaring64_bitmap_remove_bulk(r, &context, *current_val);
Expand Down Expand Up @@ -23455,6 +23436,50 @@ bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) {
return has_run_container;
}

/**
* (For advanced users.)
* Collect statistics about the bitmap
*/
void roaring64_bitmap_statistics(const roaring64_bitmap_t *r,
roaring64_statistics_t *stat) {
memset(stat, 0, sizeof(*stat));
stat->min_value = roaring64_bitmap_minimum(r);
stat->max_value = roaring64_bitmap_maximum(r);

art_iterator_t it = art_init_iterator(&r->art, true);
while (it.value != NULL) {
leaf_t *leaf = (leaf_t *)it.value;
stat->n_containers++;
uint8_t truetype = get_container_type(leaf->container, leaf->typecode);
uint32_t card =
container_get_cardinality(leaf->container, leaf->typecode);
uint32_t sbytes =
container_size_in_bytes(leaf->container, leaf->typecode);
stat->cardinality += card;
switch (truetype) {
case BITSET_CONTAINER_TYPE:
stat->n_bitset_containers++;
stat->n_values_bitset_containers += card;
stat->n_bytes_bitset_containers += sbytes;
break;
case ARRAY_CONTAINER_TYPE:
stat->n_array_containers++;
stat->n_values_array_containers += card;
stat->n_bytes_array_containers += sbytes;
break;
case RUN_CONTAINER_TYPE:
stat->n_run_containers++;
stat->n_values_run_containers += card;
stat->n_bytes_run_containers += sbytes;
break;
default:
assert(false);
roaring_unreachable;
}
art_iterator_next(&it);
}
}

static bool roaring64_leaf_internal_validate(const art_val_t *val,
const char **reason) {
leaf_t *leaf = (leaf_t *)val;
Expand Down Expand Up @@ -24576,7 +24601,7 @@ bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r,

void roaring64_bitmap_to_uint64_array(const roaring64_bitmap_t *r,
uint64_t *out) {
roaring64_iterator_t it = {0};
roaring64_iterator_t it; // gets initialized in the next line
roaring64_iterator_init_at(r, &it, /*first=*/true);
roaring64_iterator_read(&it, out, UINT64_MAX);
}
Expand Down
53 changes: 47 additions & 6 deletions croaring-sys/CRoaring/roaring.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!!
// Created by amalgamation.sh on 2024-04-02T13:42:32Z
// Created by amalgamation.sh on 2024-05-13T21:29:25Z

/*
* The CRoaring project is under a dual license (Apache/MIT).
Expand Down Expand Up @@ -59,11 +59,11 @@
// /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand
#ifndef ROARING_INCLUDE_ROARING_VERSION
#define ROARING_INCLUDE_ROARING_VERSION
#define ROARING_VERSION "3.0.1"
#define ROARING_VERSION "4.0.0"
enum {
ROARING_VERSION_MAJOR = 3,
ROARING_VERSION_MAJOR = 4,
ROARING_VERSION_MINOR = 0,
ROARING_VERSION_REVISION = 1
ROARING_VERSION_REVISION = 0
};
#endif // ROARING_INCLUDE_ROARING_VERSION
// clang-format on/* end file include/roaring/roaring_version.h */
Expand Down Expand Up @@ -159,14 +159,48 @@ typedef struct roaring_statistics_s {
max_value; /* the maximal value, undefined if cardinality is zero */
uint32_t
min_value; /* the minimal value, undefined if cardinality is zero */
uint64_t sum_value; /* the sum of all values (could be used to compute
average) */
uint64_t sum_value; /* deprecated always zero */

uint64_t cardinality; /* total number of values stored in the bitmap */

// and n_values_arrays, n_values_rle, n_values_bitmap
} roaring_statistics_t;

/**
* (For advanced users.)
* The roaring64_statistics_t can be used to collect detailed statistics about
* the composition of a roaring64 bitmap.
*/
typedef struct roaring64_statistics_s {
uint64_t n_containers; /* number of containers */

uint64_t n_array_containers; /* number of array containers */
uint64_t n_run_containers; /* number of run containers */
uint64_t n_bitset_containers; /* number of bitmap containers */

uint64_t
n_values_array_containers; /* number of values in array containers */
uint64_t n_values_run_containers; /* number of values in run containers */
uint64_t
n_values_bitset_containers; /* number of values in bitmap containers */

uint64_t n_bytes_array_containers; /* number of allocated bytes in array
containers */
uint64_t n_bytes_run_containers; /* number of allocated bytes in run
containers */
uint64_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap
containers */

uint64_t
max_value; /* the maximal value, undefined if cardinality is zero */
uint64_t
min_value; /* the minimal value, undefined if cardinality is zero */

uint64_t cardinality; /* total number of values stored in the bitmap */

// and n_values_arrays, n_values_rle, n_values_bitmap
} roaring64_statistics_t;

/**
* Roaring-internal type used to iterate within a roaring container.
*/
Expand Down Expand Up @@ -2542,6 +2576,13 @@ uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r);
*/
bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r);

/**
* (For advanced users.)
* Collect statistics about the bitmap
*/
void roaring64_bitmap_statistics(const roaring64_bitmap_t *r,
roaring64_statistics_t *stat);

/**
* Perform internal consistency checks.
*
Expand Down
2 changes: 1 addition & 1 deletion croaring-sys/CRoaring/roaring.hh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!!
// Created by amalgamation.sh on 2024-04-02T13:42:32Z
// Created by amalgamation.sh on 2024-05-13T21:29:25Z

/*
* The CRoaring project is under a dual license (Apache/MIT).
Expand Down
Loading

0 comments on commit 0c63c34

Please sign in to comment.