diff --git a/src/common/dav_v2/heap.c b/src/common/dav_v2/heap.c index 61b7ed2e2b1..5ca8abe625d 100644 --- a/src/common/dav_v2/heap.c +++ b/src/common/dav_v2/heap.c @@ -63,6 +63,8 @@ struct mbrt { struct recycler *recyclers[MAX_ALLOCATION_CLASSES]; bool laf[MAX_ALLOCATION_CLASSES]; /* last allocation failed? */ bool laf_updated; + bool is_global_mbrt; + bool is_evictable; }; enum mb_usage_hint { @@ -78,7 +80,24 @@ enum mb_usage_hint { #define MB_U30 (ZONE_MAX_SIZE * 3 / 10) #define MB_USAGE_DELTA (ZONE_MAX_SIZE / 20) -size_t mb_usage_byhint[MB_UMAX_HINT] = {0, MB_U30 + 1, MB_U75 + 1, MB_U90 + 1}; +size_t mb_usage_byhint[MB_UMAX_HINT] = {1, MB_U30 + 1, MB_U75 + 1, MB_U90 + 1}; + +struct mbrt_qbs { + struct mbrt_q mb_u90; + struct mbrt_q mb_u75; + struct mbrt_q mb_u30; + struct mbrt_q mb_u0; + struct mbrt_q mb_ue; +}; + +#define SOEMB_ACTIVE_CNT 3 + +struct soemb_rt { + struct mbrt *svec[SOEMB_ACTIVE_CNT]; + int cur_idx; + int fur_idx; + struct mbrt_qbs qbs; +}; struct heap_rt { struct alloc_class_collection *alloc_classes; @@ -101,16 +120,13 @@ struct heap_rt { void *mb_create_wq; struct zinfo_vec *zinfo_vec; struct mbrt *default_mb; - struct mbrt **evictable_mbs; + struct mbrt **mbs; struct mbrt *active_evictable_mb; - struct mbrt_q mb_u90; - struct mbrt_q mb_u75; - struct mbrt_q mb_u30; - struct mbrt_q mb_u0; + struct mbrt_qbs emb_qbs; + struct soemb_rt smbrt; + unsigned int soemb_cnt; }; -#define MBRT_NON_EVICTABLE ((struct mbrt *)(-1UL)) - static void heap_reclaim_zone_garbage(struct palloc_heap *heap, struct bucket *bucket, uint32_t zone_id); @@ -213,39 +229,230 @@ mbrt_is_laf(struct mbrt *mb, int c_id) return mb->laf[c_id]; } -void -heap_mbrt_setmb_nonevictable(struct palloc_heap *heap, uint32_t zid) +static void +mbrt_qbs_init(struct mbrt_qbs *qb) +{ + TAILQ_INIT(&qb->mb_u90); + TAILQ_INIT(&qb->mb_u75); + TAILQ_INIT(&qb->mb_u30); + TAILQ_INIT(&qb->mb_u0); + TAILQ_INIT(&qb->mb_ue); +} + +static void +mbrt_qbs_fini(struct mbrt_qbs *qb) +{ + /* No op */ +} + +static void +mbrt_qbs_insertmb(struct mbrt_qbs *qb, struct mbrt *mb) +{ + D_ASSERT(mb->qptr == NULL); + + if (mb->space_usage > MB_U90) { + TAILQ_INSERT_TAIL(&qb->mb_u90, mb, mb_link); + mb->qptr = &qb->mb_u90; + } else if (mb->space_usage > MB_U75) { + TAILQ_INSERT_TAIL(&qb->mb_u75, mb, mb_link); + mb->qptr = &qb->mb_u75; + } else if (mb->space_usage > MB_U30) { + TAILQ_INSERT_TAIL(&qb->mb_u30, mb, mb_link); + mb->qptr = &qb->mb_u30; + } else if (mb->space_usage) { + TAILQ_INSERT_TAIL(&qb->mb_u0, mb, mb_link); + mb->qptr = &qb->mb_u0; + } else { + TAILQ_INSERT_TAIL(&qb->mb_ue, mb, mb_link); + mb->qptr = &qb->mb_ue; + } + + mb->prev_usage = mb->space_usage; +} + +static void +mbrt_qbs_insertmb_force(struct mbrt_qbs *qb, struct mbrt *mb, int hint) +{ + D_ASSERT(mb->qptr == NULL); + + switch (hint) { + case MB_U90_HINT: + TAILQ_INSERT_TAIL(&qb->mb_u90, mb, mb_link); + mb->qptr = &qb->mb_u90; + break; + case MB_U75_HINT: + TAILQ_INSERT_TAIL(&qb->mb_u75, mb, mb_link); + mb->qptr = &qb->mb_u75; + break; + case MB_U30_HINT: + TAILQ_INSERT_TAIL(&qb->mb_u30, mb, mb_link); + mb->qptr = &qb->mb_u30; + break; + case MB_U0_HINT: + TAILQ_INSERT_TAIL(&qb->mb_u0, mb, mb_link); + mb->qptr = &qb->mb_u0; + break; + default: + D_ASSERTF(0, "invalid usage hint %d", hint); + break; + } +} + +static int +mbrt_qbs_update_mb(struct mbrt_qbs *qb, struct mbrt *mb) +{ + int hint = MB_UMAX_HINT; + + if (mb->qptr == NULL) + return MB_UMAX_HINT; + + if (mb->space_usage == 0) { + TAILQ_REMOVE(mb->qptr, mb, mb_link); + TAILQ_INSERT_TAIL(&qb->mb_ue, mb, mb_link); + mb->qptr = &qb->mb_ue; + mb->prev_usage = mb->space_usage; + return MB_U0_HINT; + } else if (mb->qptr == &qb->mb_ue) { + TAILQ_REMOVE(mb->qptr, mb, mb_link); + TAILQ_INSERT_TAIL(&qb->mb_u0, mb, mb_link); + mb->qptr = &qb->mb_u0; + } + + if (labs((int64_t)(mb->space_usage - mb->prev_usage)) < MB_USAGE_DELTA) + return MB_UMAX_HINT; + + if (mb->space_usage > MB_U90) { + if (mb->qptr != &qb->mb_u90) { + TAILQ_REMOVE(mb->qptr, mb, mb_link); + TAILQ_INSERT_TAIL(&qb->mb_u90, mb, mb_link); + mb->qptr = &qb->mb_u90; + hint = MB_U90_HINT; + } + } else if (mb->space_usage > MB_U75) { + if (mb->qptr != &qb->mb_u75) { + TAILQ_REMOVE(mb->qptr, mb, mb_link); + TAILQ_INSERT_TAIL(&qb->mb_u75, mb, mb_link); + mb->qptr = &qb->mb_u75; + hint = MB_U75_HINT; + } + } else if (mb->space_usage > MB_U30) { + if (mb->qptr != &qb->mb_u30) { + TAILQ_REMOVE(mb->qptr, mb, mb_link); + TAILQ_INSERT_TAIL(&qb->mb_u30, mb, mb_link); + mb->qptr = &qb->mb_u30; + hint = MB_U30_HINT; + } + } else if (mb->qptr != &qb->mb_u0) { + TAILQ_REMOVE(mb->qptr, mb, mb_link); + TAILQ_INSERT_TAIL(&qb->mb_u0, mb, mb_link); + mb->qptr = &qb->mb_u0; + hint = MB_U0_HINT; + } + mb->prev_usage = mb->space_usage; + return hint; +} + +static struct mbrt * +mbrt_qbs_getmb(struct mbrt_qbs *qb, int force) +{ + struct mbrt *mb = NULL; + + if ((mb = TAILQ_FIRST(&qb->mb_u30)) != NULL) + TAILQ_REMOVE(&qb->mb_u30, mb, mb_link); + else if ((mb = TAILQ_FIRST(&qb->mb_u0)) != NULL) + TAILQ_REMOVE(&qb->mb_u0, mb, mb_link); + else if ((mb = TAILQ_FIRST(&qb->mb_ue)) != NULL) + TAILQ_REMOVE(&qb->mb_ue, mb, mb_link); + + if (mb) { + mb->qptr = NULL; + return mb; + } + + if (!force) + return NULL; + + if ((mb = TAILQ_FIRST(&qb->mb_u75)) != NULL) + TAILQ_REMOVE(&qb->mb_u75, mb, mb_link); + else if ((mb = TAILQ_FIRST(&qb->mb_u90)) != NULL) + TAILQ_REMOVE(&qb->mb_u90, mb, mb_link); + + if (mb) + mb->qptr = NULL; + return mb; +} + +static struct mbrt * +mbrt_qbs_getmb_ue(struct mbrt_qbs *qb) +{ + struct mbrt *mb = NULL; + if ((mb = TAILQ_FIRST(&qb->mb_ue)) != NULL) { + TAILQ_REMOVE(&qb->mb_ue, mb, mb_link); + mb->qptr = NULL; + } + return mb; +} + +static void +soemb_init(struct soemb_rt *smbrt) +{ + memset(smbrt->svec, 0, sizeof(struct mbrt *) * SOEMB_ACTIVE_CNT); + mbrt_qbs_init(&smbrt->qbs); + smbrt->cur_idx = 0; + smbrt->fur_idx = 0; +} + +static void +soemb_fini(struct soemb_rt *smbrt) +{ + mbrt_qbs_fini(&smbrt->qbs); +} + +static void +heap_mbrt_setmb_nonevictable(struct palloc_heap *heap, struct mbrt *mb, uint32_t zid) { D_ASSERT(zid < heap->rt->nzones); - heap->rt->evictable_mbs[zid] = MBRT_NON_EVICTABLE; + D_ASSERT(heap->rt->default_mb != NULL); + + heap->rt->mbs[zid] = mb ? mb : heap->rt->default_mb; + if (mb) + mb->is_evictable = false; } -void +static void heap_mbrt_setmb_evictable(struct palloc_heap *heap, struct mbrt *mb) { D_ASSERT((mb->mb_id != 0) && (mb->mb_id < heap->rt->nzones)); - heap->rt->evictable_mbs[mb->mb_id] = mb; + heap->rt->mbs[mb->mb_id] = mb; + mb->is_evictable = true; } -void +static void heap_mbrt_setmb_unused(struct palloc_heap *heap, uint32_t zid) { - D_ASSERT((zid < heap->rt->nzones) && (heap->rt->evictable_mbs[zid] == MBRT_NON_EVICTABLE)); - heap->rt->evictable_mbs[zid] = NULL; + D_ASSERT((zid < heap->rt->nzones) && (heap->rt->mbs[zid]->is_evictable == false)); + heap->rt->mbs[zid] = NULL; } bool heap_mbrt_ismb_evictable(struct palloc_heap *heap, uint32_t zid) { D_ASSERT(zid < heap->rt->nzones); - return (heap->rt->evictable_mbs[zid] != MBRT_NON_EVICTABLE); + return (!heap->rt->mbs[zid] || heap->rt->mbs[zid]->is_evictable); } bool heap_mbrt_ismb_initialized(struct palloc_heap *heap, uint32_t zid) { D_ASSERT(zid < heap->rt->nzones); - return (heap->rt->evictable_mbs[zid] != 0); + return (heap->rt->mbs[zid] != 0); +} + +bool +heap_mbrt_ismb_localrt(struct palloc_heap *heap, uint32_t zid) +{ + D_ASSERT(zid < heap->rt->nzones); + return (heap->rt->mbs[zid] != heap->rt->default_mb); } /* @@ -279,7 +486,7 @@ mbrt_bucket_release(struct bucket *b) /* * heap_mbrt_setup_mb -- (internal) create and initializes a Memory Bucket runtime. */ -struct mbrt * +static struct mbrt * heap_mbrt_setup_mb(struct palloc_heap *heap, uint32_t zid) { struct heap_rt *rt = heap->rt; @@ -383,6 +590,7 @@ heap_mbrt_init(struct palloc_heap *heap) rt->mb_create_wq = NULL; rt->mb_pressure = 0; rt->empty_nemb_cnt = 0; + rt->soemb_cnt = 0; rt->empty_nemb_gcth = HEAP_NEMB_EMPTY_THRESHOLD; d_getenv_uint("DAOS_NEMB_EMPTY_RECYCLE_THRESHOLD", &rt->empty_nemb_gcth); @@ -395,27 +603,24 @@ heap_mbrt_init(struct palloc_heap *heap) goto error; } - D_ALLOC_ARRAY(rt->evictable_mbs, rt->nzones); - if (rt->evictable_mbs == NULL) { + D_ALLOC_ARRAY(rt->mbs, rt->nzones); + if (rt->mbs == NULL) { ret = ENOMEM; goto error; } - TAILQ_INIT(&rt->mb_u90); - TAILQ_INIT(&rt->mb_u75); - TAILQ_INIT(&rt->mb_u30); - TAILQ_INIT(&rt->mb_u0); + mbrt_qbs_init(&rt->emb_qbs); rt->default_mb = heap_mbrt_setup_mb(heap, 0); if (rt->default_mb == NULL) { ret = ENOMEM; goto error_default_mb_setup; } - heap_mbrt_setmb_nonevictable(heap, 0); + heap_mbrt_setmb_nonevictable(heap, NULL, 0); return 0; error_default_mb_setup: - D_FREE(rt->evictable_mbs); + D_FREE(rt->mbs); error: return ret; } @@ -428,15 +633,16 @@ heap_mbrt_fini(struct palloc_heap *heap) struct umem_store *store = heap->layout_info.store; for (i = 0; i < rt->zones_exhausted; i++) { - if (heap_mbrt_ismb_evictable(heap, i)) - heap_mbrt_cleanup_mb(rt->evictable_mbs[i]); + if (heap_mbrt_ismb_localrt(heap, i)) + heap_mbrt_cleanup_mb(rt->mbs[i]); } heap_mbrt_cleanup_mb(rt->default_mb); - D_FREE(rt->evictable_mbs); + mbrt_qbs_fini(&rt->emb_qbs); + D_FREE(rt->mbs); rt->default_mb = NULL; rt->active_evictable_mb = NULL; - rt->evictable_mbs = NULL; + rt->mbs = NULL; D_ASSERT(rt->mb_create_waiters == 0); if (rt->mb_create_wq != NULL) store->stor_ops->so_waitqueue_destroy(rt->mb_create_wq); @@ -450,11 +656,8 @@ heap_mbrt_fini(struct palloc_heap *heap) struct mbrt * heap_mbrt_get_mb(struct palloc_heap *heap, uint32_t zone_id) { - if (!heap_mbrt_ismb_evictable(heap, zone_id)) - return heap->rt->default_mb; - - D_ASSERTF(heap->rt->evictable_mbs[zone_id] != NULL, "zone_id %d is marked unused", zone_id); - return heap->rt->evictable_mbs[zone_id]; + D_ASSERTF(heap->rt->mbs[zone_id] != NULL, "zone_id %d is marked unused", zone_id); + return heap->rt->mbs[zone_id]; } void @@ -463,10 +666,8 @@ heap_mbrt_log_alloc_failure(struct palloc_heap *heap, uint32_t zone_id) struct mbrt *mb = heap->rt->active_evictable_mb; if (mb && (mb->mb_id == zone_id)) { - TAILQ_INSERT_TAIL(&heap->rt->mb_u90, mb, mb_link); - mb->qptr = &heap->rt->mb_u90; - mb->prev_usage = mb->space_usage; heap->rt->active_evictable_mb = NULL; + mbrt_qbs_insertmb_force(&heap->rt->emb_qbs, mb, MB_U90_HINT); heap_zinfo_set_usage(heap, zone_id, MB_U90_HINT); } } @@ -474,37 +675,28 @@ heap_mbrt_log_alloc_failure(struct palloc_heap *heap, uint32_t zone_id) void heap_mbrt_setmb_usage(struct palloc_heap *heap, uint32_t zone_id, uint64_t usage) { - struct mbrt *mb = heap->rt->evictable_mbs[zone_id]; + struct mbrt *mb = heap->rt->mbs[zone_id]; D_ASSERT(zone_id < heap->rt->nzones); if (zone_id == 0) { heap->rt->default_mb->space_usage = usage; return; } - if (mb == (struct mbrt *)(-1UL)) + + if (!heap_mbrt_ismb_evictable(heap, zone_id)) { + mbrt_qbs_insertmb(&heap->rt->smbrt.qbs, mb); return; + } mb->space_usage = usage; - if ((heap->rt->active_evictable_mb == mb) || (mb->qptr)) + if (heap->rt->active_evictable_mb == mb) return; - if (mb->space_usage > MB_U90) { - TAILQ_INSERT_TAIL(&heap->rt->mb_u90, mb, mb_link); - mb->qptr = &heap->rt->mb_u90; - } else if (mb->space_usage > MB_U75) { - TAILQ_INSERT_TAIL(&heap->rt->mb_u75, mb, mb_link); - mb->qptr = &heap->rt->mb_u75; - } else if (mb->space_usage > MB_U30) { - TAILQ_INSERT_TAIL(&heap->rt->mb_u30, mb, mb_link); - mb->qptr = &heap->rt->mb_u30; - heap->rt->mb_pressure = 0; - } else { - TAILQ_INSERT_TAIL(&heap->rt->mb_u0, mb, mb_link); - mb->qptr = &heap->rt->mb_u0; - heap->rt->mb_pressure = 0; - } - mb->prev_usage = mb->space_usage; + if (mb->qptr) + mbrt_qbs_update_mb(&heap->rt->emb_qbs, mb); + else + mbrt_qbs_insertmb(&heap->rt->emb_qbs, mb); } int @@ -521,8 +713,8 @@ heap_mbrt_getmb_usage(struct palloc_heap *heap, uint32_t zone_id, uint64_t *allo errno = EINVAL; return -1; } - mb = heap->rt->evictable_mbs[zone_id]; - if (!mb || (mb == (struct mbrt *)(-1UL))) { + mb = heap->rt->mbs[zone_id]; + if (!mb || !heap_mbrt_ismb_evictable(heap, zone_id)) { errno = EINVAL; return -1; } @@ -535,51 +727,31 @@ heap_mbrt_getmb_usage(struct palloc_heap *heap, uint32_t zone_id, uint64_t *allo void heap_mbrt_incrmb_usage(struct palloc_heap *heap, uint32_t zone_id, int size) { - struct mbrt *mb = heap->rt->evictable_mbs[zone_id]; + struct mbrt *mb = heap->rt->mbs[zone_id]; + int hint; - if (mb == (struct mbrt *)(-1UL)) { + if (!heap_mbrt_ismb_evictable(heap, zone_id)) heap->rt->default_mb->space_usage += size; + + if (!heap_mbrt_ismb_localrt(heap, zone_id)) return; - } mb->space_usage += size; - if ((heap->rt->active_evictable_mb == mb) || - (labs((int64_t)(mb->space_usage - mb->prev_usage)) < MB_USAGE_DELTA)) + + if (heap->rt->active_evictable_mb == mb) return; - if (mb->space_usage > MB_U90) { - if (mb->qptr != &heap->rt->mb_u90) { - TAILQ_REMOVE(mb->qptr, mb, mb_link); - TAILQ_INSERT_TAIL(&heap->rt->mb_u90, mb, mb_link); - mb->qptr = &heap->rt->mb_u90; - heap_zinfo_set_usage(heap, zone_id, MB_U90_HINT); - } - } else if (mb->space_usage > MB_U75) { - if (mb->qptr != &heap->rt->mb_u75) { - TAILQ_REMOVE(mb->qptr, mb, mb_link); - TAILQ_INSERT_TAIL(&heap->rt->mb_u75, mb, mb_link); - mb->qptr = &heap->rt->mb_u75; - heap_zinfo_set_usage(heap, zone_id, MB_U75_HINT); - } - } else if (mb->space_usage > MB_U30) { - if (mb->qptr != &heap->rt->mb_u30) { - TAILQ_REMOVE(mb->qptr, mb, mb_link); - TAILQ_INSERT_TAIL(&heap->rt->mb_u30, mb, mb_link); - mb->qptr = &heap->rt->mb_u30; - heap_zinfo_set_usage(heap, zone_id, MB_U30_HINT); + if (heap_mbrt_ismb_evictable(heap, zone_id)) { + hint = mbrt_qbs_update_mb(&heap->rt->emb_qbs, mb); + if (hint != MB_UMAX_HINT) + heap_zinfo_set_usage(heap, zone_id, hint); + if (hint <= MB_U30_HINT) heap->rt->mb_pressure = 0; - } - } else if (mb->qptr != &heap->rt->mb_u0) { - TAILQ_REMOVE(mb->qptr, mb, mb_link); - TAILQ_INSERT_TAIL(&heap->rt->mb_u0, mb, mb_link); - mb->qptr = &heap->rt->mb_u0; - heap_zinfo_set_usage(heap, zone_id, MB_U0_HINT); - heap->rt->mb_pressure = 0; - } - mb->prev_usage = mb->space_usage; + } else + hint = mbrt_qbs_update_mb(&heap->rt->smbrt.qbs, mb); } -int +static int heap_mbrt_mb_reclaim_garbage(struct palloc_heap *heap, uint32_t zid) { struct mbrt *mb; @@ -600,6 +772,85 @@ heap_mbrt_mb_reclaim_garbage(struct palloc_heap *heap, uint32_t zid) return 0; } +void +heap_soemb_active_iter_init(struct palloc_heap *heap) +{ + heap->rt->smbrt.cur_idx = 0; +} + +uint32_t +heap_soemb_active_get(struct palloc_heap *heap) +{ + struct soemb_rt *smbrt = &heap->rt->smbrt; + struct mbrt *mb = NULL; + + if (heap->rt->nzones_e == 0) + return 0; + + if (smbrt->cur_idx > smbrt->fur_idx) + smbrt->fur_idx = smbrt->cur_idx; + + if (smbrt->cur_idx < SOEMB_ACTIVE_CNT) { + mb = smbrt->svec[smbrt->cur_idx]; + smbrt->cur_idx++; + } + + if (mb) + return mb->mb_id; + + return 0; +} + +static int +heap_create_soe_mb(struct palloc_heap *heap, uint32_t *mb_id); + +void +heap_soemb_reserve(struct palloc_heap *heap) +{ + int i, ret; + uint32_t mb_id; + struct mbrt *mb; + struct soemb_rt *smbrt = &heap->rt->smbrt; + + if (heap->rt->nzones_e == 0) + return; + + if (smbrt->fur_idx > 1) { + mb = smbrt->svec[0]; + if (mb) + mbrt_qbs_insertmb(&smbrt->qbs, mb); + + for (i = 1; i < SOEMB_ACTIVE_CNT; i++) { + smbrt->svec[i - 1] = smbrt->svec[i]; + } + + smbrt->svec[SOEMB_ACTIVE_CNT - 1] = NULL; + smbrt->fur_idx = 0; + } + + for (i = 0; i < SOEMB_ACTIVE_CNT; i++) { + if (smbrt->svec[i] != NULL) + continue; + mb = mbrt_qbs_getmb(&smbrt->qbs, 0); + if (mb) { + smbrt->svec[i] = mb; + break; + } + ret = heap_create_soe_mb(heap, &mb_id); + if (ret == 0) { + smbrt->svec[i] = heap_mbrt_get_mb(heap, mb_id); + break; + } + mb = mbrt_qbs_getmb(&smbrt->qbs, 1); + if (mb) { + smbrt->svec[i] = mb; + break; + } + break; + } + smbrt->cur_idx = 0; +} + void heap_set_root_ptrs(struct palloc_heap *heap, uint64_t **offp, uint64_t **sizep) { @@ -720,8 +971,7 @@ zone_calc_size_idx(uint32_t zone_id, unsigned max_zone, size_t heap_size) * heap_zone_init -- (internal) writes zone's first chunk and header */ static void -heap_zone_init(struct palloc_heap *heap, uint32_t zone_id, uint32_t first_chunk_id, - bool is_evictable) +heap_zone_init(struct palloc_heap *heap, uint32_t zone_id, uint32_t first_chunk_id, int flags) { struct zone *z = ZID_TO_ZONE(&heap->layout_info, zone_id); uint32_t size_idx = zone_calc_size_idx(zone_id, heap->rt->nzones, heap->size); @@ -734,8 +984,11 @@ heap_zone_init(struct palloc_heap *heap, uint32_t zone_id, uint32_t first_chunk_ }; z->header = nhdr; /* write the entire header at once */ - if (is_evictable) - z->header.flags |= ZONE_EVICTABLE_MB; + + if (flags) { + D_ASSERT((flags == ZONE_EVICTABLE_MB) || (flags == ZONE_SOE_MB)); + z->header.flags = flags; + } mo_wal_persist(&heap->p_ops, &z->header, sizeof(z->header)); memblock_huge_init(heap, first_chunk_id, zone_id, size_idx - first_chunk_id); @@ -996,7 +1249,7 @@ heap_reclaim_next_ne(struct palloc_heap *heap, uint32_t *zone_id) heap_zinfo_get(heap, i, &allotted, &evictable); if (!allotted) continue; - if (!evictable) { + if (!evictable && !heap_mbrt_ismb_localrt(heap, i)) { h->zones_nextne_gc = i + 1; *zone_id = i; return 0; @@ -1032,29 +1285,22 @@ heap_get_next_unused_zone(struct palloc_heap *heap, uint32_t *zone_id) return 0; } -static int -heap_mark_zone_used_transient(struct palloc_heap *heap, uint32_t zone_id, bool is_evictable) +static void +heap_mark_zone_used_transient(struct palloc_heap *heap, struct mbrt *mb, uint32_t zone_id, + bool is_evictable) { - struct mbrt *mb; - if (is_evictable) { - mb = heap_mbrt_setup_mb(heap, zone_id); - if (mb == NULL) { - ERR("Failed to setup mbrt for zone %u\n", zone_id); - return -1; - } + D_ASSERT(mb != NULL); heap_mbrt_setmb_evictable(heap, mb); - } else - heap_mbrt_setmb_nonevictable(heap, zone_id); + heap->rt->zones_exhausted_e++; + } else { + heap_mbrt_setmb_nonevictable(heap, mb, zone_id); + heap->rt->zones_exhausted_ne++; + } heap->rt->zones_unused_first = zone_id + 1; if (heap->rt->zones_exhausted < heap->rt->zones_unused_first) heap->rt->zones_exhausted = heap->rt->zones_unused_first; - if (is_evictable) - heap->rt->zones_exhausted_e++; - else - heap->rt->zones_exhausted_ne++; - return 0; } static void @@ -1069,13 +1315,9 @@ heap_mark_zone_used_persist(struct palloc_heap *heap, uint32_t zone_id) static void heap_mark_zone_unused_transient(struct palloc_heap *heap, uint32_t zone_id) { - struct mbrt *mb = heap_mbrt_get_mb(heap, zone_id); - - if (heap_mbrt_ismb_evictable(heap, zone_id)) { - D_ASSERT(mb != NULL); - heap_mbrt_cleanup_mb(mb); + if (heap_mbrt_ismb_evictable(heap, zone_id)) heap->rt->zones_exhausted_e--; - } else + else heap->rt->zones_exhausted_ne--; heap_mbrt_setmb_unused(heap, zone_id); @@ -1086,15 +1328,20 @@ heap_mark_zone_unused_transient(struct palloc_heap *heap, uint32_t zone_id) heap->rt->zones_exhausted = zone_id; } -static void +static int heap_mark_zone_unused(struct palloc_heap *heap, uint32_t zone_id) { struct umem_cache_range rg = {0}; bool is_evictable = heap_mbrt_ismb_evictable(heap, zone_id); int rc; + struct mbrt *mb = heap_mbrt_get_mb(heap, zone_id); D_ASSERT(is_evictable == false); + if (heap_mbrt_ismb_localrt(heap, zone_id)) { + heap->rt->soemb_cnt--; + VALGRIND_DO_DESTROY_MEMPOOL_COND(ZID_TO_ZONE(&heap->layout_info, zone_id)); + } heap_mark_zone_unused_transient(heap, zone_id); rg.cr_off = GET_ZONE_OFFSET(zone_id); rg.cr_size = @@ -1103,10 +1350,13 @@ heap_mark_zone_unused(struct palloc_heap *heap, uint32_t zone_id) if (rc != 0) { rc = daos_der2errno(rc); ERR("Failed to remap zone %d in umem cache as unused rc=%d\n", zone_id, rc); - heap_mark_zone_used_transient(heap, zone_id, is_evictable); + heap_mark_zone_used_transient(heap, mb, zone_id, is_evictable); + VALGRIND_DO_CREATE_MEMPOOL(ZID_TO_ZONE(&heap->layout_info, zone_id), 0, 0); + return -1; } heap_zinfo_set_usage(heap, zone_id, MB_U0_HINT); heap_zinfo_set(heap, zone_id, false, false); + return 0; } int @@ -1115,14 +1365,16 @@ heap_populate_nemb_unused(struct palloc_heap *heap) struct bucket *defb; struct memory_block m = MEMORY_BLOCK_NONE; struct mbrt *mb; + int rc; m.size_idx = MAX_CHUNK; mb = heap_mbrt_get_mb(heap, 0); defb = mbrt_bucket_acquire(mb, DEFAULT_ALLOC_CLASS_ID); while (bucket_alloc_block(defb, &m) == 0) { - heap->rt->empty_nemb_cnt--; - heap_mark_zone_unused(heap, m.zone_id); + rc = heap_mark_zone_unused(heap, m.zone_id); + if (!rc) + heap->rt->empty_nemb_cnt--; m = MEMORY_BLOCK_NONE; m.size_idx = MAX_CHUNK; @@ -1163,9 +1415,7 @@ heap_populate_bucket(struct palloc_heap *heap, struct bucket *bucket) if (rc) return ENOMEM; - rc = heap_mark_zone_used_transient(heap, zone_id, false); - if (rc) - return ENOMEM; + heap_mark_zone_used_transient(heap, NULL, zone_id, false); /* Create a umem cache map for the new zone */ rg.cr_off = GET_ZONE_OFFSET(zone_id); @@ -1196,13 +1446,12 @@ heap_populate_bucket(struct palloc_heap *heap, struct bucket *bucket) VALGRIND_ADD_TO_GLOBAL_TX_IGNORE(z, sizeof(z->header) + sizeof(z->chunk_headers)); - heap_zone_init(heap, zone_id, 0, false); + heap_zone_init(heap, zone_id, 0, 0); heap_mark_zone_used_persist(heap, zone_id); reclaim_garbage: heap_reclaim_zone_garbage(heap, bucket, zone_id); heap_reclaim_setlast_ne(heap, zone_id); - /* * It doesn't matter that this function might not have found any * free blocks because there is still potential that subsequent calls @@ -1602,7 +1851,7 @@ heap_cleanup(struct palloc_heap *heap) if (On_memcheck) { for (i = 0; i < heap->rt->zones_exhausted; i++) { if (!heap_mbrt_ismb_initialized(heap, i) || - !heap_mbrt_ismb_evictable(heap, i)) + !heap_mbrt_ismb_localrt(heap, i)) continue; if (umem_cache_offisloaded(heap->layout_info.store, GET_ZONE_OFFSET(i))) VALGRIND_DO_DESTROY_MEMPOOL(ZID_TO_ZONE(&heap->layout_info, i)); @@ -1610,6 +1859,7 @@ heap_cleanup(struct palloc_heap *heap) } #endif heap_mbrt_fini(heap); + soemb_fini(&heap->rt->smbrt); D_FREE(rt); heap->rt = NULL; @@ -1686,7 +1936,7 @@ heap_ensure_zone0_initialized(struct palloc_heap *heap) struct bucket *b; int rc = 0; - heap_mbrt_setmb_nonevictable(heap, 0); + heap_mbrt_setmb_nonevictable(heap, NULL, 0); if (heap->layout_info.zone0->header.magic != ZONE_HEADER_MAGIC) { /* If not magic the content should be zero, indicating new file */ D_ASSERT(heap->layout_info.zone0->header.magic == 0); @@ -1772,6 +2022,9 @@ heap_boot(struct palloc_heap *heap, void *mmap_base, uint64_t heap_size, uint64_ h->nlocks = On_valgrind ? MAX_RUN_LOCKS_VG : MAX_RUN_LOCKS; for (unsigned i = 0; i < h->nlocks; ++i) util_mutex_init(&h->run_locks[i]); + + soemb_init(&h->smbrt); + heap->rt = h; heap->p_ops = *p_ops; @@ -1855,6 +2108,7 @@ heap_create_evictable_mb(struct palloc_heap *heap, uint32_t *mb_id) struct zone *z; struct umem_pin_handle *pin_handle = NULL; struct umem_store *store = heap->layout_info.store; + struct mbrt *mb; D_ASSERT(heap->rt->active_evictable_mb == NULL); @@ -1879,13 +2133,16 @@ heap_create_evictable_mb(struct palloc_heap *heap, uint32_t *mb_id) goto out; } - rc = heap_mark_zone_used_transient(heap, zone_id, true); - if (rc) { + mb = heap_mbrt_setup_mb(heap, zone_id); + if (mb == NULL) { + ERR("Failed to setup mbrt for zone %u\n", zone_id); rc = 1; errno = ENOMEM; goto out; } + heap_mark_zone_used_transient(heap, mb, zone_id, true); + /* Create a umem cache map for the new zone */ rg.cr_off = GET_ZONE_OFFSET(zone_id); rg.cr_size = @@ -1921,7 +2178,7 @@ heap_create_evictable_mb(struct palloc_heap *heap, uint32_t *mb_id) if (rc) goto error; - heap_zone_init(heap, zone_id, 0, true); + heap_zone_init(heap, zone_id, 0, ZONE_EVICTABLE_MB); rc = heap_mbrt_mb_reclaim_garbage(heap, zone_id); if (rc) { ERR("Failed to initialize evictable zone %u", zone_id); @@ -1940,6 +2197,7 @@ heap_create_evictable_mb(struct palloc_heap *heap, uint32_t *mb_id) if (pin_handle) umem_cache_unpin(heap->layout_info.store, pin_handle); heap_mark_zone_unused_transient(heap, zone_id); + heap_mbrt_cleanup_mb(mb); rc = -1; out: @@ -1952,6 +2210,84 @@ heap_create_evictable_mb(struct palloc_heap *heap, uint32_t *mb_id) return rc; } +static int +heap_create_soe_mb(struct palloc_heap *heap, uint32_t *mb_id) +{ + uint32_t zone_id; + struct umem_cache_range rg = {0}; + int rc; + struct zone *z; + struct mbrt *mb; + + if (heap->rt->zones_exhausted_ne >= heap->rt->nzones_ne) + return -1; + + rc = heap_get_next_unused_zone(heap, &zone_id); + if (rc) { + D_ERROR("Failed to obtain free zone for evictable mb"); + rc = 1; + errno = ENOMEM; + goto out; + } + + mb = heap_mbrt_setup_mb(heap, zone_id); + if (mb == NULL) { + ERR("Failed to setup mbrt for zone %u\n", zone_id); + rc = 1; + errno = ENOMEM; + goto out; + } + + heap_mark_zone_used_transient(heap, mb, zone_id, false); + + /* Create a umem cache map for the new zone */ + rg.cr_off = GET_ZONE_OFFSET(zone_id); + rg.cr_size = + ((heap->size - rg.cr_off) > ZONE_MAX_SIZE) ? ZONE_MAX_SIZE : heap->size - rg.cr_off; + + rc = umem_cache_map(heap->layout_info.store, &rg, 1); + if (rc != 0) { + ERR("Failed to map zone %u to umem cache\n", zone_id); + errno = daos_der2errno(rc); + goto error; + } + + D_DEBUG(DB_TRACE, "Creating evictable zone %d\n", zone_id); + + z = ZID_TO_ZONE(&heap->layout_info, zone_id); + VALGRIND_DO_CREATE_MEMPOOL(z, 0, 0); + VALGRIND_DO_MAKE_MEM_UNDEFINED(z, rg.cr_size); + if (rg.cr_size != ZONE_MAX_SIZE) + VALGRIND_DO_MAKE_MEM_NOACCESS(z + rg.cr_size, (ZONE_MAX_SIZE - rg.cr_size)); + + memset(z, 0, rg.cr_size); + + /* ignore zone and chunk headers */ + VALGRIND_ADD_TO_GLOBAL_TX_IGNORE(z, sizeof(z->header) + sizeof(z->chunk_headers)); + + heap_zone_init(heap, zone_id, 0, ZONE_SOE_MB); + rc = heap_mbrt_mb_reclaim_garbage(heap, zone_id); + if (rc) { + ERR("Failed to initialize evictable zone %u", zone_id); + goto error; + } + heap_mark_zone_used_persist(heap, zone_id); + + *mb_id = zone_id; + rc = 0; + heap_incr_empty_nemb_cnt(heap); + heap->rt->soemb_cnt++; + goto out; + +error: + heap_mark_zone_unused_transient(heap, zone_id); + heap_mbrt_cleanup_mb(mb); + rc = -1; + +out: + return rc; +} + int heap_get_evictable_mb(struct palloc_heap *heap, uint32_t *mb_id) { @@ -1971,34 +2307,33 @@ heap_get_evictable_mb(struct palloc_heap *heap, uint32_t *mb_id) } heap->rt->mb_pressure = 0; - if ((mb = TAILQ_FIRST(&heap->rt->mb_u30)) != NULL) - TAILQ_REMOVE(&heap->rt->mb_u30, mb, mb_link); - else if ((mb = TAILQ_FIRST(&heap->rt->mb_u0)) != NULL) - TAILQ_REMOVE(&heap->rt->mb_u0, mb, mb_link); - else if ((ret = heap_create_evictable_mb(heap, mb_id)) >= 0) { + mb = mbrt_qbs_getmb(&heap->rt->emb_qbs, 0); + if (mb) + goto out; + + if ((ret = heap_create_evictable_mb(heap, mb_id)) >= 0) { if (ret) goto retry; mb = heap_mbrt_get_mb(heap, *mb_id); D_ASSERT(mb != NULL); if (heap->rt->active_evictable_mb) { - TAILQ_INSERT_HEAD(&heap->rt->mb_u0, mb, mb_link); - mb->qptr = &heap->rt->mb_u0; + mbrt_qbs_insertmb(&heap->rt->emb_qbs, mb); *mb_id = heap->rt->active_evictable_mb->mb_id; return 0; } - } else if ((mb = TAILQ_FIRST(&heap->rt->mb_u75)) != NULL) { - TAILQ_REMOVE(&heap->rt->mb_u75, mb, mb_link); - heap->rt->mb_pressure = 1; - } else if ((mb = TAILQ_FIRST(&heap->rt->mb_u90)) != NULL) { - TAILQ_REMOVE(&heap->rt->mb_u90, mb, mb_link); - heap->rt->mb_pressure = 1; - } else { + goto out; + } + mb = mbrt_qbs_getmb(&heap->rt->emb_qbs, 1); + + heap->rt->mb_pressure = 1; + + if (mb == NULL) { D_ERROR("Failed to get an evictable MB"); *mb_id = 0; return 0; } +out: heap->rt->active_evictable_mb = mb; - mb->qptr = NULL; *mb_id = mb->mb_id; return 0; } @@ -2008,7 +2343,7 @@ heap_off2mbid(struct palloc_heap *heap, uint64_t offset) { struct memory_block m = memblock_from_offset_opt(heap, offset, 0); - if (heap_mbrt_ismb_evictable(heap, m.zone_id)) + if (heap_mbrt_ismb_localrt(heap, m.zone_id)) return m.zone_id; else return 0; @@ -2044,7 +2379,7 @@ heap_update_mbrt_zinfo(struct palloc_heap *heap, bool init) continue; } if (!evictable) { - heap_mbrt_setmb_nonevictable(heap, i); + heap_mbrt_setmb_nonevictable(heap, NULL, i); nemb_cnt++; } else { mb = heap_mbrt_setup_mb(heap, i); @@ -2078,9 +2413,10 @@ heap_update_mbrt_zinfo(struct palloc_heap *heap, bool init) int heap_load_nonevictable_zones(struct palloc_heap *heap) { - int i, rc; - bool allotted, evictable; + int i, rc; + bool allotted, evictable; struct zone *zone; + struct mbrt *mb; for (i = 1; i < heap->rt->zones_exhausted; i++) { heap_zinfo_get(heap, i, &allotted, &evictable); @@ -2091,10 +2427,20 @@ heap_load_nonevictable_zones(struct palloc_heap *heap) if (rc) return rc; zone = ZID_TO_ZONE(&heap->layout_info, i); + D_ASSERT((zone->header.flags & ZONE_EVICTABLE_MB) == 0); + if (zone->header.flags & ZONE_SOE_MB) { + mb = heap_mbrt_setup_mb(heap, i); + if (mb == NULL) { + D_ERROR("failed to load soe mb"); + return ENOMEM; + } + heap_mbrt_setmb_nonevictable(heap, mb, i); + mbrt_qbs_insertmb(&heap->rt->smbrt.qbs, mb); + heap->rt->soemb_cnt++; + } if (!zone->header.sp_usage) heap_incr_empty_nemb_cnt(heap); - else - heap_mbrt_incrmb_usage(heap, 0, zone->header.sp_usage); + heap_mbrt_incrmb_usage(heap, i, zone->header.sp_usage); } } return 0; @@ -2291,6 +2637,57 @@ heap_decr_empty_nemb_cnt(struct palloc_heap *heap) return heap->rt->empty_nemb_cnt ? --heap->rt->empty_nemb_cnt : 0; } +static void +heap_recycle_soembs(struct palloc_heap *heap) +{ + struct mbrt *mb; + struct bucket *defb, *b; + struct memory_block m = MEMORY_BLOCK_NONE; + int i, rc; + + for (i = 0; i < SOEMB_ACTIVE_CNT; i++) { + mb = heap->rt->smbrt.svec[i]; + if (mb && (mb->space_usage == 0)) { + mbrt_qbs_insertmb(&heap->rt->smbrt.qbs, mb); + heap->rt->smbrt.svec[i] = NULL; + } + } + + while ((mb = mbrt_qbs_getmb_ue(&heap->rt->smbrt.qbs)) != NULL) { + defb = mbrt_bucket_acquire(mb, DEFAULT_ALLOC_CLASS_ID); + if (!mb->garbage_reclaimed) { + heap_reclaim_zone_garbage(heap, defb, mb->mb_id); + mb->garbage_reclaimed = 1; + } + mbrt_bucket_release(defb); + for (i = 0; i < MAX_ALLOCATION_CLASSES; i++) { + if (mb->buckets[i] == NULL) + continue; + b = bucket_acquire(mb->buckets[i]); + heap_detach_and_try_discard_run(heap, b); + mbrt_bucket_release(b); + } + defb = mbrt_bucket_acquire(mb, DEFAULT_ALLOC_CLASS_ID); + heap_reclaim_garbage(heap, defb); + m = MEMORY_BLOCK_NONE; + m.size_idx = MAX_CHUNK; + if (bucket_alloc_block(defb, &m) == 0) { + rc = heap_mark_zone_unused(heap, m.zone_id); + if (rc) + mbrt_qbs_insertmb_force(&heap->rt->smbrt.qbs, mb, MB_U0_HINT); + else + heap->rt->empty_nemb_cnt--; + mbrt_bucket_release(defb); + heap_mbrt_cleanup_mb(mb); + } else { + mbrt_bucket_release(defb); + mbrt_qbs_insertmb_force(&heap->rt->smbrt.qbs, mb, MB_U0_HINT); + } + } + + return; +} + int heap_force_recycle(struct palloc_heap *heap) { @@ -2311,6 +2708,7 @@ heap_force_recycle(struct palloc_heap *heap) } } + heap_recycle_soembs(heap); defb = mbrt_bucket_acquire(mb, DEFAULT_ALLOC_CLASS_ID); while (heap_reclaim_next_ne(heap, &zone_id) == 0) { @@ -2325,10 +2723,6 @@ heap_force_recycle(struct palloc_heap *heap) heap_populate_nemb_unused(heap); mb->prev_usage = mb->space_usage; - if (max_reclaim && (heap->rt->empty_nemb_cnt >= heap->rt->empty_nemb_gcth)) - D_WARN("Force GC failed to free up enough nembs, cnt = %d", - heap->rt->empty_nemb_cnt); - return 0; } @@ -2395,7 +2789,7 @@ heap_vg_open(struct palloc_heap *heap, object_callback cb, void *arg, int object if (!heap_mbrt_ismb_initialized(heap, i)) continue; - if (heap_mbrt_ismb_evictable(heap, i)) + if (heap_mbrt_ismb_localrt(heap, i)) VALGRIND_DO_CREATE_MEMPOOL(ZID_TO_ZONE(&heap->layout_info, i), 0, 0); heap_vg_zone_open(heap, i, cb, arg, objects); diff --git a/src/common/dav_v2/heap.h b/src/common/dav_v2/heap.h index f2e710b4ce9..a5cc76d6ba6 100644 --- a/src/common/dav_v2/heap.h +++ b/src/common/dav_v2/heap.h @@ -44,25 +44,21 @@ heap_mbrt_update_alloc_class_buckets(struct palloc_heap *heap, struct mbrt *mb, struct alloc_class *c); int heap_extend(struct palloc_heap *heap, struct bucket *defb, size_t size); -void -heap_mbrt_setmb_evictable(struct palloc_heap *heap, struct mbrt *mb); -bool -heap_mbrt_ismb_initialized(struct palloc_heap *heap, uint32_t zone_id); bool heap_mbrt_ismb_evictable(struct palloc_heap *heap, uint32_t zone_id); void -heap_mbrt_setmb_nonevictable(struct palloc_heap *heap, uint32_t zone_id); -void heap_mbrt_setmb_usage(struct palloc_heap *heap, uint32_t zone_id, uint64_t usage); int heap_mbrt_getmb_usage(struct palloc_heap *heap, uint32_t zone_id, uint64_t *allotted, uint64_t *maxsz); void heap_mbrt_incrmb_usage(struct palloc_heap *heap, uint32_t zone_id, int size); -struct mbrt * -heap_mbrt_setup_mb(struct palloc_heap *heap, uint32_t zone_id); -int -heap_mbrt_mb_reclaim_garbage(struct palloc_heap *heap, uint32_t zid); +void +heap_soemb_active_iter_init(struct palloc_heap *heap); +uint32_t +heap_soemb_active_get(struct palloc_heap *heap); +void +heap_soemb_reserve(struct palloc_heap *heap); int heap_ensure_zone0_initialized(struct palloc_heap *heap); int diff --git a/src/common/dav_v2/heap_layout.h b/src/common/dav_v2/heap_layout.h index fa65846921e..098cb752de8 100644 --- a/src/common/dav_v2/heap_layout.h +++ b/src/common/dav_v2/heap_layout.h @@ -82,8 +82,9 @@ enum chunk_type { MAX_CHUNK_TYPE }; -/* zone header flags */ +/* zone header bit flags */ #define ZONE_EVICTABLE_MB 0x0001 +#define ZONE_SOE_MB 0x0002 struct chunk { uint8_t data[CHUNKSIZE]; diff --git a/src/common/dav_v2/palloc.c b/src/common/dav_v2/palloc.c index a82c887f5b1..80ed26fad98 100644 --- a/src/common/dav_v2/palloc.c +++ b/src/common/dav_v2/palloc.c @@ -212,6 +212,8 @@ palloc_reservation_create(struct palloc_heap *heap, size_t size, palloc_constr c return -1; } + heap_soemb_active_iter_init(heap); + retry: mb = heap_mbrt_get_mb(heap, mb_id); if (mb == NULL) { @@ -288,7 +290,7 @@ palloc_reservation_create(struct palloc_heap *heap, size_t size, palloc_constr c */ if ((mb_id != 0) && (err == ENOMEM)) { heap_mbrt_log_alloc_failure(heap, mb_id); - mb_id = 0; + mb_id = heap_soemb_active_get(heap); goto retry; } diff --git a/src/common/dav_v2/tx.c b/src/common/dav_v2/tx.c index 98e6d6d314e..85bde8c16dc 100644 --- a/src/common/dav_v2/tx.c +++ b/src/common/dav_v2/tx.c @@ -559,6 +559,7 @@ dav_tx_begin_v2(dav_obj_t *pop, jmp_buf env, ...) sizeof(struct tx_range_def)); tx->first_snapshot = 1; tx->pop = pop; + heap_soemb_reserve(pop->do_heap); } else { FATAL("Invalid stage %d to begin new transaction", tx->stage); } diff --git a/src/common/tests/umem_test_bmem.c b/src/common/tests/umem_test_bmem.c index cd745c48dc8..a3c54d11e37 100644 --- a/src/common/tests/umem_test_bmem.c +++ b/src/common/tests/umem_test_bmem.c @@ -31,6 +31,7 @@ #define POOL_SIZE ((256 * 1024 * 1024ULL)) #define NEMB_RATIO (0.8) #define MB_SIZE (16 * 1024 * 1024) +#define MIN_SOEMB_CNT 3 struct test_arg { struct utest_context *ta_utx; @@ -183,7 +184,7 @@ struct umem_store_ops _store_ops_v2 = { .so_wal_id_cmp = wal_id_cmp, }; -struct umem_store ustore_v2 = {.stor_size = POOL_SIZE * 2, +struct umem_store ustore_v2 = {.stor_size = POOL_SIZE * 3, .stor_ops = &_store_ops_v2, .store_type = DAOS_MD_BMEM_V2, .stor_priv = (void *)(UINT64_MAX)}; @@ -219,7 +220,7 @@ setup_pmem_internal(void **state, struct umem_store *store) return 1; } - rc = utest_pmem_create(arg->ta_pool_name, POOL_SIZE, sizeof(*arg->ta_root), store, + rc = utest_pmem_create(arg->ta_pool_name, POOL_SIZE * 2, sizeof(*arg->ta_root), store, &arg->ta_utx); if (rc != 0) { perror("Could not create pmem context"); @@ -244,7 +245,24 @@ setup_pmem(void **state) static int setup_pmem_v2(void **state) { - return setup_pmem_internal(state, &ustore_v2); + struct test_arg *arg; + struct umem_instance *umm; + int rc, i; + + rc = setup_pmem_internal(state, &ustore_v2); + + arg = *state; + umm = utest_utx2umm(arg->ta_utx); + /* + * Do soemb reservations before the test begins. + */ + if (!rc) { + for (i = 0; i < MIN_SOEMB_CNT; i++) { + umem_tx_begin(umm, NULL); + umem_tx_commit(umm); + } + } + return rc; } static int @@ -2239,7 +2257,7 @@ test_tx_alloc_from_multimb(void **state) uint32_t id; int i; - for (i = 0; i < 10; i++) { + for (i = 0; i < 8; i++) { /* Create an MB and fill it with allocs */ ainfo[i].mb_id = umem_allot_mb_evictable(umm, 0); ainfo[i].num_allocs = 0; @@ -2257,11 +2275,9 @@ test_tx_alloc_from_multimb(void **state) /* Free 15% of space for MB 5 */ free_bucket_by_pct(umm, &ainfo[5], 20); /* 75-90 */ /* Free 10% of space for MB 6 */ - free_bucket_by_pct(umm, &ainfo[6], 18); /* 75-90 */ - /* Free 50% of space for MB 7 */ - free_bucket_by_pct(umm, &ainfo[7], 50); /* 30-75 */ + free_bucket_by_pct(umm, &ainfo[6], 50); /* 30-75 */ /* Free 90% of space for MB 8 */ - free_bucket_by_pct(umm, &ainfo[8], 90); /* 0-30 */ + free_bucket_by_pct(umm, &ainfo[7], 90); /* 0-30 */ /* Allocator should return mb with utilization 30%-75% */ id = umem_allot_mb_evictable(umm, 0); @@ -2269,9 +2285,9 @@ test_tx_alloc_from_multimb(void **state) assert_true(id == ainfo[3].mb_id); alloc_bucket_to_full(umm, &ainfo[3]); id = umem_allot_mb_evictable(umm, 0); - print_message("obtained id %d, expected is %d\n", id, ainfo[7].mb_id); - assert_true(id == ainfo[7].mb_id); - alloc_bucket_to_full(umm, &ainfo[7]); + print_message("obtained id %d, expected is %d\n", id, ainfo[6].mb_id); + assert_true(id == ainfo[6].mb_id); + alloc_bucket_to_full(umm, &ainfo[6]); /* Next preference should be 0%-30% */ id = umem_allot_mb_evictable(umm, 0); @@ -2279,13 +2295,13 @@ test_tx_alloc_from_multimb(void **state) assert_true(id == ainfo[4].mb_id); alloc_bucket_to_full(umm, &ainfo[4]); id = umem_allot_mb_evictable(umm, 0); - print_message("obtained id %d, expected is %d\n", id, ainfo[8].mb_id); - assert_true(id == ainfo[8].mb_id); - alloc_bucket_to_full(umm, &ainfo[8]); + print_message("obtained id %d, expected is %d\n", id, ainfo[7].mb_id); + assert_true(id == ainfo[7].mb_id); + alloc_bucket_to_full(umm, &ainfo[7]); /* Next is to create a new memory bucket. */ id = umem_allot_mb_evictable(umm, 0); - for (i = 0; i < 10; i++) + for (i = 0; i < 8; i++) assert_true(id != ainfo[i].mb_id); print_message("obtained id %d\n", id); @@ -2384,7 +2400,7 @@ test_umempobj_create_smallsize(void **state) static void test_umempobj_nemb_usage(void **state) { - int num = 0; + int num = 0, i; char *name; struct umem_store ustore_tmp = {.stor_size = 256 * 1024 * 1024, .stor_ops = &_store_ops_v2, @@ -2399,13 +2415,21 @@ test_umempobj_nemb_usage(void **state) /* Create a heap and cache of size 256MB and 249MB (16 & 15 zones) respectively */ D_ASPRINTF(name, "/mnt/daos/umem-test-tmp-%d", 0); assert_true(name != NULL); - uma.uma_pool = umempobj_create(name, "invalid_pool", UMEMPOBJ_ENABLE_STATS, - 240 * 1024 * 1024, 0666, &ustore_tmp); + uma.uma_pool = umempobj_create(name, "valid_pool", UMEMPOBJ_ENABLE_STATS, 240 * 1024 * 1024, + 0666, &ustore_tmp); assert_ptr_not_equal(uma.uma_pool, NULL); umem_class_init(&uma, &umm); - /* Do allocation and verify that only 13 zones allotted to non evictable MBs */ + /* Do the SOEMB reservation before the actual test. */ + for (i = 0; i < MIN_SOEMB_CNT; i++) { + umem_tx_begin(&umm, NULL); + umem_tx_commit(&umm); + } + + /* Do allocation and verify that only 10 zones allotted to non evictable MBs + * 3 zones are reserved for soemb + */ for (num = 0;; num++) { /* do an allocation that takes more than half the zone size */ umoff = umem_atomic_alloc(&umm, alloc_size, UMEM_TYPE_ANY); @@ -2416,7 +2440,7 @@ test_umempobj_nemb_usage(void **state) prev_umoff = umoff; } /* 80% nemb when heap size greater than cache size */ - assert_int_equal(num, 13); + assert_int_equal(num, 13 - MIN_SOEMB_CNT); print_message("Number of allocations is %d\n", num); for (--num;; num--) { @@ -2436,12 +2460,18 @@ test_umempobj_nemb_usage(void **state) /* Create a heap and cache of size 256MB (16 zones) each */ D_ASPRINTF(name, "/mnt/daos/umem-test-tmp-%d", 1); assert_true(name != NULL); - uma.uma_pool = umempobj_create(name, "invalid_pool", UMEMPOBJ_ENABLE_STATS, - 256 * 1024 * 1024, 0666, &ustore_tmp); + uma.uma_pool = umempobj_create(name, "valid_pool", UMEMPOBJ_ENABLE_STATS, 256 * 1024 * 1024, + 0666, &ustore_tmp); assert_ptr_not_equal(uma.uma_pool, NULL); umem_class_init(&uma, &umm); + /* Do the SOEMB reservation before the actual test. */ + for (i = 0; i < MIN_SOEMB_CNT; i++) { + umem_tx_begin(&umm, NULL); + umem_tx_commit(&umm); + } + /* Do allocation and verify that all 16 zones are allotted to non evictable MBs */ for (num = 0;; num++) { /* do an allocation that takes more than half the zone size */ @@ -2472,7 +2502,7 @@ test_umempobj_nemb_usage(void **state) static void test_umempobj_heap_mb_stats(void **state) { - int num = 0, count, rc; + int num = 0, count, rc, i; char *name; uint64_t scm_size = 128 * 1024 * 1024; uint64_t meta_size = 256 * 1024 * 1024; @@ -2484,7 +2514,8 @@ test_umempobj_heap_mb_stats(void **state) struct umem_instance umm; umem_off_t umoff, *ptr = NULL, prev_umoff = UMOFF_NULL; size_t alloc_size = 128; - uint64_t allocated, allocated0, allocated1, maxsz, maxsz_exp; + uint64_t allocated, allocated0, allocated1; + uint64_t maxsz, maxsz_exp, maxsz_alloc; uint32_t mb_id; uma.uma_id = umempobj_backend_type2class_id(ustore_tmp.store_type); @@ -2494,7 +2525,8 @@ test_umempobj_heap_mb_stats(void **state) uma.uma_pool = umempobj_create(name, "invalid_pool", UMEMPOBJ_ENABLE_STATS, scm_size, 0666, &ustore_tmp); assert_ptr_not_equal(uma.uma_pool, NULL); - maxsz_exp = (uint64_t)(scm_size / MB_SIZE * NEMB_RATIO) * MB_SIZE; + maxsz_exp = (uint64_t)(scm_size / MB_SIZE * NEMB_RATIO) * MB_SIZE; + maxsz_alloc = ((uint64_t)(((scm_size / MB_SIZE) * NEMB_RATIO)) - MIN_SOEMB_CNT) * MB_SIZE; umem_class_init(&uma, &umm); @@ -2504,6 +2536,12 @@ test_umempobj_heap_mb_stats(void **state) assert_int_equal(rc, 0); assert_int_equal(maxsz, maxsz_exp); + /* Do the SOEMB reservation before the actual test. */ + for (i = 0; i < MIN_SOEMB_CNT; i++) { + umem_tx_begin(&umm, NULL); + umem_tx_commit(&umm); + } + /* allocate and consume all of the space */ for (num = 0;; num++) { umoff = umem_atomic_alloc(&umm, alloc_size, UMEM_TYPE_ANY); @@ -2516,7 +2554,7 @@ test_umempobj_heap_mb_stats(void **state) rc = umempobj_get_mbusage(umm.umm_pool, 0, &allocated1, &maxsz); print_message("NE usage max_size = %lu allocated = %lu\n", maxsz, allocated1); assert_int_equal(rc, 0); - assert_true(allocated1 * 100 / maxsz >= 99); + assert_true(allocated1 * 100 / maxsz_alloc >= 99); assert_int_equal(maxsz, maxsz_exp); for (count = num; count > num / 2; count--) { @@ -2530,7 +2568,8 @@ test_umempobj_heap_mb_stats(void **state) rc = umempobj_get_mbusage(umm.umm_pool, 0, &allocated, &maxsz); print_message("NE usage max_size = %lu allocated = %lu\n", maxsz, allocated); assert_int_equal(rc, 0); - assert_true(allocated < allocated1 / 2); + assert_true(allocated < ((allocated1 / 2) + alloc_size)); + assert_true((allocated + alloc_size) > (allocated1 / 2)); assert_int_equal(maxsz, maxsz_exp); for (;;) { umoff = *ptr; diff --git a/src/vos/tests/vts_wal.c b/src/vos/tests/vts_wal.c index 83303ad213d..ad79f0c4819 100644 --- a/src/vos/tests/vts_wal.c +++ b/src/vos/tests/vts_wal.c @@ -629,20 +629,22 @@ setup_wal_io(void **state) static struct io_test_args test_args; -#define MDTEST_META_BLOB_SIZE (256 * 1024 * 1024UL) -#define MDTEST_VOS_SIZE (160 * 1024 * 1024UL) +#define MDTEST_MIN_SOEMB_CNT 3 +#define MDTEST_MAX_NEMB_CNT 9 +#define MDTEST_MAX_EMB_CNT 8 #define MDTEST_MB_SIZE (16 * 1024 * 1024UL) -#define MDTEST_MB_CNT (MDTEST_META_BLOB_SIZE / MDTEST_MB_SIZE) -#define MDTEST_MB_VOS_CNT (MDTEST_VOS_SIZE / MDTEST_MB_SIZE) -#define MDTEST_MAX_NEMB_CNT (MDTEST_MB_VOS_CNT * 8 / 10) -#define MDTEST_MAX_EMB_CNT (MDTEST_MB_CNT - MDTEST_MAX_NEMB_CNT) +#define MDTEST_META_BLOB_SIZE \ + ((MDTEST_MIN_SOEMB_CNT + MDTEST_MAX_NEMB_CNT + MDTEST_MAX_EMB_CNT) * MDTEST_MB_SIZE) +#define MDTEST_VOS_SIZE ((MDTEST_MIN_SOEMB_CNT + MDTEST_MAX_NEMB_CNT) * 10 / 8 * MDTEST_MB_SIZE) +#define MDTEST_MB_VOS_CNT ((int)(MDTEST_VOS_SIZE / MDTEST_MB_SIZE)) +#define MDTEST_MB_CNT ((int)(MDTEST_META_BLOB_SIZE / MDTEST_MB_SIZE)) static int setup_mb_io(void **state) { int rc; - d_setenv("DAOS_NEMB_EMPTY_RECYCLE_THRESHOLD", "4", true); + d_setenv("DAOS_NEMB_EMPTY_RECYCLE_THRESHOLD", "2", true); memset(&test_args, 0, sizeof(test_args)); rc = vts_ctx_init_ex(&test_args.ctx, MDTEST_VOS_SIZE, MDTEST_META_BLOB_SIZE); *state = (void *)&test_args; @@ -1345,6 +1347,7 @@ struct bucket_alloc_info { uint32_t num_allocs; uint32_t mb_id; uint32_t alloc_size; + bool allow_spill; }; #define CHECKPOINT_FREQ 10000 @@ -1387,7 +1390,8 @@ alloc_bucket_to_full(struct umem_instance *umm, struct bucket_alloc_info *ainfo, ainfo->start_umoff = umem_alloc_from_bucket(umm, alloc_size, id); assert_false(UMOFF_IS_NULL(ainfo->start_umoff)); ainfo->num_allocs++; - assert_true(umem_get_mb_from_offset(umm, ainfo->start_umoff) == id); + if (!ainfo->allow_spill) + assert_true(umem_get_mb_from_offset(umm, ainfo->start_umoff) == id); prev_umoff = ainfo->start_umoff; ptr = (umem_off_t *)umem_off2ptr(umm, prev_umoff); *ptr = UMOFF_NULL; @@ -1407,7 +1411,8 @@ alloc_bucket_to_full(struct umem_instance *umm, struct bucket_alloc_info *ainfo, umem_tx_begin(umm, NULL); umoff = umem_alloc_from_bucket(umm, alloc_size, id); - if (UMOFF_IS_NULL(umoff) || (umem_get_mb_from_offset(umm, umoff) != id)) { + if (UMOFF_IS_NULL(umoff) || + (!ainfo->allow_spill && (umem_get_mb_from_offset(umm, umoff) != id))) { umem_tx_abort(umm, 1); break; } @@ -1452,7 +1457,8 @@ free_bucket_by_pct(struct umem_instance *umm, struct bucket_alloc_info *ainfo, i umoff = ainfo->start_umoff; for (i = 0; i < num_free; i++) { - assert_true(umem_get_mb_from_offset(umm, umoff) == ainfo->mb_id); + if (!ainfo->allow_spill) + assert_true(umem_get_mb_from_offset(umm, umoff) == ainfo->mb_id); ptr = (umem_off_t *)umem_off2ptr(umm, umoff); next_umoff = *ptr; umem_atomic_free(umm, umoff); @@ -1470,6 +1476,35 @@ free_bucket_by_pct(struct umem_instance *umm, struct bucket_alloc_info *ainfo, i ainfo->start_umoff, ainfo->num_allocs); } +static void +traverse_bucket(struct umem_instance *umm, struct bucket_alloc_info *ainfo) +{ + int num_elems = ainfo->num_allocs; + umem_off_t umoff, *ptr; + struct umem_pin_handle *p_hdl; + struct umem_cache_range rg = {0}; + int i, rc; + + if (UMOFF_IS_NULL(ainfo->start_umoff)) + return; + rg.cr_off = umem_get_mb_base_offset(umm, ainfo->mb_id); + rg.cr_size = 1; + rc = umem_cache_pin(&umm->umm_pool->up_store, &rg, 1, 0, &p_hdl); + assert_true(rc == 0); + + umoff = ainfo->start_umoff; + for (i = 1; i < num_elems * 2; i++) { + ptr = (umem_off_t *)umem_off2ptr(umm, umoff); + umoff = *ptr; + if (UMOFF_IS_NULL(umoff)) + break; + } + umem_cache_unpin(&umm->umm_pool->up_store, p_hdl); + print_message("Traverse: Bucket %d, start off %lu num_allocation %d actual_found %d\n", + ainfo->mb_id, ainfo->start_umoff, ainfo->num_allocs, i); + assert_true(i == num_elems); +} + static void wal_mb_utilization_tests(void **state) { @@ -1491,6 +1526,7 @@ wal_mb_utilization_tests(void **state) ainfo[i].num_allocs = 0; ainfo[i].start_umoff = UMOFF_NULL; ainfo[i].alloc_size = 0; + ainfo[i].allow_spill = 0; assert_true(ainfo[i].mb_id != 0); alloc_bucket_to_full(umm, &ainfo[i], checkpoint_fn, &arg->ctx.tc_po_hdl); } @@ -1545,6 +1581,7 @@ wal_mb_utilization_tests(void **state) ainfo[i].num_allocs = 0; ainfo[i].start_umoff = UMOFF_NULL; ainfo[i].alloc_size = 0; + ainfo[i].allow_spill = 0; assert_true(ainfo[i].mb_id != 0); alloc_bucket_to_full(umm, &ainfo[i], checkpoint_fn, &arg->ctx.tc_po_hdl); @@ -1592,6 +1629,7 @@ wal_mb_emb_evicts_emb(void **state) ainfo[0].num_allocs = 0; ainfo[0].start_umoff = UMOFF_NULL; ainfo[0].alloc_size = 0; + ainfo[0].allow_spill = 0; alloc_bucket_to_full(umm, &ainfo[0], checkpoint_fn, &arg->ctx.tc_po_hdl); /* @@ -1620,6 +1658,7 @@ wal_mb_emb_evicts_emb(void **state) ainfo[i].num_allocs = 0; ainfo[i].start_umoff = UMOFF_NULL; ainfo[i].alloc_size = 0; + ainfo[i].allow_spill = 0; assert_true(ainfo[i].mb_id != 0); alloc_bucket_to_full(umm, &ainfo[i], checkpoint_fn, &arg->ctx.tc_po_hdl); } @@ -1658,6 +1697,7 @@ wal_mb_nemb_evicts_emb(void **state) ainfo[i].num_allocs = 0; ainfo[i].start_umoff = UMOFF_NULL; ainfo[i].alloc_size = 0; + ainfo[i].allow_spill = 0; assert_true(ainfo[i].mb_id != 0); alloc_bucket_to_full(umm, &ainfo[i], checkpoint_fn, &arg->ctx.tc_po_hdl); } @@ -1669,6 +1709,7 @@ wal_mb_nemb_evicts_emb(void **state) ainfo[0].num_allocs = 0; ainfo[0].start_umoff = UMOFF_NULL; ainfo[0].alloc_size = 0; + ainfo[0].allow_spill = 0; alloc_bucket_to_full(umm, &ainfo[0], checkpoint_fn, &arg->ctx.tc_po_hdl); /* @@ -1706,6 +1747,7 @@ wal_mb_nemb_pct(void **state) ainfo[0].num_allocs = 0; ainfo[0].start_umoff = UMOFF_NULL; ainfo[0].alloc_size = 2048; + ainfo[0].allow_spill = 0; alloc_bucket_to_full(umm, &ainfo[0], checkpoint_fn, &arg->ctx.tc_po_hdl); rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated, &maxsz); assert_true(rc == 0); @@ -1741,6 +1783,7 @@ wal_mb_nemb_pct(void **state) ainfo[i].num_allocs = 0; ainfo[i].start_umoff = UMOFF_NULL; ainfo[i].alloc_size = 2048; + ainfo[i].allow_spill = 0; assert_true(ainfo[i].mb_id != 0); alloc_bucket_to_full(umm, &ainfo[i], checkpoint_fn, &arg->ctx.tc_po_hdl); } @@ -1772,6 +1815,7 @@ nemb_unused(void **state) ainfo[0].num_allocs = 0; ainfo[0].start_umoff = UMOFF_NULL; ainfo[0].alloc_size = 512 * 1024; + ainfo[0].allow_spill = 0; alloc_bucket_to_full(umm, &ainfo[0], checkpoint_fn, &arg->ctx.tc_po_hdl); rc = umempobj_get_mbusage(umm->umm_pool, 0, &nemb_full_size, &maxsz); assert_true(rc == 0); @@ -1795,11 +1839,13 @@ nemb_unused(void **state) ainfo[i].num_allocs = 0; ainfo[i].start_umoff = UMOFF_NULL; ainfo[i].alloc_size = 512 * 1024; + ainfo[i].allow_spill = 0; assert_true(ainfo[i].mb_id != 0); alloc_bucket_to_full(umm, &ainfo[i], checkpoint_fn, &arg->ctx.tc_po_hdl); } /* Make sure that we can only create MDTEST_MAX_EMB_CNT evictable MBs */ id = umem_allot_mb_evictable(umm, 0); + print_message("Got id %d\n", id); for (j = 1; j <= MDTEST_MAX_EMB_CNT; j++) { if (id == ainfo[j].mb_id) break; @@ -1810,7 +1856,7 @@ nemb_unused(void **state) if (umem_cache_offisloaded(&umm->umm_pool->up_store, ainfo[j].start_umoff)) found++; print_message("phase3: Found %d evictable MBs loaded\n", found); - D_ASSERT(found > (MDTEST_MB_VOS_CNT - MDTEST_MAX_NEMB_CNT)); + D_ASSERT(found > (MDTEST_MB_VOS_CNT - MDTEST_MAX_NEMB_CNT - MDTEST_MIN_SOEMB_CNT)); for (i = 1; i <= MDTEST_MAX_EMB_CNT; i++) free_bucket_by_pct(umm, &ainfo[i], 100, checkpoint_fn, &arg->ctx.tc_po_hdl); @@ -1833,7 +1879,7 @@ nemb_unused(void **state) cont = vos_hdl2cont(arg->ctx.tc_co_hdl); umm = vos_cont2umm(cont); - /* Force GC */ + /* After a restart gc may not recalim all of the free MBs. Hence rerun it multiple times */ umem_heap_gc(umm); umem_heap_gc(umm); @@ -1859,7 +1905,7 @@ nemb_unused(void **state) found++; print_message("phase7: Found %d evictable MBs loaded\n", found); - D_ASSERT(found > (MDTEST_MB_VOS_CNT - MDTEST_MAX_NEMB_CNT)); + D_ASSERT(found > (MDTEST_MB_VOS_CNT - MDTEST_MAX_NEMB_CNT - MDTEST_MIN_SOEMB_CNT)); alloc_bucket_to_full(umm, &ainfo[0], checkpoint_fn, &arg->ctx.tc_po_hdl); rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated, &maxsz); @@ -1868,6 +1914,264 @@ nemb_unused(void **state) assert_true(nemb_full_size == cur_allocated); } +static void +free_bucket_soembtest(struct umem_instance *umm, struct bucket_alloc_info *ainfo, int hint, + void (*chkpt_fn)(void *arg), void *arg) +{ + int num_free; + umem_off_t umoff, *ptr, next_umoff, *prev_ptr, baseoffset; + struct umem_pin_handle *p_hdl; + struct umem_cache_range rg = {0}; + int rc, pg_id, npg_id; + int free_incr = hint / 10 + 1; + int tfree = 0; + + if (UMOFF_IS_NULL(ainfo->start_umoff)) + return; + print_message("SOEMB Free BEFORE: Bucket %d, start off %lu num_allocation %d\n", + ainfo->mb_id, ainfo->start_umoff, ainfo->num_allocs); + + rg.cr_off = umem_get_mb_base_offset(umm, ainfo->mb_id); + rg.cr_size = 1; + rc = umem_cache_pin(&umm->umm_pool->up_store, &rg, 1, 0, &p_hdl); + assert_true(rc == 0); + + baseoffset = umem_get_mb_base_offset(umm, 0); + umoff = ainfo->start_umoff; + num_free = tfree; + pg_id = (umoff - baseoffset) / MDTEST_MB_SIZE; + prev_ptr = &ainfo->start_umoff; + while (ainfo->num_allocs) { + ptr = (umem_off_t *)umem_off2ptr(umm, umoff); + next_umoff = *ptr; + if (num_free && pg_id) { + umem_tx_begin(umm, NULL); + umem_free(umm, umoff); + if (prev_ptr != &ainfo->start_umoff) + umem_tx_add_ptr(umm, prev_ptr, sizeof(umoff)); + *prev_ptr = next_umoff; + umem_tx_commit(umm); + num_free--; + if (((ainfo->num_allocs-- % CHECKPOINT_FREQ) == 0) && (chkpt_fn != NULL)) + chkpt_fn(arg); + } else + prev_ptr = ptr; + umoff = next_umoff; + if (UMOFF_IS_NULL(umoff)) + break; + npg_id = (umoff - baseoffset) / MDTEST_MB_SIZE; + if (npg_id != pg_id) { + print_message("Freed %d blocks from page %d\n", tfree - num_free, pg_id); + pg_id = npg_id; + if (pg_id) + tfree += free_incr; + num_free = tfree; + } + } + if (chkpt_fn != NULL) + chkpt_fn(arg); + umem_cache_unpin(&umm->umm_pool->up_store, p_hdl); + print_message("SOEMB Free AFTER: Bucket %d, start off %lu num_allocation %d\n", + ainfo->mb_id, ainfo->start_umoff, ainfo->num_allocs); +} + +static void +soemb_test(void **state) +{ + struct io_test_args *arg = *state; + struct vos_container *cont; + struct umem_instance *umm; + int rc, soemb_num_allocs; + struct bucket_alloc_info ainfo[MDTEST_MB_CNT + 1]; + daos_size_t maxsz, cur_allocated, cur_allocated1, pg_alloc_sz; + daos_size_t cur_allocated2; + + cont = vos_hdl2cont(arg->ctx.tc_co_hdl); + umm = vos_cont2umm(cont); + + /* + * Obtain number of allocation possible per bucket. + */ + ainfo[2].mb_id = umem_allot_mb_evictable(umm, 0); + ainfo[2].num_allocs = 0; + ainfo[2].start_umoff = UMOFF_NULL; + ainfo[2].alloc_size = 512 * 1024; + ainfo[2].allow_spill = 0; + assert_true(ainfo[2].mb_id != 0); + alloc_bucket_to_full(umm, &ainfo[2], checkpoint_fn, &arg->ctx.tc_po_hdl); + rc = umempobj_get_mbusage(umm->umm_pool, ainfo[2].mb_id, &pg_alloc_sz, &maxsz); + assert_true(rc == 0); + print_message("non-evictable MBs max_size = %lu pg_alloc_sz = %lu\n", maxsz, pg_alloc_sz); + + /* + * Validate that the allocation to default bucket does not spill over to soe buckets. + */ + print_message("Stage 1\n"); + ainfo[0].mb_id = 0; + ainfo[0].num_allocs = 0; + ainfo[0].start_umoff = UMOFF_NULL; + ainfo[0].alloc_size = 512 * 1024; + ainfo[0].allow_spill = 0; + alloc_bucket_to_full(umm, &ainfo[0], checkpoint_fn, &arg->ctx.tc_po_hdl); + rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated, &maxsz); + print_message("non-evictable MBs max_size = %lu current utilization = %lu\n", maxsz, + cur_allocated); + assert_true(rc == 0); + assert_true(cur_allocated > (pg_alloc_sz * MDTEST_MAX_NEMB_CNT)); + + /* + * After a spill to soe buckets, the total utilization should be near full. + */ + print_message("Stage 2\n"); + ainfo[1].mb_id = umem_allot_mb_evictable(umm, 0); + ainfo[1].num_allocs = 0; + ainfo[1].start_umoff = UMOFF_NULL; + ainfo[1].alloc_size = 512 * 1024; + ainfo[1].allow_spill = 1; + assert_true(ainfo[1].mb_id != 0); + alloc_bucket_to_full(umm, &ainfo[1], checkpoint_fn, &arg->ctx.tc_po_hdl); + rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated, &maxsz); + assert_true(rc == 0); + print_message("non-evictable MBs max_size = %lu current utilization = %lu\n", maxsz, + cur_allocated); + assert_true(cur_allocated > (pg_alloc_sz * (MDTEST_MAX_NEMB_CNT + MDTEST_MIN_SOEMB_CNT))); + + /* + * Free the allocations in default bucket. Now further allocation in evictable bucket + * should cause soe buckets to extend stealing the freed non-evictable buckets. + */ + print_message("Stage 3\n"); + free_bucket_by_pct(umm, &ainfo[0], 100, checkpoint_fn, &arg->ctx.tc_po_hdl); + print_message("Triggering gc\n"); + umem_heap_gc(umm); + umem_heap_gc(umm); + umem_heap_gc(umm); + soemb_num_allocs = ainfo[1].num_allocs; + alloc_bucket_to_full(umm, &ainfo[1], checkpoint_fn, &arg->ctx.tc_po_hdl); + rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated1, &maxsz); + assert_true(rc == 0); + print_message("non-evictable MBs max_size = %lu current utilization = %lu\n", maxsz, + cur_allocated1); + assert_true(cur_allocated == cur_allocated1); + assert_true(soemb_num_allocs < ainfo[1].num_allocs); + + /* + * Do a 50% free from the evictable + soe buckets, trigger gc and do allocation in + * default bucket. The non-evictable buckets should extend using the freed soe buckets. + */ + print_message("Stage 4\n"); + free_bucket_by_pct(umm, &ainfo[1], 50, checkpoint_fn, &arg->ctx.tc_po_hdl); + rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated1, &maxsz); + assert_true(rc == 0); + print_message("non-evictable MBs max_size = %lu current utilization = %lu\n", maxsz, + cur_allocated1); + assert_true(maxsz == MDTEST_VOS_SIZE * 80 / 100); + /* 50% includes the evictable MB, hence cur_allocated1 is not exactly cur_allocated/2 */ + assert_true(cur_allocated1 < (cur_allocated / 2 + MDTEST_MB_SIZE)); + + print_message("Triggering gc\n"); + umem_heap_gc(umm); + umem_heap_gc(umm); + umem_heap_gc(umm); + + /* Now the MBs marked unused should be available for NEMB allocation */ + ainfo[0].mb_id = 0; + ainfo[0].num_allocs = 0; + ainfo[0].start_umoff = UMOFF_NULL; + ainfo[0].alloc_size = 512 * 1024; + ainfo[0].allow_spill = 0; + alloc_bucket_to_full(umm, &ainfo[0], checkpoint_fn, &arg->ctx.tc_po_hdl); + + rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated2, &maxsz); + assert_true(rc == 0); + print_message("non-evictable MBs max_size = %lu current utilization = %lu\n", maxsz, + cur_allocated2); + assert_true(maxsz == MDTEST_VOS_SIZE * 80 / 100); + assert_true(cur_allocated2 > (cur_allocated1 + 2 * pg_alloc_sz)); + + /* + * Restart the pool and check whether all of the soe buckets are accessible + * without explicit pin. + */ + print_message("Stage 5\n"); + wal_pool_refill(arg); + cont = vos_hdl2cont(arg->ctx.tc_co_hdl); + umm = vos_cont2umm(cont); + traverse_bucket(umm, &ainfo[1]); + + /* + * Check whether gc of soe buckets works post restart. + */ + print_message("Stage 6\n"); + free_bucket_by_pct(umm, &ainfo[1], 100, checkpoint_fn, &arg->ctx.tc_po_hdl); + rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated1, &maxsz); + assert_true(rc == 0); + print_message("non-evictable MBs max_size = %lu current utilization = %lu\n", maxsz, + cur_allocated1); + assert_true(maxsz == MDTEST_VOS_SIZE * 80 / 100); + + print_message("Triggering gc\n"); + umem_heap_gc(umm); + umem_heap_gc(umm); + umem_heap_gc(umm); + + alloc_bucket_to_full(umm, &ainfo[0], checkpoint_fn, &arg->ctx.tc_po_hdl); + + rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated2, &maxsz); + assert_true(rc == 0); + print_message("non-evictable MBs max_size = %lu current utilization = %lu\n", maxsz, + cur_allocated2); + assert_true(maxsz == MDTEST_VOS_SIZE * 80 / 100); + assert_true(cur_allocated2 > (pg_alloc_sz * MDTEST_MAX_NEMB_CNT)); + + /* + * Similarly check whether freed non-evictable buckets will be + * reused as soemb post gc. + */ + print_message("Stage 7\n"); + free_bucket_by_pct(umm, &ainfo[0], 100, checkpoint_fn, &arg->ctx.tc_po_hdl); + rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated1, &maxsz); + assert_true(rc == 0); + print_message("non-evictable MBs max_size = %lu current utilization = %lu\n", maxsz, + cur_allocated1); + assert_true(maxsz == MDTEST_VOS_SIZE * 80 / 100); + assert_true(cur_allocated1 < MDTEST_MB_SIZE); + + print_message("Triggering gc\n"); + umem_heap_gc(umm); + umem_heap_gc(umm); + umem_heap_gc(umm); + + alloc_bucket_to_full(umm, &ainfo[1], checkpoint_fn, &arg->ctx.tc_po_hdl); + rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated2, &maxsz); + assert_true(rc == 0); + print_message("non-evictable MBs max_size = %lu current utilization = %lu\n", maxsz, + cur_allocated2); + assert_true(maxsz == MDTEST_VOS_SIZE * 80 / 100); + assert_true(cur_allocated2 > (pg_alloc_sz * MDTEST_MAX_NEMB_CNT)); + + /* + * Selectively varying free blocks from different soe buckets and do + * reallocation post gc. Validate that all of the soe buckets are reused. + */ + print_message("Stage 8\n"); + free_bucket_soembtest(umm, &ainfo[1], ainfo[2].num_allocs, checkpoint_fn, + &arg->ctx.tc_po_hdl); + + print_message("Triggering gc\n"); + umem_heap_gc(umm); + umem_heap_gc(umm); + umem_heap_gc(umm); + + alloc_bucket_to_full(umm, &ainfo[1], checkpoint_fn, &arg->ctx.tc_po_hdl); + rc = umempobj_get_mbusage(umm->umm_pool, 0, &cur_allocated2, &maxsz); + assert_true(rc == 0); + print_message("non-evictable MBs max_size = %lu current utilization = %lu\n", maxsz, + cur_allocated2); + assert_true(maxsz == MDTEST_VOS_SIZE * 80 / 100); + assert_true(cur_allocated2 > (pg_alloc_sz * MDTEST_MAX_NEMB_CNT)); +} + static int umoff_in_freelist(umem_off_t *free_list, int cnt, umem_off_t umoff, bool clear) { @@ -1910,6 +2214,7 @@ wal_umempobj_block_reuse_internal(void **state, int restart) ainfo[0].num_allocs = 0; ainfo[0].start_umoff = UMOFF_NULL; ainfo[0].alloc_size = 512; + ainfo[0].allow_spill = 0; alloc_bucket_to_full(umm, &ainfo[0], checkpoint_fn, &arg->ctx.tc_po_hdl); /* Allocate from Evictable Buckets. */ @@ -1919,6 +2224,7 @@ wal_umempobj_block_reuse_internal(void **state, int restart) ainfo[i].num_allocs = 0; ainfo[i].start_umoff = UMOFF_NULL; ainfo[i].alloc_size = 512; + ainfo[i].allow_spill = 0; assert_true(ainfo[i].mb_id != 0); alloc_bucket_to_full(umm, &ainfo[i], checkpoint_fn, &arg->ctx.tc_po_hdl); } @@ -2022,6 +2328,7 @@ wal_umempobj_block_reuse_internal(void **state, int restart) /* Allocate from E Buckets and it should reuse the previous freed blocks */ for (i = 1; i <= MDTEST_MAX_EMB_CNT; i++) { + print_message("Allocating from bucket %d\n", ainfo[i].mb_id); rg.cr_off = umem_get_mb_base_offset(umm, ainfo[i].mb_id); rg.cr_size = 1; rc = umem_cache_pin(&umm->umm_pool->up_store, &rg, 1, 0, &p_hdl); @@ -2121,13 +2428,14 @@ wal_umempobj_mbusage_test(void **state) cont = vos_hdl2cont(arg->ctx.tc_co_hdl); umm = vos_cont2umm(cont); - maxsz_exp = MDTEST_MAX_NEMB_CNT * MDTEST_MB_SIZE; + maxsz_exp = (MDTEST_MAX_NEMB_CNT + MDTEST_MIN_SOEMB_CNT) * MDTEST_MB_SIZE; /* Allocate from NE Buckets. It should use 80% 360M i.e, 16 buckets */ ainfo[0].mb_id = 0; ainfo[0].num_allocs = 0; ainfo[0].start_umoff = UMOFF_NULL; ainfo[0].alloc_size = 512; + ainfo[0].allow_spill = 0; alloc_bucket_to_full(umm, &ainfo[0], checkpoint_fn, &arg->ctx.tc_po_hdl); /* Create an MB and fill it with allocs */ @@ -2135,6 +2443,7 @@ wal_umempobj_mbusage_test(void **state) ainfo[1].num_allocs = 0; ainfo[1].start_umoff = UMOFF_NULL; ainfo[1].alloc_size = 512; + ainfo[1].allow_spill = 0; assert_true(ainfo[1].mb_id != 0); alloc_bucket_to_full(umm, &ainfo[1], checkpoint_fn, &arg->ctx.tc_po_hdl); free_bucket_by_pct(umm, &ainfo[1], 50, checkpoint_fn, &arg->ctx.tc_po_hdl); @@ -2314,7 +2623,7 @@ p2_basic_test(void **state) daos_epoch_t epoch = 1; daos_size_t io_size = 512; struct vos_object *obj; - uint32_t bkt_id = 1, missed, loaded; + uint32_t bkt_id = 1 + MDTEST_MIN_SOEMB_CNT, missed, loaded; uint64_t used[2], ne_init; int rc; @@ -2635,6 +2944,7 @@ static const struct CMUnitTest wal_MB_tests[] = { {"WAL39: P2 fill evictable buckets", p2_fill_test, setup_mb_io, teardown_mb_io}, {"WAL40: nemb pct test", wal_mb_nemb_pct, setup_mb_io_nembpct, teardown_mb_io_nembpct}, {"WAL41: nemb unused test", nemb_unused, setup_mb_io, teardown_mb_io}, + {"WAL42: soemb test", soemb_test, setup_mb_io, teardown_mb_io}, }; int @@ -2652,14 +2962,12 @@ run_wal_tests(const char *cfg) dts_create_config(test_name, "WAL Pool and container tests %s", cfg); D_PRINT("Running %s\n", test_name); - rc = cmocka_run_group_tests_name(test_name, wal_tests, setup_wal_test, - teardown_wal_test); + rc = cmocka_run_group_tests_name(test_name, wal_tests, setup_wal_test, teardown_wal_test); dts_create_config(test_name, "WAL Basic SV and EV IO tests %s", cfg); D_PRINT("Running %s\n", test_name); otype = 0; - rc += cmocka_run_group_tests_name(test_name, wal_kv_basic_tests, - setup_wal_io, teardown_io); + rc += cmocka_run_group_tests_name(test_name, wal_kv_basic_tests, setup_wal_io, teardown_io); for (i = 0; i < (sizeof(type_list) / sizeof(int)); i++) { otype = type_list[i]; @@ -2675,13 +2983,13 @@ run_wal_tests(const char *cfg) cfg); test_name[3] = '1'; D_PRINT("Running %s\n", test_name); - rc += cmocka_run_group_tests_name(test_name, wal_io_tests, - setup_wal_io, teardown_io); + rc += + cmocka_run_group_tests_name(test_name, wal_io_tests, setup_wal_io, teardown_io); if (otype == DAOS_OT_MULTI_UINT64) { test_name[3] = '2'; D_PRINT("Running %s\n", test_name); - rc += cmocka_run_group_tests_name(test_name, wal_io_int_tests, - setup_wal_io, teardown_io); + rc += cmocka_run_group_tests_name(test_name, wal_io_int_tests, setup_wal_io, + teardown_io); } }