From b6d60ca4913e586e0ddefa09d8dc1f5035c4bc46 Mon Sep 17 00:00:00 2001 From: Thilina Rathnayake Date: Tue, 23 Mar 2021 09:13:06 -0500 Subject: [PATCH] Fix element redistribution issue (#37) --- src/genmap-impl.h | 9 +- src/genmap-laplacian.c | 28 +++--- src/genmap-load-balance.c | 31 +++++++ src/genmap-rcb.c | 181 +++++++++++++++++++------------------ src/genmap-rsb.c | 3 + src/genmap-statistics.c | 7 +- src/genmap.h | 4 +- src/parRSB.c | 14 +-- src/parRSB.h | 16 ++-- tests/t210-levels.c | 2 +- tests/t220-interpolation.c | 2 +- tests/t230-flex-cg.c | 2 +- tests/t235-project-pf.c | 2 +- 13 files changed, 170 insertions(+), 131 deletions(-) diff --git a/src/genmap-impl.h b/src/genmap-impl.h index 2c8c0c66..f1d6b625 100644 --- a/src/genmap-impl.h +++ b/src/genmap-impl.h @@ -27,7 +27,7 @@ /* rcb_element is used for rcb and rib */ struct rcb_element { - unsigned char type; + int type; GenmapInt proc; GenmapInt origin; GenmapInt seq; @@ -38,7 +38,7 @@ struct rcb_element { /* rsb_element should be a superset of rcb_element */ struct rsb_element { - unsigned char type; + int type; GenmapInt proc; GenmapInt origin; GenmapInt seq; @@ -99,12 +99,15 @@ typedef enum { FIEDLER, NFIEDLER, FIEDLERSORT, - BISECT, + BISECTANDREPAIR, LANCZOS, NLANCZOS, WEIGHTEDLAPLACIAN, TQLI, LAPLACIANSETUP, + FINDNBRS, + CSRMATSETUP, + CSRTOPSETUP, PRECONDSETUP, RQI, NRQI, diff --git a/src/genmap-laplacian.c b/src/genmap-laplacian.c index 8d660b5b..17a8860a 100644 --- a/src/genmap-laplacian.c +++ b/src/genmap-laplacian.c @@ -2,8 +2,8 @@ #define min(a, b) ((b) < (a) ? (b) : (a)) -static void GenmapFindNeighbors(struct array *nbrs, genmap_handle h, - struct comm *cc) { +static void genmap_find_neighbors(struct array *nbrs, genmap_handle h, + struct comm *cc) { sint lelt = genmap_get_nel(h); sint nv = genmap_get_nvertices(h); @@ -38,9 +38,7 @@ static void GenmapFindNeighbors(struct array *nbrs, genmap_handle h, size = vertices.n; vertex *vPtr = vertices.ptr; - buffer buf; - buffer_init(&buf, 1024); - sarray_sort(vertex, vPtr, size, vertexId, 1, &buf); + sarray_sort(vertex, vPtr, size, vertexId, 1, &h->buf); struct array a; array_init(csr_entry, &a, 10); @@ -67,14 +65,13 @@ static void GenmapFindNeighbors(struct array *nbrs, genmap_handle h, sarray_transfer(csr_entry, &a, proc, 1, &cr); // TODO: Check if the last line is redundant - sarray_sort_2(csr_entry, a.ptr, a.n, r, 1, c, 1, &buf); - sarray_sort(csr_entry, a.ptr, a.n, r, 1, &buf); + sarray_sort_2(csr_entry, a.ptr, a.n, r, 1, c, 1, &h->buf); + // sarray_sort(csr_entry, a.ptr, a.n, r, 1, &h->buf); array_init(entry, nbrs, lelt); if (a.n == 0) { crystal_free(&cr); - buffer_free(&buf); array_free(&vertices); array_free(&a); } @@ -96,21 +93,30 @@ static void GenmapFindNeighbors(struct array *nbrs, genmap_handle h, } } - sarray_sort_2(entry, nbrs->ptr, nbrs->n, r, 1, c, 1, &buf); + sarray_sort_2(entry, nbrs->ptr, nbrs->n, r, 1, c, 1, &h->buf); crystal_free(&cr); - buffer_free(&buf); array_free(&vertices); array_free(&a); } int GenmapInitLaplacian(genmap_handle h, struct comm *c) { struct array entries; - GenmapFindNeighbors(&entries, h, c); + + metric_tic(c, FINDNBRS); + genmap_find_neighbors(&entries, h, c); + metric_toc(c, FINDNBRS); + + metric_tic(c, CSRMATSETUP); csr_mat_setup(&entries, c, &h->M); + metric_toc(c, CSRMATSETUP); + array_free(&entries); + metric_toc(c, CSRTOPSETUP); h->gsh = get_csr_top(h->M, c); + metric_toc(c, CSRTOPSETUP); + GenmapRealloc(h->M->row_off[h->M->rn], &h->b); #if defined(GENMAP_DEBUG) diff --git a/src/genmap-load-balance.c b/src/genmap-load-balance.c index dc09d97c..3d0b312a 100644 --- a/src/genmap-load-balance.c +++ b/src/genmap-load-balance.c @@ -68,3 +68,34 @@ void genmap_load_balance(struct array *eList, uint nel, int nv, double *coord, free(element); } + +void genmap_restore_original(int *part, int *seq, struct crystal *cr, + struct array *eList, buffer *bfr) { + struct rcb_element *element = eList->ptr; + size_t unit_size; + if (element->type == GENMAP_RSB_ELEMENT) // RSB + unit_size = sizeof(struct rsb_element); + else + unit_size = sizeof(struct rcb_element); + + sarray_transfer_(eList, unit_size, offsetof(struct rcb_element, origin), 1, + cr); + + uint nel = eList->n; + if (element->type == GENMAP_RSB_ELEMENT) // RSB + sarray_sort(struct rsb_element, eList->ptr, nel, globalId, 1, bfr); + else + sarray_sort(struct rcb_element, eList->ptr, nel, globalId, 1, bfr); + + int e; + for (e = 0; e < nel; e++) { + element = eList->ptr + e * unit_size; + part[e] = element->origin; // element[e].origin; + } + + if (seq != NULL) + for (e = 0; e < nel; e++) { + element = eList->ptr + e * unit_size; + seq[e] = element->seq; // element[e].seq; + } +} diff --git a/src/genmap-rcb.c b/src/genmap-rcb.c index 365a8cdb..bc3a5ca8 100644 --- a/src/genmap-rcb.c +++ b/src/genmap-rcb.c @@ -2,169 +2,170 @@ #include #include -void get_rcb_axis_local(double *min, double *max, void *elems, uint nel, - int ndim) { +void get_rcb_axis_local(double *min, double *max, struct rcb_element *elems, + uint nel, int ndim) { // TODO: Get rid of this size_t unit_size; - unsigned char *type = elems; - if (*type == GENMAP_RCB_ELEMENT) { + if (elems->type == GENMAP_RCB_ELEMENT) { unit_size = sizeof(struct rcb_element); - } else if (*type == GENMAP_RSB_ELEMENT) { + } else if (elems->type == GENMAP_RSB_ELEMENT) { unit_size = sizeof(struct rsb_element); } sint i; - for (i = 0; i < ndim; i++) - min[i] = DBL_MAX, max[i] = -DBL_MAX; + for (i = 0; i < ndim; i++) { + min[i] = DBL_MAX; + max[i] = -DBL_MAX; + } - struct rcb_element *elem; + struct rcb_element *ei; for (i = 0; i < nel; i++) { - elem = (struct rcb_element *)((char *)elems + i * unit_size); - if (elem->coord[0] < min[0]) - min[0] = elem->coord[0]; - if (elem->coord[0] > max[0]) - max[0] = elem->coord[0]; - - if (elem->coord[1] < min[1]) - min[1] = elem->coord[1]; - if (elem->coord[1] > max[1]) - max[1] = elem->coord[1]; + ei = (struct rcb_element *)((char *)elems + i * unit_size); + if (ei->coord[0] < min[0]) + min[0] = ei->coord[0]; + if (ei->coord[0] > max[0]) + max[0] = ei->coord[0]; + + if (ei->coord[1] < min[1]) + min[1] = ei->coord[1]; + if (ei->coord[1] > max[1]) + max[1] = ei->coord[1]; } if (ndim == 3) { for (i = 0; i < nel; i++) { - elem = (struct rcb_element *)((char *)elems + i * unit_size); - if (elem->coord[2] < min[2]) - min[2] = elem->coord[2]; - if (elem->coord[2] > max[2]) - max[2] = elem->coord[2]; + ei = (struct rcb_element *)((char *)elems + i * unit_size); + if (ei->coord[2] < min[2]) + min[2] = ei->coord[2]; + if (ei->coord[2] > max[2]) + max[2] = ei->coord[2]; } } } -// TODO: Get rid of this -void get_rcb_axis(double *length, struct array *a, struct comm *c, int ndim) { - double min[MAXDIM], max[MAXDIM], buf[MAXDIM]; +void rcb_local(struct array *a, uint start, uint end, int ndim, buffer *buf) { + sint size = end - start; + assert(size >= 0); - get_rcb_axis_local(min, max, a->ptr, a->n, ndim); - comm_allreduce(c, gs_double, gs_min, min, MAXDIM, buf); - comm_allreduce(c, gs_double, gs_max, max, MAXDIM, buf); + if (size <= 2) + return; + size_t unit_size; + struct rsb_element *elem = a->ptr; + if (elem->type == GENMAP_RCB_ELEMENT) + unit_size = sizeof(struct rcb_element); + else if (elem->type == GENMAP_RSB_ELEMENT) + unit_size = sizeof(struct rsb_element); + + double min[3], max[3]; + char *st = (char *)a->ptr + unit_size * start; + get_rcb_axis_local(min, max, (struct rcb_element *)st, size, ndim); + + double length[3]; sint i; for (i = 0; i < ndim; i++) length[i] = max[i] - min[i]; -} - -int rcb_level(struct comm *c, struct array *a, int ndim, buffer *bfr) { - if (c->np == 1) - return 0; - - double length[MAXDIM]; - get_rcb_axis(length, a, c, ndim); - - int axis1 = 0, d; - for (d = 1; d < ndim; d++) - if (length[d] > length[axis1]) - axis1 = d; + int axis = 0; + if (fabs(length[axis]) < fabs(length[1])) + axis = 1; + if (ndim == 3) + if (fabs(length[axis]) < fabs(length[2])) + axis = 2; - unsigned char *type = a->ptr; - if (*type == GENMAP_RCB_ELEMENT) { - switch (axis1) { + if (elem->type == GENMAP_RCB_ELEMENT) { + switch (axis) { case 0: - parallel_sort(struct rcb_element, a, coord[0], gs_double, 0, 1, c, bfr); + sarray_sort(struct rcb_element, st, size, coord[0], 3, buf); break; case 1: - parallel_sort(struct rcb_element, a, coord[1], gs_double, 0, 1, c, bfr); + sarray_sort(struct rcb_element, st, size, coord[1], 3, buf); break; case 2: - parallel_sort(struct rcb_element, a, coord[2], gs_double, 0, 1, c, bfr); + sarray_sort(struct rcb_element, st, size, coord[2], 3, buf); break; default: break; } - } else if (*type == GENMAP_RSB_ELEMENT) { - switch (axis1) { + } else if (elem->type == GENMAP_RSB_ELEMENT) { + switch (axis) { case 0: - parallel_sort(struct rsb_element, a, coord[0], gs_double, 0, 1, c, bfr); + sarray_sort(struct rsb_element, st, size, coord[0], 3, buf); break; case 1: - parallel_sort(struct rsb_element, a, coord[1], gs_double, 0, 1, c, bfr); + sarray_sort(struct rsb_element, st, size, coord[1], 3, buf); break; case 2: - parallel_sort(struct rsb_element, a, coord[2], gs_double, 0, 1, c, bfr); + sarray_sort(struct rsb_element, st, size, coord[2], 3, buf); break; default: break; } } - return 0; + uint mid = (start + end) / 2; + rcb_local(a, start, mid, ndim, buf); + rcb_local(a, mid, end, ndim, buf); } -void rcb_local(struct array *a, uint start, uint end, int ndim, buffer *buf) { - sint size = end - start; - assert(size >= 0); - - if (size <= 2) - return; - - size_t unit_size; - unsigned char *type = a->ptr; - if (*type == GENMAP_RCB_ELEMENT) { - unit_size = sizeof(struct rcb_element); - } else if (*type == GENMAP_RSB_ELEMENT) { - unit_size = sizeof(struct rsb_element); - } +// TODO: Get rid of this +void get_rcb_axis(double *length, struct array *a, struct comm *c, int ndim) { + double min[MAXDIM], max[MAXDIM], buf[MAXDIM]; - void *st = (void *)a->ptr + unit_size * start; - double length[3], min[3], max[3]; - get_rcb_axis_local(min, max, st, size, ndim); + get_rcb_axis_local(min, max, a->ptr, a->n, ndim); + comm_allreduce(c, gs_double, gs_min, min, MAXDIM, buf); + comm_allreduce(c, gs_double, gs_max, max, MAXDIM, buf); sint i; for (i = 0; i < ndim; i++) length[i] = max[i] - min[i]; +} - int axis = 0; - if (fabs(length[axis]) < fabs(length[1])) - axis = 1; - if (ndim == 3) - if (fabs(length[axis]) < fabs(length[2])) - axis = 2; +int rcb_level(struct comm *c, struct array *a, int ndim, buffer *bfr) { + if (c->np == 1) + return 0; - if (*type == GENMAP_RCB_ELEMENT) { - switch (axis) { + double length[MAXDIM]; + + get_rcb_axis(length, a, c, ndim); + + int axis1 = 0, d; + for (d = 1; d < ndim; d++) + if (length[d] > length[axis1]) + axis1 = d; + + struct rsb_element *elem = a->ptr; + if (elem->type == GENMAP_RCB_ELEMENT) { + switch (axis1) { case 0: - sarray_sort(struct rcb_element, st, size, coord[0], 3, buf); + parallel_sort(struct rcb_element, a, coord[0], gs_double, 0, 1, c, bfr); break; case 1: - sarray_sort(struct rcb_element, st, size, coord[1], 3, buf); + parallel_sort(struct rcb_element, a, coord[1], gs_double, 0, 1, c, bfr); break; case 2: - sarray_sort(struct rcb_element, st, size, coord[2], 3, buf); + parallel_sort(struct rcb_element, a, coord[2], gs_double, 0, 1, c, bfr); break; default: break; } - } else if (*type == GENMAP_RSB_ELEMENT) { - switch (axis) { + } else if (elem->type == GENMAP_RSB_ELEMENT) { + switch (axis1) { case 0: - sarray_sort(struct rsb_element, st, size, coord[0], 3, buf); + parallel_sort(struct rsb_element, a, coord[0], gs_double, 0, 1, c, bfr); break; case 1: - sarray_sort(struct rsb_element, st, size, coord[1], 3, buf); + parallel_sort(struct rsb_element, a, coord[1], gs_double, 0, 1, c, bfr); break; case 2: - sarray_sort(struct rsb_element, st, size, coord[2], 3, buf); + parallel_sort(struct rsb_element, a, coord[2], gs_double, 0, 1, c, bfr); break; default: break; } } - uint mid = (start + end) / 2; - rcb_local(a, start, mid, ndim, buf); - rcb_local(a, mid, end, ndim, buf); + return 0; } int rcb(struct comm *ci, struct array *elements, int ndim, buffer *bfr) { diff --git a/src/genmap-rsb.c b/src/genmap-rsb.c index 817d32e7..e32d1064 100644 --- a/src/genmap-rsb.c +++ b/src/genmap-rsb.c @@ -117,7 +117,10 @@ int genmap_rsb(genmap_handle h) { metric_toc(lc, FIEDLERSORT); /* Bisect */ + double t = comm_time(); split_and_repair_partitions(h, lc, level); + t = comm_time() - t; + metric_acc(BISECTANDREPAIR, t); genmap_comm_scan(h, lc); metric_push_level(); diff --git a/src/genmap-statistics.c b/src/genmap-statistics.c index cdfc32c3..826935e1 100644 --- a/src/genmap-statistics.c +++ b/src/genmap-statistics.c @@ -94,6 +94,9 @@ void metric_print(struct comm *c) { printf(" NFIEDLER : %g/%g/%g\n", SUMMARY(i, NFIEDLER)); printf(" LAPLACIANSETUP : %g/%g/%g\n", SUMMARY(i, LAPLACIANSETUP)); + printf(" FINDNBRS : %g/%g/%g\n", SUMMARY(i, FINDNBRS)); + printf(" CSRMATSETUP : %g/%g/%g\n", SUMMARY(i, CSRMATSETUP)); + printf(" CSRTOPSETUP : %g/%g/%g\n", SUMMARY(i, CSRTOPSETUP)); printf(" PRECONDSETUP : %g/%g/%g\n", SUMMARY(i, PRECONDSETUP)); printf(" RQI : %g/%g/%g\n", SUMMARY(i, RQI)); printf(" NRQI : %g/%g/%g\n", SUMMARY(i, NRQI)); @@ -106,7 +109,9 @@ void metric_print(struct comm *c) { printf(" LAPLACIAN : %g/%g/%g\n", SUMMARY(i, LAPLACIAN)); printf(" PROJECT : %g/%g/%g\n", SUMMARY(i, PROJECT)); printf(" GRAMMIAN : %g/%g/%g\n", SUMMARY(i, GRAMMIAN)); - printf(" BISECT : %g/%g/%g\n", SUMMARY(i, BISECT)); + printf(" FIEDLERSORT : %g/%g/%g\n", SUMMARY(i, FIEDLERSORT)); + printf(" BISECTANDREPAIR : %g/%g/%g\n", + SUMMARY(i, BISECTANDREPAIR)); } } diff --git a/src/genmap.h b/src/genmap.h index 5a1677a9..3073f2aa 100644 --- a/src/genmap.h +++ b/src/genmap.h @@ -22,7 +22,6 @@ genmap_comm genmap_global_comm(genmap_handle h); void genmap_set_nvertices(genmap_handle h, int nv); int genmap_get_nvertices(genmap_handle h); - GenmapULong genmap_get_partition_nel(genmap_handle h); void genmap_set_partition_nel(genmap_handle h, GenmapULong globalElements); @@ -90,6 +89,9 @@ int genmap_rsb(genmap_handle h); int genmap_rcb(genmap_handle h); int genmap_rib(genmap_handle h); +void genmap_restore_original(int *part, int *seq, struct crystal *cr, + struct array *eList, buffer *bfr); + /* Misc */ double GenmapGetMaxRss(); void GenmapPrintStack(); diff --git a/src/parRSB.c b/src/parRSB.c index 04ea4670..e39a46e3 100644 --- a/src/parRSB.c +++ b/src/parRSB.c @@ -115,19 +115,7 @@ int parRSB_partMesh(int *part, int *seq, long long *vtx, double *coord, int nel, comm_barrier(&c); double time4 = comm_time(); - /* Restore original input */ - sarray_transfer(struct rsb_element, &eList, origin, 1, &cr); - nel = eList.n; - sarray_sort(struct rsb_element, eList.ptr, nel, globalId, 1, &bfr); - - struct rsb_element *e_ptr = eList.ptr; - int e; - for (e = 0; e < nel; e++) - part[e] = e_ptr[e].origin; - - if (seq != NULL) - for (e = 0; e < nel; e++) - seq[e] = e_ptr[e].seq; + genmap_restore_original(part, seq, &cr, &eList, &bfr); double time5 = comm_time(); comm_barrier(&c); diff --git a/src/parRSB.h b/src/parRSB.h index 9f9bcac1..bc3ef18b 100644 --- a/src/parRSB.h +++ b/src/parRSB.h @@ -5,16 +5,16 @@ typedef struct { /* General options */ - int global_partitioner; // -1 - None, 0 - RSB, 1 - RCB, 2 - RIB (Default 0) - int local_partitioner; // -1 - None, 0 - RSB, 1 - RCB, 2 - RIB (Default -1) - int debug_level; // 0, 1, 2, .. etc (Default 0) - int print_timing_info; // 0 or 1 (Default 0) + int global_partitioner; // -1 - None, 0 - RSB, 1 - RCB, 2 - RIB (Default: 0) + int local_partitioner; // -1 - None, 0 - RSB, 1 - RCB, 2 - RIB (Default: -1) + int debug_level; // 0, 1, 2, .. etc (Default: 0) + int print_timing_info; // 0 or 1 (Default: 0) /* RSB specific */ - int rsb_algo; // 0 - Lanczos, 1 - MG (Default 1) - int rsb_prepartition; // 0 - None, 1 - RCB , 2 - RIB (Default 1) - int rsb_grammian; // 0 or 1 (Default 1) - int rsb_paul; // 0 or 1 (Default 1) + int rsb_algo; // 0 - Lanczos, 1 - MG (Default: 0) + int rsb_prepartition; // 0 - None, 1 - RCB , 2 - RIB (Default: 1) + int rsb_grammian; // 0 or 1 (Default: 1) + int rsb_paul; // 0 or 1 (Default: 1) } parRSB_options; extern parRSB_options parrsb_default_options; diff --git a/tests/t210-levels.c b/tests/t210-levels.c index 02f9df60..1c167614 100644 --- a/tests/t210-levels.c +++ b/tests/t210-levels.c @@ -38,7 +38,7 @@ int main(int argc, char *argv[]) { /* Setup CSR on fine level */ genmap_comm c = genmap_global_comm(gh); - struct array *entries = GenmapFindNeighbors(gh, c); + struct array *entries = genmap_find_neighbors(gh, c); csr_mat M; csr_mat_setup(entries, &c->gsc, &M); array_free(entries); diff --git a/tests/t220-interpolation.c b/tests/t220-interpolation.c index 430ec68f..6b9e5ba2 100644 --- a/tests/t220-interpolation.c +++ b/tests/t220-interpolation.c @@ -37,7 +37,7 @@ int main(int argc, char *argv[]) { /* Setup CSR on fine level */ genmap_comm c = genmap_global_comm(gh); - struct array *entries = GenmapFindNeighbors(gh, c); + struct array *entries = genmap_find_neighbors(gh, c); csr_mat M; csr_mat_setup(entries, &c->gsc, &M); array_free(entries); diff --git a/tests/t230-flex-cg.c b/tests/t230-flex-cg.c index 85ed2663..ba90e111 100644 --- a/tests/t230-flex-cg.c +++ b/tests/t230-flex-cg.c @@ -142,7 +142,7 @@ int main(int argc, char *argv[]) { /* Setup CSR on fine level */ genmap_comm c = genmap_global_comm(gh); - struct array *entries = GenmapFindNeighbors(gh, c); + struct array *entries = genmap_find_neighbors(gh, c); csr_mat M; csr_mat_setup(entries, &c->gsc, &M); array_free(entries); diff --git a/tests/t235-project-pf.c b/tests/t235-project-pf.c index f9de1edb..60d95888 100644 --- a/tests/t235-project-pf.c +++ b/tests/t235-project-pf.c @@ -142,7 +142,7 @@ int main(int argc, char *argv[]) { /* Setup CSR on fine level */ genmap_comm c = genmap_global_comm(gh); - struct array *entries = GenmapFindNeighbors(gh, c); + struct array *entries = genmap_find_neighbors(gh, c); csr_mat M; csr_mat_setup(entries, &c->gsc, &M); array_free(entries);