Skip to content

Commit 618a982

Browse files
authored
Merge branch 'Sandia-OpenSHMEM:main' into pequiet
2 parents 9b92130 + 8f925b0 commit 618a982

File tree

5 files changed

+91
-29
lines changed

5 files changed

+91
-29
lines changed

.github/workflows/ci.yml

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ env:
2121

2222
jobs:
2323
OFI:
24-
runs-on: ubuntu-20.04
24+
runs-on: ubuntu-24.04
2525
strategy:
2626
fail-fast: false
2727
matrix:
@@ -260,7 +260,7 @@ jobs:
260260
${SOS_PM_POST}
261261
262262
# PMIx:
263-
# runs-on: ubuntu-20.04
263+
# runs-on: ubuntu-24.04
264264
# strategy:
265265
# fail-fast: false
266266
# matrix:
@@ -477,13 +477,13 @@ jobs:
477477
# ${{ matrix.sos_pm_post }}
478478
#
479479
UCX:
480-
runs-on: ubuntu-20.04
480+
runs-on: ubuntu-24.04
481481
strategy:
482482
fail-fast: false
483483
matrix:
484484
include:
485-
- config_name: ucx-1.9.0
486-
ucx_version: v1.9.0
485+
- config_name: ucx-1.18.0
486+
ucx_version: v1.18.0
487487
xpmem_version: master
488488
sos_config: [--enable-pmi-simple --disable-fortran,
489489
--with-cma --enable-error-checking --enable-profiling
@@ -563,7 +563,7 @@ jobs:
563563
cat modules/tests-sos/test/unit/hello.log
564564
565565
Portals4:
566-
runs-on: ubuntu-20.04
566+
runs-on: ubuntu-24.04
567567
strategy:
568568
fail-fast: false
569569
matrix:
@@ -635,6 +635,7 @@ jobs:
635635
run: |
636636
cd repos/portals4
637637
./autogen.sh
638+
sed -i 's/^struct ptl_abort_state abort_state;/static struct ptl_abort_state abort_state;/' src/ib/ptl_gbl.h
638639
mkdir build; cd build
639640
../configure --prefix=${PORTALS4_INSTALL_DIR} --enable-zero-mrs --enable-reliable-udp --disable-pmi-from-portals
640641
make -j
@@ -650,13 +651,14 @@ jobs:
650651
make -j
651652
make install
652653
- name: Test SOS (${{ matrix.name }})
654+
continue-on-error: true
653655
run: |
654656
cd build
655657
make check TESTS= -j
656658
${SOS_PM} -np 1 modules/tests-sos/test/unit/hello
657659
658660
XPMEM_Only:
659-
runs-on: ubuntu-20.04
661+
runs-on: ubuntu-24.04
660662
strategy:
661663
fail-fast: false
662664
matrix:

README

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,15 @@ options.
263263
Disable multirail functionality. Enabling this will restrict all
264264
communications to occur over a single NIC per system.
265265

266+
SHMEM_OFI_DISABLE_SINGLE_EP (default: off)
267+
Disable the single endpoint resource optimization. Setting this (to any
268+
value) will enable at least 2 separate endpoints per PE, one for
269+
transmission on the default context and one as the target of
270+
communication. If unset, the default context and the target endpoint
271+
are merged to conserve context resources. Regardless of this
272+
parameter, each PE consumes another endpoint for each OpenSHMEM user
273+
context that is created.
274+
266275
Team Environment variables:
267276

268277
SHMEM_TEAMS_MAX (default: 10)

src/shmem_env_defs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ SHMEM_INTERNAL_ENV_DEF(OFI_STX_DISABLE_PRIVATE, bool, false, SHMEM_INTERNAL_ENV_
109109
"Disallow private contexts from having exclusive STX access")
110110
SHMEM_INTERNAL_ENV_DEF(OFI_DISABLE_MULTIRAIL, bool, false, SHMEM_INTERNAL_ENV_CAT_TRANSPORT,
111111
"Disable usage of multirail functionality")
112+
SHMEM_INTERNAL_ENV_DEF(OFI_DISABLE_SINGLE_EP, bool, false, SHMEM_INTERNAL_ENV_CAT_TRANSPORT,
113+
"Disable single endpoint resource optimization (enable separate Tx and Rx EPs)")
112114
#endif
113115

114116
#ifdef USE_UCX

src/transport_ofi.c

Lines changed: 64 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ shmem_internal_mutex_t shmem_transport_ofi_lock;
114114
pthread_mutex_t shmem_transport_ofi_progress_lock = PTHREAD_MUTEX_INITIALIZER;
115115
#endif /* ENABLE_THREADS */
116116

117+
int shmem_transport_ofi_single_ep;
118+
117119
/* Temporarily redefine SHM_INTERNAL integer types to their FI counterparts to
118120
* translate the DTYPE_* types (defined by autoconf according to system ABI)
119121
* into FI types in the table below */
@@ -205,6 +207,8 @@ struct shmem_internal_tid shmem_transport_ofi_gettid(void)
205207
return tid;
206208
}
207209

210+
#define SHMEM_TRANSPORT_OFI_PROV_SOCKETS "sockets"
211+
208212
static struct fabric_info shmem_transport_ofi_info = {0};
209213

210214
static size_t shmem_transport_ofi_grow_size = 128;
@@ -622,15 +626,25 @@ int bind_enable_ep_resources(shmem_transport_ctx_t *ctx)
622626
* removed below. However, there aren't currently any cases where removing
623627
* FI_RECV significantly improves performance or resource usage. */
624628

625-
ret = fi_ep_bind(ctx->ep, &ctx->cq->fid,
626-
FI_SELECTIVE_COMPLETION | FI_TRANSMIT | FI_RECV);
627-
OFI_CHECK_RETURN_STR(ret, "fi_ep_bind CQ to endpoint failed");
629+
if (ctx->ep != shmem_transport_ofi_target_ep) {
630+
ret = fi_ep_bind(ctx->ep, &ctx->cq->fid,
631+
FI_SELECTIVE_COMPLETION | FI_TRANSMIT | FI_RECV);
632+
OFI_CHECK_RETURN_STR(ret, "fi_ep_bind CQ to endpoint failed");
628633

629-
ret = fi_ep_bind(ctx->ep, &shmem_transport_ofi_avfd->fid, 0);
630-
OFI_CHECK_RETURN_STR(ret, "fi_ep_bind AV to endpoint failed");
634+
ret = fi_ep_bind(ctx->ep, &shmem_transport_ofi_avfd->fid, 0);
635+
OFI_CHECK_RETURN_STR(ret, "fi_ep_bind AV to endpoint failed");
631636

632-
ret = fi_enable(ctx->ep);
633-
OFI_CHECK_RETURN_STR(ret, "fi_enable on endpoint failed");
637+
ret = fi_enable(ctx->ep);
638+
OFI_CHECK_RETURN_STR(ret, "fi_enable on endpoint failed");
639+
} /* In single-endpoint mode, the sockets provider requires re-enabling the EP, but other
640+
providers require NOT re-enabling the EP (e.g. as of v2.1.0, tcp, verbs, and opx) */
641+
else if (shmem_transport_ofi_info.p_info->fabric_attr->prov_name != NULL &&
642+
strncmp(shmem_transport_ofi_info.p_info->fabric_attr->prov_name,
643+
SHMEM_TRANSPORT_OFI_PROV_SOCKETS,
644+
strlen(SHMEM_TRANSPORT_OFI_PROV_SOCKETS)) == 0) {
645+
ret = fi_enable(ctx->ep);
646+
OFI_CHECK_RETURN_STR(ret, "fi_enable on endpoint failed");
647+
}
634648

635649
return ret;
636650
}
@@ -1668,6 +1682,9 @@ static int shmem_transport_ofi_target_ep_init(void)
16681682
struct fabric_info* info = &shmem_transport_ofi_info;
16691683
info->p_info->ep_attr->tx_ctx_cnt = 0;
16701684
info->p_info->caps = FI_RMA | FI_ATOMIC | FI_REMOTE_READ | FI_REMOTE_WRITE;
1685+
if (shmem_transport_ofi_single_ep) {
1686+
info->p_info->caps |= FI_WRITE | FI_READ | FI_RECV;
1687+
}
16711688
#if ENABLE_TARGET_CNTR
16721689
info->p_info->caps |= FI_RMA_EVENT;
16731690
#endif
@@ -1693,7 +1710,7 @@ static int shmem_transport_ofi_target_ep_init(void)
16931710
OFI_CHECK_RETURN_MSG(ret, "cq_open failed (%s)\n", fi_strerror(errno));
16941711

16951712
ret = fi_ep_bind(shmem_transport_ofi_target_ep,
1696-
&shmem_transport_ofi_target_cq->fid, FI_TRANSMIT | FI_RECV);
1713+
&shmem_transport_ofi_target_cq->fid, FI_SELECTIVE_COMPLETION | FI_TRANSMIT | FI_RECV);
16971714
OFI_CHECK_RETURN_STR(ret, "fi_ep_bind CQ to target endpoint failed");
16981715

16991716
ret = fi_enable(shmem_transport_ofi_target_ep);
@@ -1756,15 +1773,20 @@ static int shmem_transport_ofi_ctx_init(shmem_transport_ctx_t *ctx, int id)
17561773
&ctx->get_cntr, NULL);
17571774
OFI_CHECK_RETURN_MSG(ret, "get_cntr creation failed (%s)\n", fi_strerror(errno));
17581775

1759-
ret = fi_cq_open(shmem_transport_ofi_domainfd, &cq_attr, &ctx->cq, NULL);
1760-
if (ret && errno == FI_EMFILE) {
1761-
DEBUG_STR("Context creation failed because of open files limit, consider increasing with 'ulimit' command");
1762-
}
1763-
OFI_CHECK_RETURN_MSG(ret, "cq_open failed (%s)\n", fi_strerror(errno));
1776+
if (shmem_transport_ofi_single_ep && id == SHMEM_TRANSPORT_CTX_DEFAULT_ID) {
1777+
ctx->cq = shmem_transport_ofi_target_cq;
1778+
ctx->ep = shmem_transport_ofi_target_ep;
1779+
} else {
1780+
ret = fi_cq_open(shmem_transport_ofi_domainfd, &cq_attr, &ctx->cq, NULL);
1781+
if (ret && errno == FI_EMFILE) {
1782+
DEBUG_STR("Context creation failed because of open files limit, consider increasing with 'ulimit' command");
1783+
}
1784+
OFI_CHECK_RETURN_MSG(ret, "cq_open failed (%s)\n", fi_strerror(errno));
17641785

1765-
ret = fi_endpoint(shmem_transport_ofi_domainfd,
1766-
info->p_info, &ctx->ep, NULL);
1767-
OFI_CHECK_RETURN_MSG(ret, "ep creation failed (%s)\n", fi_strerror(errno));
1786+
ret = fi_endpoint(shmem_transport_ofi_domainfd,
1787+
info->p_info, &ctx->ep, NULL);
1788+
OFI_CHECK_RETURN_MSG(ret, "ep creation failed (%s)\n", fi_strerror(errno));
1789+
}
17681790

17691791
/* TODO: Fill in TX attr */
17701792

@@ -1819,6 +1841,13 @@ int shmem_transport_init(void)
18191841
else
18201842
shmem_transport_ofi_info.domain_name = NULL;
18211843

1844+
/* Unless SHMEM_OFI_DISABLE_SINGLE_EP env var is set, each PE opens a single libfabric endpoint
1845+
* for both transmission (on the default context) and as the target of communication */
1846+
if (shmem_internal_params.OFI_DISABLE_SINGLE_EP_provided)
1847+
shmem_transport_ofi_single_ep = 0;
1848+
else
1849+
shmem_transport_ofi_single_ep = 1;
1850+
18221851
/* Check STX resource settings */
18231852
if ((shmem_internal_thread_level == SHMEM_THREAD_SINGLE ||
18241853
shmem_internal_thread_level == SHMEM_THREAD_FUNNELED ) &&
@@ -2050,6 +2079,7 @@ int shmem_transport_ctx_create(struct shmem_internal_team_t *team, long options,
20502079
void shmem_transport_ctx_destroy(shmem_transport_ctx_t *ctx)
20512080
{
20522081
int ret;
2082+
bool close_default_ctx = false;
20532083

20542084
if (ctx == NULL)
20552085
return;
@@ -2075,7 +2105,12 @@ void shmem_transport_ctx_destroy(shmem_transport_ctx_t *ctx)
20752105
SHMEM_TRANSPORT_OFI_CTX_UNLOCK(ctx);
20762106
}
20772107

2078-
if (ctx->ep) {
2108+
/* When in single-endpoint mode, defer closing the default context because it also
2109+
* serves as the target endpoint, which is cleaned up later in transport_fini(). */
2110+
if (!shmem_transport_ofi_single_ep || ctx->id != SHMEM_TRANSPORT_CTX_DEFAULT_ID)
2111+
close_default_ctx = true;
2112+
2113+
if (ctx->ep && close_default_ctx) {
20792114
ret = fi_close(&ctx->ep->fid);
20802115
OFI_CHECK_ERROR_MSG(ret, "Context endpoint close failed (%s)\n", fi_strerror(errno));
20812116
}
@@ -2112,17 +2147,17 @@ void shmem_transport_ctx_destroy(shmem_transport_ctx_t *ctx)
21122147
SHMEM_MUTEX_UNLOCK(shmem_transport_ofi_lock);
21132148
}
21142149

2115-
if (ctx->put_cntr) {
2150+
if (ctx->put_cntr && close_default_ctx) {
21162151
ret = fi_close(&ctx->put_cntr->fid);
21172152
OFI_CHECK_ERROR_MSG(ret, "Context put CNTR close failed (%s)\n", fi_strerror(errno));
21182153
}
21192154

2120-
if (ctx->get_cntr) {
2155+
if (ctx->get_cntr && close_default_ctx) {
21212156
ret = fi_close(&ctx->get_cntr->fid);
21222157
OFI_CHECK_ERROR_MSG(ret, "Context get CNTR close failed (%s)\n", fi_strerror(errno));
21232158
}
21242159

2125-
if (ctx->cq) {
2160+
if (ctx->cq && close_default_ctx) {
21262161
ret = fi_close(&ctx->cq->fid);
21272162
OFI_CHECK_ERROR_MSG(ret, "Context CQ close failed (%s)\n", fi_strerror(errno));
21282163
}
@@ -2213,6 +2248,15 @@ int shmem_transport_fini(void)
22132248
ret = fi_close(&shmem_transport_ofi_target_ep->fid);
22142249
OFI_CHECK_ERROR_MSG(ret, "Target endpoint close failed (%s)\n", fi_strerror(errno));
22152250

2251+
/* If single-endpoint mode, need to close the default context's put and get counters */
2252+
if (shmem_transport_ofi_single_ep) {
2253+
ret = fi_close(&shmem_transport_ctx_default.put_cntr->fid);
2254+
OFI_CHECK_ERROR_MSG(ret, "Default EP put CNTR close failed (%s)\n", fi_strerror(errno));
2255+
2256+
ret = fi_close(&shmem_transport_ctx_default.get_cntr->fid);
2257+
OFI_CHECK_ERROR_MSG(ret, "Default EP get CNTR close failed (%s)\n", fi_strerror(errno));
2258+
}
2259+
22162260
ret = fi_close(&shmem_transport_ofi_target_cq->fid);
22172261
OFI_CHECK_ERROR_MSG(ret, "Target CQ close failed (%s)\n", fi_strerror(errno));
22182262

src/transport_ofi.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ extern long shmem_transport_ofi_max_bounce_buffers;
7373

7474
extern pthread_mutex_t shmem_transport_ofi_progress_lock;
7575

76+
extern int shmem_transport_ofi_single_ep;
77+
7678
#ifndef MIN
7779
#define MIN(a,b) (((a)<(b))?(a):(b))
7880
#endif
@@ -390,8 +392,11 @@ void shmem_transport_probe(void)
390392
if (0 == pthread_mutex_trylock(&shmem_transport_ofi_progress_lock)) {
391393
# endif
392394
struct fi_cq_entry buf;
393-
int ret = fi_cq_read(shmem_transport_ofi_target_cq, &buf, 1);
394-
if (ret == 1)
395+
/* Do not read a CQ entry in single-endpoint mode, just make progress. */
396+
/* The target EP and default ctx share resources, so a CQ entry is valid */
397+
int ret = fi_cq_read(shmem_transport_ofi_target_cq, (void *)&buf,
398+
!shmem_transport_ofi_single_ep);
399+
if (!shmem_transport_ofi_single_ep && ret == 1)
395400
RAISE_WARN_STR("Unexpected event");
396401
# ifdef USE_THREAD_COMPLETION
397402
pthread_mutex_unlock(&shmem_transport_ofi_progress_lock);

0 commit comments

Comments
 (0)