@@ -114,6 +114,8 @@ shmem_internal_mutex_t shmem_transport_ofi_lock;
114
114
pthread_mutex_t shmem_transport_ofi_progress_lock = PTHREAD_MUTEX_INITIALIZER ;
115
115
#endif /* ENABLE_THREADS */
116
116
117
+ int shmem_transport_ofi_single_ep ;
118
+
117
119
/* Temporarily redefine SHM_INTERNAL integer types to their FI counterparts to
118
120
* translate the DTYPE_* types (defined by autoconf according to system ABI)
119
121
* into FI types in the table below */
@@ -205,6 +207,8 @@ struct shmem_internal_tid shmem_transport_ofi_gettid(void)
205
207
return tid ;
206
208
}
207
209
210
+ #define SHMEM_TRANSPORT_OFI_PROV_SOCKETS "sockets"
211
+
208
212
static struct fabric_info shmem_transport_ofi_info = {0 };
209
213
210
214
static size_t shmem_transport_ofi_grow_size = 128 ;
@@ -622,15 +626,25 @@ int bind_enable_ep_resources(shmem_transport_ctx_t *ctx)
622
626
* removed below. However, there aren't currently any cases where removing
623
627
* FI_RECV significantly improves performance or resource usage. */
624
628
625
- ret = fi_ep_bind (ctx -> ep , & ctx -> cq -> fid ,
626
- FI_SELECTIVE_COMPLETION | FI_TRANSMIT | FI_RECV );
627
- OFI_CHECK_RETURN_STR (ret , "fi_ep_bind CQ to endpoint failed" );
629
+ if (ctx -> ep != shmem_transport_ofi_target_ep ) {
630
+ ret = fi_ep_bind (ctx -> ep , & ctx -> cq -> fid ,
631
+ FI_SELECTIVE_COMPLETION | FI_TRANSMIT | FI_RECV );
632
+ OFI_CHECK_RETURN_STR (ret , "fi_ep_bind CQ to endpoint failed" );
628
633
629
- ret = fi_ep_bind (ctx -> ep , & shmem_transport_ofi_avfd -> fid , 0 );
630
- OFI_CHECK_RETURN_STR (ret , "fi_ep_bind AV to endpoint failed" );
634
+ ret = fi_ep_bind (ctx -> ep , & shmem_transport_ofi_avfd -> fid , 0 );
635
+ OFI_CHECK_RETURN_STR (ret , "fi_ep_bind AV to endpoint failed" );
631
636
632
- ret = fi_enable (ctx -> ep );
633
- OFI_CHECK_RETURN_STR (ret , "fi_enable on endpoint failed" );
637
+ ret = fi_enable (ctx -> ep );
638
+ OFI_CHECK_RETURN_STR (ret , "fi_enable on endpoint failed" );
639
+ } /* In single-endpoint mode, the sockets provider requires re-enabling the EP, but other
640
+ providers require NOT re-enabling the EP (e.g. as of v2.1.0, tcp, verbs, and opx) */
641
+ else if (shmem_transport_ofi_info .p_info -> fabric_attr -> prov_name != NULL &&
642
+ strncmp (shmem_transport_ofi_info .p_info -> fabric_attr -> prov_name ,
643
+ SHMEM_TRANSPORT_OFI_PROV_SOCKETS ,
644
+ strlen (SHMEM_TRANSPORT_OFI_PROV_SOCKETS )) == 0 ) {
645
+ ret = fi_enable (ctx -> ep );
646
+ OFI_CHECK_RETURN_STR (ret , "fi_enable on endpoint failed" );
647
+ }
634
648
635
649
return ret ;
636
650
}
@@ -1668,6 +1682,9 @@ static int shmem_transport_ofi_target_ep_init(void)
1668
1682
struct fabric_info * info = & shmem_transport_ofi_info ;
1669
1683
info -> p_info -> ep_attr -> tx_ctx_cnt = 0 ;
1670
1684
info -> p_info -> caps = FI_RMA | FI_ATOMIC | FI_REMOTE_READ | FI_REMOTE_WRITE ;
1685
+ if (shmem_transport_ofi_single_ep ) {
1686
+ info -> p_info -> caps |= FI_WRITE | FI_READ | FI_RECV ;
1687
+ }
1671
1688
#if ENABLE_TARGET_CNTR
1672
1689
info -> p_info -> caps |= FI_RMA_EVENT ;
1673
1690
#endif
@@ -1693,7 +1710,7 @@ static int shmem_transport_ofi_target_ep_init(void)
1693
1710
OFI_CHECK_RETURN_MSG (ret , "cq_open failed (%s)\n" , fi_strerror (errno ));
1694
1711
1695
1712
ret = fi_ep_bind (shmem_transport_ofi_target_ep ,
1696
- & shmem_transport_ofi_target_cq -> fid , FI_TRANSMIT | FI_RECV );
1713
+ & shmem_transport_ofi_target_cq -> fid , FI_SELECTIVE_COMPLETION | FI_TRANSMIT | FI_RECV );
1697
1714
OFI_CHECK_RETURN_STR (ret , "fi_ep_bind CQ to target endpoint failed" );
1698
1715
1699
1716
ret = fi_enable (shmem_transport_ofi_target_ep );
@@ -1756,15 +1773,20 @@ static int shmem_transport_ofi_ctx_init(shmem_transport_ctx_t *ctx, int id)
1756
1773
& ctx -> get_cntr , NULL );
1757
1774
OFI_CHECK_RETURN_MSG (ret , "get_cntr creation failed (%s)\n" , fi_strerror (errno ));
1758
1775
1759
- ret = fi_cq_open (shmem_transport_ofi_domainfd , & cq_attr , & ctx -> cq , NULL );
1760
- if (ret && errno == FI_EMFILE ) {
1761
- DEBUG_STR ("Context creation failed because of open files limit, consider increasing with 'ulimit' command" );
1762
- }
1763
- OFI_CHECK_RETURN_MSG (ret , "cq_open failed (%s)\n" , fi_strerror (errno ));
1776
+ if (shmem_transport_ofi_single_ep && id == SHMEM_TRANSPORT_CTX_DEFAULT_ID ) {
1777
+ ctx -> cq = shmem_transport_ofi_target_cq ;
1778
+ ctx -> ep = shmem_transport_ofi_target_ep ;
1779
+ } else {
1780
+ ret = fi_cq_open (shmem_transport_ofi_domainfd , & cq_attr , & ctx -> cq , NULL );
1781
+ if (ret && errno == FI_EMFILE ) {
1782
+ DEBUG_STR ("Context creation failed because of open files limit, consider increasing with 'ulimit' command" );
1783
+ }
1784
+ OFI_CHECK_RETURN_MSG (ret , "cq_open failed (%s)\n" , fi_strerror (errno ));
1764
1785
1765
- ret = fi_endpoint (shmem_transport_ofi_domainfd ,
1766
- info -> p_info , & ctx -> ep , NULL );
1767
- OFI_CHECK_RETURN_MSG (ret , "ep creation failed (%s)\n" , fi_strerror (errno ));
1786
+ ret = fi_endpoint (shmem_transport_ofi_domainfd ,
1787
+ info -> p_info , & ctx -> ep , NULL );
1788
+ OFI_CHECK_RETURN_MSG (ret , "ep creation failed (%s)\n" , fi_strerror (errno ));
1789
+ }
1768
1790
1769
1791
/* TODO: Fill in TX attr */
1770
1792
@@ -1819,6 +1841,13 @@ int shmem_transport_init(void)
1819
1841
else
1820
1842
shmem_transport_ofi_info .domain_name = NULL ;
1821
1843
1844
+ /* Unless SHMEM_OFI_DISABLE_SINGLE_EP env var is set, each PE opens a single libfabric endpoint
1845
+ * for both transmission (on the default context) and as the target of communication */
1846
+ if (shmem_internal_params .OFI_DISABLE_SINGLE_EP_provided )
1847
+ shmem_transport_ofi_single_ep = 0 ;
1848
+ else
1849
+ shmem_transport_ofi_single_ep = 1 ;
1850
+
1822
1851
/* Check STX resource settings */
1823
1852
if ((shmem_internal_thread_level == SHMEM_THREAD_SINGLE ||
1824
1853
shmem_internal_thread_level == SHMEM_THREAD_FUNNELED ) &&
@@ -2050,6 +2079,7 @@ int shmem_transport_ctx_create(struct shmem_internal_team_t *team, long options,
2050
2079
void shmem_transport_ctx_destroy (shmem_transport_ctx_t * ctx )
2051
2080
{
2052
2081
int ret ;
2082
+ bool close_default_ctx = false;
2053
2083
2054
2084
if (ctx == NULL )
2055
2085
return ;
@@ -2075,7 +2105,12 @@ void shmem_transport_ctx_destroy(shmem_transport_ctx_t *ctx)
2075
2105
SHMEM_TRANSPORT_OFI_CTX_UNLOCK (ctx );
2076
2106
}
2077
2107
2078
- if (ctx -> ep ) {
2108
+ /* When in single-endpoint mode, defer closing the default context because it also
2109
+ * serves as the target endpoint, which is cleaned up later in transport_fini(). */
2110
+ if (!shmem_transport_ofi_single_ep || ctx -> id != SHMEM_TRANSPORT_CTX_DEFAULT_ID )
2111
+ close_default_ctx = true;
2112
+
2113
+ if (ctx -> ep && close_default_ctx ) {
2079
2114
ret = fi_close (& ctx -> ep -> fid );
2080
2115
OFI_CHECK_ERROR_MSG (ret , "Context endpoint close failed (%s)\n" , fi_strerror (errno ));
2081
2116
}
@@ -2112,17 +2147,17 @@ void shmem_transport_ctx_destroy(shmem_transport_ctx_t *ctx)
2112
2147
SHMEM_MUTEX_UNLOCK (shmem_transport_ofi_lock );
2113
2148
}
2114
2149
2115
- if (ctx -> put_cntr ) {
2150
+ if (ctx -> put_cntr && close_default_ctx ) {
2116
2151
ret = fi_close (& ctx -> put_cntr -> fid );
2117
2152
OFI_CHECK_ERROR_MSG (ret , "Context put CNTR close failed (%s)\n" , fi_strerror (errno ));
2118
2153
}
2119
2154
2120
- if (ctx -> get_cntr ) {
2155
+ if (ctx -> get_cntr && close_default_ctx ) {
2121
2156
ret = fi_close (& ctx -> get_cntr -> fid );
2122
2157
OFI_CHECK_ERROR_MSG (ret , "Context get CNTR close failed (%s)\n" , fi_strerror (errno ));
2123
2158
}
2124
2159
2125
- if (ctx -> cq ) {
2160
+ if (ctx -> cq && close_default_ctx ) {
2126
2161
ret = fi_close (& ctx -> cq -> fid );
2127
2162
OFI_CHECK_ERROR_MSG (ret , "Context CQ close failed (%s)\n" , fi_strerror (errno ));
2128
2163
}
@@ -2213,6 +2248,15 @@ int shmem_transport_fini(void)
2213
2248
ret = fi_close (& shmem_transport_ofi_target_ep -> fid );
2214
2249
OFI_CHECK_ERROR_MSG (ret , "Target endpoint close failed (%s)\n" , fi_strerror (errno ));
2215
2250
2251
+ /* If single-endpoint mode, need to close the default context's put and get counters */
2252
+ if (shmem_transport_ofi_single_ep ) {
2253
+ ret = fi_close (& shmem_transport_ctx_default .put_cntr -> fid );
2254
+ OFI_CHECK_ERROR_MSG (ret , "Default EP put CNTR close failed (%s)\n" , fi_strerror (errno ));
2255
+
2256
+ ret = fi_close (& shmem_transport_ctx_default .get_cntr -> fid );
2257
+ OFI_CHECK_ERROR_MSG (ret , "Default EP get CNTR close failed (%s)\n" , fi_strerror (errno ));
2258
+ }
2259
+
2216
2260
ret = fi_close (& shmem_transport_ofi_target_cq -> fid );
2217
2261
OFI_CHECK_ERROR_MSG (ret , "Target CQ close failed (%s)\n" , fi_strerror (errno ));
2218
2262
0 commit comments