Skip to content

Commit 70e3bf2

Browse files
author
Minh Quan Ho
committed
OPAL: re-enable async progress thread
- The SW-based async progress thread has been planned long time ago in 683efcb, but has never been enabled/implemented since. - This commit enables the spawn of an async progress thread to execute _opal_progress() routine when enabled at both compile time and runtime (--enable-progress-threads (default=enabled) and MCA variables opal_async_progress or mpi_async_progress). - Fix minor typo in opal_progress.h doxygen comment Signed-off-by: Minh Quan Ho <minh-quan.ho@sipearl.com>
1 parent cd5fd1d commit 70e3bf2

File tree

13 files changed

+204
-43
lines changed

13 files changed

+204
-43
lines changed

config/opal_configure_options.m4

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -544,9 +544,21 @@ fi
544544
AC_DEFINE_UNQUOTED([OPAL_ENABLE_GETPWUID], [$opal_want_getpwuid],
545545
[Disable getpwuid support (default: enabled)])
546546

547-
dnl We no longer support the old OPAL_ENABLE_PROGRESS_THREADS. At
548-
dnl some point, this should die.
549-
AC_DEFINE([OPAL_ENABLE_PROGRESS_THREADS],
550-
[0],
551-
[Whether we want BTL progress threads enabled])
547+
#
548+
# Disable progress threads
549+
#
550+
AC_MSG_CHECKING([if want asynchronous progress threads])
551+
AC_ARG_ENABLE([progress_threads],
552+
[AS_HELP_STRING([--disable-progress-threads],
553+
[Disable asynchronous progress threads. Note that when enabled, for performance-related reasons, the progress thread is still not spawned by default. User must enable MCA variables 'opal_async_progress' or 'mpi_async_progress' to have the progress thread spawned at runtime. (default: enabled)])])
554+
if test "$enable_progress_threads" = "no"; then
555+
AC_MSG_RESULT([no])
556+
opal_want_progress_threads=0
557+
else
558+
AC_MSG_RESULT([yes])
559+
opal_want_progress_threads=1
560+
fi
561+
AC_DEFINE_UNQUOTED([OPAL_ENABLE_PROGRESS_THREADS], [$opal_want_progress_threads],
562+
[Disable BTL asynchronous progress threads (default: enabled)])
563+
552564
])dnl

ompi/instance/instance.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
512512
ddt_init, but before mca_coll_base_open, since some collective
513513
modules (e.g., the hierarchical coll component) may need ops in
514514
their query function. */
515-
if (OMPI_SUCCESS != (ret = ompi_op_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) {
515+
if (OMPI_SUCCESS != (ret = ompi_op_base_find_available (opal_async_progress_thread_spawned, ompi_mpi_thread_multiple))) {
516516
return ompi_instance_print_error ("ompi_op_base_find_available() failed", ret);
517517
}
518518

@@ -532,7 +532,7 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
532532
return ompi_instance_print_error ("mca_smsc_base_select() failed", ret);
533533
}
534534

535-
if (OMPI_SUCCESS != (ret = mca_pml_base_select (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) {
535+
if (OMPI_SUCCESS != (ret = mca_pml_base_select (opal_async_progress_thread_spawned, ompi_mpi_thread_multiple))) {
536536
return ompi_instance_print_error ("mca_pml_base_select() failed", ret);
537537
}
538538

@@ -617,7 +617,7 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
617617
return ompi_instance_print_error ("mca_pml_base_bsend_init() failed", ret);
618618
}
619619

620-
if (OMPI_SUCCESS != (ret = mca_coll_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) {
620+
if (OMPI_SUCCESS != (ret = mca_coll_base_find_available (opal_async_progress_thread_spawned, ompi_mpi_thread_multiple))) {
621621
return ompi_instance_print_error ("mca_coll_base_find_available() failed", ret);
622622
}
623623

ompi/mpi/c/request_get_status.c.in

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,7 @@
3838
PROTOTYPE ERROR_CLASS request_get_status(REQUEST request, INT_OUT flag,
3939
STATUS_OUT status)
4040
{
41-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
4241
int do_it_once = 0;
43-
#endif
4442

4543
MEMCHECKER(
4644
memchecker_request(&request);
@@ -56,9 +54,7 @@ PROTOTYPE ERROR_CLASS request_get_status(REQUEST request, INT_OUT flag,
5654
}
5755
}
5856

59-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
6057
recheck_request_status:
61-
#endif
6258
opal_atomic_mb();
6359
if( (request == MPI_REQUEST_NULL) || (request->req_state == OMPI_REQUEST_INACTIVE) ) {
6460
*flag = true;
@@ -80,16 +76,16 @@ PROTOTYPE ERROR_CLASS request_get_status(REQUEST request, INT_OUT flag,
8076
}
8177
return MPI_SUCCESS;
8278
}
83-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
84-
if( 0 == do_it_once ) {
79+
80+
if( 0 == do_it_once && !opal_async_progress_thread_spawned ) {
8581
/* If we run the opal_progress then check the status of the
8682
request before leaving. We will call the opal_progress only
8783
once per call. */
8884
opal_progress();
8985
do_it_once++;
9086
goto recheck_request_status;
9187
}
92-
#endif
88+
9389
*flag = false;
9490
return MPI_SUCCESS;
9591
}

ompi/request/req_test.c

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,9 @@ int ompi_request_default_test(ompi_request_t ** rptr,
3232
{
3333
ompi_request_t *request = *rptr;
3434

35-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
3635
int do_it_once = 0;
3736

3837
recheck_request_status:
39-
#endif
4038
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
4139
*completed = true;
4240
if (MPI_STATUS_IGNORE != status) {
@@ -81,8 +79,8 @@ int ompi_request_default_test(ompi_request_t ** rptr,
8179
return MPI_ERR_PROC_FAILED_PENDING;
8280
}
8381
#endif
84-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
85-
if( 0 == do_it_once ) {
82+
83+
if( 0 == do_it_once && !opal_async_progress_thread_spawned ) {
8684
/**
8785
* If we run the opal_progress then check the status of the request before
8886
* leaving. We will call the opal_progress only once per call.
@@ -92,7 +90,7 @@ int ompi_request_default_test(ompi_request_t ** rptr,
9290
goto recheck_request_status;
9391
}
9492
}
95-
#endif
93+
9694
*completed = false;
9795
return OMPI_SUCCESS;
9896
}
@@ -163,9 +161,9 @@ int ompi_request_default_test_any(
163161
*index = MPI_UNDEFINED;
164162
if(num_requests_null_inactive != count) {
165163
*completed = false;
166-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
167-
opal_progress();
168-
#endif
164+
if (!opal_async_progress_thread_spawned) {
165+
opal_progress();
166+
}
169167
} else {
170168
*completed = true;
171169
if (MPI_STATUS_IGNORE != status) {
@@ -208,16 +206,16 @@ int ompi_request_default_test_all(
208206
return MPI_ERR_PROC_FAILED_PENDING;
209207
}
210208
#endif /* OPAL_ENABLE_FT_MPI */
211-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
212-
if (0 == do_it_once) {
209+
210+
if (0 == do_it_once && !opal_async_progress_thread_spawned) {
213211
++do_it_once;
214212
if (0 != opal_progress()) {
215213
/* continue walking the list, retest the current request */
216214
--i;
217215
continue;
218216
}
219217
}
220-
#endif /* OPAL_ENABLE_PROGRESS_THREADS */
218+
221219
/* short-circuit */
222220
break;
223221
}
@@ -353,9 +351,9 @@ int ompi_request_default_test_some(
353351
*outcount = num_requests_done;
354352

355353
if (num_requests_done == 0) {
356-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
357-
opal_progress();
358-
#endif
354+
if (!opal_async_progress_thread_spawned) {
355+
opal_progress();
356+
}
359357
return OMPI_SUCCESS;
360358
}
361359

ompi/runtime/ompi_mpi_finalize.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,10 @@ int ompi_mpi_finalize(void)
193193
opal_atomic_swap_32(&ompi_mpi_state,
194194
OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT);
195195

196-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
197-
opal_progress_set_event_flag(OPAL_EVLOOP_ONCE | OPAL_EVLOOP_NONBLOCK);
198-
#endif
196+
/* shutdown async progress thread before tearing down further services */
197+
if (opal_async_progress_thread_spawned) {
198+
opal_progress_shutdown_async_progress_thread();
199+
}
199200

200201
/* NOTE: MPI-2.1 requires that MPI_FINALIZE is "collective" across
201202
*all* connected processes. This only means that all processes

ompi/runtime/ompi_mpi_init.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -559,16 +559,16 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
559559
time if so, then start the clock again */
560560
OMPI_TIMING_NEXT("barrier");
561561

562-
#if OPAL_ENABLE_PROGRESS_THREADS == 0
563562
/* Start setting up the event engine for MPI operations. Don't
564563
block in the event library, so that communications don't take
565564
forever between procs in the dynamic code. This will increase
566565
CPU utilization for the remainder of MPI_INIT when we are
567566
blocking on RTE-level events, but may greatly reduce non-TCP
568567
latency. */
569-
int old_event_flags = opal_progress_set_event_flag(0);
570-
opal_progress_set_event_flag(old_event_flags | OPAL_EVLOOP_NONBLOCK);
571-
#endif
568+
if (!opal_async_progress_thread_spawned) {
569+
int old_event_flags = opal_progress_set_event_flag(0);
570+
opal_progress_set_event_flag(old_event_flags | OPAL_EVLOOP_NONBLOCK);
571+
}
572572

573573
/* wire up the mpi interface, if requested. Do this after the
574574
non-block switch for non-TCP performance. Do before the

ompi/runtime/ompi_mpi_params.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,13 @@ int ompi_mpi_register_params(void)
351351
MCA_BASE_VAR_SCOPE_READONLY,
352352
&ompi_async_mpi_finalize);
353353

354+
#if OPAL_ENABLE_PROGRESS_THREADS == 1
355+
value = mca_base_var_find ("opal", "opal", NULL, "async_progress");
356+
if (0 <= value) {
357+
(void) mca_base_var_register_synonym(value, "ompi", "mpi", NULL, "async_progress", 0);
358+
}
359+
#endif
360+
354361
value = mca_base_var_find ("opal", "opal", NULL, "abort_delay");
355362
if (0 <= value) {
356363
(void) mca_base_var_register_synonym(value, "ompi", "mpi", NULL, "abort_delay",

opal/mca/btl/smcuda/btl_smcuda.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,7 @@ static struct mca_btl_base_endpoint_t *create_sm_endpoint(int local_proc, struct
487487
OBJ_CONSTRUCT(&ep->endpoint_lock, opal_mutex_t);
488488
#if OPAL_ENABLE_PROGRESS_THREADS == 1
489489
sprintf(path, "%s" OPAL_PATH_SEP "sm_fifo.%lu", opal_process_info.job_session_dir,
490-
(unsigned long) proc->proc_name);
490+
(unsigned long) proc->proc_name.vpid);
491491
ep->fifo_fd = open(path, O_WRONLY);
492492
if (ep->fifo_fd < 0) {
493493
opal_output(0, "mca_btl_smcuda_add_procs: open(%s) failed with errno=%d\n", path, errno);

opal/mca/btl/smcuda/btl_smcuda_component.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,7 @@ mca_btl_smcuda_component_init(int *num_btls, bool enable_progress_threads, bool
860860
#if OPAL_ENABLE_PROGRESS_THREADS == 1
861861
/* create a named pipe to receive events */
862862
sprintf(mca_btl_smcuda_component.sm_fifo_path, "%s" OPAL_PATH_SEP "sm_fifo.%lu",
863-
opal_process_info.job_session_dir, (unsigned long) OPAL_PROC_MY_NAME->vpid);
863+
opal_process_info.job_session_dir, (unsigned long) OPAL_PROC_MY_NAME.vpid);
864864
if (mkfifo(mca_btl_smcuda_component.sm_fifo_path, 0660) < 0) {
865865
opal_output(0, "mca_btl_smcuda_component_init: mkfifo failed with errno=%d\n", errno);
866866
return NULL;

opal/runtime/opal_params_core.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ int opal_abort_delay = 0;
9090

9191
int opal_max_thread_in_progress = 1;
9292

93+
#if OPAL_ENABLE_PROGRESS_THREADS == 1
94+
bool opal_async_progress = false;
95+
#endif
96+
9397
static bool opal_register_util_done = false;
9498

9599
static char *opal_var_dump_color_string = NULL;
@@ -416,6 +420,14 @@ int opal_register_util_params(void)
416420
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_8,
417421
MCA_BASE_VAR_SCOPE_READONLY, &opal_max_thread_in_progress);
418422

423+
#if OPAL_ENABLE_PROGRESS_THREADS == 1
424+
/* Spawn a dedicated software progress-thread to execute opal_progress() */
425+
(void) mca_base_var_register("opal", "opal", NULL, "async_progress",
426+
"Spawn a dedicated software progress-thread. Default: false",
427+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_9,
428+
MCA_BASE_VAR_SCOPE_READONLY, &opal_async_progress);
429+
#endif
430+
419431
/* Use sync_memops functionality with accelerator codes or deploy
420432
alternative path using IPC events to ensure consistency */
421433
opal_accelerator_use_sync_memops = true;

0 commit comments

Comments
 (0)