@@ -463,23 +463,17 @@ function addprocs(manager::ClusterManager; kwargs...)
463463
464464 cluster_mgmt_from_master_check()
465465
466+ # Call worker-starting callbacks
467+ warning_interval = params[:callback_warning_interval]
468+ _run_callbacks_concurrently(" worker-starting" , worker_starting_callbacks,
469+ warning_interval, [(manager, kwargs)])
470+
471+ # Add new workers
466472 new_workers = @lock worker_lock addprocs_locked(manager:: ClusterManager , params)
467473
468- callback_tasks = Dict{Any, Task}()
469- for worker in new_workers
470- for (name, callback) in worker_added_callbacks
471- callback_tasks[name] = Threads. @spawn callback(worker)
472- end
473- end
474-
475- running_callbacks = () -> [" '$(key) '" for (key, task) in callback_tasks if ! istaskdone(task)]
476- while timedwait(() -> isempty(running_callbacks()), params[:callback_warning_interval]) === :timed_out
477- callbacks_str = join(running_callbacks(), " , " )
478- @warn " Waiting for these worker-added callbacks to finish: $(callbacks_str) "
479- end
480-
481- # Wait on the tasks so that exceptions bubble up
482- wait.(values(callback_tasks))
474+ # Call worker-started callbacks
475+ _run_callbacks_concurrently(" worker-started" , worker_started_callbacks,
476+ warning_interval, new_workers)
483477
484478 return new_workers
485479end
@@ -870,7 +864,8 @@ const HDR_COOKIE_LEN=16
870864const map_pid_wrkr = Dict{Int, Union{Worker, LocalProcess}}()
871865const map_sock_wrkr = IdDict()
872866const map_del_wrkr = Set{Int}()
873- const worker_added_callbacks = Dict{Any, Base. Callable}()
867+ const worker_starting_callbacks = Dict{Any, Base. Callable}()
868+ const worker_started_callbacks = Dict{Any, Base. Callable}()
874869const worker_exiting_callbacks = Dict{Any, Base. Callable}()
875870const worker_exited_callbacks = Dict{Any, Base. Callable}()
876871
882877
883878# Callbacks
884879
885- function _add_callback(f, key, dict)
886- if ! hasmethod(f, Tuple{Int})
887- throw(ArgumentError(" Callback function is invalid, it must be able to accept a single Int argument" ))
880+ function _run_callbacks_concurrently(callbacks_name, callbacks_dict, warning_interval, arglist)
881+ callback_tasks = Dict{Any, Task}()
882+ for args in arglist
883+ for (name, callback) in callbacks_dict
884+ callback_tasks[name] = Threads. @spawn callback(args... )
885+ end
886+ end
887+
888+ running_callbacks = () -> [" '$(key) '" for (key, task) in callback_tasks if ! istaskdone(task)]
889+ while timedwait(() -> isempty(running_callbacks()), warning_interval) === :timed_out
890+ callbacks_str = join(running_callbacks(), " , " )
891+ @warn " Waiting for these $(callbacks_name) callbacks to finish: $(callbacks_str) "
892+ end
893+
894+ # Wait on the tasks so that exceptions bubble up
895+ wait.(values(callback_tasks))
896+ end
897+
898+ function _add_callback(f, key, dict; arg_types= Tuple{Int})
899+ desired_signature = " f(" * join([" ::$(t) " for t in arg_types. types], " , " ) * " )"
900+
901+ if ! hasmethod(f, arg_types)
902+ throw(ArgumentError(" Callback function is invalid, it must be able to be called with these argument types: $(desired_signature) " ))
888903 elseif haskey(dict, key)
889904 throw(ArgumentError(" A callback function with key '$(key) ' already exists" ))
890905 end
@@ -900,29 +915,58 @@ end
900915_remove_callback(key, dict) = delete!(dict, key)
901916
902917"""
903- add_worker_added_callback(f::Base.Callable; key=nothing)
918+ add_worker_starting_callback(f::Base.Callable; key=nothing)
919+
920+ Register a callback to be called on the master process immediately before new
921+ workers are started. The callback `f` will be called with the `ClusterManager`
922+ instance that is being used and a dictionary of parameters related to adding
923+ workers, i.e. `f(manager, params)`. The `params` dictionary is specific to the
924+ `manager` type. Note that the `LocalManager` and `SSHManager` cluster managers
925+ in DistributedNext are not fully documented yet, see the
926+ [managers.jl](https://github.com/JuliaParallel/DistributedNext.jl/blob/master/src/managers.jl)
927+ file for their definitions.
928+
929+ !!! warning
930+ Adding workers can fail so it is not guaranteed that the workers requested
931+ will exist.
932+
933+ The worker-starting callbacks will be executed concurrently. If one throws an
934+ exception it will not be caught and will bubble up through [`addprocs`](@ref).
935+
936+ Keep in mind that the callbacks will add to the time taken to launch workers; so
937+ try to either keep the callbacks fast to execute, or do the actual work
938+ asynchronously by spawning a task in the callback (beware of race conditions if
939+ you do this).
940+ """
941+ add_worker_starting_callback(f:: Base.Callable ; key= nothing ) = _add_callback(f, key, worker_starting_callbacks;
942+ arg_types= Tuple{ClusterManager, Dict})
943+
944+ remove_worker_starting_callback(key) = _remove_callback(key, worker_starting_callbacks)
945+
946+ """
947+ add_worker_started_callback(f::Base.Callable; key=nothing)
904948
905949Register a callback to be called on the master process whenever a worker is
906950added. The callback will be called with the added worker ID,
907951e.g. `f(w::Int)`. Chooses and returns a unique key for the callback if `key` is
908952not specified.
909953
910- The worker-added callbacks will be executed concurrently. If one throws an
954+ The worker-started callbacks will be executed concurrently. If one throws an
911955exception it will not be caught and will bubble up through [`addprocs()`](@ref).
912956
913957Keep in mind that the callbacks will add to the time taken to launch workers; so
914958try to either keep the callbacks fast to execute, or do the actual
915959initialization asynchronously by spawning a task in the callback (beware of race
916960conditions if you do this).
917961"""
918- add_worker_added_callback (f:: Base.Callable ; key= nothing ) = _add_callback(f, key, worker_added_callbacks )
962+ add_worker_started_callback (f:: Base.Callable ; key= nothing ) = _add_callback(f, key, worker_started_callbacks )
919963
920964"""
921- remove_worker_added_callback (key)
965+ remove_worker_started_callback (key)
922966
923- Remove the callback for `key` that was added with [`add_worker_added_callback ()`](@ref).
967+ Remove the callback for `key` that was added with [`add_worker_started_callback ()`](@ref).
924968"""
925- remove_worker_added_callback (key) = _remove_callback(key, worker_added_callbacks )
969+ remove_worker_started_callback (key) = _remove_callback(key, worker_started_callbacks )
926970
927971"""
928972 add_worker_exiting_callback(f::Base.Callable; key=nothing)
0 commit comments