@@ -99,10 +99,10 @@ mutable struct Worker
9999 del_msgs:: Array{Any,1} # XXX : Could del_msgs and add_msgs be Channels?
100100 add_msgs:: Array{Any,1}
101101 @atomic gcflag:: Bool
102- state:: WorkerState
103- c_state:: Condition # wait for state changes
104- ct_time:: Float64 # creation time
105- conn_func:: Any # used to setup connections lazily
102+ @atomic state:: WorkerState
103+ c_state:: Threads. Condition # wait for state changes, lock for state
104+ ct_time:: Float64 # creation time
105+ conn_func:: Any # used to setup connections lazily
106106
107107 r_stream:: IO
108108 w_stream:: IO
@@ -134,7 +134,7 @@ mutable struct Worker
134134 if haskey(map_pid_wrkr, id)
135135 return map_pid_wrkr[id]
136136 end
137- w= new(id, Threads. ReentrantLock(), [], [], false , W_CREATED, Condition(), time(), conn_func)
137+ w= new(id, Threads. ReentrantLock(), [], [], false , W_CREATED, Threads . Condition(), time(), conn_func)
138138 w. initialized = Event()
139139 register_worker(w)
140140 w
@@ -144,8 +144,10 @@ mutable struct Worker
144144end
145145
146146function set_worker_state(w, state)
147- w. state = state
148- notify(w. c_state; all= true )
147+ lock(w. c_state) do
148+ @atomic w. state = state
149+ notify(w. c_state; all= true )
150+ end
149151end
150152
151153function check_worker_state(w:: Worker )
@@ -161,15 +163,16 @@ function check_worker_state(w::Worker)
161163 else
162164 w. ct_time = time()
163165 if myid() > w. id
164- t = @async exec_conn_func(w)
166+ t = Threads . @spawn Threads . threadpool() exec_conn_func(w)
165167 else
166168 # route request via node 1
167- t = @async remotecall_fetch((p,to_id) -> remotecall_fetch(exec_conn_func, p, to_id), 1 , w. id, myid())
169+ t = Threads . @spawn Threads . threadpool() remotecall_fetch((p,to_id) -> remotecall_fetch(exec_conn_func, p, to_id), 1 , w. id, myid())
168170 end
169171 errormonitor(t)
170172 wait_for_conn(w)
171173 end
172174 end
175+ return nothing
173176end
174177
175178exec_conn_func(id:: Int ) = exec_conn_func(worker_from_id(id):: Worker )
@@ -191,9 +194,17 @@ function wait_for_conn(w)
191194 timeout = worker_timeout() - (time() - w. ct_time)
192195 timeout <= 0 && error(" peer $(w. id) has not connected to $(myid()) " )
193196
194- @async (sleep(timeout); notify(w. c_state; all= true ))
195- wait(w. c_state)
196- w. state === W_CREATED && error(" peer $(w. id) didn't connect to $(myid()) within $timeout seconds" )
197+ T = Threads. @spawn Threads. threadpool() begin
198+ sleep($ timeout)
199+ lock(w. c_state) do
200+ notify(w. c_state; all= true )
201+ end
202+ end
203+ errormonitor(T)
204+ lock(w. c_state) do
205+ wait(w. c_state)
206+ w. state === W_CREATED && error(" peer $(w. id) didn't connect to $(myid()) within $timeout seconds" )
207+ end
197208 end
198209 nothing
199210end
@@ -247,7 +258,7 @@ function start_worker(out::IO, cookie::AbstractString=readline(stdin); close_std
247258 else
248259 sock = listen(interface, LPROC. bind_port)
249260 end
250- errormonitor(@async while isopen(sock)
261+ errormonitor(Threads . @spawn while isopen(sock)
251262 client = accept(sock)
252263 process_messages(client, client, true )
253264 end )
279290
280291
281292function redirect_worker_output(ident, stream)
282- t = @async while ! eof(stream)
293+ t = Threads . @spawn while ! eof(stream)
283294 line = readline(stream)
284295 if startswith(line, " From worker " )
285296 # stdout's of "additional" workers started from an initial worker on a host are not available
@@ -318,7 +329,7 @@ function read_worker_host_port(io::IO)
318329 leader = String[]
319330 try
320331 while ntries > 0
321- readtask = @async readline(io)
332+ readtask = Threads . @spawn Threads . threadpool() readline(io)
322333 yield()
323334 while ! istaskdone(readtask) && ((time_ns() - t0) < timeout)
324335 sleep(0.05 )
@@ -419,7 +430,7 @@ if launching workers programmatically, execute `addprocs` in its own task.
419430
420431```julia
421432# On busy clusters, call `addprocs` asynchronously
422- t = @async addprocs(...)
433+ t = Threads.@spawn addprocs(...)
423434```
424435
425436```julia
@@ -485,20 +496,23 @@ function addprocs_locked(manager::ClusterManager; kwargs...)
485496 # call manager's `launch` is a separate task. This allows the master
486497 # process initiate the connection setup process as and when workers come
487498 # online
488- t_launch = @async launch(manager, params, launched, launch_ntfy)
499+ t_launch = Threads . @spawn Threads . threadpool() launch(manager, params, launched, launch_ntfy)
489500
490501 @sync begin
491502 while true
492503 if isempty(launched)
493504 istaskdone(t_launch) && break
494- @async (sleep(1 ); notify(launch_ntfy))
505+ Threads. @spawn Threads. threadpool() begin
506+ sleep(1 )
507+ notify(launch_ntfy)
508+ end
495509 wait(launch_ntfy)
496510 end
497511
498512 if ! isempty(launched)
499513 wconfig = popfirst!(launched)
500514 let wconfig= wconfig
501- @async setup_launched_worker(manager, wconfig, launched_q)
515+ Threads . @spawn Threads . threadpool() setup_launched_worker(manager, wconfig, launched_q)
502516 end
503517 end
504518 end
@@ -578,7 +592,7 @@ function launch_n_additional_processes(manager, frompid, fromconfig, cnt, launch
578592 wconfig. port = port
579593
580594 let wconfig= wconfig
581- @async begin
595+ Threads . @spawn Threads . threadpool() begin
582596 pid = create_worker(manager, wconfig)
583597 remote_do(redirect_output_from_additional_worker, frompid, pid, port)
584598 push!(launched_q, pid)
@@ -645,7 +659,12 @@ function create_worker(manager, wconfig)
645659 # require the value of config.connect_at which is set only upon connection completion
646660 for jw in PGRP. workers
647661 if (jw. id != 1 ) && (jw. id < w. id)
648- (jw. state === W_CREATED) && wait(jw. c_state)
662+ # wait for wl to join
663+ if jw. state === W_CREATED
664+ lock(jw. c_state) do
665+ wait(jw. c_state)
666+ end
667+ end
649668 push!(join_list, jw)
650669 end
651670 end
@@ -668,7 +687,12 @@ function create_worker(manager, wconfig)
668687 end
669688
670689 for wl in wlist
671- (wl. state === W_CREATED) && wait(wl. c_state)
690+ lock(wl. c_state) do
691+ if wl. state === W_CREATED
692+ # wait for wl to join
693+ wait(wl. c_state)
694+ end
695+ end
672696 push!(join_list, wl)
673697 end
674698 end
@@ -727,23 +751,21 @@ function redirect_output_from_additional_worker(pid, port)
727751end
728752
729753function check_master_connect()
730- timeout = worker_timeout() * 1e9
731754 # If we do not have at least process 1 connect to us within timeout
732755 # we log an error and exit, unless we're running on valgrind
733756 if ccall(:jl_running_on_valgrind,Cint,()) != 0
734757 return
735758 end
736- @async begin
737- start = time_ns()
738- while ! haskey(map_pid_wrkr, 1 ) && (time_ns() - start) < timeout
739- sleep(1.0 )
740- end
741759
742- if ! haskey(map_pid_wrkr, 1 )
743- print(stderr , " Master process (id 1) could not connect within $(timeout/ 1e9 ) seconds.\n exiting.\n " )
744- exit(1 )
760+ errormonitor(
761+ Threads. @spawn begin
762+ timeout = worker_timeout()
763+ if timedwait(() -> ! haskey(map_pid_wrkr, 1 ), timeout) === :timed_out
764+ print(stderr , " Master process (id 1) could not connect within $(timeout) seconds.\n exiting.\n " )
765+ exit(1 )
766+ end
745767 end
746- end
768+ )
747769end
748770
749771
@@ -1028,13 +1050,13 @@ function rmprocs(pids...; waitfor=typemax(Int))
10281050
10291051 pids = vcat(pids... )
10301052 if waitfor == 0
1031- t = @async _rmprocs(pids, typemax(Int))
1053+ t = Threads . @spawn Threads . threadpool() _rmprocs(pids, typemax(Int))
10321054 yield()
10331055 return t
10341056 else
10351057 _rmprocs(pids, waitfor)
10361058 # return a dummy task object that user code can wait on.
1037- return @async nothing
1059+ return Threads . @spawn Threads . threadpool() nothing
10381060 end
10391061end
10401062
@@ -1217,7 +1239,7 @@ function interrupt(pids::AbstractVector=workers())
12171239 @assert myid() == 1
12181240 @sync begin
12191241 for pid in pids
1220- @async interrupt(pid)
1242+ Threads . @spawn Threads . threadpool() interrupt(pid)
12211243 end
12221244 end
12231245end
@@ -1288,18 +1310,16 @@ end
12881310
12891311using Random: randstring
12901312
1291- let inited = false
1292- # do initialization that's only needed when there is more than 1 processor
1293- global function init_multi()
1294- if ! inited
1295- inited = true
1296- push!(Base. package_callbacks, _require_callback)
1297- atexit(terminate_all_workers)
1298- init_bind_addr()
1299- cluster_cookie(randstring(HDR_COOKIE_LEN))
1300- end
1301- return nothing
1313+ # do initialization that's only needed when there is more than 1 processor
1314+ const inited = Threads. Atomic{Bool}(false )
1315+ function init_multi()
1316+ if ! Threads. atomic_cas!(inited, false , true )
1317+ push!(Base. package_callbacks, _require_callback)
1318+ atexit(terminate_all_workers)
1319+ init_bind_addr()
1320+ cluster_cookie(randstring(HDR_COOKIE_LEN))
13021321 end
1322+ return nothing
13031323end
13041324
13051325function init_parallel()
0 commit comments