Skip to content

Commit 0f5a139

Browse files
committed
Refresh content
1 parent cf1fff6 commit 0f5a139

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+496
-688
lines changed

exercise-notebooks/deploy_notebooks.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ using Literate
22
## include Literate scripts starting with following letters in the deploy
33
incl = "lecture10_"
44
## Set `sol=true` to produce output with solutions contained and hints stripts. Otherwise the other way around.
5-
sol = true
5+
sol = false
66
##
77

88
function replace_string(str)

exercise-notebooks/notebooks/lecture10_ex1.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,11 @@
3636
"file_extension": ".jl",
3737
"mimetype": "application/julia",
3838
"name": "julia",
39-
"version": "1.10.3"
39+
"version": "1.10.5"
4040
},
4141
"kernelspec": {
4242
"name": "julia-1.10",
43-
"display_name": "Julia 1.10.3",
43+
"display_name": "Julia 1.10.5",
4444
"language": "julia"
4545
}
4646
},

exercise-notebooks/notebooks/lecture10_ex2.ipynb

Lines changed: 28 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -193,14 +193,15 @@
193193
"outputs": [],
194194
"cell_type": "code",
195195
"source": [
196+
"# solution\n",
196197
"max_threads = attribute(device(),CUDA.DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK)\n",
197198
"thread_count = []\n",
198199
"throughputs = []\n",
199200
"for pow = 0:Int(log2(max_threads/32))\n",
200201
" threads = (32, 2^pow)\n",
201-
" blocks = (nx÷threads[1], ny÷threads[2])\n",
202-
" t_it = @belapsed begin @cuda blocks=$blocks threads=$threads update_temperature!($T2, $T, $Ci, $lam, $dt, $_dx, $_dy); synchronize() end\n",
203-
" T_eff = (2*1+1)*1/1e9*nx*ny*sizeof(Float64)/t_it\n",
202+
" blocks = #...\n",
203+
" t_it = @belapsed begin @cuda #...\n",
204+
" T_eff = #...\n",
204205
" push!(thread_count, prod(threads))\n",
205206
" push!(throughputs, T_eff)\n",
206207
" println(\"(threads=$threads) T_eff = $(T_eff)\")\n",
@@ -295,16 +296,16 @@
295296
"outputs": [],
296297
"cell_type": "code",
297298
"source": [
298-
"# solution\n",
299+
"# hint\n",
299300
"function update_temperature!(T2, T, Ci, lam, dt, _dx, _dy)\n",
300301
" ix = (blockIdx().x-1) * blockDim().x + threadIdx().x\n",
301302
" iy = (blockIdx().y-1) * blockDim().y + threadIdx().y\n",
302-
" tx = threadIdx().x\n",
303-
" ty = threadIdx().y\n",
304-
" T_l = @cuDynamicSharedMem(eltype(T), (blockDim().x, blockDim().y))\n",
305-
" @inbounds T_l[tx,ty] = T[ix,iy]\n",
303+
" tx = # local thread id, x dimension\n",
304+
" ty = # local thread id, y dimension\n",
305+
" T_l = # allocation of a block-local temperature array (in shared memory)\n",
306+
" @inbounds T_l[tx,ty] = # read the values of the temperature array `T` into shared memory\n",
306307
" if (ix>1 && ix<size(T2,1) && iy>1 && iy<size(T2,2))\n",
307-
" @inbounds T2[ix,iy] = T_l[tx,ty] + dt*Ci[ix,iy]\n",
308+
" @inbounds T2[ix,iy] = #=read temperature values from shared memory=# + dt*Ci[ix,iy]\n",
308309
" end\n",
309310
" return\n",
310311
"end"
@@ -326,9 +327,7 @@
326327
"outputs": [],
327328
"cell_type": "code",
328329
"source": [
329-
"# solution\n",
330-
"t_it = @belapsed begin @cuda blocks=$blocks threads=$threads shmem=prod($threads)*sizeof(Float64) update_temperature!($T2, $T, $Ci, $lam, $dt, $_dx, $_dy); synchronize() end\n",
331-
"T_eff = (2*1+1)*1/1e9*nx*ny*sizeof(Float64)/t_it"
330+
"# solution"
332331
],
333332
"metadata": {},
334333
"execution_count": null
@@ -356,21 +355,21 @@
356355
"outputs": [],
357356
"cell_type": "code",
358357
"source": [
359-
"# solution\n",
358+
"# hint\n",
360359
"function update_temperature!(T2, T, Ci, lam, dt, _dx, _dy)\n",
361360
" ix = (blockIdx().x-1) * blockDim().x + threadIdx().x\n",
362361
" iy = (blockIdx().y-1) * blockDim().y + threadIdx().y\n",
363-
" tx = threadIdx().x+1\n",
364-
" ty = threadIdx().y+1\n",
365-
" T_l = @cuDynamicSharedMem(eltype(T), (blockDim().x+2, blockDim().y+2))\n",
362+
" tx = # adjust the local thread id in y dimension\n",
363+
" ty = # adjust the local thread id in y dimension\n",
364+
" T_l = # adjust the shared memory allocation\n",
366365
" @inbounds T_l[tx,ty] = T[ix,iy]\n",
367366
" if (ix>1 && ix<size(T2,1) && iy>1 && iy<size(T2,2))\n",
368367
" @inbounds T2[ix,iy] = T_l[tx,ty] + dt*Ci[ix,iy]\n",
369368
" end\n",
370369
" return\n",
371370
"end\n",
372371
"\n",
373-
"t_it = @belapsed begin @cuda blocks=$blocks threads=$threads shmem=prod($threads.+2)*sizeof(Float64) update_temperature!($T2, $T, $Ci, $lam, $dt, $_dx, $_dy); synchronize() end\n",
372+
"t_it = @belapsed begin @cuda blocks=$blocks threads=$threads shmem=#=adjust the shared memory=# update_temperature!($T2, $T, $Ci, $lam, $dt, $_dx, $_dy); synchronize() end\n",
374373
"T_eff = (2*1+1)*1/1e9*nx*ny*sizeof(Float64)/t_it"
375374
],
376375
"metadata": {},
@@ -397,7 +396,7 @@
397396
"outputs": [],
398397
"cell_type": "code",
399398
"source": [
400-
"# solution\n",
399+
"# hint\n",
401400
"function update_temperature!(T2, T, Ci, lam, dt, _dx, _dy)\n",
402401
" ix = (blockIdx().x-1) * blockDim().x + threadIdx().x\n",
403402
" iy = (blockIdx().y-1) * blockDim().y + threadIdx().y\n",
@@ -406,10 +405,10 @@
406405
" T_l = @cuDynamicSharedMem(eltype(T), (blockDim().x+2, blockDim().y+2))\n",
407406
" @inbounds T_l[tx,ty] = T[ix,iy]\n",
408407
" if (ix>1 && ix<size(T2,1) && iy>1 && iy<size(T2,2))\n",
409-
" @inbounds if (threadIdx().x == 1) T_l[tx-1,ty] = T[ix-1,iy] end\n",
410-
" @inbounds if (threadIdx().x == blockDim().x) T_l[tx+1,ty] = T[ix+1,iy] end\n",
411-
" @inbounds if (threadIdx().y == 1) T_l[tx,ty-1] = T[ix,iy-1] end\n",
412-
" @inbounds if (threadIdx().y == blockDim().y) T_l[tx,ty+1] = T[ix,iy+1] end\n",
408+
" @inbounds if (threadIdx().x == 1) #=read the required values to the left halo of `T_l`=# end\n",
409+
" @inbounds if (threadIdx().x == blockDim().x) #=read the required values to the right halo of `T_l`=# end\n",
410+
" @inbounds if #=read the required values to the bottom halo of `T_l`=# end\n",
411+
" @inbounds if #=read the required values to the top halo of `T_l`=# end\n",
413412
" @inbounds T2[ix,iy] = T_l[tx,ty] + dt*Ci[ix,iy]\n",
414413
" end\n",
415414
" return\n",
@@ -443,7 +442,7 @@
443442
"outputs": [],
444443
"cell_type": "code",
445444
"source": [
446-
"# solution\n",
445+
"# hint\n",
447446
"function update_temperature!(T2, T, Ci, lam, dt, _dx, _dy)\n",
448447
" ix = (blockIdx().x-1) * blockDim().x + threadIdx().x\n",
449448
" iy = (blockIdx().y-1) * blockDim().y + threadIdx().y\n",
@@ -458,28 +457,15 @@
458457
" @inbounds if (threadIdx().y == blockDim().y) T_l[tx,ty+1] = T[ix,iy+1] end\n",
459458
" sync_threads()\n",
460459
" @inbounds T2[ix,iy] = T_l[tx,ty] + dt*Ci[ix,iy]*(\n",
461-
" - ((-lam*(T_l[tx+1,ty] - T_l[tx,ty])*_dx) - (-lam*(T_l[tx,ty] - T_l[tx-1,ty])*_dx))*_dx\n",
462-
" - ((-lam*(T_l[tx,ty+1] - T_l[tx,ty])*_dy) - (-lam*(T_l[tx,ty] - T_l[tx,ty-1])*_dy))*_dy\n",
460+
" # add the computation of the derivatives\n",
461+
" # ...\n",
463462
" )\n",
464463
" end\n",
465464
" return\n",
466465
"end\n",
467466
"\n",
468-
"function diffusion2D_step!(T2, T, Ci, lam, dt, _dx, _dy)\n",
469-
" threads = (32, 8)\n",
470-
" blocks = (size(T2,1)÷threads[1], size(T2,2)÷threads[2])\n",
471-
" @cuda blocks=blocks threads=threads shmem=prod(threads.+2)*sizeof(Float64) update_temperature!(T2, T, Ci, lam, dt, _dx, _dy); synchronize()\n",
472-
"end\n",
467+
"diffusion2D()\n",
473468
"\n",
474-
"diffusion2D()"
475-
],
476-
"metadata": {},
477-
"execution_count": null
478-
},
479-
{
480-
"outputs": [],
481-
"cell_type": "code",
482-
"source": [
483469
"t_it = @belapsed begin @cuda blocks=$blocks threads=$threads shmem=prod($threads.+2)*sizeof(Float64) update_temperature!($T2, $T, $Ci, $lam, $dt, $_dx, $_dy); synchronize() end\n",
484470
"T_eff = (2*1+1)*1/1e9*nx*ny*sizeof(Float64)/t_it"
485471
],
@@ -513,9 +499,7 @@
513499
"outputs": [],
514500
"cell_type": "code",
515501
"source": [
516-
"# solution\n",
517-
"T_peak = 561 # Peak memory throughput of the Tesla P100 GPU\n",
518-
"T_eff/T_peak"
502+
"# solution"
519503
],
520504
"metadata": {},
521505
"execution_count": null
@@ -534,11 +518,11 @@
534518
"file_extension": ".jl",
535519
"mimetype": "application/julia",
536520
"name": "julia",
537-
"version": "1.10.3"
521+
"version": "1.10.5"
538522
},
539523
"kernelspec": {
540524
"name": "julia-1.10",
541-
"display_name": "Julia 1.10.3",
525+
"display_name": "Julia 1.10.5",
542526
"language": "julia"
543527
}
544528
},

0 commit comments

Comments
 (0)