Skip to content

Commit bfd826c

Browse files
Merge pull request #1634 from devitocodes/clang-omp-workaround
compiler: Work around clang[10,11,?] omp-offloading bug
2 parents 1e619c0 + 0f27388 commit bfd826c

File tree

6 files changed

+30
-31
lines changed

6 files changed

+30
-31
lines changed

devito/ir/iet/nodes.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,15 @@ class Node(Signer):
6666

6767
def __new__(cls, *args, **kwargs):
6868
obj = super(Node, cls).__new__(cls)
69-
argnames = inspect.getfullargspec(cls.__init__).args
69+
argnames, _, _, defaultvalues, _, _, _ = inspect.getfullargspec(cls.__init__)
70+
try:
71+
defaults = dict(zip(argnames[-len(defaultvalues):], defaultvalues))
72+
except TypeError:
73+
# No default kwarg values
74+
defaults = {}
7075
obj._args = {k: v for k, v in zip(argnames[1:], args)}
7176
obj._args.update(kwargs.items())
72-
obj._args.update({k: None for k in argnames[1:] if k not in obj._args})
77+
obj._args.update({k: defaults.get(k) for k in argnames[1:] if k not in obj._args})
7378
return obj
7479

7580
def _rebuild(self, *args, **kwargs):
@@ -789,9 +794,10 @@ class PointerCast(ExprStmt, Node):
789794

790795
is_PointerCast = True
791796

792-
def __init__(self, function, obj=None):
797+
def __init__(self, function, obj=None, alignment=True):
793798
self.function = function
794799
self.obj = obj
800+
self.alignment = alignment
795801

796802
def __repr__(self):
797803
return "<PointerCast(%s)>" % self.function

devito/ir/iet/visitors.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -218,9 +218,9 @@ def visit_PointerCast(self, o):
218218
f._C_field_data)
219219
else:
220220
rvalue = '(%s (*)%s) %s' % (f._C_typedata, shape, obj)
221-
lvalue = c.AlignedAttribute(f._data_alignment,
222-
c.Value(f._C_typedata,
223-
'(*restrict %s)%s' % (f.name, shape)))
221+
lvalue = c.Value(f._C_typedata, '(*restrict %s)%s' % (f.name, shape))
222+
if o.alignment:
223+
lvalue = c.AlignedAttribute(f._data_alignment, lvalue)
224224
return c.Initializer(lvalue, rvalue)
225225

226226
def visit_Dereference(self, o):

devito/passes/iet/definitions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import cgen as c
1111

12-
from devito.ir import (EntryFunction, List, LocalExpression, PointerCast, FindSymbols,
12+
from devito.ir import (EntryFunction, List, LocalExpression, FindSymbols,
1313
MapExprStmts, Transformer)
1414
from devito.passes.iet.engine import iet_pass
1515
from devito.passes.iet.langbase import LangBB
@@ -284,7 +284,7 @@ def place_casts(self, iet):
284284
symbol_names = {i.name for i in FindSymbols('free-symbols').visit(iet)}
285285
need_cast = {i for i in need_cast if i.name in symbol_names}
286286

287-
casts = tuple(PointerCast(i) for i in iet.parameters if i in need_cast)
287+
casts = tuple(self.lang.PointerCast(i) for i in iet.parameters if i in need_cast)
288288
if casts:
289289
casts = (List(body=casts, footer=c.Line()),)
290290

devito/passes/iet/langbase.py

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import cgen as c
55

66
from devito.ir import (DummyEq, Call, Conditional, List, Prodder, ParallelIteration,
7-
ParallelBlock, EntryFunction, LocalExpression)
7+
ParallelBlock, PointerCast, EntryFunction, LocalExpression)
88
from devito.mpi.distributed import MPICommObject
99
from devito.passes.iet.engine import iet_pass
1010
from devito.symbolics import Byref, CondNe
@@ -34,27 +34,13 @@ class LangBB(object, metaclass=LangMeta):
3434
Abstract base class for Language Building Blocks.
3535
"""
3636

37-
# Note: below dummy values are used, so a subclass should override them
38-
37+
# NOTE: a subclass may want to override the values below, which represent
38+
# IET node types used in the various lowering and/or transformation passes
3939
Region = ParallelBlock
40-
"""
41-
The IET node type to be used to construct a parallel region.
42-
"""
43-
4440
HostIteration = ParallelIteration
45-
"""
46-
The IET node type to be used to construct a host-parallel Iteration.
47-
"""
48-
4941
DeviceIteration = ParallelIteration
50-
"""
51-
The IET node type to be used to construct a device-parallel Iteration.
52-
"""
53-
5442
Prodder = Prodder
55-
"""
56-
The IET node type to be used to construct asynchronous prodders.
57-
"""
43+
PointerCast = PointerCast
5844

5945
@classmethod
6046
def _map_to(cls, f, imask=None, queueid=None):

devito/passes/iet/languages/openmp.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from devito.arch import AMDGPUX, NVIDIAX
55
from devito.ir import (Block, Call, Conditional, List, Prodder, ParallelIteration,
6-
ParallelBlock, While, FindNodes, Transformer)
6+
ParallelBlock, PointerCast, While, FindNodes, Transformer)
77
from devito.mpi.routines import IrecvCall, IsendCall
88
from devito.passes.iet.definitions import DataManager, DeviceAwareDataManager
99
from devito.passes.iet.engine import iet_pass
@@ -151,6 +151,13 @@ class OmpBB(PragmaLangBB):
151151
Prodder = ThreadedProdder
152152

153153

154+
class DeviceOmpBB(OmpBB):
155+
156+
# NOTE: Work around clang>=10 issue concerning offloading arrays declared
157+
# with an `__attribute__(aligned(...))` qualifier
158+
PointerCast = lambda *args: PointerCast(*args, alignment=False)
159+
160+
154161
class SimdOmpizer(PragmaSimdTransformer):
155162
lang = OmpBB
156163

@@ -161,7 +168,7 @@ class Ompizer(PragmaShmTransformer):
161168

162169
class DeviceOmpizer(PragmaDeviceAwareTransformer):
163170

164-
lang = OmpBB
171+
lang = DeviceOmpBB
165172

166173
@iet_pass
167174
def make_gpudirect(self, iet):
@@ -181,8 +188,8 @@ class OmpDataManager(DataManager):
181188

182189

183190
class DeviceOmpDataManager(DeviceAwareDataManager):
184-
lang = OmpBB
191+
lang = DeviceOmpBB
185192

186193

187194
class OmpOrchestrator(Orchestrator):
188-
lang = OmpBB
195+
lang = DeviceOmpBB

examples/performance/01_gpu.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@
285285
" }\n",
286286
" /* End of OpenMP setup */\n",
287287
"\n",
288-
" float (*restrict u)[u_vec->size[1]][u_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data;\n",
288+
" float (*restrict u)[u_vec->size[1]][u_vec->size[2]] = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data;\n",
289289
"\n",
290290
" #pragma omp target enter data map(to: u[0:u_vec->size[0]][0:u_vec->size[1]][0:u_vec->size[2]])\n",
291291
"\n",

0 commit comments

Comments
 (0)