From 7aeb2646bff41973f03d4a928ecd7be6ca4a019e Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 18 Nov 2025 08:22:25 -0800 Subject: [PATCH 01/57] [ASan] Make most tests run under internal shell on Darwin This patch fixes most of the ASan tests that were failing on Darwin when running under the internal shell. There are still a couple left that are more interesting cases that I'll do in a follow up patch. The tests that still need to be done: ``` TestCases/Darwin/duplicate_os_log_reports.cpp TestCases/Darwin/dyld_insert_libraries_reexec.cpp TestCases/Darwin/interface_symbols_darwin.cpp ``` Reviewers: thetruestblue, fhahn, vitalybuka, DanBlackwell, ndrewh Reviewed By: DanBlackwell Pull Request: https://github.com/llvm/llvm-project/pull/168545 --- .../Darwin/atos-symbolizer-dyld-root-path.cpp | 3 ++- .../asan/TestCases/Darwin/atos-symbolizer.cpp | 3 ++- .../Darwin/dyld_insert_libraries_reexec.cpp | 3 ++- .../Darwin/dyld_insert_libraries_remove.cpp | 26 +++++++++---------- .../asan/TestCases/Darwin/init_for_dlopen.cpp | 2 +- .../Darwin/malloc_zone-protected.cpp | 3 +-- .../Darwin/llvm_interface_symbols.cpp | 3 ++- 7 files changed, 22 insertions(+), 21 deletions(-) diff --git a/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cpp b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cpp index 664471b6987a8..4201d49df4d74 100644 --- a/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cpp +++ b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cpp @@ -1,6 +1,7 @@ // Check that when having a DYLD_ROOT_PATH set, the symbolizer still works. // RUN: %clangxx_asan -O0 %s -o %t -// RUN: %env_asan_opts=verbosity=2 DYLD_ROOT_PATH="/" ASAN_SYMBOLIZER_PATH=$(which atos) \ +// RUN: which atos | tr -d '\n' > %t.symbolizer_path +// RUN: %env_asan_opts=verbosity=2 DYLD_ROOT_PATH="/" ASAN_SYMBOLIZER_PATH=%{readfile:%t.symbolizer_path} \ // RUN: not %run %t 2>&1 | FileCheck %s // // Due to a bug in atos, this only works on x86_64. diff --git a/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer.cpp b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer.cpp index bab4e4f3765c2..7487bd4cb40e6 100644 --- a/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer.cpp +++ b/compiler-rt/test/asan/TestCases/Darwin/atos-symbolizer.cpp @@ -1,7 +1,8 @@ // Check that the `atos` symbolizer works. // RUN: %clangxx_asan -O0 %s -o %t -// RUN: %env_asan_opts=verbosity=2 ASAN_SYMBOLIZER_PATH=$(which atos) not %run %t 2>&1 | FileCheck %s +// RUN: which atos | tr -d '\n' > %t.symbolizer_path +// RUN: %env_asan_opts=verbosity=2 ASAN_SYMBOLIZER_PATH=%{readfile:%t.symbolizer_path} not %run %t 2>&1 | FileCheck %s // Path returned by `which atos` is invalid on iOS. // UNSUPPORTED: ios, i386-darwin diff --git a/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cpp b/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cpp index 0fec18b89411a..145e162a21c0e 100644 --- a/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cpp +++ b/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cpp @@ -4,7 +4,8 @@ // UNSUPPORTED: ios // RUN: rm -rf %t && mkdir -p %t -// RUN: cp `%clang_asan -print-file-name=lib`/darwin/libclang_rt.asan_osx_dynamic.dylib \ +// RUN: %clang_asan -print-file-name=lib | tr -d '\n' > %t.lib_name +// RUN: cp %{readfile:%t.lib_name}/darwin/libclang_rt.asan_osx_dynamic.dylib \ // RUN: %t/libclang_rt.asan_osx_dynamic.dylib // RUN: %clangxx_asan %s -o %t/a.out diff --git a/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cpp b/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cpp index 0672e064a1904..872848d075eaf 100644 --- a/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cpp +++ b/compiler-rt/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cpp @@ -5,29 +5,27 @@ // UNSUPPORTED: ios // RUN: rm -rf %t && mkdir -p %t -// RUN: cp `%clang_asan -print-file-name=lib`/darwin/libclang_rt.asan_osx_dynamic.dylib \ +// RUN: %clang_asan -print-file-name=lib | tr -d '\n' > %t.lib_name +// RUN: cp %{readfile:%t.lib_name}/darwin/libclang_rt.asan_osx_dynamic.dylib \ // RUN: %t/libclang_rt.asan_osx_dynamic.dylib // RUN: %clangxx_asan %s -o %t/a.out // RUN: %clangxx -DSHARED_LIB %s \ // RUN: -dynamiclib -o %t/dummy-so.dylib -// RUN: ( cd %t && \ -// RUN: DYLD_INSERT_LIBRARIES=@executable_path/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \ -// RUN: %run ./a.out 2>&1 ) | FileCheck %s || exit 1 +// RUN: cd %t +// RUN: env DYLD_INSERT_LIBRARIES=@executable_path/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \ +// RUN: %run ./a.out 2>&1 | FileCheck %s -// RUN: ( cd %t && \ -// RUN: DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \ -// RUN: %run ./a.out 2>&1 ) | FileCheck %s || exit 1 +// RUN: env DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \ +// RUN: %run ./a.out 2>&1 | FileCheck %s -// RUN: ( cd %t && \ -// RUN: %env_asan_opts=strip_env=0 \ -// RUN: DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \ -// RUN: %run ./a.out 2>&1 ) | FileCheck %s --check-prefix=CHECK-KEEP || exit 1 +// RUN: %env_asan_opts=strip_env=0 \ +// RUN: DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \ +// RUN: %run ./a.out 2>&1 | FileCheck %s --check-prefix=CHECK-KEEP -// RUN: ( cd %t && \ -// RUN: DYLD_INSERT_LIBRARIES=%t/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \ -// RUN: %run ./a.out 2>&1 ) | FileCheck %s || exit 1 +// RUN: env DYLD_INSERT_LIBRARIES=%t/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \ +// RUN: %run ./a.out 2>&1 | FileCheck %s #if !defined(SHARED_LIB) #include diff --git a/compiler-rt/test/asan/TestCases/Darwin/init_for_dlopen.cpp b/compiler-rt/test/asan/TestCases/Darwin/init_for_dlopen.cpp index 3bf8e99703a08..9bb652cc79438 100644 --- a/compiler-rt/test/asan/TestCases/Darwin/init_for_dlopen.cpp +++ b/compiler-rt/test/asan/TestCases/Darwin/init_for_dlopen.cpp @@ -5,7 +5,7 @@ // - By default the lit config sets this but we don't want this // test to implicitly depend on this. // - It avoids requiring `--crash` to be passed to `not`. -// RUN: APPLE_ASAN_INIT_FOR_DLOPEN=0 %env_asan_opts=abort_on_error=0 not \ +// RUN: %env_asan_opts=abort_on_error=0 APPLE_ASAN_INIT_FOR_DLOPEN=0 not \ // RUN: %run %t %shared_libasan 2>&1 | \ // RUN: FileCheck -check-prefix=CHECK-DL-OPEN-FAIL %s // RUN: env -u APPLE_ASAN_INIT_FOR_DLOPEN %env_asan_opts=abort_on_error=0 not \ diff --git a/compiler-rt/test/asan/TestCases/Darwin/malloc_zone-protected.cpp b/compiler-rt/test/asan/TestCases/Darwin/malloc_zone-protected.cpp index 125b544724d3f..ac3c5898f271a 100644 --- a/compiler-rt/test/asan/TestCases/Darwin/malloc_zone-protected.cpp +++ b/compiler-rt/test/asan/TestCases/Darwin/malloc_zone-protected.cpp @@ -3,8 +3,7 @@ #include // RUN: %clangxx_asan %s -o %t -// RUN: ASAN_OPTIONS="abort_on_error=1" not --crash %run %t 2>&1 | FileCheck %s - +// RUN: env ASAN_OPTIONS="abort_on_error=1" not --crash %run %t 2>&1 | FileCheck %s void *pwn(malloc_zone_t *unused_zone, size_t unused_size) { printf("PWNED\n"); diff --git a/compiler-rt/test/asan_abi/TestCases/Darwin/llvm_interface_symbols.cpp b/compiler-rt/test/asan_abi/TestCases/Darwin/llvm_interface_symbols.cpp index 5da18aa971d43..ba7b5e5815bd6 100644 --- a/compiler-rt/test/asan_abi/TestCases/Darwin/llvm_interface_symbols.cpp +++ b/compiler-rt/test/asan_abi/TestCases/Darwin/llvm_interface_symbols.cpp @@ -24,7 +24,8 @@ // RUN: diff %t.imports-sorted %t.exports-sorted // Ensure that there is no dynamic dylib linked. -// RUN: otool -L %t | (! grep -q "dynamic.dylib") +// RUN: otool -L %t > %t.libs +// RUN: not grep -q "dynamic.dylib" < %t.libs // UNSUPPORTED: ios From 38c1a58605e8347afd05e31360d3bfd5c4c19ced Mon Sep 17 00:00:00 2001 From: Tarun Prabhu Date: Tue, 18 Nov 2025 09:22:43 -0700 Subject: [PATCH 02/57] [flang][NFC] Strip trailing whitespace from tests (6 of N) Only the fortran source files in flang/test/Lower/PowerPC and some in flang/test/Lower have been modified. The other files in the directory will be cleaned up in subsequent commits --- flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 | 4 ++-- flang/test/Lower/PowerPC/ppc-vec-sel.f90 | 2 +- .../test/Lower/PowerPC/ppc-vec-store-elem-order.f90 | 4 ++-- flang/test/Lower/PowerPC/ppc-vec-store.f90 | 12 ++++++------ flang/test/Lower/allocatable-assignment.f90 | 12 ++++++------ flang/test/Lower/allocatable-globals.f90 | 2 +- flang/test/Lower/allocatable-polymorphic.f90 | 8 ++++---- flang/test/Lower/allocated.f90 | 1 - flang/test/Lower/array-elemental-calls-2.f90 | 2 +- flang/test/Lower/array-elemental-calls.f90 | 2 +- flang/test/Lower/array-expression-assumed-size.f90 | 8 ++++---- flang/test/Lower/array-substring.f90 | 2 +- flang/test/Lower/array-wide-char.f90 | 2 +- flang/test/Lower/array.f90 | 2 +- flang/test/Lower/forall-pointer-assignment.f90 | 2 +- flang/test/Lower/forall/forall-2.f90 | 4 ++-- flang/test/Lower/forall/forall-ranked.f90 | 2 +- flang/test/Lower/forall/forall-where-2.f90 | 10 +++++----- flang/test/Lower/forall/forall-where.f90 | 2 +- 19 files changed, 41 insertions(+), 42 deletions(-) diff --git a/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 index 355fd6c3a742a..b17c3f1bdc4e7 100644 --- a/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 +++ b/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 @@ -394,7 +394,7 @@ subroutine vec_xl_testi8a(arg1, arg2, res) vector(integer(1)) :: res res = vec_xl(arg1, arg2) - + ! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]] ! LLVMIR: %[[ld:.*]] = load <16 x i8>, ptr %[[addr]], align 1 @@ -481,7 +481,7 @@ subroutine vec_xl_be_testi8a(arg1, arg2, res) vector(integer(1)) :: res res = vec_xl_be(arg1, arg2) - + ! LLVMIR: %4 = load i8, ptr %0, align 1 ! LLVMIR: %5 = getelementptr i8, ptr %1, i8 %4 ! LLVMIR: %6 = load <16 x i8>, ptr %5, align 1 diff --git a/flang/test/Lower/PowerPC/ppc-vec-sel.f90 b/flang/test/Lower/PowerPC/ppc-vec-sel.f90 index c3de8ba9c1444..93641d1461a99 100644 --- a/flang/test/Lower/PowerPC/ppc-vec-sel.f90 +++ b/flang/test/Lower/PowerPC/ppc-vec-sel.f90 @@ -136,7 +136,7 @@ subroutine vec_sel_testu8(arg1, arg2, arg3) vector(unsigned(8)) :: arg1, arg2, r vector(unsigned(8)) :: arg3 r = vec_sel(arg1, arg2, arg3) - + ! LLVMIR: %[[arg1:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 ! LLVMIR: %[[arg2:.*]] = load <2 x i64>, ptr %{{.*}}, align 16 diff --git a/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90 index caf6d5463a833..947c8b1c7eb2c 100644 --- a/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90 +++ b/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90 @@ -14,7 +14,7 @@ subroutine vec_st_test(arg1, arg2, arg3) ! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16 ! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]] -! LLVMIR: %[[bc:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32> +! LLVMIR: %[[bc:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32> ! LLVMIR: %[[shf:.*]] = shufflevector <4 x i32> %[[bc]], <4 x i32> undef, <4 x i32> ! LLVMIR: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[shf]], ptr %[[addr]]) end subroutine vec_st_test @@ -28,7 +28,7 @@ subroutine vec_ste_test(arg1, arg2, arg3) integer(4) :: arg2 real(4) :: arg3 call vec_ste(arg1, arg2, arg3) - + ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %0, align 16 ! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 ! LLVMIR: %[[addr]] = getelementptr i8, ptr %2, i32 %[[arg2]] diff --git a/flang/test/Lower/PowerPC/ppc-vec-store.f90 b/flang/test/Lower/PowerPC/ppc-vec-store.f90 index c25cc8b07cf79..1c3ab9638f117 100644 --- a/flang/test/Lower/PowerPC/ppc-vec-store.f90 +++ b/flang/test/Lower/PowerPC/ppc-vec-store.f90 @@ -300,7 +300,7 @@ subroutine vec_xst_test_vr4i2r4(arg1, arg2, arg3) real(4) :: arg3 call vec_xst(arg1, arg2, arg3) - + ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 ! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]] @@ -432,7 +432,7 @@ subroutine vec_xst_be_test_vi4i4vai4(arg1, arg2, arg3, i) ! LLVMIR: %[[iadd:.*]] = add nsw i64 %[[imul2]], 0 ! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]] ! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 -! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 +! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 ! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]] ! LLVMIR: %[[src:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> undef, <4 x i32> ! LLVMIR: store <4 x i32> %[[src]], ptr %[[gep2]], align 16 @@ -449,7 +449,7 @@ subroutine vec_xstd2_test_vr4i2r4(arg1, arg2, arg3) real(4) :: arg3 call vec_xstd2(arg1, arg2, arg3) - + ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 ! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]] @@ -509,7 +509,7 @@ subroutine vec_xstd2_test_vi4i4vai4(arg1, arg2, arg3, i) ! LLVMIR: %[[iadd:.*]] = add nsw i64 %[[imul2]], 0 ! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]] ! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 -! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 +! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 ! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]] ! LLVMIR: %[[src:.*]] = bitcast <4 x i32> %[[arg1]] to <2 x i64> ! LLVMIR: store <2 x i64> %[[src]], ptr %[[gep2]], align 16 @@ -526,7 +526,7 @@ subroutine vec_xstw4_test_vr4i2r4(arg1, arg2, arg3) real(4) :: arg3 call vec_xstw4(arg1, arg2, arg3) - + ! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16 ! LLVMIR: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2 ! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]] @@ -584,7 +584,7 @@ subroutine vec_xstw4_test_vi4i4vai4(arg1, arg2, arg3, i) ! LLVMIR: %[[iadd:.*]] = add nsw i64 %[[imul2]], 0 ! LLVMIR: %[[gep1:.*]] = getelementptr <4 x i32>, ptr %2, i64 %[[iadd]] ! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16 -! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 +! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4 ! LLVMIR: %[[gep2:.*]] = getelementptr i8, ptr %[[gep1]], i32 %[[arg2]] ! LLVMIR: store <4 x i32> %[[arg1]], ptr %[[gep2]], align 16 end subroutine vec_xstw4_test_vi4i4vai4 diff --git a/flang/test/Lower/allocatable-assignment.f90 b/flang/test/Lower/allocatable-assignment.f90 index 3c220232104a5..b6b2f7b6c77b9 100644 --- a/flang/test/Lower/allocatable-assignment.f90 +++ b/flang/test/Lower/allocatable-assignment.f90 @@ -283,14 +283,14 @@ subroutine test_dyn_char(x, n, c) ! CHECK: hlfir.assign %[[VAL_8]]#0 to %[[VAL_14]]#0 realloc keep_lhs_len : !fir.box>>, !fir.ref>>>> subroutine test_derived_with_init(x, y) - type t + type t integer, allocatable :: a(:) - end type - type(t), allocatable :: x - type(t) :: y + end type + type(t), allocatable :: x + type(t) :: y ! The allocatable component of `x` need to be initialized ! during the automatic allocation (setting its rank and allocation - ! status) before it is assigned with the component of `y` + ! status) before it is assigned with the component of `y` x = y end subroutine ! CHECK-LABEL: func.func @_QMalloc_assignPtest_derived_with_init( @@ -357,7 +357,7 @@ end function elt ! real :: y(2, 3) = reshape([1,2,3,4,5,6], [2,3]) ! real, allocatable :: x (:, :) ! allocate(x(2,2)) -! call test_with_lbounds(x, y) +! call test_with_lbounds(x, y) ! print *, x(10, 20) ! print *, x !end diff --git a/flang/test/Lower/allocatable-globals.f90 b/flang/test/Lower/allocatable-globals.f90 index 9d386688f8881..8b7420ab32391 100644 --- a/flang/test/Lower/allocatable-globals.f90 +++ b/flang/test/Lower/allocatable-globals.f90 @@ -12,7 +12,7 @@ module mod_allocatables character(10), allocatable :: c(:) end module - + ! CHECK-LABEL: func @_QPtest_mod_allocatables() subroutine test_mod_allocatables() use mod_allocatables, only: c diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90 index 27cdf2839767d..d528fd8e546ff 100644 --- a/flang/test/Lower/allocatable-polymorphic.f90 +++ b/flang/test/Lower/allocatable-polymorphic.f90 @@ -460,7 +460,7 @@ subroutine test_allocate_with_mold() ! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]](%{{.*}}) {uniq_name = "_QMpolyFtest_allocate_with_moldEx"} : (!fir.ref,c:i32}>>>, !fir.shape<1>) -> (!fir.ref,c:i32}>>>, !fir.ref,c:i32}>>>) ! CHECK: %[[EMBOX_X:.*]] = fir.embox %[[X_DECL]]#0(%{{.*}}) : (!fir.ref,c:i32}>>>, !fir.shape<1>) -> !fir.box,c:i32}>>> -! CHECK: %[[RANK:.*]] = arith.constant 1 : i32 +! CHECK: %[[RANK:.*]] = arith.constant 1 : i32 ! CHECK: %[[P_BOX_NONE:.*]] = fir.convert %[[P_DECL]]#0 : (!fir.ref>>>>) -> !fir.ref> ! CHECK: %[[X_BOX_NONE:.*]] = fir.convert %[[EMBOX_X]] : (!fir.box,c:i32}>>>) -> !fir.box ! CHECK: fir.call @_FortranAPointerApplyMold(%[[P_BOX_NONE]], %[[X_BOX_NONE]], %[[RANK]]) {{.*}} : (!fir.ref>, !fir.box, i32) -> () @@ -614,10 +614,10 @@ program test_alloc ! LLVM: %[[TYPE_CODE:.*]] = load i8, ptr %[[TYPE_CODE_GEP]] ! LLVM-NEXT: %[[EXT_TYPE_CODE:.*]] = sext i8 %[[TYPE_CODE]] to i32 ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } undef, i64 %[[ELEM_SIZE]], 1 -! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[EXT_TYPE_CODE]] to i8 +! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[EXT_TYPE_CODE]] to i8 ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, i8 %[[TRUNC_TYPE_CODE]], 4 ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, ptr %[[TMP:.*]] -! LLVM: call void %{{.*}}(ptr %{{.*}}) +! LLVM: call void %{{.*}}(ptr %{{.*}}) ! LLVM: call void @llvm.memcpy.p0.p0.i32 ! LLVM: %[[GEP_TDESC_C2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 7 @@ -628,7 +628,7 @@ program test_alloc ! LLVM: %[[TYPE_CODE:.*]] = load i8, ptr %[[TYPE_CODE_GEP]] ! LLVM-NEXT: %[[EXT_TYPE_CODE:.*]] = sext i8 %[[TYPE_CODE]] to i32 ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } undef, i64 %[[ELEM_SIZE]], 1 -! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[EXT_TYPE_CODE]] to i8 +! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[EXT_TYPE_CODE]] to i8 ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, i8 %[[TRUNC_TYPE_CODE]], 4 ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, ptr %{{.*}} ! LLVM: call void %{{.*}}(ptr %{{.*}}) diff --git a/flang/test/Lower/allocated.f90 b/flang/test/Lower/allocated.f90 index 6e8420fc7d79a..11e856fd67bad 100644 --- a/flang/test/Lower/allocated.f90 +++ b/flang/test/Lower/allocated.f90 @@ -15,4 +15,3 @@ subroutine allocated_test(scalar, array) ! CHECK: cmpi ne, %[[addrToInt1]], %c0{{.*}} print *, allocated(array) end subroutine - \ No newline at end of file diff --git a/flang/test/Lower/array-elemental-calls-2.f90 b/flang/test/Lower/array-elemental-calls-2.f90 index 2674b07dece17..60c9257a19822 100644 --- a/flang/test/Lower/array-elemental-calls-2.f90 +++ b/flang/test/Lower/array-elemental-calls-2.f90 @@ -172,7 +172,7 @@ subroutine check_parentheses_logical() subroutine check_parentheses_derived(a) type t integer :: i - end type + end type interface integer elemental function elem_func_derived(x) import :: t diff --git a/flang/test/Lower/array-elemental-calls.f90 b/flang/test/Lower/array-elemental-calls.f90 index 853807bcb3e6c..93d2979ec9383 100644 --- a/flang/test/Lower/array-elemental-calls.f90 +++ b/flang/test/Lower/array-elemental-calls.f90 @@ -57,7 +57,7 @@ elemental impure integer function impure_func(j) integer, intent(in) :: j end function end interface - + i = 42 + pure_func(j) i = 42 + impure_func(j) end subroutine diff --git a/flang/test/Lower/array-expression-assumed-size.f90 b/flang/test/Lower/array-expression-assumed-size.f90 index a498148d07fc7..b51dc00c20e28 100644 --- a/flang/test/Lower/array-expression-assumed-size.f90 +++ b/flang/test/Lower/array-expression-assumed-size.f90 @@ -16,8 +16,8 @@ end subroutine assumed_size_forall_test ! CHECK-LABEL: func @_QPassumed_size_test( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>{{.*}}) { -! CHECK: %[[VAL_1A:.*]] = fir.convert %c10{{.*}} : (i64) -> index -! CHECK: %[[VAL_1B:.*]] = arith.cmpi sgt, %[[VAL_1A]], %c0{{.*}} : index +! CHECK: %[[VAL_1A:.*]] = fir.convert %c10{{.*}} : (i64) -> index +! CHECK: %[[VAL_1B:.*]] = arith.cmpi sgt, %[[VAL_1A]], %c0{{.*}} : index ! CHECK: %[[VAL_1:.*]] = arith.select %[[VAL_1B]], %[[VAL_1A]], %c0{{.*}} : index ! CHECK: %[[VAL_2:.*]] = fir.assumed_size_extent : index ! CHECK: %[[VAL_3:.*]] = arith.constant 1 : index @@ -79,8 +79,8 @@ end subroutine assumed_size_forall_test ! CHECK-LABEL: func @_QPassumed_size_forall_test( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>{{.*}}) { ! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {adapt.valuebyref, bindc_name = "i"} -! CHECK: %[[VAL_2A:.*]] = fir.convert %c10{{.*}} : (i64) -> index -! CHECK: %[[VAL_2B:.*]] = arith.cmpi sgt, %[[VAL_2A]], %c0{{.*}} : index +! CHECK: %[[VAL_2A:.*]] = fir.convert %c10{{.*}} : (i64) -> index +! CHECK: %[[VAL_2B:.*]] = arith.cmpi sgt, %[[VAL_2A]], %c0{{.*}} : index ! CHECK: %[[VAL_2:.*]] = arith.select %[[VAL_2B]], %[[VAL_2A]], %c0{{.*}} : index ! CHECK: %[[VAL_3:.*]] = fir.assumed_size_extent : index ! CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32 diff --git a/flang/test/Lower/array-substring.f90 b/flang/test/Lower/array-substring.f90 index 7544fbb989627..0ede04f0bb2f8 100644 --- a/flang/test/Lower/array-substring.f90 +++ b/flang/test/Lower/array-substring.f90 @@ -46,5 +46,5 @@ function test(C) logical :: test(1) character*12 C(1) - test = C(1:1)(1:8) == (/'ABCDabcd'/) + test = C(1:1)(1:8) == (/'ABCDabcd'/) end function test diff --git a/flang/test/Lower/array-wide-char.f90 b/flang/test/Lower/array-wide-char.f90 index 8bad280d0f056..44fcd45519d85 100644 --- a/flang/test/Lower/array-wide-char.f90 +++ b/flang/test/Lower/array-wide-char.f90 @@ -2,7 +2,7 @@ character(LEN=128, KIND=4), PARAMETER :: conarr(3) = & [ character(128,4) :: "now is the time", "for all good men to come", & - "to the aid of the country" ] + "to the aid of the country" ] character(LEN=10, KIND=4) :: arr(3) = & [ character(10,4) :: "good buddy", "best buddy", " " ] call action_on_char4(conarr) diff --git a/flang/test/Lower/array.f90 b/flang/test/Lower/array.f90 index 710175739b3a8..cd12d7f851e67 100644 --- a/flang/test/Lower/array.f90 +++ b/flang/test/Lower/array.f90 @@ -93,7 +93,7 @@ subroutine s(i,j,k,ii,jj,kk,a1,a2,a3,a4,a5,a6,a7) ! CHECK: fir.coordinate_of %[[a7]], %[[t7]] : ! CHECK-LABEL: EndIoStatement print *, a7(kk, jj, ii) - + end subroutine s ! CHECK-LABEL: range diff --git a/flang/test/Lower/forall-pointer-assignment.f90 b/flang/test/Lower/forall-pointer-assignment.f90 index d89fb3ed5cb57..62184a77addf5 100644 --- a/flang/test/Lower/forall-pointer-assignment.f90 +++ b/flang/test/Lower/forall-pointer-assignment.f90 @@ -1,4 +1,4 @@ -! Test lower of FORALL pointer assignment +! Test lower of FORALL pointer assignment ! RUN: bbc -emit-fir %s -o - | FileCheck %s diff --git a/flang/test/Lower/forall/forall-2.f90 b/flang/test/Lower/forall/forall-2.f90 index cdafb4f3d49e7..c6a20f5859497 100644 --- a/flang/test/Lower/forall/forall-2.f90 +++ b/flang/test/Lower/forall/forall-2.f90 @@ -16,7 +16,7 @@ subroutine implied_iters_allocatable(thing, a1) end type t type(t) :: thing(:) integer :: i - + forall (i=5:13) ! commenting out this test for the moment (hits assert) ! thing(i)%arr = a1 @@ -32,7 +32,7 @@ subroutine conflicting_allocatable(thing, lo, hi) end type t type(t) :: thing(:) integer :: i - + forall (i = lo:hi) ! commenting out this test for the moment (hits assert) ! thing(i)%arr = thing(hi-i)%arr diff --git a/flang/test/Lower/forall/forall-ranked.f90 b/flang/test/Lower/forall/forall-ranked.f90 index 9e56be926e78e..f508c67468212 100644 --- a/flang/test/Lower/forall/forall-ranked.f90 +++ b/flang/test/Lower/forall/forall-ranked.f90 @@ -68,7 +68,7 @@ end function f integer :: arr(11) end type t type(t) :: a(10,10) - + forall (i=1:5) a(i,:)%arr(i+4) = f(i) end forall diff --git a/flang/test/Lower/forall/forall-where-2.f90 b/flang/test/Lower/forall/forall-where-2.f90 index c075508bef561..85aab87559c3c 100644 --- a/flang/test/Lower/forall/forall-where-2.f90 +++ b/flang/test/Lower/forall/forall-where-2.f90 @@ -6,7 +6,7 @@ ! Test a FORALL construct with a nested WHERE construct where the mask ! contains temporary array expressions. -subroutine test_nested_forall_where_with_temp_in_mask(a,b) +subroutine test_nested_forall_where_with_temp_in_mask(a,b) interface function temp_foo(i, j) integer :: i, j @@ -28,10 +28,10 @@ function temp_foo(i, j) ! CHECK: func @_QPtest_nested_forall_where_with_temp_in_mask({{.*}}) { ! CHECK: %[[tempResultBox:.*]] = fir.alloca !fir.box>> {bindc_name = ".result"} - ! Where condition pre-evaluation + ! Where condition pre-evaluation ! CHECK: fir.do_loop {{.*}} { ! CHECK: fir.do_loop {{.*}} { - ! Evaluation of mask for iteration (i,j) into ragged array temp + ! Evaluation of mask for iteration (i,j) into ragged array temp ! CHECK: %[[tempResult:.*]] = fir.call @_QPtemp_foo ! CHECK: fir.save_result %[[tempResult]] to %[[tempResultBox]] : !fir.box>>, !fir.ref>>> ! CHECK: fir.if {{.*}} { @@ -52,7 +52,7 @@ function temp_foo(i, j) ! CHECK: fir.do_loop {{.*}} { ! Array assignment at iteration (i, j) ! CHECK: fir.do_loop {{.*}} { -! CHECK: fir.if {{.*}} { +! CHECK: fir.if {{.*}} { ! CHECK: arith.divf ! CHECK: } else { ! CHECK: } @@ -64,7 +64,7 @@ function temp_foo(i, j) ! CHECK: fir.do_loop {{.*}} { ! Array assignment at iteration (i, j) ! CHECK: fir.do_loop {{.*}} { -! CHECK: fir.if {{.*}} { +! CHECK: fir.if {{.*}} { ! CHECK: } else { ! CHECK: arith.negf ! CHECK: } diff --git a/flang/test/Lower/forall/forall-where.f90 b/flang/test/Lower/forall/forall-where.f90 index 54ff2bd4c3f16..3202edbaec808 100644 --- a/flang/test/Lower/forall/forall-where.f90 +++ b/flang/test/Lower/forall/forall-where.f90 @@ -6,7 +6,7 @@ ! This has both an explicit and implicit iteration space. The WHERE construct ! makes the assignments conditional and the where mask evaluation must happen ! prior to evaluating the array assignment statement. -subroutine test_nested_forall_where(a,b) +subroutine test_nested_forall_where(a,b) type t real data(100) end type t From 67d5c14ad66f022d689cbcb0709df690938e5b6d Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Tue, 18 Nov 2025 11:31:45 -0500 Subject: [PATCH 03/57] [llvm][AddressSanitizer] option for applying AddressSanitizer to specific address spaces (#167770) For some backends, e.g., BPF, it is desirable to only sanitize memory belonging to specific address spaces. More specifically, it is sometimes desirable to only apply address sanitization for arena memory belonging to address space 1. However, AddressSanitizer currently does not support selectively sanitizing address spaces. Add a new option to select which address spaces to apply AddressSanitizer to. No functional change for existing targets (namely AMD GPU) that hardcode which address spaces to sanitize --- .../Instrumentation/AddressSanitizer.cpp | 35 +++++++++++++++---- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 3a14ee5addc2f..c9f249a8733ac 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" @@ -441,6 +442,15 @@ static cl::opt ClOverrideDestructorKind( "Use global destructors")), cl::init(AsanDtorKind::Invalid), cl::Hidden); +static SmallSet SrcAddrSpaces; +static cl::list ClAddrSpaces( + "asan-instrument-address-spaces", + cl::desc("Only instrument variables in the specified address spaces."), + cl::Hidden, cl::CommaSeparated, cl::ZeroOrMore, + cl::callback([](const unsigned &AddrSpace) { + SrcAddrSpaces.insert(AddrSpace); + })); + // Debug flags. static cl::opt ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, @@ -1363,11 +1373,25 @@ static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) { static bool isUnsupportedAMDGPUAddrspace(Value *Addr) { Type *PtrTy = cast(Addr->getType()->getScalarType()); unsigned int AddrSpace = PtrTy->getPointerAddressSpace(); + // Globals in address space 1 and 4 are supported for AMDGPU. if (AddrSpace == 3 || AddrSpace == 5) return true; return false; } +static bool isSupportedAddrspace(const Triple &TargetTriple, Value *Addr) { + Type *PtrTy = cast(Addr->getType()->getScalarType()); + unsigned int AddrSpace = PtrTy->getPointerAddressSpace(); + + if (!SrcAddrSpaces.empty()) + return SrcAddrSpaces.count(AddrSpace); + + if (TargetTriple.isAMDGPU()) + return !isUnsupportedAMDGPUAddrspace(Addr); + + return AddrSpace == 0; +} + Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { // Shadow >> scale Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); @@ -1431,10 +1455,9 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { } bool AddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) { - // Instrument accesses from different address spaces only for AMDGPU. - Type *PtrTy = cast(Ptr->getType()->getScalarType()); - if (PtrTy->getPointerAddressSpace() != 0 && - !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(Ptr))) + // Check whether the target supports sanitizing the address space + // of the pointer. + if (!isSupportedAddrspace(TargetTriple, Ptr)) return true; // Ignore swifterror addresses. @@ -2097,9 +2120,7 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const { return false; if (!Ty->isSized()) return false; if (!G->hasInitializer()) return false; - // Globals in address space 1 and 4 are supported for AMDGPU. - if (G->getAddressSpace() && - !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(G))) + if (!isSupportedAddrspace(TargetTriple, G)) return false; if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals. // Two problems with thread-locals: From 1fb8e3d76e87a6c6f0d8fc7aa4e7ed75e3641fee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matej=20Ko=C5=A1=C3=ADk?= Date: Tue, 18 Nov 2025 17:40:31 +0100 Subject: [PATCH 04/57] [lldb] Support integer registers with more than 64 bits. (#166363) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this PR we are proposing to change LLDB codebase so that LLDB is able to print values of integer registers that have more than 64-bits (even if the number of bits is not equal to 128). --------- Co-authored-by: Matej Košík Co-authored-by: Jonas Devlieghere --- lldb/include/lldb/Utility/RegisterValue.h | 11 +- lldb/source/Utility/DataExtractor.cpp | 4 - lldb/source/Utility/RegisterValue.cpp | 48 ++- lldb/unittests/Utility/RegisterValueTest.cpp | 298 +++++++++++++++++-- 4 files changed, 308 insertions(+), 53 deletions(-) diff --git a/lldb/include/lldb/Utility/RegisterValue.h b/lldb/include/lldb/Utility/RegisterValue.h index 49aaf68be17fc..baf984cbcb052 100644 --- a/lldb/include/lldb/Utility/RegisterValue.h +++ b/lldb/include/lldb/Utility/RegisterValue.h @@ -46,7 +46,8 @@ class RegisterValue { eTypeUInt16, eTypeUInt32, eTypeUInt64, - eTypeUInt128, + eTypeUIntN, /// < This value is used when the (integer) register is larger + /// than 64-bits. eTypeFloat, eTypeDouble, eTypeLongDouble, @@ -69,7 +70,7 @@ class RegisterValue { m_scalar = inst; } - explicit RegisterValue(llvm::APInt inst) : m_type(eTypeUInt128) { + explicit RegisterValue(llvm::APInt inst) : m_type(eTypeUIntN) { m_scalar = llvm::APInt(std::move(inst)); } @@ -178,7 +179,7 @@ class RegisterValue { } void operator=(llvm::APInt uint) { - m_type = eTypeUInt128; + m_type = eTypeUIntN; m_scalar = llvm::APInt(std::move(uint)); } @@ -217,8 +218,8 @@ class RegisterValue { m_scalar = uint; } - void SetUInt128(llvm::APInt uint) { - m_type = eTypeUInt128; + void SetUIntN(llvm::APInt uint) { + m_type = eTypeUIntN; m_scalar = std::move(uint); } diff --git a/lldb/source/Utility/DataExtractor.cpp b/lldb/source/Utility/DataExtractor.cpp index e9be0cba81f0c..a9aea168acf41 100644 --- a/lldb/source/Utility/DataExtractor.cpp +++ b/lldb/source/Utility/DataExtractor.cpp @@ -662,10 +662,6 @@ size_t DataExtractor::ExtractBytes(offset_t offset, offset_t length, const uint8_t *src = PeekData(offset, length); if (src) { if (dst_byte_order != GetByteOrder()) { - // Validate that only a word- or register-sized dst is byte swapped - assert(length == 1 || length == 2 || length == 4 || length == 8 || - length == 10 || length == 16 || length == 32); - for (uint32_t i = 0; i < length; ++i) (static_cast(dst))[i] = src[length - i - 1]; } else diff --git a/lldb/source/Utility/RegisterValue.cpp b/lldb/source/Utility/RegisterValue.cpp index 8b2af4e3d4f0e..c28c9e2d4d106 100644 --- a/lldb/source/Utility/RegisterValue.cpp +++ b/lldb/source/Utility/RegisterValue.cpp @@ -127,7 +127,7 @@ bool RegisterValue::GetScalarValue(Scalar &scalar) const { case eTypeUInt16: case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -180,8 +180,6 @@ Status RegisterValue::SetValueFromData(const RegisterInfo ®_info, if (src_len > reg_info.byte_size) src_len = reg_info.byte_size; - type128 int128; - m_type = eTypeInvalid; switch (reg_info.encoding) { case eEncodingInvalid: @@ -196,17 +194,15 @@ Status RegisterValue::SetValueFromData(const RegisterInfo ®_info, SetUInt32(src.GetMaxU32(&src_offset, src_len)); else if (reg_info.byte_size <= 8) SetUInt64(src.GetMaxU64(&src_offset, src_len)); - else if (reg_info.byte_size <= 16) { - uint64_t data1 = src.GetU64(&src_offset); - uint64_t data2 = src.GetU64(&src_offset); - if (src.GetByteOrder() == eByteOrderLittle) { - int128.x[0] = data1; - int128.x[1] = data2; - } else { - int128.x[0] = data2; - int128.x[1] = data1; - } - SetUInt128(llvm::APInt(128, int128.x)); + else { + std::vector native_endian_src(src_len, 0); + src.ExtractBytes(src_offset, src_len, + llvm::sys::IsLittleEndianHost ? eByteOrderLittle + : eByteOrderBig, + native_endian_src.data()); + llvm::APInt uint = llvm::APInt::getZero(src_len * 8); + llvm::LoadIntFromMemory(uint, native_endian_src.data(), src_len); + SetUIntN(uint); } break; case eEncodingIEEE754: @@ -442,7 +438,7 @@ bool RegisterValue::SignExtend(uint32_t sign_bitpos) { case eTypeUInt16: case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: return m_scalar.SignExtend(sign_bitpos); case eTypeFloat: case eTypeDouble: @@ -465,7 +461,7 @@ bool RegisterValue::CopyValue(const RegisterValue &rhs) { case eTypeUInt16: case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -581,7 +577,7 @@ llvm::APInt RegisterValue::GetAsUInt128(const llvm::APInt &fail_value, case eTypeUInt16: case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -616,7 +612,7 @@ float RegisterValue::GetAsFloat(float fail_value, bool *success_ptr) const { break; case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -636,7 +632,7 @@ double RegisterValue::GetAsDouble(double fail_value, bool *success_ptr) const { case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -657,7 +653,7 @@ long double RegisterValue::GetAsLongDouble(long double fail_value, case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -676,7 +672,7 @@ const void *RegisterValue::GetBytes() const { case eTypeUInt16: case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -698,7 +694,7 @@ uint32_t RegisterValue::GetByteSize() const { return 2; case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -721,7 +717,7 @@ bool RegisterValue::SetUInt(uint64_t uint, uint32_t byte_size) { } else if (byte_size <= 8) { SetUInt64(uint); } else if (byte_size <= 16) { - SetUInt128(llvm::APInt(128, uint)); + SetUIntN(llvm::APInt(128, uint)); } else return false; return true; @@ -749,7 +745,7 @@ bool RegisterValue::operator==(const RegisterValue &rhs) const { case eTypeUInt16: case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: case eTypeFloat: case eTypeDouble: case eTypeLongDouble: @@ -774,7 +770,7 @@ bool RegisterValue::ClearBit(uint32_t bit) { case eTypeUInt16: case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: if (bit < (GetByteSize() * 8)) { return m_scalar.ClearBit(bit); } @@ -814,7 +810,7 @@ bool RegisterValue::SetBit(uint32_t bit) { case eTypeUInt16: case eTypeUInt32: case eTypeUInt64: - case eTypeUInt128: + case eTypeUIntN: if (bit < (GetByteSize() * 8)) { return m_scalar.SetBit(bit); } diff --git a/lldb/unittests/Utility/RegisterValueTest.cpp b/lldb/unittests/Utility/RegisterValueTest.cpp index 6239dbe21634a..7b27e841cbec5 100644 --- a/lldb/unittests/Utility/RegisterValueTest.cpp +++ b/lldb/unittests/Utility/RegisterValueTest.cpp @@ -57,13 +57,12 @@ TEST(RegisterValueTest, GetScalarValue) { APInt(128, 0x7766554433221100))); } -static const Scalar etalon128(APInt(128, 0xffeeddccbbaa9988ull) << 64 | - APInt(128, 0x7766554433221100ull)); - -void TestSetValueFromData128(void *src, const lldb::ByteOrder endianness) { - RegisterInfo ri{"uint128_register", +void TestSetValueFromData(const Scalar &etalon, void *src, size_t src_byte_size, + const lldb::ByteOrder endianness, + const RegisterValue::Type register_value_type) { + RegisterInfo ri{"test", nullptr, - 16, + static_cast(src_byte_size), 0, lldb::Encoding::eEncodingUint, lldb::Format::eFormatDefault, @@ -71,26 +70,289 @@ void TestSetValueFromData128(void *src, const lldb::ByteOrder endianness) { nullptr, nullptr, nullptr}; - DataExtractor src_extractor(src, 16, endianness, 8); + DataExtractor src_extractor(src, src_byte_size, endianness, 8); RegisterValue rv; EXPECT_TRUE(rv.SetValueFromData(ri, src_extractor, 0, false).Success()); Scalar s; EXPECT_TRUE(rv.GetScalarValue(s)); - EXPECT_EQ(s, etalon128); + EXPECT_EQ(rv.GetType(), register_value_type); + EXPECT_EQ(s, etalon); +} + +static const Scalar etalon7(APInt(32, 0x0000007F)); + +TEST(RegisterValueTest, SetValueFromData_7_le) { + uint8_t src[] = {0x7F}; + TestSetValueFromData(etalon7, src, 1, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt8); +} + +TEST(RegisterValueTest, SetValueFromData_7_be) { + uint8_t src[] = {0x7F}; + TestSetValueFromData(etalon7, src, 1, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt8); +} + +static const Scalar etalon8(APInt(32, 0x000000FE)); + +TEST(RegisterValueTest, SetValueFromData_8_le) { + uint8_t src[] = {0xFE}; + TestSetValueFromData(etalon8, src, 1, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt8); +} + +TEST(RegisterValueTest, SetValueFromData_8_be) { + uint8_t src[] = {0xFE}; + TestSetValueFromData(etalon8, src, 1, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt8); +} + +static const Scalar etalon9(APInt(32, 0x000001FE)); + +TEST(RegisterValueTest, SetValueFromData_9_le) { + uint8_t src[] = {0xFE, 0x01}; + TestSetValueFromData(etalon9, src, 2, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt16); +} + +TEST(RegisterValueTest, SetValueFromData_9_be) { + uint8_t src[] = {0x01, 0xFE}; + TestSetValueFromData(etalon9, src, 2, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt16); +} + +static const Scalar etalon15(APInt(32, 0x00007FED)); + +TEST(RegisterValueTest, SetValueFromData_15_le) { + uint8_t src[] = {0xED, 0x7F}; + TestSetValueFromData(etalon15, src, 2, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt16); +} + +TEST(RegisterValueTest, SetValueFromData_15_be) { + uint8_t src[] = {0x7F, 0xED}; + TestSetValueFromData(etalon15, src, 2, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt16); +} + +static const Scalar etalon16(APInt(32, 0x0000FEDC)); + +TEST(RegisterValueTest, SetValueFromData_16_le) { + uint8_t src[] = {0xDC, 0xFE}; + TestSetValueFromData(etalon16, src, 2, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt16); +} + +TEST(RegisterValueTest, SetValueFromData_16_be) { + uint8_t src[] = {0xFE, 0xDC}; + TestSetValueFromData(etalon16, src, 2, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt16); +} + +static const Scalar etalon17(APInt(32, 0x0001FEDC)); + +TEST(RegisterValueTest, SetValueFromData_17_le) { + uint8_t src[] = {0xDC, 0xFE, 0x01}; + TestSetValueFromData(etalon17, src, 3, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt32); +} + +TEST(RegisterValueTest, SetValueFromData_17_be) { + uint8_t src[] = {0x01, 0xFE, 0xDC}; + TestSetValueFromData(etalon17, src, 3, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt32); +} + +static const Scalar etalon24(APInt(32, 0x00FEDCBA)); + +TEST(RegisterValueTest, SetValueFromData_24_le) { + uint8_t src[] = {0xBA, 0xDC, 0xFE}; + TestSetValueFromData(etalon24, src, 3, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt32); +} + +TEST(RegisterValueTest, SetValueFromData_24_be) { + uint8_t src[] = {0xFE, 0xDC, 0xBA}; + TestSetValueFromData(etalon24, src, 3, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt32); +} + +static const Scalar etalon31(APInt(32, 0x7EDCBA98)); + +TEST(RegisterValueTest, SetValueFromData_31_le) { + uint8_t src[] = {0x98, 0xBA, 0xDC, 0x7E}; + TestSetValueFromData(etalon31, src, 4, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt32); +} + +TEST(RegisterValueTest, SetValueFromData_31_be) { + uint8_t src[] = {0x7E, 0xDC, 0xBA, 0x98}; + TestSetValueFromData(etalon31, src, 4, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt32); +} + +static const Scalar etalon32(APInt(32, 0xFEDCBA98)); + +TEST(RegisterValueTest, SetValueFromData_32_le) { + uint8_t src[] = {0x98, 0xBA, 0xDC, 0xFE}; + TestSetValueFromData(etalon32, src, 4, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt32); } -// Test that the "RegisterValue::SetValueFromData" method works correctly -// with 128-bit little-endian data that represents an integer. +TEST(RegisterValueTest, SetValueFromData_32_be) { + uint8_t src[] = {0xFE, 0xDC, 0xBA, 0x98}; + TestSetValueFromData(etalon32, src, 4, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt32); +} + +static const Scalar etalon33(APInt(64, 0x00000001FEDCBA98)); + +TEST(RegisterValueTest, SetValueFromData_33_le) { + uint8_t src[] = {0x98, 0xBA, 0xDC, 0xFE, 0x01}; + TestSetValueFromData(etalon33, src, 5, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt64); +} + +TEST(RegisterValueTest, SetValueFromData_33_be) { + uint8_t src[] = {0x01, 0xFE, 0xDC, 0xBA, 0x98}; + TestSetValueFromData(etalon33, src, 5, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt64); +} + +static const Scalar etalon40(APInt(64, 0x000000FEDCBA9876)); + +TEST(RegisterValueTest, SetValueFromData_40_le) { + uint8_t src[] = {0x76, 0x98, 0xBA, 0xDC, 0xFE}; + TestSetValueFromData(etalon40, src, 5, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt64); +} + +TEST(RegisterValueTest, SetValueFromData_40_be) { + uint8_t src[] = {0xFE, 0xDC, 0xBA, 0x98, 0x76}; + TestSetValueFromData(etalon40, src, 5, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt64); +} + +static const Scalar etalon63(APInt(64, 0x7EDCBA9876543210)); + +TEST(RegisterValueTest, SetValueFromData_63_le) { + uint8_t src[] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0x7E}; + TestSetValueFromData(etalon63, src, 8, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt64); +} + +TEST(RegisterValueTest, SetValueFromData_63_be) { + uint8_t src[] = {0x7E, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10}; + TestSetValueFromData(etalon63, src, 8, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt64); +} + +static const Scalar etalon64(APInt(64, 0xFEDCBA9876543210)); + +TEST(RegisterValueTest, SetValueFromData_64_le) { + uint8_t src[] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE}; + TestSetValueFromData(etalon64, src, 8, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUInt64); +} + +TEST(RegisterValueTest, SetValueFromData_64_be) { + uint8_t src[] = {0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10}; + TestSetValueFromData(etalon64, src, 8, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUInt64); +} + +static const Scalar etalon65(APInt(72, 0x0000000000000001ull) << 1 * 64 | + APInt(72, 0x0706050403020100ull) << 0 * 64); + +TEST(RegisterValueTest, SetValueFromData_65_le) { + uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x01}; + TestSetValueFromData(etalon65, src, 9, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUIntN); +} + +TEST(RegisterValueTest, SetValueFromData_65_be) { + uint8_t src[] = {0x01, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00}; + TestSetValueFromData(etalon65, src, 9, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUIntN); +} + +static const Scalar etalon127(APInt(128, 0x7f0e0d0c0b0a0908ull) << 1 * 64 | + APInt(128, 0x0706050403020100ull) << 0 * 64); + +TEST(RegisterValueTest, SetValueFromData_127_le) { + uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x7f}; + TestSetValueFromData(etalon127, src, 16, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUIntN); +} + +TEST(RegisterValueTest, SetValueFromData_127_be) { + uint8_t src[] = {0x7f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00}; + TestSetValueFromData(etalon127, src, 16, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUIntN); +} + +static const Scalar etalon128(APInt(128, 0x0f0e0d0c0b0a0908ull) << 1 * 64 | + APInt(128, 0x0706050403020100ull) << 0 * 64); + TEST(RegisterValueTest, SetValueFromData_128_le) { - uint8_t src[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, - 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}; - TestSetValueFromData128(src, lldb::ByteOrder::eByteOrderLittle); + uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}; + TestSetValueFromData(etalon128, src, 16, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUIntN); } -// Test that the "RegisterValue::SetValueFromData" method works correctly -// with 128-bit big-endian data that represents an integer. TEST(RegisterValueTest, SetValueFromData_128_be) { - uint8_t src[] = {0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99, 0x88, - 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11, 0x00}; - TestSetValueFromData128(src, lldb::ByteOrder::eByteOrderBig); + uint8_t src[] = {0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00}; + TestSetValueFromData(etalon128, src, 16, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUIntN); +} + +static const Scalar etalon256(APInt(256, 0x1f1e1d1c1b1a1918ull) << 3 * 64 | + APInt(256, 0x1716151413121110ull) << 2 * 64 | + APInt(256, 0x0f0e0d0c0b0a0908ull) << 1 * 64 | + APInt(256, 0x0706050403020100ull) << 0 * 64); + +TEST(RegisterValueTest, SetValueFromData_256_le) { + uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f}; + TestSetValueFromData(etalon256, src, 32, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUIntN); +} + +TEST(RegisterValueTest, SetValueFromData_256_be) { + uint8_t src[] = {0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, + 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, + 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00}; + TestSetValueFromData(etalon256, src, 32, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUIntN); +} + +static const Scalar etalon257(APInt(512, 0x0000000000000001ull) << 4 * 64 | + APInt(512, 0x1f1e1d1c1b1a1918ull) << 3 * 64 | + APInt(512, 0x1716151413121110ull) << 2 * 64 | + APInt(512, 0x0f0e0d0c0b0a0908ull) << 1 * 64 | + APInt(512, 0x0706050403020100ull) << 0 * 64); + +TEST(RegisterValueTest, SetValueFromData_257_le) { + uint8_t src[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, + 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x01}; + TestSetValueFromData(etalon257, src, 33, lldb::ByteOrder::eByteOrderLittle, + RegisterValue::eTypeUIntN); +} + +TEST(RegisterValueTest, SetValueFromData_257_be) { + uint8_t src[] = {0x01, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, + 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, + 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, + 0x05, 0x04, 0x03, 0x02, 0x01, 0x00}; + TestSetValueFromData(etalon257, src, 33, lldb::ByteOrder::eByteOrderBig, + RegisterValue::eTypeUIntN); } From 93a8ca8fc738379333242ee51d9e080fbb283c6e Mon Sep 17 00:00:00 2001 From: Nathan Corbyn Date: Tue, 18 Nov 2025 16:42:58 +0000 Subject: [PATCH 05/57] [AArch64][GISel] Don't crash in known-bits when copying from vectors to non-vectors (#168081) Updates the demanded elements before recursing through copies in case the type of the source register changes from a non-vector register to a vector register. Fixes #167842. --- .../CodeGen/GlobalISel/GISelValueTracking.cpp | 11 +++- .../GlobalISel/knownbits-copy-vector-crash.ll | 56 +++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index c1fb8b6d78ff8..ecba323f8d6bf 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -247,6 +247,7 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { const MachineOperand &Src = MI.getOperand(Idx); Register SrcReg = Src.getReg(); + LLT SrcTy = MRI.getType(SrcReg); // Look through trivial copies and phis but don't look through trivial // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits // analysis is currently unable to determine the bit width of a @@ -255,9 +256,15 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, // We can't use NoSubRegister by name as it's defined by each target but // it's always defined to be 0 by tablegen. if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ && - MRI.getType(SrcReg).isValid()) { + SrcTy.isValid()) { + // In case we're forwarding from a vector register to a non-vector + // register we need to update the demanded elements to reflect this + // before recursing. + APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector() + ? APInt::getAllOnes(SrcTy.getNumElements()) + : DemandedElts; // Known to be APInt(1, 1) // For COPYs we don't do anything, don't increase the depth. - computeKnownBitsImpl(SrcReg, Known2, DemandedElts, + computeKnownBitsImpl(SrcReg, Known2, NowDemandedElts, Depth + (Opcode != TargetOpcode::COPY)); Known2 = Known2.anyextOrTrunc(BitWidth); Known = Known.intersectWith(Known2); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll new file mode 100644 index 0000000000000..f15253682c336 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-copy-vector-crash.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -global-isel -o - %s | FileCheck %s + +target triple = "aarch64-unknown-unknown" + +; Check we don't crash here when computing known bits. + +define <4 x i32> @test(<8 x i16> %in, i1 %continue) { +; CHECK-LABEL: test: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: mov w12, wzr +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: mov w9, #2 // =0x2 +; CHECK-NEXT: mov w10, #0 // =0x0 +; CHECK-NEXT: .LBB0_1: // %loop +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mov w11, w12 +; CHECK-NEXT: mov w12, w12 +; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: and x12, x12, #0x7 +; CHECK-NEXT: umull x12, w12, w9 +; CHECK-NEXT: ldrb w12, [x8, x12] +; CHECK-NEXT: cmp w12, #0 +; CHECK-NEXT: cset w12, eq +; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: mov v1.b[1], w10 +; CHECK-NEXT: mov v1.b[2], w10 +; CHECK-NEXT: mov v1.b[3], w10 +; CHECK-NEXT: fmov w12, s1 +; CHECK-NEXT: tbz w0, #0, .LBB0_1 +; CHECK-NEXT: // %bb.2: // %exit +; CHECK-NEXT: fmov s0, w11 +; CHECK-NEXT: mov v0.s[1], wzr +; CHECK-NEXT: mov v0.s[2], wzr +; CHECK-NEXT: mov v0.s[3], wzr +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret +entry: + br label %loop + +exit: + %result = insertelement <4 x i32> zeroinitializer, i32 %index, i64 0 + ret <4 x i32> %result + +loop: + %index = phi i32 [ 0, %entry ], [ %insert.bitcast, %loop ] + %extracted = extractelement <8 x i16> %in, i32 %index + %masked = and i16 %extracted, 255 + %maskedIsZero = icmp eq i16 %masked, 0 + %maskedIsZero.zext = zext i1 %maskedIsZero to i8 + %insert = insertelement <4 x i8> zeroinitializer, i8 %maskedIsZero.zext, i64 0 + %insert.bitcast = bitcast <4 x i8> %insert to i32 + br i1 %continue, label %exit, label %loop +} From 2675dcd72d02ee1ac2472b7d2914bfe601ff33d4 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Tue, 18 Nov 2025 08:46:40 -0800 Subject: [PATCH 06/57] [lldb] update lldb-server platform help parsing (attempt 3) (#164904) * original change #162730 * with windows fix #164843 * remove timeout that was pointed out in the comment above * Remove test that starts and listens on a socket to avoid timeout issues --- ...s.test => TestGdbserverErrorMessages.test} | 0 .../TestPlatformErrorMessages.test | 25 ++ .../Shell/lldb-server/TestPlatformHelp.test | 40 +++ lldb/tools/lldb-server/CMakeLists.txt | 5 + lldb/tools/lldb-server/PlatformOptions.td | 75 +++++ lldb/tools/lldb-server/lldb-platform.cpp | 265 +++++++++++------- 6 files changed, 301 insertions(+), 109 deletions(-) rename lldb/test/Shell/lldb-server/{TestErrorMessages.test => TestGdbserverErrorMessages.test} (100%) create mode 100644 lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test create mode 100644 lldb/test/Shell/lldb-server/TestPlatformHelp.test create mode 100644 lldb/tools/lldb-server/PlatformOptions.td diff --git a/lldb/test/Shell/lldb-server/TestErrorMessages.test b/lldb/test/Shell/lldb-server/TestGdbserverErrorMessages.test similarity index 100% rename from lldb/test/Shell/lldb-server/TestErrorMessages.test rename to lldb/test/Shell/lldb-server/TestGdbserverErrorMessages.test diff --git a/lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test b/lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test new file mode 100644 index 0000000000000..7d3b37aa5fc39 --- /dev/null +++ b/lldb/test/Shell/lldb-server/TestPlatformErrorMessages.test @@ -0,0 +1,25 @@ +RUN: %platformserver 2>&1 | FileCheck --check-prefixes=NO_LISTEN,ALL %s +NO_LISTEN: error: either --listen or --child-platform-fd is required + +RUN: %lldb-server platform --listen 2>&1 | FileCheck --check-prefixes=LISTEN_MISSING,ALL %s +LISTEN_MISSING: error: --listen: missing argument + +RUN: %lldb-server p --bogus 2>&1 | FileCheck --check-prefixes=BOGUS,ALL %s +BOGUS: error: unknown argument '--bogus' + +RUN: %platformserver --gdbserver-port 2>&1 | FileCheck --check-prefixes=GDBPORT_MISSING,ALL %s +GDBPORT_MISSING: error: --gdbserver-port: missing argument + +RUN: %platformserver --gdbserver-port notanumber --listen :1234 2>&1 | FileCheck --check-prefixes=GDBPORT_INVALID %s +GDBPORT_INVALID: error: invalid --gdbserver-port value + +RUN: %platformserver --socket-file 2>&1 | FileCheck --check-prefixes=SOCKETFILE_MISSING,ALL %s +SOCKETFILE_MISSING: error: --socket-file: missing argument + +RUN: %platformserver --log-file 2>&1 | FileCheck --check-prefixes=LOGFILE_MISSING,ALL %s +LOGFILE_MISSING: error: --log-file: missing argument + +RUN: %platformserver --log-channels 2>&1 | FileCheck --check-prefixes=LOGCHANNELS_MISSING,ALL %s +LOGCHANNELS_MISSING: error: --log-channels: missing argument + +ALL: Use 'lldb-server{{(\.exe)?}} {{p|platform}} --help' for a complete list of options. diff --git a/lldb/test/Shell/lldb-server/TestPlatformHelp.test b/lldb/test/Shell/lldb-server/TestPlatformHelp.test new file mode 100644 index 0000000000000..c5ced8a318100 --- /dev/null +++ b/lldb/test/Shell/lldb-server/TestPlatformHelp.test @@ -0,0 +1,40 @@ +RUN: %platformserver --help 2>&1 | FileCheck %s +RUN: %platformserver -h 2>&1 | FileCheck %s +RUN: %lldb-server p --help 2>&1 | FileCheck %s +RUN: %lldb-server p -h 2>&1 | FileCheck %s +RUN: %lldb-server platform --help 2>&1 | FileCheck %s +RUN: %lldb-server platform -h 2>&1 | FileCheck %s + +CHECK: OVERVIEW: lldb-server{{(\.exe)?}} platform + +CHECK: USAGE: lldb-server{{(\.exe)?}} {{p|platform}} [options] --listen <[host]:port> {{\[}}[--] program args...] + +CHECK: CONNECTION OPTIONS: +CHECK: --gdbserver-port +CHECK-SAME: Short form: -P +CHECK: --listen <[host]:port> +CHECK-SAME: Short form: -L +CHECK: --socket-file +CHECK-SAME: Short form: -f + +CHECK: GENERAL OPTIONS: +CHECK: --help +CHECK: --log-channels +CHECK: Short form: -c +CHECK: --log-file +CHECK-SAME: Short form: -l +CHECK: --server + +CHECK: OPTIONS: +CHECK: -- program args + +CHECK: DESCRIPTION +CHECK: Acts as a platform server for remote debugging + +CHECK: EXAMPLES +CHECK: # Listen on port 1234, exit after first connection +CHECK: lldb-server{{(\.exe)?}} platform --listen tcp://0.0.0.0:1234 +CHECK: # Listen on port 5555, accept multiple connections +CHECK: lldb-server{{(\.exe)?}} platform --server --listen tcp://localhost:5555 +CHECK: # Listen on Unix domain socket +CHECK: lldb-server{{(\.exe)?}} platform --listen unix:///tmp/lldb-server.sock diff --git a/lldb/tools/lldb-server/CMakeLists.txt b/lldb/tools/lldb-server/CMakeLists.txt index 1d8dc72a3f872..fb55c64936121 100644 --- a/lldb/tools/lldb-server/CMakeLists.txt +++ b/lldb/tools/lldb-server/CMakeLists.txt @@ -2,6 +2,10 @@ set(LLVM_TARGET_DEFINITIONS LLGSOptions.td) tablegen(LLVM LLGSOptions.inc -gen-opt-parser-defs) add_public_tablegen_target(LLGSOptionsTableGen) +set(LLVM_TARGET_DEFINITIONS PlatformOptions.td) +tablegen(LLVM PlatformOptions.inc -gen-opt-parser-defs) +add_public_tablegen_target(PlatformOptionsTableGen) + set(LLDB_PLUGINS) if(CMAKE_SYSTEM_NAME MATCHES "Linux|Android") @@ -67,6 +71,7 @@ add_lldb_tool(lldb-server add_dependencies(lldb-server LLGSOptionsTableGen + PlatformOptionsTableGen ${tablegen_deps} ) target_include_directories(lldb-server PRIVATE "${LLDB_SOURCE_DIR}/source") diff --git a/lldb/tools/lldb-server/PlatformOptions.td b/lldb/tools/lldb-server/PlatformOptions.td new file mode 100644 index 0000000000000..eedd1d8c35343 --- /dev/null +++ b/lldb/tools/lldb-server/PlatformOptions.td @@ -0,0 +1,75 @@ +include "llvm/Option/OptParser.td" + +class F: Flag<["--", "-"], name>; +class R prefixes, string name> + : Option; + +multiclass SJ { + def NAME: Separate<["--", "-"], name>, + HelpText; + def NAME # _eq: Joined<["--", "-"], name # "=">, + Alias(NAME)>; +} + +def grp_connect : OptionGroup<"connection">, HelpText<"CONNECTION OPTIONS">; + +defm listen: SJ<"listen", "Host and port to listen on. Format: [host]:port or protocol://[host]:port (e.g., tcp://localhost:1234, unix:///path/to/socket). Short form: -L">, + MetaVarName<"<[host]:port>">, + Group; +def: Separate<["-"], "L">, Alias, + Group; + +defm socket_file: SJ<"socket-file", "Write listening socket information (port number for TCP or path for Unix domain sockets) to the specified file. Short form: -f">, + MetaVarName<"">, + Group; +def: Separate<["-"], "f">, Alias, + Group; + +defm gdbserver_port: SJ<"gdbserver-port", "Port to use for spawned gdbserver instances. If 0 or unspecified, a port will be chosen automatically. Short form: -P">, + MetaVarName<"">, + Group; +def: Separate<["-"], "P">, Alias, + Group; + +defm child_platform_fd: SJ<"child-platform-fd", "File descriptor for communication with parent platform process (internal use only).">, + MetaVarName<"">, + Group, + Flags<[HelpHidden]>; + +def grp_general : OptionGroup<"general options">, HelpText<"GENERAL OPTIONS">; + +def server: F<"server">, + HelpText<"Run in server mode, accepting multiple client connections sequentially. Without this flag, the server exits after handling the first connection.">, + Group; + +defm log_channels: SJ<"log-channels", "Channels to log. A colon-separated list of entries. Each entry starts with a channel followed by a space-separated list of categories. Common channels: lldb, gdb-remote, platform, process. Short form: -c">, + MetaVarName<"">, + Group; +def: Separate<["-"], "c">, Alias, + Group; + +defm log_file: SJ<"log-file", "Destination file to log to. If empty, log to stderr. Short form: -l">, + MetaVarName<"">, + Group; +def: Separate<["-"], "l">, Alias, + Group; + +def debug: F<"debug">, + HelpText<"(Unused, kept for backward compatibility)">, + Group, + Flags<[HelpHidden]>; + +def verbose: F<"verbose">, + HelpText<"(Unused, kept for backward compatibility)">, + Group, + Flags<[HelpHidden]>; + +def help: F<"help">, + HelpText<"Display this help message and exit.">, + Group; +def: Flag<["-"], "h">, Alias, + Group; + +def REM : R<["--"], "">, + HelpText<"Arguments to pass to launched gdbserver instances.">, + MetaVarName<"program args">; diff --git a/lldb/tools/lldb-server/lldb-platform.cpp b/lldb/tools/lldb-server/lldb-platform.cpp index 0bd928507ba89..59b1eb419bc2b 100644 --- a/lldb/tools/lldb-server/lldb-platform.cpp +++ b/lldb/tools/lldb-server/lldb-platform.cpp @@ -21,6 +21,9 @@ #include #include +#include "llvm/Option/ArgList.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Option/Option.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/WithColor.h" @@ -56,22 +59,69 @@ using namespace llvm; // of target CPUs. For now, let's just use 100. static const int backlog = 100; static const int socket_error = -1; -static int g_debug = 0; -static int g_verbose = 0; -static int g_server = 0; - -// option descriptors for getopt_long_only() -static struct option g_long_options[] = { - {"debug", no_argument, &g_debug, 1}, - {"verbose", no_argument, &g_verbose, 1}, - {"log-file", required_argument, nullptr, 'l'}, - {"log-channels", required_argument, nullptr, 'c'}, - {"listen", required_argument, nullptr, 'L'}, - {"gdbserver-port", required_argument, nullptr, 'P'}, - {"socket-file", required_argument, nullptr, 'f'}, - {"server", no_argument, &g_server, 1}, - {"child-platform-fd", required_argument, nullptr, 2}, - {nullptr, 0, nullptr, 0}}; + +namespace { +using namespace llvm::opt; + +enum ID { + OPT_INVALID = 0, // This is not an option ID. +#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), +#include "PlatformOptions.inc" +#undef OPTION +}; + +#define OPTTABLE_STR_TABLE_CODE +#include "PlatformOptions.inc" +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "PlatformOptions.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE + +static constexpr opt::OptTable::Info InfoTable[] = { +#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), +#include "PlatformOptions.inc" +#undef OPTION +}; + +class PlatformOptTable : public opt::GenericOptTable { +public: + PlatformOptTable() + : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} + + void PrintHelp(llvm::StringRef Name) { + std::string Usage = + (Name + " [options] --listen <[host]:port> [[--] program args...]") + .str(); + + std::string Title = "lldb-server platform"; + + OptTable::printHelp(llvm::outs(), Usage.c_str(), Title.c_str()); + + llvm::outs() << R"( +DESCRIPTION + Acts as a platform server for remote debugging. When LLDB clients connect, + the platform server handles platform operations (file transfers, process + launching) and spawns debug server instances (lldb-server gdbserver) to + handle actual debugging sessions. + + By default, the server exits after handling one connection. Use --server + to keep running and accept multiple connections sequentially. + +EXAMPLES + # Listen on port 1234, exit after first connection + lldb-server platform --listen tcp://0.0.0.0:1234 + + # Listen on port 5555, accept multiple connections + lldb-server platform --server --listen tcp://localhost:5555 + + # Listen on Unix domain socket + lldb-server platform --listen unix:///tmp/lldb-server.sock + +)"; + } +}; +} // namespace #if defined(__APPLE__) #define LOW_PORT (IPPORT_RESERVED) @@ -97,12 +147,11 @@ static void signal_handler(int signo) { } #endif -static void display_usage(const char *progname, const char *subcommand) { - fprintf(stderr, "Usage:\n %s %s [--log-file log-file-name] [--log-channels " - "log-channel-list] [--port-file port-file-path] --server " - "--listen port\n", - progname, subcommand); - exit(0); +static void display_usage(PlatformOptTable &Opts, const char *progname, + const char *subcommand) { + std::string Name = + (llvm::sys::path::filename(progname) + " " + subcommand).str(); + Opts.PrintHelp(Name); } static Status parse_listen_host_port(Socket::SocketProtocol &protocol, @@ -261,7 +310,8 @@ static Status spawn_process(const char *progname, const FileSpec &prog, const Socket *conn_socket, uint16_t gdb_port, const lldb_private::Args &args, const std::string &log_file, - const StringRef log_channels, MainLoop &main_loop) { + const StringRef log_channels, MainLoop &main_loop, + bool multi_client) { Status error; SharedSocket shared_socket(conn_socket, error); if (error.Fail()) @@ -297,9 +347,12 @@ static Status spawn_process(const char *progname, const FileSpec &prog, launch_info.SetLaunchInSeparateProcessGroup(false); - if (g_server) + // Set up process monitor callback based on whether we're in server mode. + if (multi_client) + // In server mode: empty callback (don't terminate when child exits). launch_info.SetMonitorProcessCallback([](lldb::pid_t, int, int) {}); else + // In single-client mode: terminate main loop when child exits. launch_info.SetMonitorProcessCallback([&main_loop](lldb::pid_t, int, int) { main_loop.AddPendingCallback( [](MainLoopBase &loop) { loop.RequestTermination(); }); @@ -371,107 +424,101 @@ int main_platform(int argc, char *argv[]) { signal(SIGPIPE, SIG_IGN); signal(SIGHUP, signal_handler); #endif - int long_option_index = 0; - Status error; - std::string listen_host_port; - int ch; - std::string log_file; - StringRef - log_channels; // e.g. "lldb process threads:gdb-remote default:linux all" + // Special handling for 'help' as first argument. + if (argc > 0 && strcmp(argv[0], "help") == 0) { + PlatformOptTable Opts; + display_usage(Opts, progname, subcommand); + return EXIT_SUCCESS; + } + Status error; shared_fd_t fd = SharedSocket::kInvalidFD; - uint16_t gdbserver_port = 0; - FileSpec socket_file; - bool show_usage = false; - int option_error = 0; - std::string short_options(OptionParser::GetShortOptionString(g_long_options)); + PlatformOptTable Opts; + BumpPtrAllocator Alloc; + StringSaver Saver(Alloc); + bool HasError = false; -#if __GLIBC__ - optind = 0; -#else - optreset = 1; - optind = 1; -#endif + opt::InputArgList Args = + Opts.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](llvm::StringRef Msg) { + WithColor::error() << Msg << "\n"; + HasError = true; + }); - while ((ch = getopt_long_only(argc, argv, short_options.c_str(), - g_long_options, &long_option_index)) != -1) { - switch (ch) { - case 0: // Any optional that auto set themselves will return 0 - break; + std::string Name = + (llvm::sys::path::filename(progname) + " " + subcommand).str(); + std::string HelpText = + "Use '" + Name + " --help' for a complete list of options.\n"; - case 'L': - listen_host_port.append(optarg); - break; + if (HasError) { + llvm::errs() << HelpText; + return EXIT_FAILURE; + } - case 'l': // Set Log File - if (optarg && optarg[0]) - log_file.assign(optarg); - break; + if (Args.hasArg(OPT_help)) { + display_usage(Opts, progname, subcommand); + return EXIT_SUCCESS; + } - case 'c': // Log Channels - if (optarg && optarg[0]) - log_channels = StringRef(optarg); - break; + // Parse arguments. + std::string listen_host_port = Args.getLastArgValue(OPT_listen).str(); + std::string log_file = Args.getLastArgValue(OPT_log_file).str(); + StringRef log_channels = Args.getLastArgValue(OPT_log_channels); + bool multi_client = Args.hasArg(OPT_server); + [[maybe_unused]] bool debug = Args.hasArg(OPT_debug); + [[maybe_unused]] bool verbose = Args.hasArg(OPT_verbose); + + if (Args.hasArg(OPT_socket_file)) { + socket_file.SetFile(Args.getLastArgValue(OPT_socket_file), + FileSpec::Style::native); + } - case 'f': // Socket file - if (optarg && optarg[0]) - socket_file.SetFile(optarg, FileSpec::Style::native); - break; + if (Args.hasArg(OPT_gdbserver_port)) { + if (!llvm::to_integer(Args.getLastArgValue(OPT_gdbserver_port), + gdbserver_port)) { + WithColor::error() << "invalid --gdbserver-port value\n"; + return EXIT_FAILURE; + } + } - case 'P': - case 'm': - case 'M': { - uint16_t portnum; - if (!llvm::to_integer(optarg, portnum)) { - WithColor::error() << "invalid port number string " << optarg << "\n"; - option_error = 2; - break; - } - // Note the condition gdbserver_port > HIGH_PORT is valid in case of using - // --child-platform-fd. Check gdbserver_port later. - if (ch == 'P') - gdbserver_port = portnum; - else if (gdbserver_port == 0) - gdbserver_port = portnum; - } break; - - case 2: { - uint64_t _fd; - if (!llvm::to_integer(optarg, _fd)) { - WithColor::error() << "invalid fd " << optarg << "\n"; - option_error = 6; - } else - fd = (shared_fd_t)_fd; - } break; - - case 'h': /* fall-through is intentional */ - case '?': - show_usage = true; - break; + if (Args.hasArg(OPT_child_platform_fd)) { + uint64_t _fd; + if (!llvm::to_integer(Args.getLastArgValue(OPT_child_platform_fd), _fd)) { + WithColor::error() << "invalid --child-platform-fd value\n"; + return EXIT_FAILURE; } + fd = (shared_fd_t)_fd; } if (!LLDBServerUtilities::SetupLogging(log_file, log_channels, 0)) return -1; // Print usage and exit if no listening port is specified. - if (listen_host_port.empty() && fd == SharedSocket::kInvalidFD) - show_usage = true; + if (listen_host_port.empty() && fd == SharedSocket::kInvalidFD) { + WithColor::error() << "either --listen or --child-platform-fd is required\n" + << HelpText; + return EXIT_FAILURE; + } - if (show_usage || option_error) { - display_usage(progname, subcommand); - exit(option_error); + // Get remaining arguments for inferior. + std::vector Inputs; + for (opt::Arg *Arg : Args.filtered(OPT_INPUT)) + Inputs.push_back(Arg->getValue()); + if (opt::Arg *Arg = Args.getLastArg(OPT_REM)) { + for (const char *Val : Arg->getValues()) + Inputs.push_back(Val); } - // Skip any options we consumed with getopt_long_only. - argc -= optind; - argv += optind; lldb_private::Args inferior_arguments; - inferior_arguments.SetArguments(argc, const_cast(argv)); + if (!Inputs.empty()) { + std::vector args_ptrs; + for (const auto &Input : Inputs) + args_ptrs.push_back(Input.data()); + inferior_arguments.SetArguments(args_ptrs.size(), args_ptrs.data()); + } FileSpec debugserver_path = GetDebugserverPath(); if (!debugserver_path) { @@ -514,7 +561,7 @@ int main_platform(int argc, char *argv[]) { platform.SetConnection( std::make_unique(std::move(socket))); client_handle(platform, inferior_arguments); - return 0; + return EXIT_SUCCESS; } if (gdbserver_port != 0 && @@ -522,7 +569,7 @@ int main_platform(int argc, char *argv[]) { WithColor::error() << llvm::formatv("Port number {0} is not in the " "valid user port range of {1} - {2}\n", gdbserver_port, LOW_PORT, HIGH_PORT); - return 1; + return EXIT_FAILURE; } Socket::SocketProtocol protocol = Socket::ProtocolUnixDomain; @@ -559,7 +606,7 @@ int main_platform(int argc, char *argv[]) { if (error.Fail()) { fprintf(stderr, "failed to write socket id to %s: %s\n", socket_file.GetPath().c_str(), error.AsCString()); - return 1; + return EXIT_FAILURE; } } @@ -577,22 +624,22 @@ int main_platform(int argc, char *argv[]) { llvm::Expected> platform_handles = platform_sock->Accept( main_loop, [progname, gdbserver_port, &inferior_arguments, log_file, - log_channels, &main_loop, + log_channels, &main_loop, multi_client, &platform_handles](std::unique_ptr sock_up) { printf("Connection established.\n"); Status error = spawn_process( progname, HostInfo::GetProgramFileSpec(), sock_up.get(), gdbserver_port, inferior_arguments, log_file, log_channels, - main_loop); + main_loop, multi_client); if (error.Fail()) { Log *log = GetLog(LLDBLog::Platform); LLDB_LOGF(log, "spawn_process failed: %s", error.AsCString()); WithColor::error() << "spawn_process failed: " << error.AsCString() << "\n"; - if (!g_server) + if (!multi_client) main_loop.RequestTermination(); } - if (!g_server) + if (!multi_client) platform_handles->clear(); }); if (!platform_handles) { @@ -616,5 +663,5 @@ int main_platform(int argc, char *argv[]) { fprintf(stderr, "lldb-server exiting...\n"); - return 0; + return EXIT_SUCCESS; } From c7d2ed43648ebd9076ee290928d7bc805906882d Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 18 Nov 2025 11:50:19 -0500 Subject: [PATCH 07/57] Reland [Support][Jobserver][Tests] Simplify default executor init (#168165) and make (#165264) Truely recover Executor::getDefaultExecutor. The previous change missed std::unique_ptr, which is needed in a normal program exit, since only with that ThreadPoolExecutor destructor will be called in a normal program exit, where it ensures the executor has been stopped and waits for worker threads to finish. The wait is important as it prevents intermittent crashes on Windows when the process is doing a full exit. --- llvm/lib/Support/Parallel.cpp | 21 +----- llvm/unittests/Support/JobserverTest.cpp | 81 +++++++++++++++++++----- 2 files changed, 68 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp index 8e0c724accb36..ab220b8f2ceba 100644 --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -193,16 +193,7 @@ class ThreadPoolExecutor : public Executor { JobserverClient *TheJobserver = nullptr; }; -// A global raw pointer to the executor. Lifetime is managed by the -// objects created within createExecutor(). -static Executor *TheExec = nullptr; -static std::once_flag Flag; - -// This function will be called exactly once to create the executor. -// It contains the necessary platform-specific logic. Since functions -// called by std::call_once cannot return value, we have to set the -// executor as a global variable. -void createExecutor() { +Executor *Executor::getDefaultExecutor() { #ifdef _WIN32 // The ManagedStatic enables the ThreadPoolExecutor to be stopped via // llvm_shutdown() which allows a "clean" fast exit, e.g. via _exit(). This @@ -226,22 +217,16 @@ void createExecutor() { ThreadPoolExecutor::Deleter> ManagedExec; static std::unique_ptr Exec(&(*ManagedExec)); - TheExec = Exec.get(); + return Exec.get(); #else // ManagedStatic is not desired on other platforms. When `Exec` is destroyed // by llvm_shutdown(), worker threads will clean up and invoke TLS // destructors. This can lead to race conditions if other threads attempt to // access TLS objects that have already been destroyed. static ThreadPoolExecutor Exec(strategy); - TheExec = &Exec; + return &Exec; #endif } - -Executor *Executor::getDefaultExecutor() { - // Use std::call_once to lazily and safely initialize the executor. - std::call_once(Flag, createExecutor); - return TheExec; -} } // namespace } // namespace detail diff --git a/llvm/unittests/Support/JobserverTest.cpp b/llvm/unittests/Support/JobserverTest.cpp index d27445897db0a..1917145704608 100644 --- a/llvm/unittests/Support/JobserverTest.cpp +++ b/llvm/unittests/Support/JobserverTest.cpp @@ -15,6 +15,7 @@ #include "llvm/Config/llvm-config.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Parallel.h" +#include "llvm/Support/Program.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/raw_ostream.h" #include "gtest/gtest.h" @@ -40,8 +41,14 @@ using namespace llvm; +// Provided by the unit test main to locate the current test binary. +extern const char *TestMainArgv0; + namespace { +// Unique anchor whose address helps locate the current test binary. +static int JobserverTestAnchor = 0; + // RAII helper to set an environment variable for the duration of a test. class ScopedEnvironment { std::string Name; @@ -382,51 +389,93 @@ TEST_F(JobserverStrategyTest, ThreadPoolConcurrencyIsLimited) { EXPECT_EQ(CompletedTasks, NumTasks); } -TEST_F(JobserverStrategyTest, ParallelForIsLimited) { +// Parent-side driver that spawns a fresh process to run the child test which +// validates that parallelFor respects the jobserver limit when it is the first +// user of the default executor in that process. +TEST_F(JobserverStrategyTest, ParallelForIsLimited_Subprocess) { + // Mark child execution. + setenv("LLVM_JOBSERVER_TEST_CHILD", "1", 1); + + // Find the current test binary and build args to run only the child test. + std::string Executable = + sys::fs::getMainExecutable(TestMainArgv0, &JobserverTestAnchor); + ASSERT_FALSE(Executable.empty()) << "Failed to get main executable path"; + SmallVector Args{Executable, + "--gtest_filter=JobserverStrategyTest." + "ParallelForIsLimited_SubprocessChild"}; + + std::string Error; + bool ExecFailed = false; + int RC = sys::ExecuteAndWait(Executable, Args, std::nullopt, {}, 0, 0, &Error, + &ExecFailed); + unsetenv("LLVM_JOBSERVER_TEST_CHILD"); + ASSERT_FALSE(ExecFailed) << Error; + ASSERT_EQ(RC, 0) << "Executable failed with exit code " << RC; +} + +// Child-side test: create FIFO and make-proxy in this process, set the +// jobserver strategy, and then run parallelFor. +TEST_F(JobserverStrategyTest, ParallelForIsLimited_SubprocessChild) { + if (!getenv("LLVM_JOBSERVER_TEST_CHILD")) + GTEST_SKIP() << "Not running in child mode"; + // This test verifies that llvm::parallelFor respects the jobserver limit. const int NumExplicitJobs = 3; const int ConcurrencyLimit = NumExplicitJobs + 1; // +1 implicit const int NumTasks = 20; - LLVM_DEBUG(dbgs() << "Calling startMakeProxy with " << NumExplicitJobs - << " jobs.\n"); startMakeProxy(NumExplicitJobs); - LLVM_DEBUG(dbgs() << "MakeProxy is running.\n"); - // Set the global strategy. parallelFor will use this. + // Set the global strategy before any default executor is created. parallel::strategy = jobserver_concurrency(); std::atomic ActiveTasks{0}; std::atomic MaxActiveTasks{0}; - parallelFor(0, NumTasks, [&](int i) { + parallelFor(0, NumTasks, [&]([[maybe_unused]] int i) { int CurrentActive = ++ActiveTasks; - LLVM_DEBUG(dbgs() << "Task " << i << ": Active tasks: " << CurrentActive - << "\n"); int OldMax = MaxActiveTasks.load(); while (CurrentActive > OldMax) MaxActiveTasks.compare_exchange_weak(OldMax, CurrentActive); - std::this_thread::sleep_for(std::chrono::milliseconds(20)); --ActiveTasks; }); - LLVM_DEBUG(dbgs() << "ParallelFor finished. Max active tasks was " - << MaxActiveTasks << ".\n"); EXPECT_LE(MaxActiveTasks, ConcurrencyLimit); } -TEST_F(JobserverStrategyTest, ParallelSortIsLimited) { - // This test serves as an integration test to ensure parallelSort completes - // correctly when running under the jobserver strategy. It doesn't directly - // measure concurrency but verifies correctness. +// Parent-side driver for parallelSort child test. +TEST_F(JobserverStrategyTest, ParallelSortIsLimited_Subprocess) { + setenv("LLVM_JOBSERVER_TEST_CHILD", "1", 1); + + std::string Executable = + sys::fs::getMainExecutable(TestMainArgv0, &JobserverTestAnchor); + ASSERT_FALSE(Executable.empty()) << "Failed to get main executable path"; + SmallVector Args{Executable, + "--gtest_filter=JobserverStrategyTest." + "ParallelSortIsLimited_SubprocessChild"}; + + std::string Error; + bool ExecFailed = false; + int RC = sys::ExecuteAndWait(Executable, Args, std::nullopt, {}, 0, 0, &Error, + &ExecFailed); + unsetenv("LLVM_JOBSERVER_TEST_CHILD"); + ASSERT_FALSE(ExecFailed) << Error; + ASSERT_EQ(RC, 0) << "Executable failed with exit code " << RC; +} + +// Child-side test: ensure parallelSort runs and completes correctly under the +// jobserver strategy when it owns default executor initialization. +TEST_F(JobserverStrategyTest, ParallelSortIsLimited_SubprocessChild) { + if (!getenv("LLVM_JOBSERVER_TEST_CHILD")) + GTEST_SKIP() << "Not running in child mode"; + const int NumExplicitJobs = 3; startMakeProxy(NumExplicitJobs); parallel::strategy = jobserver_concurrency(); std::vector V(1024); - // Fill with random data std::mt19937 randEngine; std::uniform_int_distribution dist; for (int &i : V) From 727ee7e2f169ec60797004dfb9b29ef7ea7cc47a Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 18 Nov 2025 16:54:12 +0000 Subject: [PATCH 08/57] [APInt] Introduce carry-less multiply primitives (#168527) In line with a std proposal to introduce std::clmul, and in preparation to introduce a clmul intrinsic, implement carry-less multiply primitives for APIntOps, clmul[rh]. Ref: https://isocpp.org/files/papers/P3642R3.html --- llvm/include/llvm/ADT/APInt.h | 21 ++++++++ llvm/lib/Support/APInt.cpp | 22 ++++++++- llvm/unittests/ADT/APIntTest.cpp | 83 ++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index fdb3b84b73a1f..7e73cc1957c05 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -2440,6 +2440,27 @@ LLVM_ABI APInt fshl(const APInt &Hi, const APInt &Lo, const APInt &Shift); /// (4) fshr(i8 255, i8 0, i8 9) = fshr(i8 255, i8 0, i8 1) // 9 % 8 LLVM_ABI APInt fshr(const APInt &Hi, const APInt &Lo, const APInt &Shift); +/// Perform a carry-less multiply, also known as XOR multiplication, and return +/// low-bits. All arguments and result have the same bitwidth. +/// +/// Examples: +/// (1) clmul(i4 1, i4 2) = 2 +/// (2) clmul(i4 5, i4 6) = 14 +/// (3) clmul(i4 -4, i4 2) = -8 +/// (4) clmul(i4 -4, i4 -5) = 4 +LLVM_ABI APInt clmul(const APInt &LHS, const APInt &RHS); + +/// Perform a reversed carry-less multiply. +/// +/// clmulr(a, b) = bitreverse(clmul(bitreverse(a), bitreverse(b))) +LLVM_ABI APInt clmulr(const APInt &LHS, const APInt &RHS); + +/// Perform a carry-less multiply, and return high-bits. All arguments and +/// result have the same bitwidth. +/// +/// clmulh(a, b) = clmulr(a, b) >> 1 +LLVM_ABI APInt clmulh(const APInt &LHS, const APInt &RHS); + } // namespace APIntOps // See friend declaration above. This additional declaration is required in diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp index f6fd5f9ddd633..673cd867f0e45 100644 --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -15,10 +15,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/bit.h" -#include "llvm/Config/llvm-config.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -3187,3 +3187,23 @@ APInt llvm::APIntOps::fshr(const APInt &Hi, const APInt &Lo, return Lo; return Hi.shl(Hi.getBitWidth() - ShiftAmt) | Lo.lshr(ShiftAmt); } + +APInt llvm::APIntOps::clmul(const APInt &LHS, const APInt &RHS) { + assert(LHS.getBitWidth() == RHS.getBitWidth()); + unsigned BW = LHS.getBitWidth(); + APInt Result(BW, 0); + for (unsigned I : seq(BW)) + if (RHS[I]) + Result ^= LHS.shl(I); + return Result; +} + +APInt llvm::APIntOps::clmulr(const APInt &LHS, const APInt &RHS) { + assert(LHS.getBitWidth() == RHS.getBitWidth()); + return clmul(LHS.reverseBits(), RHS.reverseBits()).reverseBits(); +} + +APInt llvm::APIntOps::clmulh(const APInt &LHS, const APInt &RHS) { + assert(LHS.getBitWidth() == RHS.getBitWidth()); + return clmulr(LHS, RHS).lshr(1); +} diff --git a/llvm/unittests/ADT/APIntTest.cpp b/llvm/unittests/ADT/APIntTest.cpp index ca9f9f17ee112..4cb537da72e87 100644 --- a/llvm/unittests/ADT/APIntTest.cpp +++ b/llvm/unittests/ADT/APIntTest.cpp @@ -3823,4 +3823,87 @@ TEST(APIntTest, Fshr) { -8193); } +TEST(APIntTest, clmul) { + EXPECT_EQ(APIntOps::clmul(APInt(4, 1), APInt(4, 2)).getZExtValue(), 2U); + EXPECT_EQ(APIntOps::clmul(APInt(4, 5), APInt(4, 6)).getZExtValue(), 14U); + EXPECT_EQ(APIntOps::clmul(APInt(4, -4, /*isSigned*/ true), + APInt(4, 2, /*isSigned*/ false)) + .getSExtValue(), + -8); + EXPECT_EQ(APIntOps::clmul(APInt(4, -4, /*isSigned*/ true), + APInt(4, -5, /*isSigned*/ true)) + .getSExtValue(), + 4); + EXPECT_EQ(APIntOps::clmul(APInt(8, 0), APInt(8, 255)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmul(APInt(8, 15), APInt(8, 15)).getZExtValue(), 85U); + EXPECT_EQ(APIntOps::clmul(APInt(8, 1), APInt(8, 2)).getZExtValue(), 2U); + EXPECT_EQ(APIntOps::clmul(APInt(64, 0, /*isSigned*/ true), + APInt(64, 9223372036854775807, /*isSigned*/ true)) + .getSExtValue(), + 0); + EXPECT_EQ(APIntOps::clmul(APInt(64, 1, /*isSigned*/ true), + APInt(64, 2, /*isSigned*/ true)) + .getSExtValue(), + 2); + EXPECT_EQ(APIntOps::clmul(APInt(16, -2, /*isSigned*/ true), + APInt(16, -1, /*isSigned*/ true)) + .getSExtValue(), + -21846); +} + +TEST(APIntTest, clmulr) { + EXPECT_EQ(APIntOps::clmulr(APInt(4, 1), APInt(4, 2)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmulr(APInt(4, 5), APInt(4, 6)).getZExtValue(), 3U); + EXPECT_EQ(APIntOps::clmulr(APInt(4, -4, /*isSigned*/ true), + APInt(4, 2, /*isSigned*/ false)) + .getSExtValue(), + 3); + EXPECT_EQ(APIntOps::clmulr(APInt(4, -4, /*isSigned*/ true), + APInt(4, -5, /*isSigned*/ true)) + .getSExtValue(), + -2); + EXPECT_EQ(APIntOps::clmulr(APInt(8, 0), APInt(8, 255)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmulr(APInt(8, 15), APInt(8, 15)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmulr(APInt(8, 1), APInt(8, 2)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmulr(APInt(64, 0, /*isSigned*/ true), + APInt(64, 9223372036854775807, /*isSigned*/ true)) + .getSExtValue(), + 0); + EXPECT_EQ(APIntOps::clmulr(APInt(64, 1, /*isSigned*/ true), + APInt(64, 2, /*isSigned*/ true)) + .getSExtValue(), + 0); + EXPECT_EQ(APIntOps::clmulr(APInt(16, -2, /*isSigned*/ true), + APInt(16, -1, /*isSigned*/ true)) + .getSExtValue(), + -21845); +} + +TEST(APIntTest, clmulh) { + EXPECT_EQ(APIntOps::clmulh(APInt(4, 1), APInt(4, 2)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmulh(APInt(4, 5), APInt(4, 6)).getZExtValue(), 1U); + EXPECT_EQ(APIntOps::clmulh(APInt(4, -4, /*isSigned*/ true), + APInt(4, 2, /*isSigned*/ false)) + .getSExtValue(), + 1); + EXPECT_EQ(APIntOps::clmulh(APInt(4, -4, /*isSigned*/ true), + APInt(4, -5, /*isSigned*/ true)) + .getSExtValue(), + 7); + EXPECT_EQ(APIntOps::clmulh(APInt(8, 0), APInt(8, 255)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmulh(APInt(8, 15), APInt(8, 15)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmulh(APInt(8, 1), APInt(8, 2)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::clmulh(APInt(64, 0, /*isSigned*/ true), + APInt(64, 9223372036854775807, /*isSigned*/ true)) + .getSExtValue(), + 0); + EXPECT_EQ(APIntOps::clmulh(APInt(64, 1, /*isSigned*/ true), + APInt(64, 2, /*isSigned*/ true)) + .getSExtValue(), + 0); + EXPECT_EQ(APIntOps::clmulh(APInt(16, -2, /*isSigned*/ true), + APInt(16, -1, /*isSigned*/ true)) + .getSExtValue(), + 21845); +} } // end anonymous namespace From cb5812982d96e4c6a07ab77dfa969192d201bd20 Mon Sep 17 00:00:00 2001 From: vangthao95 Date: Tue, 18 Nov 2025 09:00:57 -0800 Subject: [PATCH 09/57] [AMDGPU][GlobalISel] Add RegBankLegalize support for G_IS_FPCLASS (#167575) --- .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 8 + .../CodeGen/AMDGPU/llvm.is.fpclass.f16.ll | 169 +++++++--- llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll | 312 +++++++++++++----- 3 files changed, 347 insertions(+), 142 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index b81a08de383d9..e36c57ad59bfd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -960,6 +960,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat) .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat); + addRulesForGOpcs({G_IS_FPCLASS}) + .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}}) + .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}}) + .Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}}) + .Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}}) + .Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}}) + .Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}}); + using namespace Intrinsic; addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}}); diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll index dd2cffd7bd161..dd19ba17bb292 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -1,16 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-TRUE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-FAKE16 %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s + +; FIXME: There are code size regressions in GlobalISel due to use of SGPRs and +; moving those SGPRs into VGPRs. define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) { ; GFX7SELDAG-LABEL: sgpr_isnan_f16: @@ -34,48 +37,98 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) { ; GFX7GLISEL-NEXT: s_mov_b32 s2, -1 ; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX7GLISEL-NEXT: s_and_b32 s3, s3, 0x7fff +; GFX7GLISEL-NEXT: s_and_b32 s3, 0xffff, s3 ; GFX7GLISEL-NEXT: s_cmpk_gt_u32 s3, 0x7c00 -; GFX7GLISEL-NEXT: s_cselect_b32 s3, 1, 0 -; GFX7GLISEL-NEXT: s_bfe_i32 s3, s3, 0x10000 +; GFX7GLISEL-NEXT: s_cselect_b32 s3, -1, 0 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, s3 ; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000 ; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7GLISEL-NEXT: s_endpgm ; -; GFX8CHECK-LABEL: sgpr_isnan_f16: -; GFX8CHECK: ; %bb.0: -; GFX8CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX8CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX8CHECK-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 -; GFX8CHECK-NEXT: v_mov_b32_e32 v0, s0 -; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] -; GFX8CHECK-NEXT: v_mov_b32_e32 v1, s1 -; GFX8CHECK-NEXT: flat_store_dword v[0:1], v2 -; GFX8CHECK-NEXT: s_endpgm -; -; GFX9CHECK-LABEL: sgpr_isnan_f16: -; GFX9CHECK: ; %bb.0: -; GFX9CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX9CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX9CHECK-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 -; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] -; GFX9CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9CHECK-NEXT: s_endpgm -; -; GFX10CHECK-LABEL: sgpr_isnan_f16: -; GFX10CHECK: ; %bb.0: -; GFX10CHECK-NEXT: s_clause 0x1 -; GFX10CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX10CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX10CHECK-NEXT: v_cmp_class_f16_e64 s2, s2, 3 -; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10CHECK-NEXT: s_endpgm +; GFX8SELDAG-LABEL: sgpr_isnan_f16: +; GFX8SELDAG: ; %bb.0: +; GFX8SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX8SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX8SELDAG-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 +; GFX8SELDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] +; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX8SELDAG-NEXT: flat_store_dword v[0:1], v2 +; GFX8SELDAG-NEXT: s_endpgm +; +; GFX8GLISEL-LABEL: sgpr_isnan_f16: +; GFX8GLISEL: ; %bb.0: +; GFX8GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX8GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8GLISEL-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 +; GFX8GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX8GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX8GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX8GLISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX8GLISEL-NEXT: s_endpgm +; +; GFX9SELDAG-LABEL: sgpr_isnan_f16: +; GFX9SELDAG: ; %bb.0: +; GFX9SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9SELDAG-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] +; GFX9SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9SELDAG-NEXT: s_endpgm +; +; GFX9GLISEL-LABEL: sgpr_isnan_f16: +; GFX9GLISEL: ; %bb.0: +; GFX9GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9GLISEL-NEXT: v_cmp_class_f16_e64 s[2:3], s2, 3 +; GFX9GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX9GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX9GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9GLISEL-NEXT: s_endpgm +; +; GFX10SELDAG-LABEL: sgpr_isnan_f16: +; GFX10SELDAG: ; %bb.0: +; GFX10SELDAG-NEXT: s_clause 0x1 +; GFX10SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX10SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX10SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s2, s2, 3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX10SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10SELDAG-NEXT: s_endpgm +; +; GFX10GLISEL-LABEL: sgpr_isnan_f16: +; GFX10GLISEL: ; %bb.0: +; GFX10GLISEL-NEXT: s_load_dword s0, s[4:5], 0x2c +; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10GLISEL-NEXT: v_cmp_class_f16_e64 s2, s0, 3 +; GFX10GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX10GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10GLISEL-NEXT: s_endpgm ; ; GFX11SELDAG-TRUE16-LABEL: sgpr_isnan_f16: ; GFX11SELDAG-TRUE16: ; %bb.0: @@ -103,26 +156,36 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) { ; ; GFX11GLISEL-TRUE16-LABEL: sgpr_isnan_f16: ; GFX11GLISEL-TRUE16: ; %bb.0: -; GFX11GLISEL-TRUE16-NEXT: s_clause 0x1 -; GFX11GLISEL-TRUE16-NEXT: s_load_b32 s2, s[4:5], 0x2c -; GFX11GLISEL-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11GLISEL-TRUE16-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX11GLISEL-TRUE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 0 ; GFX11GLISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, s2, v0.l -; GFX11GLISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo +; GFX11GLISEL-TRUE16-NEXT: v_cmp_class_f16_e32 vcc_lo, s0, v0.l +; GFX11GLISEL-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11GLISEL-TRUE16-NEXT: s_cmp_lg_u32 vcc_lo, 0 +; GFX11GLISEL-TRUE16-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11GLISEL-TRUE16-NEXT: s_and_b32 s2, s2, 1 +; GFX11GLISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-TRUE16-NEXT: s_cselect_b32 s2, -1, 0 +; GFX11GLISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX11GLISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11GLISEL-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11GLISEL-TRUE16-NEXT: s_endpgm ; ; GFX11GLISEL-FAKE16-LABEL: sgpr_isnan_f16: ; GFX11GLISEL-FAKE16: ; %bb.0: -; GFX11GLISEL-FAKE16-NEXT: s_clause 0x1 -; GFX11GLISEL-FAKE16-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11GLISEL-FAKE16-NEXT: s_load_b32 s0, s[4:5], 0x2c +; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11GLISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11GLISEL-FAKE16-NEXT: v_cmp_class_f16_e64 s2, s0, 3 ; GFX11GLISEL-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0 +; GFX11GLISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-FAKE16-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11GLISEL-FAKE16-NEXT: s_and_b32 s2, s2, 1 +; GFX11GLISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-FAKE16-NEXT: s_cselect_b32 s2, -1, 0 +; GFX11GLISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 ; GFX11GLISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GLISEL-FAKE16-NEXT: v_cmp_class_f16_e64 s2, s2, 3 -; GFX11GLISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX11GLISEL-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11GLISEL-FAKE16-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11GLISEL-FAKE16-NEXT: s_endpgm %result = call i1 @llvm.is.fpclass.f16(half %x, i32 3) %sext = sext i1 %result to i32 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll index 4f5432a202058..0a9fe10874c38 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll @@ -1,14 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8SELDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK %s -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK %s -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11CHECK %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11CHECK %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9SELDAG %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL %s + +; FIXME: There are code size regressions in GlobalISel due to use of SGPRs and +; moving those SGPRs into VGPRs. define amdgpu_kernel void @sgpr_isnan_f32(ptr addrspace(1) %out, float %x) { ; GFX7SELDAG-LABEL: sgpr_isnan_f32: @@ -30,58 +33,132 @@ define amdgpu_kernel void @sgpr_isnan_f32(ptr addrspace(1) %out, float %x) { ; GFX7GLISEL-NEXT: s_mov_b32 s2, -1 ; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX7GLISEL-NEXT: v_cmp_class_f32_e64 s[4:5], s3, 3 -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5] +; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5] +; GFX7GLISEL-NEXT: s_cselect_b32 s3, 1, 0 +; GFX7GLISEL-NEXT: s_and_b32 s3, s3, 1 +; GFX7GLISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX7GLISEL-NEXT: s_cselect_b32 s3, -1, 0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, s3 ; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000 ; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7GLISEL-NEXT: s_endpgm ; -; GFX8CHECK-LABEL: sgpr_isnan_f32: -; GFX8CHECK: ; %bb.0: -; GFX8CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX8CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX8CHECK-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 -; GFX8CHECK-NEXT: v_mov_b32_e32 v0, s0 -; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] -; GFX8CHECK-NEXT: v_mov_b32_e32 v1, s1 -; GFX8CHECK-NEXT: flat_store_dword v[0:1], v2 -; GFX8CHECK-NEXT: s_endpgm -; -; GFX9CHECK-LABEL: sgpr_isnan_f32: -; GFX9CHECK: ; %bb.0: -; GFX9CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX9CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX9CHECK-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 -; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] -; GFX9CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9CHECK-NEXT: s_endpgm -; -; GFX10CHECK-LABEL: sgpr_isnan_f32: -; GFX10CHECK: ; %bb.0: -; GFX10CHECK-NEXT: s_clause 0x1 -; GFX10CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX10CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX10CHECK-NEXT: v_cmp_class_f32_e64 s2, s2, 3 -; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10CHECK-NEXT: s_endpgm -; -; GFX11CHECK-LABEL: sgpr_isnan_f32: -; GFX11CHECK: ; %bb.0: -; GFX11CHECK-NEXT: s_clause 0x1 -; GFX11CHECK-NEXT: s_load_b32 s2, s[4:5], 0x2c -; GFX11CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX11CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX11CHECK-NEXT: v_cmp_class_f32_e64 s2, s2, 3 -; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX11CHECK-NEXT: s_endpgm +; GFX8SELDAG-LABEL: sgpr_isnan_f32: +; GFX8SELDAG: ; %bb.0: +; GFX8SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX8SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX8SELDAG-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 +; GFX8SELDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX8SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] +; GFX8SELDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX8SELDAG-NEXT: flat_store_dword v[0:1], v2 +; GFX8SELDAG-NEXT: s_endpgm +; +; GFX8GLISEL-LABEL: sgpr_isnan_f32: +; GFX8GLISEL: ; %bb.0: +; GFX8GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX8GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8GLISEL-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 +; GFX8GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX8GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX8GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX8GLISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX8GLISEL-NEXT: s_endpgm +; +; GFX9SELDAG-LABEL: sgpr_isnan_f32: +; GFX9SELDAG: ; %bb.0: +; GFX9SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9SELDAG-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] +; GFX9SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9SELDAG-NEXT: s_endpgm +; +; GFX9GLISEL-LABEL: sgpr_isnan_f32: +; GFX9GLISEL: ; %bb.0: +; GFX9GLISEL-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX9GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9GLISEL-NEXT: v_cmp_class_f32_e64 s[2:3], s2, 3 +; GFX9GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX9GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX9GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9GLISEL-NEXT: s_endpgm +; +; GFX10SELDAG-LABEL: sgpr_isnan_f32: +; GFX10SELDAG: ; %bb.0: +; GFX10SELDAG-NEXT: s_clause 0x1 +; GFX10SELDAG-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX10SELDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX10SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10SELDAG-NEXT: v_cmp_class_f32_e64 s2, s2, 3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX10SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10SELDAG-NEXT: s_endpgm +; +; GFX10GLISEL-LABEL: sgpr_isnan_f32: +; GFX10GLISEL: ; %bb.0: +; GFX10GLISEL-NEXT: s_load_dword s0, s[4:5], 0x2c +; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10GLISEL-NEXT: v_cmp_class_f32_e64 s2, s0, 3 +; GFX10GLISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX10GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10GLISEL-NEXT: s_endpgm +; +; GFX11SELDAG-LABEL: sgpr_isnan_f32: +; GFX11SELDAG: ; %bb.0: +; GFX11SELDAG-NEXT: s_clause 0x1 +; GFX11SELDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX11SELDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11SELDAG-NEXT: v_cmp_class_f32_e64 s2, s2, 3 +; GFX11SELDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX11SELDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11SELDAG-NEXT: s_endpgm +; +; GFX11GLISEL-LABEL: sgpr_isnan_f32: +; GFX11GLISEL: ; %bb.0: +; GFX11GLISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c +; GFX11GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11GLISEL-NEXT: v_cmp_class_f32_e64 s2, s0, 3 +; GFX11GLISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11GLISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX11GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX11GLISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX11GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11GLISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11GLISEL-NEXT: s_endpgm %result = call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; nan %sext = sext i1 %result to i32 store i32 %sext, ptr addrspace(1) %out, align 4 @@ -106,9 +183,14 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) { ; GFX7GLISEL: ; %bb.0: ; GFX7GLISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 ; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX7GLISEL-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3] +; GFX7GLISEL-NEXT: v_cmp_class_f64_e64 s[4:5], s[2:3], 3 ; GFX7GLISEL-NEXT: s_mov_b32 s2, -1 +; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5] +; GFX7GLISEL-NEXT: s_cselect_b32 s3, 1, 0 +; GFX7GLISEL-NEXT: s_and_b32 s3, s3, 1 +; GFX7GLISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX7GLISEL-NEXT: s_cselect_b32 s3, -1, 0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, s3 ; GFX7GLISEL-NEXT: s_mov_b32 s3, 0xf000 ; GFX7GLISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7GLISEL-NEXT: s_endpgm @@ -131,40 +213,92 @@ define amdgpu_kernel void @sgpr_isnan_f64(ptr addrspace(1) %out, double %x) { ; GFX8GLISEL-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 ; GFX8GLISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX8GLISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] +; GFX8GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX8GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX8GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX8GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX8GLISEL-NEXT: v_mov_b32_e32 v2, s2 ; GFX8GLISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8GLISEL-NEXT: s_endpgm ; -; GFX9CHECK-LABEL: sgpr_isnan_f64: -; GFX9CHECK: ; %bb.0: -; GFX9CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX9CHECK-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 -; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] -; GFX9CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9CHECK-NEXT: s_endpgm -; -; GFX10CHECK-LABEL: sgpr_isnan_f64: -; GFX10CHECK: ; %bb.0: -; GFX10CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX10CHECK-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 -; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10CHECK-NEXT: s_endpgm -; -; GFX11CHECK-LABEL: sgpr_isnan_f64: -; GFX11CHECK: ; %bb.0: -; GFX11CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 0 -; GFX11CHECK-NEXT: s_waitcnt lgkmcnt(0) -; GFX11CHECK-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 -; GFX11CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 -; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX11CHECK-NEXT: s_endpgm +; GFX9SELDAG-LABEL: sgpr_isnan_f64: +; GFX9SELDAG: ; %bb.0: +; GFX9SELDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9SELDAG-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 +; GFX9SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] +; GFX9SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9SELDAG-NEXT: s_endpgm +; +; GFX9GLISEL-LABEL: sgpr_isnan_f64: +; GFX9GLISEL: ; %bb.0: +; GFX9GLISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9GLISEL-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 3 +; GFX9GLISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX9GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX9GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX9GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9GLISEL-NEXT: s_endpgm +; +; GFX10SELDAG-LABEL: sgpr_isnan_f64: +; GFX10SELDAG: ; %bb.0: +; GFX10SELDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX10SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10SELDAG-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX10SELDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10SELDAG-NEXT: s_endpgm +; +; GFX10GLISEL-LABEL: sgpr_isnan_f64: +; GFX10GLISEL: ; %bb.0: +; GFX10GLISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX10GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10GLISEL-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX10GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX10GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX10GLISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10GLISEL-NEXT: s_endpgm +; +; GFX11SELDAG-LABEL: sgpr_isnan_f64: +; GFX11SELDAG: ; %bb.0: +; GFX11SELDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11SELDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX11SELDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11SELDAG-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 +; GFX11SELDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11SELDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 +; GFX11SELDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX11SELDAG-NEXT: s_endpgm +; +; GFX11GLISEL-LABEL: sgpr_isnan_f64: +; GFX11GLISEL: ; %bb.0: +; GFX11GLISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11GLISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX11GLISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11GLISEL-NEXT: v_cmp_class_f64_e64 s2, s[2:3], 3 +; GFX11GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11GLISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11GLISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX11GLISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11GLISEL-NEXT: s_cselect_b32 s2, -1, 0 +; GFX11GLISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11GLISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX11GLISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX11GLISEL-NEXT: s_endpgm %result = call i1 @llvm.is.fpclass.f64(double %x, i32 3) ; nan %sext = sext i1 %result to i32 store i32 %sext, ptr addrspace(1) %out, align 4 From 6d3971d97f362c02a0dd3f148b6e82f61810d025 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 18 Nov 2025 09:14:01 -0800 Subject: [PATCH 10/57] [AsmParser] Use a range-based for loop (NFC) (#168488) Identified with modernize-loop-convert. --- llvm/lib/AsmParser/LLParser.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 921462e28a467..799234a0b491d 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -315,11 +315,10 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { return error(NT.second.second, "use of undefined type '%" + Twine(NT.first) + "'"); - for (StringMap >::iterator I = - NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I) - if (I->second.second.isValid()) - return error(I->second.second, - "use of undefined type named '" + I->getKey() + "'"); + for (const auto &[Name, TypeInfo] : NamedTypes) + if (TypeInfo.second.isValid()) + return error(TypeInfo.second, + "use of undefined type named '" + Name + "'"); if (!ForwardRefComdats.empty()) return error(ForwardRefComdats.begin()->second, From 58cffea94a31e52d6492ce7103e04c6b073dee16 Mon Sep 17 00:00:00 2001 From: AZero13 Date: Tue, 18 Nov 2025 12:15:26 -0500 Subject: [PATCH 11/57] [InstCombine] Canonicalize signed saturated additions (#153053) https://alive2.llvm.org/ce/z/YGT5SN https://alive2.llvm.org/ce/z/PVDxCw https://alive2.llvm.org/ce/z/8buR2N This is tricky because with positive numbers, we only go up, so we can in fact always hit the signed_max boundary. This is important because the intrinsic we use has the behavior of going the OTHER way, aka clamp to INT_MIN if it goes in that direction. And the range checking we do only works for positive numbers. Because of this issue, we can only do this for constants as well. --- .../InstCombine/InstCombineSelect.cpp | 95 +++++- .../InstCombine/canonicalize-const-to-bop.ll | 3 +- .../InstCombine/saturating-add-sub.ll | 320 ++++++++++++++++++ 3 files changed, 412 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 9572f9d702e1b..e7dc366b13798 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1027,10 +1027,9 @@ static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI, return Result; } -static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, - InstCombiner::BuilderTy &Builder) { - if (!Cmp->hasOneUse()) - return nullptr; +static Value * +canonicalizeSaturatedAddUnsigned(ICmpInst *Cmp, Value *TVal, Value *FVal, + InstCombiner::BuilderTy &Builder) { // Match unsigned saturated add with constant. Value *Cmp0 = Cmp->getOperand(0); @@ -1130,6 +1129,94 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, return nullptr; } +static Value *canonicalizeSaturatedAddSigned(ICmpInst *Cmp, Value *TVal, + Value *FVal, + InstCombiner::BuilderTy &Builder) { + // Match saturated add with constant. + Value *Cmp0 = Cmp->getOperand(0); + Value *Cmp1 = Cmp->getOperand(1); + ICmpInst::Predicate Pred = Cmp->getPredicate(); + Value *X; + const APInt *C; + + // Canonicalize INT_MAX to true value of the select. + if (match(FVal, m_MaxSignedValue())) { + std::swap(TVal, FVal); + Pred = CmpInst::getInversePredicate(Pred); + } + + if (!match(TVal, m_MaxSignedValue())) + return nullptr; + + // sge maximum signed value is canonicalized to eq maximum signed value and + // requires special handling (a == INT_MAX) ? INT_MAX : a + 1 -> sadd.sat(a, + // 1) + if (Pred == ICmpInst::ICMP_EQ) { + if (match(FVal, m_Add(m_Specific(Cmp0), m_One())) && Cmp1 == TVal) { + return Builder.CreateBinaryIntrinsic( + Intrinsic::sadd_sat, Cmp0, ConstantInt::get(Cmp0->getType(), 1)); + } + return nullptr; + } + + // (X > Y) ? INT_MAX : (X + C) --> sadd.sat(X, C) + // (X >= Y) ? INT_MAX : (X + C) --> sadd.sat(X, C) + // where Y is INT_MAX - C or INT_MAX - C - 1, and C > 0 + if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) && + match(FVal, m_Add(m_Specific(Cmp0), m_StrictlyPositive(C)))) { + APInt IntMax = + APInt::getSignedMaxValue(Cmp1->getType()->getScalarSizeInBits()); + + // For SGE, try to flip to SGT to normalize the comparison constant. + if (Pred == ICmpInst::ICMP_SGE) { + if (auto Flipped = getFlippedStrictnessPredicateAndConstant( + Pred, cast(Cmp1))) { + Pred = Flipped->first; + Cmp1 = Flipped->second; + } + } + + // Check the pattern: X > INT_MAX - C or X > INT_MAX - C - 1 + if (Pred == ICmpInst::ICMP_SGT && + (match(Cmp1, m_SpecificIntAllowPoison(IntMax - *C)) || + match(Cmp1, m_SpecificIntAllowPoison(IntMax - *C - 1)))) + return Builder.CreateBinaryIntrinsic( + Intrinsic::sadd_sat, Cmp0, ConstantInt::get(Cmp0->getType(), *C)); + } + + // Canonicalize predicate to less-than or less-or-equal-than. + if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) { + std::swap(Cmp0, Cmp1); + Pred = CmpInst::getSwappedPredicate(Pred); + } + + if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SLE) + return nullptr; + + if (match(Cmp0, m_NSWSub(m_MaxSignedValue(), m_Value(X))) && + match(FVal, m_c_Add(m_Specific(X), m_Specific(Cmp1)))) { + // (INT_MAX - X s< Y) ? INT_MAX : (X + Y) --> sadd.sat(X, Y) + // (INT_MAX - X s< Y) ? INT_MAX : (Y + X) --> sadd.sat(X, Y) + return Builder.CreateBinaryIntrinsic(Intrinsic::sadd_sat, X, Cmp1); + } + + return nullptr; +} + +static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, + InstCombiner::BuilderTy &Builder) { + if (!Cmp->hasOneUse()) + return nullptr; + + if (Value *V = canonicalizeSaturatedAddUnsigned(Cmp, TVal, FVal, Builder)) + return V; + + if (Value *V = canonicalizeSaturatedAddSigned(Cmp, TVal, FVal, Builder)) + return V; + + return nullptr; +} + /// Try to match patterns with select and subtract as absolute difference. static Value *foldAbsDiff(ICmpInst *Cmp, Value *TVal, Value *FVal, InstCombiner::BuilderTy &Builder) { diff --git a/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll b/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll index b3093a92624ae..f0e40f4ede161 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll @@ -123,8 +123,7 @@ define i8 @udiv_slt_exact(i8 %x) { define i8 @canonicalize_icmp_operands(i8 %x) { ; CHECK-LABEL: define i8 @canonicalize_icmp_operands( ; CHECK-SAME: i8 [[X:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 119) -; CHECK-NEXT: [[S:%.*]] = add nsw i8 [[TMP1]], 8 +; CHECK-NEXT: [[S:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X]], i8 8) ; CHECK-NEXT: ret i8 [[S]] ; %add = add nsw i8 %x, 8 diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll index cfd679c0cc592..c0ad5818e448a 100644 --- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll @@ -2351,3 +2351,323 @@ define i8 @fold_add_umax_to_usub_multiuse(i8 %a) { } declare void @usei8(i8) + +define i8 @sadd_sat_uge_int_max(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_uge_int_max( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp sge i8 %x, 127 + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_ugt_int_max(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_ugt_int_max( +; CHECK-NEXT: [[R:%.*]] = add i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp sgt i8 %x, 127 + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_eq_int_max(i8 %x) { +; CHECK-LABEL: @sadd_sat_eq_int_max( +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 1) +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp eq i8 %x, 127 + %add = add i8 %x, 1 + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_constant(i8 %x) { +; CHECK-LABEL: @sadd_sat_constant( +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 10) +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp sge i8 %x, 118 + %add = add i8 %x, 10 + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_negative_no_fold(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_negative_no_fold( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp sge i8 %x, 127 + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_wrong_predicate(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_wrong_predicate( +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[Y:%.*]], 127 +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP_NOT]], i8 [[ADD]], i8 127 +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp slt i8 %x, 127 + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_wrong_constant(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_wrong_constant( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], 125 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp sge i8 %x, 126 + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define <2 x i8> @sadd_sat_vector(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @sadd_sat_vector( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 127) +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP]], <2 x i8> splat (i8 127), <2 x i8> [[ADD]] +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %cmp = icmp sge <2 x i8> %x, + %add = add <2 x i8> %x, %y + %r = select <2 x i1> %cmp, <2 x i8> , <2 x i8> %add + ret <2 x i8> %r +} + +define <2 x i8> @sadd_sat_vector_constant(<2 x i8> %x) { +; CHECK-LABEL: @sadd_sat_vector_constant( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP1]], +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %cmp = icmp sge <2 x i8> %x, + %add = add <2 x i8> %x, + %r = select <2 x i1> %cmp, <2 x i8> , <2 x i8> %add + ret <2 x i8> %r +} + +define i8 @sadd_sat_int_max_minus_x(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x( +; CHECK-NEXT: [[SUB:%.*]] = sub i8 127, [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SUB]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub i8 127, %x + %cmp = icmp slt i8 %sub, %y + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_int_max_minus_x_commuted(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_commuted( +; CHECK-NEXT: [[SUB:%.*]] = sub i8 127, [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[Y:%.*]], [[SUB]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub i8 127, %x + %cmp = icmp sgt i8 %y, %sub + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_int_max_minus_x_nonstrict(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_nonstrict( +; CHECK-NEXT: [[SUB:%.*]] = sub i8 127, [[X:%.*]] +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp sgt i8 [[SUB]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP_NOT]], i8 [[ADD]], i8 127 +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub i8 127, %x + %cmp = icmp sle i8 %sub, %y + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_int_max_minus_x_commuted_nonstrict(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_commuted_nonstrict( +; CHECK-NEXT: [[SUB:%.*]] = sub i8 127, [[X:%.*]] +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp slt i8 [[Y:%.*]], [[SUB]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP_NOT]], i8 [[ADD]], i8 127 +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub i8 127, %x + %cmp = icmp sge i8 %y, %sub + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_int_max_minus_x_wrong_constant(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_wrong_constant( +; CHECK-NEXT: [[SUB:%.*]] = sub i8 126, [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SUB]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub i8 126, %x + %cmp = icmp slt i8 %sub, %y + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_int_max_minus_x_wrong_predicate(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_wrong_predicate( +; CHECK-NEXT: [[SUB:%.*]] = sub i8 127, [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[SUB]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub i8 127, %x + %cmp = icmp sgt i8 %sub, %y + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define <2 x i8> @sadd_sat_int_max_minus_x_vector(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_vector( +; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i8> splat (i8 127), [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[SUB]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[CMP]], <2 x i8> splat (i8 127), <2 x i8> [[ADD]] +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %sub = sub <2 x i8> , %x + %cmp = icmp slt <2 x i8> %sub, %y + %add = add <2 x i8> %x, %y + %r = select <2 x i1> %cmp, <2 x i8> , <2 x i8> %add + ret <2 x i8> %r +} + +define i8 @sadd_sat_commuted_select(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_commuted_select( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[Y:%.*]], 127 +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 127 +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp sge i8 %x, 127 + %add = add i8 %x, %y + %r = select i1 %cmp, i8 %add, i8 127 + ret i8 %r +} + +define i8 @sadd_sat_commuted_add(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_commuted_add( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[Y:%.*]], [[X]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp sge i8 %x, 127 + %add = add i8 %y, %x + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_commuted_both(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_commuted_both( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 127 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[Y:%.*]], 127 +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 [[ADD]], i8 127 +; CHECK-NEXT: ret i8 [[R]] +; + %cmp = icmp sge i8 %x, 127 + %add = add i8 %y, %x + %r = select i1 %cmp, i8 %add, i8 127 + ret i8 %r +} + +define i8 @sadd_sat_int_max_minus_x_nsw_slt(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_nsw_slt( +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub nsw i8 127, %x + %cmp = icmp slt i8 %sub, %y + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_int_max_minus_x_nsw_sge_commuted(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_nsw_sge_commuted( +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub nsw i8 127, %x + %cmp = icmp sge i8 %y, %sub + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @sadd_sat_int_max_minus_x_no_nsw_neg(i8 %x, i8 %y) { +; CHECK-LABEL: @sadd_sat_int_max_minus_x_no_nsw_neg( +; CHECK-NEXT: [[SUB:%.*]] = sub i8 127, [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SUB]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i8 127, i8 [[ADD]] +; CHECK-NEXT: ret i8 [[R]] +; + %sub = sub i8 127, %x + %cmp = icmp slt i8 %sub, %y + %add = add i8 %x, %y + %r = select i1 %cmp, i8 127, i8 %add + ret i8 %r +} + +define i8 @neg_no_nsw(i8 %x, i8 %y) { +; CHECK-LABEL: @neg_no_nsw( +; CHECK-NEXT: [[ADD:%.*]] = sub i8 127, [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], [[ADD]] +; CHECK-NEXT: [[D:%.*]] = add i8 [[X]], [[Y]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[CMP]], i8 127, i8 [[D]] +; CHECK-NEXT: ret i8 [[S]] +; + %add = sub i8 127, %y + %cmp = icmp sgt i8 %x, %add + %d = add i8 %x, %y + %s = select i1 %cmp, i8 127, i8 %d + ret i8 %s +} + +define i8 @neg_neg_constant(i8 %x, i8 %y) { +; CHECK-LABEL: @neg_neg_constant( +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 -1) +; CHECK-NEXT: [[S:%.*]] = and i8 [[TMP1]], 127 +; CHECK-NEXT: ret i8 [[S]] +; + %cmp = icmp sgt i8 %x, -2 + %d = add i8 %x, -128 + %s = select i1 %cmp, i8 127, i8 %d + ret i8 %s +} From b53371210fcf1f23d1f87e5727fdf1e9aefa674f Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 18 Nov 2025 09:31:55 -0800 Subject: [PATCH 12/57] [CI] Only run normal check targets if requested (#168412) When building just the runtimes (eg a patch only touches compiler-rt), we do not actually run any normal check targets. This ends up causing an empty ninja invocation, which builds more targets than necessary. Gate the ninja build for normal check-* targets under an if statement to fix this. --- .ci/monolithic-linux.sh | 8 +++++--- .ci/monolithic-windows.sh | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.ci/monolithic-linux.sh b/.ci/monolithic-linux.sh index 4a8418d7baa8c..ca619aa7e98a1 100755 --- a/.ci/monolithic-linux.sh +++ b/.ci/monolithic-linux.sh @@ -64,9 +64,11 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \ start-group "ninja" -# Targets are not escaped as they are passed as separate arguments. -ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log -cp ${BUILD_DIR}/.ninja_log ninja.ninja_log +if [[ "${targets}" != "" ]]; then + # Targets are not escaped as they are passed as separate arguments. + ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log + cp ${BUILD_DIR}/.ninja_log ninja.ninja_log +fi if [[ "${runtime_targets}" != "" ]]; then start-group "ninja Runtimes" diff --git a/.ci/monolithic-windows.sh b/.ci/monolithic-windows.sh index 7b926b87f3623..99e7758ce8d79 100755 --- a/.ci/monolithic-windows.sh +++ b/.ci/monolithic-windows.sh @@ -51,9 +51,11 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \ start-group "ninja" -# Targets are not escaped as they are passed as separate arguments. -ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log -cp ${BUILD_DIR}/.ninja_log ninja.ninja_log +if [[ "${targets}" != "" ]]; then + # Targets are not escaped as they are passed as separate arguments. + ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log + cp ${BUILD_DIR}/.ninja_log ninja.ninja_log +fi if [[ "${runtimes_targets}" != "" ]]; then start-group "ninja runtimes" From 94e9bfb80365de0c9c71303418b33ceb767f7cf9 Mon Sep 17 00:00:00 2001 From: Marius Kamp Date: Tue, 18 Nov 2025 18:36:18 +0100 Subject: [PATCH 13/57] [AArch64] Reorder Comparison Trees to Facilitate CSE (#168064) The AArch64 backend converts trees formed by conjunctions/disjunctions of comparisons into sequences of `CCMP` instructions. The implementation before this change checks whether a sub-tree must be processed first. If not, it processes the operations in the order they occur in the DAG. This may not be optimal if there is a corresponding `SUB` node for one of the comparisons. In this case, we should process this comparison first because we can then use the same instruction for the `SUB` node and the comparison. To achieve this, this commit comprises the following changes: - Extend `canEmitConjunction` with a new output parameter `PreferFirst`, which reports to the caller whether the sub-tree should preferably be processed first. - Set `PreferFirst` to `true` if we can find a corresponding `SUB` node in the DAG. - If we can process a sub-tree with `PreferFirst = true` first (i.e., we do not violate any `MustBeFirst` constraint by doing so), we swap the sub-trees. - The already existing code for performing the common subexpression elimination takes care to use only a single instruction for the comparison and the `SUB` node if possible. Closes #149685. --- .../Target/AArch64/AArch64ISelLowering.cpp | 41 ++++-- llvm/test/CodeGen/AArch64/ccmp-cse.ll | 139 ++++++++++++++++++ 2 files changed, 170 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/ccmp-cse.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d21e19b2ecd46..8f41f230b5521 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3886,22 +3886,30 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, /// \param MustBeFirst Set to true if this subtree needs to be negated and we /// cannot do the negation naturally. We are required to /// emit the subtree first in this case. +/// \param PreferFirst Set to true if processing this subtree first may +/// result in more efficient code. /// \param WillNegate Is true if are called when the result of this /// subexpression must be negated. This happens when the /// outer expression is an OR. We can use this fact to know /// that we have a double negation (or (or ...) ...) that /// can be implemented for free. -static bool canEmitConjunction(const SDValue Val, bool &CanNegate, - bool &MustBeFirst, bool WillNegate, +static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val, + bool &CanNegate, bool &MustBeFirst, + bool &PreferFirst, bool WillNegate, unsigned Depth = 0) { if (!Val.hasOneUse()) return false; unsigned Opcode = Val->getOpcode(); if (Opcode == ISD::SETCC) { - if (Val->getOperand(0).getValueType() == MVT::f128) + EVT VT = Val->getOperand(0).getValueType(); + if (VT == MVT::f128) return false; CanNegate = true; MustBeFirst = false; + // Designate this operation as a preferred first operation if the result + // of a SUB operation can be reused. + PreferFirst = DAG.doesNodeExist(ISD::SUB, DAG.getVTList(VT), + {Val->getOperand(0), Val->getOperand(1)}); return true; } // Protect against exponential runtime and stack overflow. @@ -3913,11 +3921,15 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate, SDValue O1 = Val->getOperand(1); bool CanNegateL; bool MustBeFirstL; - if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1)) + bool PreferFirstL; + if (!canEmitConjunction(DAG, O0, CanNegateL, MustBeFirstL, PreferFirstL, + IsOR, Depth + 1)) return false; bool CanNegateR; bool MustBeFirstR; - if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1)) + bool PreferFirstR; + if (!canEmitConjunction(DAG, O1, CanNegateR, MustBeFirstR, PreferFirstR, + IsOR, Depth + 1)) return false; if (MustBeFirstL && MustBeFirstR) @@ -3940,6 +3952,7 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate, CanNegate = false; MustBeFirst = MustBeFirstL || MustBeFirstR; } + PreferFirst = PreferFirstL || PreferFirstR; return true; } return false; @@ -4001,19 +4014,25 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, SDValue LHS = Val->getOperand(0); bool CanNegateL; bool MustBeFirstL; - bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR); + bool PreferFirstL; + bool ValidL = canEmitConjunction(DAG, LHS, CanNegateL, MustBeFirstL, + PreferFirstL, IsOR); assert(ValidL && "Valid conjunction/disjunction tree"); (void)ValidL; SDValue RHS = Val->getOperand(1); bool CanNegateR; bool MustBeFirstR; - bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR); + bool PreferFirstR; + bool ValidR = canEmitConjunction(DAG, RHS, CanNegateR, MustBeFirstR, + PreferFirstR, IsOR); assert(ValidR && "Valid conjunction/disjunction tree"); (void)ValidR; - // Swap sub-tree that must come first to the right side. - if (MustBeFirstL) { + bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR; + + // Swap sub-tree that must or should come first to the right side. + if (MustBeFirstL || ShouldFirstL) { assert(!MustBeFirstR && "Valid conjunction/disjunction tree"); std::swap(LHS, RHS); std::swap(CanNegateL, CanNegateR); @@ -4069,7 +4088,9 @@ static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC) { bool DummyCanNegate; bool DummyMustBeFirst; - if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false)) + bool DummyPreferFirst; + if (!canEmitConjunction(DAG, Val, DummyCanNegate, DummyMustBeFirst, + DummyPreferFirst, false)) return SDValue(); return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL); diff --git a/llvm/test/CodeGen/AArch64/ccmp-cse.ll b/llvm/test/CodeGen/AArch64/ccmp-cse.ll new file mode 100644 index 0000000000000..657498172a04c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ccmp-cse.ll @@ -0,0 +1,139 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_single_or: +; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x2, x1 +; CHECK-NEXT: ccmp x2, x0, #2, hs +; CHECK-NEXT: csel x0, xzr, x8, hi +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch = icmp ugt i64 %y, %unrelated + %or.cond = or i1 %cmp.match, %cmp.nomatch + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %or.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_two_ors: +; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x2, x1 +; CHECK-NEXT: ccmp x0, x1, #0, hs +; CHECK-NEXT: ccmp x2, x0, #2, hs +; CHECK-NEXT: csel x0, xzr, x8, hi +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch1 = icmp ult i64 %unrelated, %x + %cmp.nomatch2 = icmp ugt i64 %y, %unrelated + %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2 + %or.cond = or i1 %cmp.match, %or.nomatch + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %or.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_two_ors_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x2, x1 +; CHECK-NEXT: ccmp x0, x1, #0, hs +; CHECK-NEXT: ccmp x2, x0, #2, hs +; CHECK-NEXT: csel x0, xzr, x8, hi +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch1 = icmp ult i64 %unrelated, %x + %cmp.nomatch2 = icmp ugt i64 %y, %unrelated + %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2 + %or.cond = or i1 %or.nomatch, %cmp.match + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %or.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_single_and: +; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x2, x1 +; CHECK-NEXT: ccmp x2, x0, #0, lo +; CHECK-NEXT: csel x0, xzr, x8, hi +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch = icmp ugt i64 %y, %unrelated + %and.cond = and i1 %cmp.match, %cmp.nomatch + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %and.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +define i64 @test_single_or_sub_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_single_or_sub_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x1, x2 +; CHECK-NEXT: ccmp x2, x0, #2, ls +; CHECK-NEXT: csel x0, xzr, x8, hi +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch = icmp ugt i64 %y, %unrelated + %or.cond = or i1 %cmp.match, %cmp.nomatch + %sub.reuse = sub nuw i64 %x, %y + %res = select i1 %or.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +; Negative test: We must negate the or operation, hence this must come first. +define i64 @test_mustbefirst_overrides_preferfirst_negative(i64 %unrelated, i64 %x, i64 %y) nounwind { +; CHECK-LABEL: test_mustbefirst_overrides_preferfirst_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x2, x0 +; CHECK-NEXT: sub x8, x2, x1 +; CHECK-NEXT: ccmp x0, x1, #0, ls +; CHECK-NEXT: ccmp x2, x1, #2, lo +; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: ret + %cmp.match = icmp ult i64 %y, %x + %cmp.nomatch1 = icmp ult i64 %unrelated, %x + %cmp.nomatch2 = icmp ugt i64 %y, %unrelated + %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2 + %and.cond = and i1 %or.nomatch, %cmp.match + %sub.reuse = sub nuw i64 %y, %x + %res = select i1 %and.cond, i64 0, i64 %sub.reuse + ret i64 %res +} + +; Negative test: There is no analogue of SUBS for floating point. +define float @test_negative_float(float %unrelated, float %x, float %y) nounwind { +; CHECK-LABEL: test_negative_float: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s2, s0 +; CHECK-NEXT: fsub s0, s2, s1 +; CHECK-NEXT: movi d3, #0000000000000000 +; CHECK-NEXT: fccmp s2, s1, #8, le +; CHECK-NEXT: fcsel s0, s3, s0, mi +; CHECK-NEXT: ret + %cmp.nomatch1 = fcmp olt float %y, %x + %cmp.nomatch2 = fcmp ogt float %y, %unrelated + %or.cond = or i1 %cmp.nomatch1, %cmp.nomatch2 + %sub.noreuse = fsub float %y, %x + %res = select i1 %or.cond, float 0.0, float %sub.noreuse + ret float %res +} + +; Negative test: If both operands match a sub, do not reorder them. +define i64 @test_prefer_right_negative(i64 %x, i64 %y, i64 %z) nounwind { +; CHECK-LABEL: test_prefer_right_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x2, x0 +; CHECK-NEXT: ccmp x2, x1, #0, ls +; CHECK-NEXT: csel x8, x0, x1, lo +; CHECK-NEXT: sub x0, x2, x8 +; CHECK-NEXT: ret + %cmp.match1 = icmp ult i64 %z, %y + %cmp.match2 = icmp ugt i64 %z, %x + %or.cond = or i1 %cmp.match1, %cmp.match2 + %sub.reuse1 = sub nuw i64 %z, %y + %sub.reuse2 = sub nuw i64 %z, %x + %res = select i1 %or.cond, i64 %sub.reuse2, i64 %sub.reuse1 + ret i64 %res +} From 3cf1f0c127bcc11185a5f8f6a295ce678827b923 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Tue, 18 Nov 2025 20:40:40 +0300 Subject: [PATCH 14/57] [ARM] Pattern match Low Overhead Loops pseudos (NFC) (#168209) Pull Request: https://github.com/llvm/llvm-project/pull/168209 --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 36 ------------------------- llvm/lib/Target/ARM/ARMInstrThumb2.td | 31 ++++++++++++++++----- 2 files changed, 24 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 847b7af5a9b11..26b5e5a22386e 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3965,31 +3965,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; // Other cases are autogenerated. break; - case ARMISD::WLSSETUP: { - SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32, - N->getOperand(0)); - ReplaceUses(N, New); - CurDAG->RemoveDeadNode(N); - return; - } - case ARMISD::WLS: { - SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, - N->getOperand(1), N->getOperand(2), - N->getOperand(0)); - ReplaceUses(N, New); - CurDAG->RemoveDeadNode(N); - return; - } - case ARMISD::LE: { - SDValue Ops[] = { N->getOperand(1), - N->getOperand(2), - N->getOperand(0) }; - unsigned Opc = ARM::t2LoopEnd; - SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); - ReplaceUses(N, New); - CurDAG->RemoveDeadNode(N); - return; - } case ARMISD::LDRD: { if (Subtarget->isThumb2()) break; // TableGen handles isel in this case. @@ -4043,17 +4018,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->RemoveDeadNode(N); return; } - case ARMISD::LOOP_DEC: { - SDValue Ops[] = { N->getOperand(1), - N->getOperand(2), - N->getOperand(0) }; - SDNode *Dec = - CurDAG->getMachineNode(ARM::t2LoopDec, dl, - CurDAG->getVTList(MVT::i32, MVT::Other), Ops); - ReplaceUses(N, Dec); - CurDAG->RemoveDeadNode(N); - return; - } case ARMISD::BRCOND: { // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index c229c8e4491df..911d7ebfba141 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -5581,6 +5581,20 @@ class t2LOL let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB]; } +def arm_wlssetup + : SDNode<"ARMISD::WLSSETUP", + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<1, 0>]>>; + +def arm_wls : SDNode<"ARMISD::WLS", + SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>, + [SDNPHasChain]>; + +def arm_loop_dec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>; + +def arm_le : SDNode<"ARMISD::LE", + SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>, + [SDNPHasChain]>; + let isNotDuplicable = 1 in { def t2WLS : t2LOL<(outs GPRlr:$LR), (ins rGPR:$Rn, wlslabel_u11:$label), @@ -5651,15 +5665,17 @@ def t2DoLoopStartTP : // valid after reg alloc, as it should be lowered during MVETPAndVPTOptimisations // into a t2WhileLoopStartLR (or expanded). def t2WhileLoopSetup : - t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br, []>; + t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br, + [(set i32:$lr, (arm_wlssetup i32:$tc))]>; // A pseudo to represent the decrement in a low overhead loop. A t2LoopDec and // t2LoopEnd together represent a LE instruction. Ideally these are converted // to a t2LoopEndDec which is lowered as a single instruction. let hasSideEffects = 0 in def t2LoopDec : - t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size), - 4, IIC_Br, []>, Sched<[WriteBr]>; + t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size), 4, IIC_Br, + [(set i32:$Rm, (arm_loop_dec i32:$Rn, timm:$size))]>, + Sched<[WriteBr]>; let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in { // The branch in a t2WhileLoopSetup/t2WhileLoopStart pair, eventually turned @@ -5667,8 +5683,8 @@ let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in { def t2WhileLoopStart : t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target), - 4, IIC_Br, []>, - Sched<[WriteBr]>; + 4, IIC_Br, [(arm_wls i32:$tc, bb:$target)]>, + Sched<[WriteBr]>; // WhileLoopStartLR that sets up LR and branches on zero, equivalent to WLS. It // is lowered in the ARMLowOverheadLoops pass providing the branches are within @@ -5690,8 +5706,9 @@ def t2WhileLoopStartTP : // t2LoopEnd - the branch half of a t2LoopDec/t2LoopEnd pair. def t2LoopEnd : - t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target), - 8, IIC_Br, []>, Sched<[WriteBr]>; + t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target), + 8, IIC_Br, [(arm_le i32:$tc, bb:$target)]>, + Sched<[WriteBr]>; // The combination of a t2LoopDec and t2LoopEnd, performing both the LR // decrement and branch as a single instruction. Is lowered to a LE or From 0b82415c59c57c40beb072a716675293e7007a65 Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Tue, 18 Nov 2025 18:41:04 +0100 Subject: [PATCH 15/57] [AMDGPU] Consider FLAT instructions for VMEM hazard detection (#137170) In general, "Flat instructions look at the per-workitem address and determine for each work item if the target memory address is in global, private or scratch memory." (RDNA2 ISA) That means that FLAT instructions need to be considered for VMEM hazards even without "specific segment". Also, LDS DMA should be considered for LDS hazard detection. See also #137148 --- llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 10 ++++------ llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir | 5 +++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 7a2f84a2f73eb..29d22f27a2d8e 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1502,9 +1502,8 @@ static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF, bool HasVmem = false; for (auto &MBB : MF) { for (auto &MI : MBB) { - HasLds |= SIInstrInfo::isDS(MI); - HasVmem |= (SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) || - SIInstrInfo::isSegmentSpecificFLAT(MI); + HasLds |= SIInstrInfo::isDS(MI) || SIInstrInfo::isLDSDMA(MI); + HasVmem |= SIInstrInfo::isVMEM(MI); if (HasLds && HasVmem) return true; } @@ -1526,10 +1525,9 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) { assert(!ST.hasExtendedWaitCounts()); auto IsHazardInst = [](const MachineInstr &MI) { - if (SIInstrInfo::isDS(MI)) + if (SIInstrInfo::isDS(MI) || SIInstrInfo::isLDSDMA(MI)) return 1; - if ((SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) || - SIInstrInfo::isSegmentSpecificFLAT(MI)) + if (SIInstrInfo::isVMEM(MI)) return 2; return 0; }; diff --git a/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir b/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir index 86e657093b5b2..ab4077d8f5b68 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir @@ -269,11 +269,12 @@ body: | S_ENDPGM 0 ... -# GCN-LABEL: name: no_hazard_lds_branch_flat +# GCN-LABEL: name: hazard_lds_branch_flat # GCN: bb.1: +# GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 # GCN-NEXT: FLAT_LOAD_DWORD --- -name: no_hazard_lds_branch_flat +name: hazard_lds_branch_flat body: | bb.0: successors: %bb.1 From c88ae6eb21201ee3c699a76ba424cbe42ae2e7b1 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 18 Nov 2025 11:44:03 -0600 Subject: [PATCH 16/57] [flang][OpenMP] Move two utilities from Semantics to Parser, NFC (#168549) Move `GetInnermostExecPart` and `IsStrictlyStructuredBlock` from Semantics/openmp-utils.* to Parser/openmp-utils.*. These two only depend on the AST contents and properties. --- flang/include/flang/Parser/openmp-utils.h | 2 ++ flang/include/flang/Semantics/openmp-utils.h | 2 -- flang/lib/Parser/openmp-utils.cpp | 28 ++++++++++++++++++++ flang/lib/Semantics/check-omp-atomic.cpp | 2 ++ flang/lib/Semantics/openmp-utils.cpp | 28 -------------------- 5 files changed, 32 insertions(+), 30 deletions(-) diff --git a/flang/include/flang/Parser/openmp-utils.h b/flang/include/flang/Parser/openmp-utils.h index 8fa4a84aff06d..36556f8dd7f4a 100644 --- a/flang/include/flang/Parser/openmp-utils.h +++ b/flang/include/flang/Parser/openmp-utils.h @@ -137,6 +137,8 @@ const T *GetFirstArgument(const OmpDirectiveSpecification &spec) { const BlockConstruct *GetFortranBlockConstruct( const ExecutionPartConstruct &epc); +const Block &GetInnermostExecPart(const Block &block); +bool IsStrictlyStructuredBlock(const Block &block); const OmpCombinerExpression *GetCombinerExpr( const OmpReductionSpecifier &rspec); diff --git a/flang/include/flang/Semantics/openmp-utils.h b/flang/include/flang/Semantics/openmp-utils.h index 14a4f0e93bda5..f5739ab16d643 100644 --- a/flang/include/flang/Semantics/openmp-utils.h +++ b/flang/include/flang/Semantics/openmp-utils.h @@ -97,8 +97,6 @@ const SomeExpr *HasStorageOverlap( const SomeExpr &base, llvm::ArrayRef exprs); bool IsAssignment(const parser::ActionStmt *x); bool IsPointerAssignment(const evaluate::Assignment &x); -const parser::Block &GetInnermostExecPart(const parser::Block &block); -bool IsStrictlyStructuredBlock(const parser::Block &block); } // namespace omp } // namespace Fortran::semantics diff --git a/flang/lib/Parser/openmp-utils.cpp b/flang/lib/Parser/openmp-utils.cpp index b9d3763cdd06d..2424828293c73 100644 --- a/flang/lib/Parser/openmp-utils.cpp +++ b/flang/lib/Parser/openmp-utils.cpp @@ -93,6 +93,34 @@ const BlockConstruct *GetFortranBlockConstruct( return nullptr; } +/// parser::Block is a list of executable constructs, parser::BlockConstruct +/// is Fortran's BLOCK/ENDBLOCK construct. +/// Strip the outermost BlockConstructs, return the reference to the Block +/// in the executable part of the innermost of the stripped constructs. +/// Specifically, if the given `block` has a single entry (it's a list), and +/// the entry is a BlockConstruct, get the Block contained within. Repeat +/// this step as many times as possible. +const Block &GetInnermostExecPart(const Block &block) { + const Block *iter{&block}; + while (iter->size() == 1) { + const ExecutionPartConstruct &ep{iter->front()}; + if (auto *bc{GetFortranBlockConstruct(ep)}) { + iter = &std::get(bc->t); + } else { + break; + } + } + return *iter; +} + +bool IsStrictlyStructuredBlock(const Block &block) { + if (block.size() == 1) { + return GetFortranBlockConstruct(block.front()) != nullptr; + } else { + return false; + } +} + const OmpCombinerExpression *GetCombinerExpr( const OmpReductionSpecifier &rspec) { return addr_if(std::get>(rspec.t)); diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp index ec03e6fe2d920..b9e34ca6e74df 100644 --- a/flang/lib/Semantics/check-omp-atomic.cpp +++ b/flang/lib/Semantics/check-omp-atomic.cpp @@ -19,6 +19,7 @@ #include "flang/Evaluate/rewrite.h" #include "flang/Evaluate/tools.h" #include "flang/Parser/char-block.h" +#include "flang/Parser/openmp-utils.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/openmp-utils.h" #include "flang/Semantics/symbol.h" @@ -41,6 +42,7 @@ namespace Fortran::semantics { +using namespace Fortran::parser::omp; using namespace Fortran::semantics::omp; namespace operation = Fortran::evaluate::operation; diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp index 4a40d6eec17bb..18a37d64a3b5a 100644 --- a/flang/lib/Semantics/openmp-utils.cpp +++ b/flang/lib/Semantics/openmp-utils.cpp @@ -496,32 +496,4 @@ bool IsPointerAssignment(const evaluate::Assignment &x) { return std::holds_alternative(x.u) || std::holds_alternative(x.u); } - -/// parser::Block is a list of executable constructs, parser::BlockConstruct -/// is Fortran's BLOCK/ENDBLOCK construct. -/// Strip the outermost BlockConstructs, return the reference to the Block -/// in the executable part of the innermost of the stripped constructs. -/// Specifically, if the given `block` has a single entry (it's a list), and -/// the entry is a BlockConstruct, get the Block contained within. Repeat -/// this step as many times as possible. -const parser::Block &GetInnermostExecPart(const parser::Block &block) { - const parser::Block *iter{&block}; - while (iter->size() == 1) { - const parser::ExecutionPartConstruct &ep{iter->front()}; - if (auto *bc{GetFortranBlockConstruct(ep)}) { - iter = &std::get(bc->t); - } else { - break; - } - } - return *iter; -} - -bool IsStrictlyStructuredBlock(const parser::Block &block) { - if (block.size() == 1) { - return GetFortranBlockConstruct(block.front()) != nullptr; - } else { - return false; - } -} } // namespace Fortran::semantics::omp From bd8c94177537ba30c6a160afa6dd1b8b8fc1e813 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 18 Nov 2025 17:47:18 +0000 Subject: [PATCH 17/57] Reapply "[Github] Update PR labeller to v6.0.1 (#167246)" This reverts commit b3d62645158cd6f463f2e1c878f6d63b9dc4b164. This broke the workflow because the sync-labels flag was set to a zero-length string to work around an issue. The underlying issue has been fixed and the value is now required to be a boolean. We can just drop the value because we want the default behavior anyways. This should be the last remaining breaking change from v5 that we need to migrate. --- .github/new-prs-labeler.yml | 1942 +++++++++++++++++++-------------- .github/workflows/new-prs.yml | 4 +- 2 files changed, 1131 insertions(+), 815 deletions(-) diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index efdc42d349195..bb0eef5842b0f 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -1,1131 +1,1449 @@ BOLT: - - bolt/**/* + - changed-files: + - any-glob-to-any-file: + - bolt/**/* ClangIR: - - clang/include/clang/CIR/**/* - - clang/lib/CIR/**/* - - clang/tools/cir-*/**/* - - clang/test/CIR/**/* + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/CIR/**/* + - clang/lib/CIR/**/* + - clang/tools/cir-*/**/* + - clang/test/CIR/**/* clang:bytecode: - - clang/docs/ConstantInterpreter.rst - - clang/lib/AST/ByteCode/**/* - - clang/test/AST/ByteCode/**/* - - clang/unittests/AST/ByteCode/**/* + - changed-files: + - any-glob-to-any-file: + - clang/docs/ConstantInterpreter.rst + - clang/lib/AST/ByteCode/**/* + - clang/test/AST/ByteCode/**/* + - clang/unittests/AST/ByteCode/**/* clang:dataflow: - - clang/include/clang/Analysis/FlowSensitive/**/* - - clang/lib/Analysis/FlowSensitive/**/* - - clang/unittests/Analysis/FlowSensitive/**/* - - clang/docs/DataFlowAnalysisIntro.md - - clang/docs/DataFlowAnalysisIntroImages/**/* + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/Analysis/FlowSensitive/**/* + - clang/lib/Analysis/FlowSensitive/**/* + - clang/unittests/Analysis/FlowSensitive/**/* + - clang/docs/DataFlowAnalysisIntro.md + - clang/docs/DataFlowAnalysisIntroImages/**/* clang:frontend: - - clang/lib/AST/**/* - - clang/include/clang/AST/**/* - - clang/lib/Basic/**/* - - clang/include/clang/Basic/**/* - - clang/lib/Interpreter/**/* - - clang/include/clang/Interpreter/**/* - - clang/lib/Lex/**/* - - clang/include/clang/Lex/**/* - - clang/lib/Parse/**/* - - clang/include/clang/Parse/**/* - - clang/lib/Sema/**/* - - clang/include/clang/Sema/**/* + - changed-files: + - any-glob-to-any-file: + - clang/lib/AST/**/* + - clang/include/clang/AST/**/* + - clang/lib/Basic/**/* + - clang/include/clang/Basic/**/* + - clang/lib/Interpreter/**/* + - clang/include/clang/Interpreter/**/* + - clang/lib/Lex/**/* + - clang/include/clang/Lex/**/* + - clang/lib/Parse/**/* + - clang/include/clang/Parse/**/* + - clang/lib/Sema/**/* + - clang/include/clang/Sema/**/* clang:headers: - - clang/lib/Headers/**/* + - changed-files: + - any-glob-to-any-file: + - clang/lib/Headers/**/* compiler-rt: - - compiler-rt/**/* + - changed-files: + - any-glob-to-any-file: + - compiler-rt/**/* flang: - - flang/**/* + - changed-files: + - any-glob-to-any-file: + - flang/**/* flang:frontend: - - flang/Parser/**/* - - flang/Evaluate/**/* - - flang/Semantics/**/* + - changed-files: + - any-glob-to-any-file: + - flang/Parser/**/* + - flang/Evaluate/**/* + - flang/Semantics/**/* libclc: - - libclc/** + - changed-files: + - any-glob-to-any-file: + - libclc/** HLSL: - - clang/*HLSL*/**/* - - clang/**/*HLSL* - - llvm/**/Frontend/HLSL/**/* + - changed-files: + - any-glob-to-any-file: + - clang/*HLSL*/**/* + - clang/**/*HLSL* + - llvm/**/Frontend/HLSL/**/* lld: - - lld/**/* + - changed-files: + - any-glob-to-any-file: + - lld/**/* llvm-lit: - - llvm/utils/lit/**/* + - changed-files: + - any-glob-to-any-file: + - llvm/utils/lit/**/* PGO: - - llvm/**/ProfileData/**/* - - llvm/**/SampleProfile* - - llvm/**/CodeGen/MIRSampleProfile* - - llvm/lib/Transforms/Instrumentation/CGProfile.cpp - - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp - - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp - - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp - - llvm/lib/Transforms/Instrumentation/PGO* - - llvm/lib/Transforms/Instrumentation/ValueProfile* - - llvm/test/Instrumentation/InstrProfiling/**/* - - llvm/test/Transforms/PGOProfile/**/* - - llvm/test/Transforms/SampleProfile/**/* - - llvm/**/llvm-profdata/**/* - - llvm/**/llvm-profgen/**/* + - changed-files: + - any-glob-to-any-file: + - llvm/**/ProfileData/**/* + - llvm/**/SampleProfile* + - llvm/**/CodeGen/MIRSampleProfile* + - llvm/lib/Transforms/Instrumentation/CGProfile.cpp + - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp + - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp + - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp + - llvm/lib/Transforms/Instrumentation/PGO* + - llvm/lib/Transforms/Instrumentation/ValueProfile* + - llvm/test/Instrumentation/InstrProfiling/**/* + - llvm/test/Transforms/PGOProfile/**/* + - llvm/test/Transforms/SampleProfile/**/* + - llvm/**/llvm-profdata/**/* + - llvm/**/llvm-profgen/**/* vectorizers: - - llvm/lib/Transforms/Vectorize/**/* - - llvm/include/llvm/Transforms/Vectorize/**/* + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Transforms/Vectorize/**/* + - llvm/include/llvm/Transforms/Vectorize/**/* # IMPORTED FROM CODEOWNERS LTO: - - llvm/*/LTO/** - - llvm/*/Linker/** - - llvm/*/ThinLTO/** - - llvm/lib/Transforms/*/FunctionImport* - - llvm/tools/gold/** + - changed-files: + - any-glob-to-any-file: + - llvm/*/LTO/** + - llvm/*/Linker/** + - llvm/*/ThinLTO/** + - llvm/lib/Transforms/*/FunctionImport* + - llvm/tools/gold/** clang:driver: - - clang/*/Driver/** + - changed-files: + - any-glob-to-any-file: + - clang/*/Driver/** compiler-rt:asan: - - compiler-rt/lib/asan/** - - compiler-rt/include/sanitizer/asan_interface.h - - compiler-rt/test/asan/** - - compiler-rt/lib/asan_abi/** - - compiler-rt/test/asan_abi/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/asan/** + - compiler-rt/include/sanitizer/asan_interface.h + - compiler-rt/test/asan/** + - compiler-rt/lib/asan_abi/** + - compiler-rt/test/asan_abi/** compiler-rt:builtins: - - compiler-rt/lib/builtins/** - - compiler-rt/test/builtins/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/builtins/** + - compiler-rt/test/builtins/** compiler-rt:cfi: - - compiler-rt/lib/cfi/** - - compiler-rt/test/cfi/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/cfi/** + - compiler-rt/test/cfi/** compiler-rt:fuzzer: - - compiler-rt/lib/fuzzer/** - - compiler-rt/include/fuzzer/** - - compiler-rt/test/fuzzer/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/fuzzer/** + - compiler-rt/include/fuzzer/** + - compiler-rt/test/fuzzer/** compiler-rt:hwasan: - - compiler-rt/lib/hwasan/** - - compiler-rt/include/sanitizer/hwasan_interface.h - - compiler-rt/test/hwasan/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/hwasan/** + - compiler-rt/include/sanitizer/hwasan_interface.h + - compiler-rt/test/hwasan/** compiler-rt:lsan: - - compiler-rt/lib/lsan/** - - compiler-rt/include/sanitizer/lsan_interface.h - - compiler-rt/test/lsan/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/lsan/** + - compiler-rt/include/sanitizer/lsan_interface.h + - compiler-rt/test/lsan/** compiler-rt:msan: - - compiler-rt/lib/msan/** - - compiler-rt/include/sanitizer/msan_interface.h - - compiler-rt/test/msan/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/msan/** + - compiler-rt/include/sanitizer/msan_interface.h + - compiler-rt/test/msan/** compiler-rt:sanitizer: - - llvm/lib/Transforms/Instrumentation/*Sanitizer* - - compiler-rt/lib/interception/** - - compiler-rt/lib/*san*/** - - compiler-rt/include/sanitizer/** - - compiler-rt/test/*san*/** - - compiler-rt/lib/fuzzer/** - - compiler-rt/include/fuzzer/** - - compiler-rt/test/fuzzer/** - - compiler-rt/lib/scudo/** - - compiler-rt/test/scudo/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Transforms/Instrumentation/*Sanitizer* + - compiler-rt/lib/interception/** + - compiler-rt/lib/*san*/** + - compiler-rt/include/sanitizer/** + - compiler-rt/test/*san*/** + - compiler-rt/lib/fuzzer/** + - compiler-rt/include/fuzzer/** + - compiler-rt/test/fuzzer/** + - compiler-rt/lib/scudo/** + - compiler-rt/test/scudo/** compiler-rt:scudo: - - compiler-rt/lib/scudo/** - - compiler-rt/test/scudo/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/scudo/** + - compiler-rt/test/scudo/** compiler-rt:tsan: - - compiler-rt/lib/tsan/** - - compiler-rt/include/sanitizer/tsan_interface.h - - compiler-rt/include/sanitizer/tsan_interface_atomic.h - - compiler-rt/test/tsan/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/tsan/** + - compiler-rt/include/sanitizer/tsan_interface.h + - compiler-rt/include/sanitizer/tsan_interface_atomic.h + - compiler-rt/test/tsan/** compiler-rt:ubsan: - - compiler-rt/lib/ubsan/** - - compiler-rt/include/sanitizer/ubsan_interface.h - - compiler-rt/test/ubsan/** - - compiler-rt/lib/ubsan_minimal/** - - compiler-rt/test/ubsan_minimal/** + - changed-files: + - any-glob-to-any-file: + - compiler-rt/lib/ubsan/** + - compiler-rt/include/sanitizer/ubsan_interface.h + - compiler-rt/test/ubsan/** + - compiler-rt/lib/ubsan_minimal/** + - compiler-rt/test/ubsan_minimal/** xray: - - llvm/tools/llvm-xray/** - - compiler-rt/*/xray/** - - clang/include/clang/Basic/XRay* - - clang/lib/Basic/XRay* - - compiler-rt/*/xray/** - - llvm/include/llvm/XRay/** - - llvm/lib/XRay/** - - llvm/tools/llvm-xray/** - - llvm/unittests/XRay/** - - compiler-rt/*/xray/** + - changed-files: + - any-glob-to-any-file: + - llvm/tools/llvm-xray/** + - compiler-rt/*/xray/** + - clang/include/clang/Basic/XRay* + - clang/lib/Basic/XRay* + - compiler-rt/*/xray/** + - llvm/include/llvm/XRay/** + - llvm/lib/XRay/** + - llvm/tools/llvm-xray/** + - llvm/unittests/XRay/** + - compiler-rt/*/xray/** clang:codegen: - - clang/lib/CodeGen/** - - clang/include/clang/CodeGen/** + - changed-files: + - any-glob-to-any-file: + - clang/lib/CodeGen/** + - clang/include/clang/CodeGen/** mlir: - - mlir/** + - changed-files: + - any-glob-to-any-file: + - mlir/** mlir:core: - - mlir/include/mlir/Support/** - - mlir/lib/Support/** - - mlir/include/mlir/Parser/** - - mlir/lib/Parser/** - - mlir/include/mlir/IR/** - - mlir/lib/IR/** - - mlir/include/mlir/Bytecode/** - - mlir/lib/Bytecode/** - - mlir/include/mlir/AsmParser/** - - mlir/lib/AsmParser/** - - mlir/include/mlir/Pass/** - - mlir/lib/Pass/** - - mlir/include/mlir/Tools/** - - mlir/lib/Tools/** - - mlir/include/mlir/Reducer/** - - mlir/lib/Reducer/** - - mlir/include/mlir/Transforms/** - - mlir/lib/Transforms/** - - mlir/include/mlir/Debug/** - - mlir/lib/Debug/** - - mlir/tools/** + - changed-files: + - any-glob-to-any-file: + - mlir/include/mlir/Support/** + - mlir/lib/Support/** + - mlir/include/mlir/Parser/** + - mlir/lib/Parser/** + - mlir/include/mlir/IR/** + - mlir/lib/IR/** + - mlir/include/mlir/Bytecode/** + - mlir/lib/Bytecode/** + - mlir/include/mlir/AsmParser/** + - mlir/lib/AsmParser/** + - mlir/include/mlir/Pass/** + - mlir/lib/Pass/** + - mlir/include/mlir/Tools/** + - mlir/lib/Tools/** + - mlir/include/mlir/Reducer/** + - mlir/lib/Reducer/** + - mlir/include/mlir/Transforms/** + - mlir/lib/Transforms/** + - mlir/include/mlir/Debug/** + - mlir/lib/Debug/** + - mlir/tools/** mlir:ods: - - mlir/TableGen/** - - mlir/tblgen/** - - mlir/include/mlir/IR/*.td + - changed-files: + - any-glob-to-any-file: + - mlir/TableGen/** + - mlir/tblgen/** + - mlir/include/mlir/IR/*.td mlir:bindings: - - mlir/Bindings/** + - changed-files: + - any-glob-to-any-file: + - mlir/Bindings/** mlir:gpu: - - mlir/**/*GPU*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*GPU*/** mlir:amdgpu: - - mlir/**/AMDGPU/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/AMDGPU/** mlir:amx: - - mlir/**/AMX/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/AMX/** mlir:affine: - - mlir/**/Affine/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Affine/** mlir:arith: - - mlir/**/Arith/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Arith/** mlir:neon: - - mlir/**/ArmNeon/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/ArmNeon/** mlir:sme: - - mlir/**/ArmSME/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/ArmSME/** mlir:sve: - - mlir/**/ArmSVE/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/ArmSVE/** mlir:async: - - mlir/**/Async/** - - mlir/**/Async/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Async/** + - mlir/**/Async/** mlir:bufferization: - - mlir/**/Bufferization/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Bufferization/** mlir:complex: - - mlir/**/Complex/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Complex/** mlir:cf: - - mlir/**/ControlFlow/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/ControlFlow/** mlir:dlti: - - mlir/**/DLTI/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/DLTI/** mlir:emitc: - - mlir/**/*EmitC*/** - - mlir/lib/Target/Cpp/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*EmitC*/** + - mlir/lib/Target/Cpp/** mlir:func: - - mlir/**/Func/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Func/** mlir:irdl: - - mlir/**/IRDL/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/IRDL/** mlir:index: - - mlir/**/Index/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Index/** mlir:llvm: - - mlir/**/LLVM* - - mlir/**/LLVM*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/LLVM* + - mlir/**/LLVM*/** mlir:linalg: - - mlir/**/*linalg/** - - mlir/**/*Linalg/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*linalg/** + - mlir/**/*Linalg/** mlir:mlprogram: - - mlir/**/MLProgram/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/MLProgram/** mlir:math: - - mlir/**/Math/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Math/** mlir:memref: - - mlir/**/MemRef/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/MemRef/** mlir:nvgpu: - - mlir/**/NVGPU/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/NVGPU/** mlir:openacc: - - mlir/**/*OpenACC* - - mlir/**/*OpenACC*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*OpenACC* + - mlir/**/*OpenACC*/** mlir:openmp: - - mlir/**/*OpenMP* - - mlir/**/*OpenMP*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*OpenMP* + - mlir/**/*OpenMP*/** mlir:pdl: - - mlir/**/PDL/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/PDL/** mlir:quant: - - mlir/**/Quant/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Quant/** mlir:scf: - - mlir/**/SCF/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/SCF/** mlir:spirv: - - mlir/**/SPIRV/** - - mlir/**/SPIRVTo*/** - - mlir/**/*ToSPIRV/** - - mlir/tools/mlir-spirv-cpu-runner/** - - mlir/tools/mlir-vulkan-runner/** - - mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp + - changed-files: + - any-glob-to-any-file: + - mlir/**/SPIRV/** + - mlir/**/SPIRVTo*/** + - mlir/**/*ToSPIRV/** + - mlir/tools/mlir-spirv-cpu-runner/** + - mlir/tools/mlir-vulkan-runner/** + - mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp mlir:shape: - - mlir/**/Shape/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Shape/** mlir:sparse: - - mlir/**/SparseTensor/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/SparseTensor/** mlir:tensor: - - mlir/**/Tensor/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/Tensor/** mlir:tosa: - - mlir/**/*Tosa*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*Tosa*/** mlir:ub: - - mlir/**/UB/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/UB/** mlir:vector: - - mlir/**/*Vector/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*Vector/** mlir:execution-engine: - - mlir/**/ExecutionEngine/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/ExecutionEngine/** mlir:presburger: - - mlir/**/*Presburger*/** + - changed-files: + - any-glob-to-any-file: + - mlir/**/*Presburger*/** mlir:python: - - mlir/python/**/* + - changed-files: + - any-glob-to-any-file: + - mlir/python/**/* mlir:vectorops: - - mlir/**/Vector/**/* + - changed-files: + - any-glob-to-any-file: + - mlir/**/Vector/**/* coroutines: - - clang/docs/DebuggingCoroutines.rst - - clang/lib/Sema/SemaCoroutine.cpp - - clang/lib/CodeGen/CGCoroutine.cpp - - clang/test/CodeGenCoroutines/** - - llvm/docs/Coroutines.rst - - llvm/include/llvm/Transforms/Coroutines/** - - llvm/lib/Transforms/Coroutines/** - - llvm/test/Transforms/Coroutines/* + - changed-files: + - any-glob-to-any-file: + - clang/docs/DebuggingCoroutines.rst + - clang/lib/Sema/SemaCoroutine.cpp + - clang/lib/CodeGen/CGCoroutine.cpp + - clang/test/CodeGenCoroutines/** + - llvm/docs/Coroutines.rst + - llvm/include/llvm/Transforms/Coroutines/** + - llvm/lib/Transforms/Coroutines/** + - llvm/test/Transforms/Coroutines/* clang:modules: - - clang/docs/StandardCPlusPlusModules.rst - - clang/include/clang/AST/AbstractBasicReader.h - - clang/include/clang/AST/AbstractBasicWriter.h - - clang/include/clang/AST/AbstractTypeReader.h - - clang/include/clang/AST/AbstractTypeWriter.h - - clang/include/clang/AST/PropertiesBase.td - - clang/include/clang/AST/ODRHash.h - - clang/include/clang/AST/TypeProperties.td - - clang/include/clang/Basic/Module.h - - clang/include/clang/Frontend/PrecompiledPreamble.h - - clang/include/clang/Lex/ModuleLoader.h - - clang/include/clang/Lex/ModuleMap.h - - clang/include/clang/Serialization/** - - clang/lib/AST/ODRHash.cpp - - clang/lib/AST/StmtProfile.cpp - - clang/lib/Basic/Module.cpp - - clang/lib/Frontend/ModuleDependencyCollector.cpp - - clang/lib/Frontend/PrecompiledPreamble.cpp - - clang/lib/Lex/ModuleMap.cpp - - clang/lib/Sema/SemaModule.cpp - - clang/lib/Serialization/** - - clang/test/CXX/module/** - - clang/test/Modules/** - - clang/unittests/Serialization/* + - changed-files: + - any-glob-to-any-file: + - clang/docs/StandardCPlusPlusModules.rst + - clang/include/clang/AST/AbstractBasicReader.h + - clang/include/clang/AST/AbstractBasicWriter.h + - clang/include/clang/AST/AbstractTypeReader.h + - clang/include/clang/AST/AbstractTypeWriter.h + - clang/include/clang/AST/PropertiesBase.td + - clang/include/clang/AST/ODRHash.h + - clang/include/clang/AST/TypeProperties.td + - clang/include/clang/Basic/Module.h + - clang/include/clang/Frontend/PrecompiledPreamble.h + - clang/include/clang/Lex/ModuleLoader.h + - clang/include/clang/Lex/ModuleMap.h + - clang/include/clang/Serialization/** + - clang/lib/AST/ODRHash.cpp + - clang/lib/AST/StmtProfile.cpp + - clang/lib/Basic/Module.cpp + - clang/lib/Frontend/ModuleDependencyCollector.cpp + - clang/lib/Frontend/PrecompiledPreamble.cpp + - clang/lib/Lex/ModuleMap.cpp + - clang/lib/Sema/SemaModule.cpp + - clang/lib/Serialization/** + - clang/test/CXX/module/** + - clang/test/Modules/** + - clang/unittests/Serialization/* clang-tidy: - - clang-tools-extra/clang-tidy/** - - clang-tools-extra/docs/clang-tidy/** - - clang-tools-extra/test/clang-tidy/** + - changed-files: + - any-glob-to-any-file: + - clang-tools-extra/clang-tidy/** + - clang-tools-extra/docs/clang-tidy/** + - clang-tools-extra/test/clang-tidy/** clang-tools-extra: - - clang-tools-extra/** + - changed-files: + - any-glob-to-any-file: + - clang-tools-extra/** tools:llvm-mca: - - llvm/tools/llvm-mca/** - - llvm/include/llvm/MCA/** - - llvm/lib/MCA/** + - changed-files: + - any-glob-to-any-file: + - llvm/tools/llvm-mca/** + - llvm/include/llvm/MCA/** + - llvm/lib/MCA/** clang: - - any: - - clang/** - - '!clang/**/Format/**' - - '!clang/tools/clang-format/**' + - changed-files: + - all-globs-to-all-file: + - clang/** + - '!clang/**/Format/**' + - '!clang/tools/clang-format/**' testing-tools: - - llvm/include/llvm/FileCheck/** - - llvm/lib/FileCheck/** - - llvm/test/FileCheck/** - - llvm/unittests/FileCheck/** - - llvm/utils/lit/** - - llvm/utils/split-file/** - - llvm/utils/not/** - - llvm/utils/count/** - - llvm/utils/FileCheck/** - - llvm/docs/CommandGuide/FileCheck.rst - - llvm/docs/CommandGuide/lit.rst - - llvm/docs/TestingGuide.rst - - llvm/test/Other/FileCheck-space.txt - - llvm/utils/UpdateTestChecks/** - - llvm/utils/update*_test_checks.py + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/FileCheck/** + - llvm/lib/FileCheck/** + - llvm/test/FileCheck/** + - llvm/unittests/FileCheck/** + - llvm/utils/lit/** + - llvm/utils/split-file/** + - llvm/utils/not/** + - llvm/utils/count/** + - llvm/utils/FileCheck/** + - llvm/docs/CommandGuide/FileCheck.rst + - llvm/docs/CommandGuide/lit.rst + - llvm/docs/TestingGuide.rst + - llvm/test/Other/FileCheck-space.txt + - llvm/utils/UpdateTestChecks/** + - llvm/utils/update*_test_checks.py debuginfo: - - clang/lib/CodeGen/CGDebugInfo.* - - llvm/include/llvm/BinaryFormat/Dwarf.* - - llvm/include/llvm/CodeGen/*Debug*.* - - llvm/include/llvm/DebugInfo/** - - llvm/include/llvm/Debuginfod/** - - llvm/include/llvm/Frontend/Debug/** - - llvm/include/llvm/IR/Debug*.* - - llvm/include/llvm/Object/*Debug*.* - - llvm/include/llvm/ObjectYAML/*Debug*.* - - llvm/include/llvm/Transforms/Utils/*Debug*.* - - llvm/include/llvm-c/DebugInfo.h - - llvm/lib/BinaryFormat/Dwarf.cpp - - llvm/lib/CodeGen/AsmPrinter/*Debug*.* - - llvm/lib/CodeGen/AsmPrinter/Dwarf*.* - - llvm/lib/CodeGen/AsmPrinter/DIE*.* - - llvm/lib/CodeGen/LiveDebugValues/** - - llvm/lib/CodeGen/*Debug*.* - - llvm/lib/CodeGen/DwarfEHPrepare.cpp - - llvm/lib/DebugInfo/** - - llvm/lib/Debuginfod/** - - llvm/lib/DWARFLinkerParallel/** - - llvm/lib/IR/Debug*.cpp - - llvm/lib/MC/MCDwarf.cpp - - llvm/lib/Transforms/Utils/*Debug*.* - - llvm/test/DebugInfo/** - - llvm/test/tools/dsymutil/** - - llvm/test/tools/llvm-debuginfo-analyzer/** - - llvm/test/tools/llvm-debuginfod/** - - llvm/test/tools/llvm-debuginfod-find/** - - llvm/test/tools/llvm-dwarfdump/** - - llvm/test/tools/llvm-dwarfutil/** - - llvm/test/tools/llvm-dwp/** - - llvm/test/tools/llvm-gsymutil/** - - llvm/test/tools/llvm-pdbuti/** - - llvm/tools/dsymutil/** - - llvm/tools/llvm-debuginfo-analyzer/** - - llvm/tools/llvm-debuginfod/** - - llvm/tools/llvm-debuginfod-find/** - - llvm/tools/llvm-dwarfdump/** - - llvm/tools/llvm-dwarfutil/** - - llvm/tools/llvm-dwp/** - - llvm/tools/llvm-gsymutil/** - - llvm/tools/llvm-pdbutil/** + - changed-files: + - any-glob-to-any-file: + - clang/lib/CodeGen/CGDebugInfo.* + - llvm/include/llvm/BinaryFormat/Dwarf.* + - llvm/include/llvm/CodeGen/*Debug*.* + - llvm/include/llvm/DebugInfo/** + - llvm/include/llvm/Debuginfod/** + - llvm/include/llvm/Frontend/Debug/** + - llvm/include/llvm/IR/Debug*.* + - llvm/include/llvm/Object/*Debug*.* + - llvm/include/llvm/ObjectYAML/*Debug*.* + - llvm/include/llvm/Transforms/Utils/*Debug*.* + - llvm/include/llvm-c/DebugInfo.h + - llvm/lib/BinaryFormat/Dwarf.cpp + - llvm/lib/CodeGen/AsmPrinter/*Debug*.* + - llvm/lib/CodeGen/AsmPrinter/Dwarf*.* + - llvm/lib/CodeGen/AsmPrinter/DIE*.* + - llvm/lib/CodeGen/LiveDebugValues/** + - llvm/lib/CodeGen/*Debug*.* + - llvm/lib/CodeGen/DwarfEHPrepare.cpp + - llvm/lib/DebugInfo/** + - llvm/lib/Debuginfod/** + - llvm/lib/DWARFLinkerParallel/** + - llvm/lib/IR/Debug*.cpp + - llvm/lib/MC/MCDwarf.cpp + - llvm/lib/Transforms/Utils/*Debug*.* + - llvm/test/DebugInfo/** + - llvm/test/tools/dsymutil/** + - llvm/test/tools/llvm-debuginfo-analyzer/** + - llvm/test/tools/llvm-debuginfod/** + - llvm/test/tools/llvm-debuginfod-find/** + - llvm/test/tools/llvm-dwarfdump/** + - llvm/test/tools/llvm-dwarfutil/** + - llvm/test/tools/llvm-dwp/** + - llvm/test/tools/llvm-gsymutil/** + - llvm/test/tools/llvm-pdbuti/** + - llvm/tools/dsymutil/** + - llvm/tools/llvm-debuginfo-analyzer/** + - llvm/tools/llvm-debuginfod/** + - llvm/tools/llvm-debuginfod-find/** + - llvm/tools/llvm-dwarfdump/** + - llvm/tools/llvm-dwarfutil/** + - llvm/tools/llvm-dwp/** + - llvm/tools/llvm-gsymutil/** + - llvm/tools/llvm-pdbutil/** github:workflow: - - .github/workflows/** + - changed-files: + - any-glob-to-any-file: + - .github/workflows/** cmake: - - cmake/** - - llvm/cmake/** - - runtimes/** + - changed-files: + - any-glob-to-any-file: + - cmake/** + - llvm/cmake/** + - runtimes/** flang:driver: - - flang/tools/flang-driver/** - - flang/unittests/Frontend/** - - flang/lib/FrontendTool/** - - flang/lib/Frontend/** - - flang/include/flang/Frontend/** - - flang/include/flang/FrontendTool/** - - flang/test/Driver/** + - changed-files: + - any-glob-to-any-file: + - flang/tools/flang-driver/** + - flang/unittests/Frontend/** + - flang/lib/FrontendTool/** + - flang/lib/Frontend/** + - flang/include/flang/Frontend/** + - flang/include/flang/FrontendTool/** + - flang/test/Driver/** backend:m68k: - - llvm/lib/Target/M68k/** - - clang/lib/Basic/Targets/M68k.* - - clang/lib/CodeGen/Targets/M68k.cpp - - llvm/test/CodeGen/M68k/** - - llvm/test/MC/Disassembler/M68k/** - - llvm/test/MC/M68k/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Target/M68k/** + - clang/lib/Basic/Targets/M68k.* + - clang/lib/CodeGen/Targets/M68k.cpp + - llvm/test/CodeGen/M68k/** + - llvm/test/MC/Disassembler/M68k/** + - llvm/test/MC/M68k/** libc++: - - libcxx/** - - .github/workflows/libcxx-* + - changed-files: + - any-glob-to-any-file: + - libcxx/** + - .github/workflows/libcxx-* libc++abi: - - libcxxabi/** + - changed-files: + - any-glob-to-any-file: + - libcxxabi/** libunwind: - - libunwind/** + - changed-files: + - any-glob-to-any-file: + - libunwind/** objectyaml: - - llvm/include/llvm/ObjectYAML/** - - llvm/lib/ObjectYAML/** - - llvm/test/tools/obj2yaml/** - - llvm/test/tools/yaml2obj/** - - llvm/tools/obj2yaml/** - - llvm/tools/yaml2obj/** + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/ObjectYAML/** + - llvm/lib/ObjectYAML/** + - llvm/test/tools/obj2yaml/** + - llvm/test/tools/yaml2obj/** + - llvm/tools/obj2yaml/** + - llvm/tools/yaml2obj/** clang:analysis: - - clang/include/clang/Analysis/** - - clang/lib/Analysis/** + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/Analysis/** + - clang/lib/Analysis/** clang:static analyzer: - - clang/include/clang/StaticAnalyzer/** - - clang/lib/StaticAnalyzer/** - - clang/tools/scan-build/** - - clang/utils/analyzer/** - - clang/docs/analyzer/** - - clang/test/Analysis/** + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/StaticAnalyzer/** + - clang/lib/StaticAnalyzer/** + - clang/tools/scan-build/** + - clang/utils/analyzer/** + - clang/docs/analyzer/** + - clang/test/Analysis/** pgo: - - llvm/lib/Transforms/Instrumentation/CGProfile.cpp - - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp - - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp - - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp - - llvm/lib/Transforms/Instrumentation/PGO* - - llvm/lib/Transforms/Instrumentation/ValueProfile* - - llvm/test/Instrumentation/InstrProfiling/** - - llvm/test/Transforms/PGOProfile/** - - compiler-rt/lib/profile/** - - compiler-rt/lib/memprof/** - - compiler-rt/test/profile/** - - compiler-rt/test/memprof/** - - llvm/tools/llvm-profdata/** - - llvm/tools/llvm-profgen/** - - llvm/test/tools/llvm-profdata/** - - llvm/test/tools/llvm-profgen/** - - llvm/unittests/ProfileData/* + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Transforms/Instrumentation/CGProfile.cpp + - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp + - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp + - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp + - llvm/lib/Transforms/Instrumentation/PGO* + - llvm/lib/Transforms/Instrumentation/ValueProfile* + - llvm/test/Instrumentation/InstrProfiling/** + - llvm/test/Transforms/PGOProfile/** + - compiler-rt/lib/profile/** + - compiler-rt/lib/memprof/** + - compiler-rt/test/profile/** + - compiler-rt/test/memprof/** + - llvm/tools/llvm-profdata/** + - llvm/tools/llvm-profgen/** + - llvm/test/tools/llvm-profdata/** + - llvm/test/tools/llvm-profgen/** + - llvm/unittests/ProfileData/* openacc: - - flang/**/OpenACC/** - - flang/include/flang/Lower/OpenACC.h - - flang/docs/OpenACC.md - - flang/lib/Parser/openacc-parsers.cpp - - flang/lib/Lower/OpenACC.cpp - - llvm/**/Frontend/OpenACC/** - - llvm/unittests/Frontend/OpenACCTest.cpp - - mlir/test/Target/LLVMIR/openacc-llvm.mlir - - mlir/**/*OpenACC/** + - changed-files: + - any-glob-to-any-file: + - flang/**/OpenACC/** + - flang/include/flang/Lower/OpenACC.h + - flang/docs/OpenACC.md + - flang/lib/Parser/openacc-parsers.cpp + - flang/lib/Lower/OpenACC.cpp + - llvm/**/Frontend/OpenACC/** + - llvm/unittests/Frontend/OpenACCTest.cpp + - mlir/test/Target/LLVMIR/openacc-llvm.mlir + - mlir/**/*OpenACC/** flang:runtime: - - flang/runtime/** + - changed-files: + - any-glob-to-any-file: + - flang/runtime/** flang:parser: - - flang/**/Parser/** + - changed-files: + - any-glob-to-any-file: + - flang/**/Parser/** flang:semantics: - - flang/**/Evaluate/** - - flang/**/Semantics/** + - changed-files: + - any-glob-to-any-file: + - flang/**/Evaluate/** + - flang/**/Semantics/** flang:fir-hlfir: - - flang/**/Lower/** - - flang/**/Optimizer/** + - changed-files: + - any-glob-to-any-file: + - flang/**/Lower/** + - flang/**/Optimizer/** flang:codegen: - - flang/**/CodeGen/** + - changed-files: + - any-glob-to-any-file: + - flang/**/CodeGen/** llvm:codegen: - - llvm/lib/CodeGen/* - - llvm/lib/CodeGen/MIRParser/* - - llvm/lib/CodeGen/LiveDebugValues/* - - llvm/lib/CodeGen/AsmPrinter/* + - changed-files: + - any-glob-to-any-file: + - llvm/lib/CodeGen/* + - llvm/lib/CodeGen/MIRParser/* + - llvm/lib/CodeGen/LiveDebugValues/* + - llvm/lib/CodeGen/AsmPrinter/* llvm:globalisel: - - llvm/**/GlobalISel/** - - llvm/utils/TableGen/GlobalISel* + - changed-files: + - any-glob-to-any-file: + - llvm/**/GlobalISel/** + - llvm/utils/TableGen/GlobalISel* function-specialization: - - llvm/include/llvm/Transforms/Utils/SCCPSolver.h - - llvm/lib/Transforms/Utils/SCCPSolver.cpp - - llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h - - llvm/lib/Transforms/IPO/FunctionSpecialization.cpp - - llvm/test/Transforms/FunctionSpecialization/* + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/Transforms/Utils/SCCPSolver.h + - llvm/lib/Transforms/Utils/SCCPSolver.cpp + - llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h + - llvm/lib/Transforms/IPO/FunctionSpecialization.cpp + - llvm/test/Transforms/FunctionSpecialization/* libc: - - libc/** - - utils/bazel/llvm-project-overlay/libc/** + - changed-files: + - any-glob-to-any-file: + - libc/** + - utils/bazel/llvm-project-overlay/libc/** clang-format: - - clang/**/Format/** - - clang/tools/clang-format/** + - changed-files: + - any-glob-to-any-file: + - clang/**/Format/** + - clang/tools/clang-format/** flang:openmp: - - flang/test/**/OpenMP/** - - flang/lib/Lower/OpenMP.cpp - - flang/lib/Semantics/resolve-directives.cpp - - flang/lib/Semantics/check-omp-structure.cpp - - flang/lib/Optimizer/Transforms/OMP* - - flang/test/Fir/convert-to-llvm-openmp-and-fir.fir - - flang/test/Lower/OpenMP/** - - flang/test/Transforms/omp* - - mlir/**/*OpenMP* - - mlir/test/Target/LLVMIR/openmp* - - llvm/lib/Frontend/OpenMP/** - - llvm/include/llvm/Frontend/OpenMP/** - - llvm/unittests/Frontend/OpenMP* + - changed-files: + - any-glob-to-any-file: + - flang/test/**/OpenMP/** + - flang/lib/Lower/OpenMP.cpp + - flang/lib/Semantics/resolve-directives.cpp + - flang/lib/Semantics/check-omp-structure.cpp + - flang/lib/Optimizer/Transforms/OMP* + - flang/test/Fir/convert-to-llvm-openmp-and-fir.fir + - flang/test/Lower/OpenMP/** + - flang/test/Transforms/omp* + - mlir/**/*OpenMP* + - mlir/test/Target/LLVMIR/openmp* + - llvm/lib/Frontend/OpenMP/** + - llvm/include/llvm/Frontend/OpenMP/** + - llvm/unittests/Frontend/OpenMP* llvm:ir: - - llvm/lib/IR/** - - llvm/include/llvm/IR/** - - llvm/docs/LangRef.rst - - llvm/unittests/IR/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/IR/** + - llvm/include/llvm/IR/** + - llvm/docs/LangRef.rst + - llvm/unittests/IR/** llvm:SandboxIR: - - llvm/lib/SandboxIR/** - - llvm/include/llvm/SandboxIR/** - - llvm/docs/SandboxIR.md - - llvm/unittests/SandboxIR/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/SandboxIR/** + - llvm/include/llvm/SandboxIR/** + - llvm/docs/SandboxIR.md + - llvm/unittests/SandboxIR/** llvm:analysis: - - llvm/lib/Analysis/** - - llvm/include/llvm/Analysis/** - - llvm/test/Analysis/** - - llvm/unittests/Analysis/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Analysis/** + - llvm/include/llvm/Analysis/** + - llvm/test/Analysis/** + - llvm/unittests/Analysis/** llvm:adt: - - llvm/**/ADT/* + - changed-files: + - any-glob-to-any-file: + - llvm/**/ADT/* llvm:support: - - llvm/**/Support/** + - changed-files: + - any-glob-to-any-file: + - llvm/**/Support/** # Skip llvm/test/MC and llvm/unittests/MC, which includes target-specific directories. llvm:mc: - - llvm/include/llvm/MC/** - - llvm/lib/MC/** - - llvm/tools/llvm-mc/** + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/MC/** + - llvm/lib/MC/** + - llvm/tools/llvm-mc/** llvm:transforms: - - llvm/lib/Transforms/** - - llvm/include/llvm/Transforms/** - - llvm/test/Transforms/** - - llvm/unittests/Transforms/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Transforms/** + - llvm/include/llvm/Transforms/** + - llvm/test/Transforms/** + - llvm/unittests/Transforms/** llvm:instcombine: - - llvm/lib/Analysis/InstructionSimplify.cpp - - llvm/lib/Transforms/InstCombine/** - - llvm/include/llvm/Transforms/InstCombine/ - - llvm/include/llvm/Analysis/InstructionSimplify.h - - llvm/test/Transforms/InstCombine/** - - llvm/test/Transforms/InstSimplify/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Analysis/InstructionSimplify.cpp + - llvm/lib/Transforms/InstCombine/** + - llvm/include/llvm/Transforms/InstCombine/ + - llvm/include/llvm/Analysis/InstructionSimplify.h + - llvm/test/Transforms/InstCombine/** + - llvm/test/Transforms/InstSimplify/** llvm:vectorcombine: - - llvm/lib/Transforms/Vectorize/VectorCombine.cpp - - llvm/test/Transforms/VectorCombine/** + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Transforms/Vectorize/VectorCombine.cpp + - llvm/test/Transforms/VectorCombine/** clangd: - - clang-tools-extra/clangd/** + - changed-files: + - any-glob-to-any-file: + - clang-tools-extra/clangd/** hlsl: - - clang/test/ParserHLSL/** - - clang/test/SemaHLSL/** - - clang/test/AST/HLSL/** - - clang/test/CodeGenHLSL/** - - clang/cmake/caches/HLSL.cmake - - clang/include/clang/Basic/HLSL*.h - - clang/include/clang/Sema/HLSL*.h - - clang/docs/HLSL/** - - clang/lib/Driver/ToolChains/HLSL* - - clang/lib/Parse/ParseHLSL.cpp - - clang/lib/Sema/HLSLExternalSemaSource.cpp - - clang/lib/Sema/SemaHLSL.cpp - - clang/lib/CodeGen/CGHLSLRuntime.* - - clang/lib/CodeGen/CGHLSLBuiltins.cpp - - llvm/include/llvm/Frontend/HLSL/** - - llvm/lib/Frontend/HLSL/** + - changed-files: + - any-glob-to-any-file: + - clang/test/ParserHLSL/** + - clang/test/SemaHLSL/** + - clang/test/AST/HLSL/** + - clang/test/CodeGenHLSL/** + - clang/cmake/caches/HLSL.cmake + - clang/include/clang/Basic/HLSL*.h + - clang/include/clang/Sema/HLSL*.h + - clang/docs/HLSL/** + - clang/lib/Driver/ToolChains/HLSL* + - clang/lib/Parse/ParseHLSL.cpp + - clang/lib/Sema/HLSLExternalSemaSource.cpp + - clang/lib/Sema/SemaHLSL.cpp + - clang/lib/CodeGen/CGHLSLRuntime.* + - clang/lib/CodeGen/CGHLSLBuiltins.cpp + - llvm/include/llvm/Frontend/HLSL/** + - llvm/lib/Frontend/HLSL/** llvm:SelectionDAG: - - llvm/include/llvm/CodeGen/SelectionDAG*.h - - llvm/include/llvm/CodeGen/SDNodeProperties.td - - llvm/include/llvm/Target/TargetSelectionDAG.td - - llvm/lib/CodeGen/SelectionDAG/** - - llvm/utils/TableGen/CodeGenDAG* - - llvm/utils/TableGen/DAGISel* - - llvm/include/llvm/CodeGen/DAGCombine.h - - llvm/include/llvm/CodeGen/ISDOpcodes.h + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/CodeGen/SelectionDAG*.h + - llvm/include/llvm/CodeGen/SDNodeProperties.td + - llvm/include/llvm/Target/TargetSelectionDAG.td + - llvm/lib/CodeGen/SelectionDAG/** + - llvm/utils/TableGen/CodeGenDAG* + - llvm/utils/TableGen/DAGISel* + - llvm/include/llvm/CodeGen/DAGCombine.h + - llvm/include/llvm/CodeGen/ISDOpcodes.h backend:DirectX: - - '**/*DirectX*' - - '**/*DXIL*' - - '**/*dxil*' - - '**/*DirectX*/**' - - '**/*DXIL*/**' - - '**/*dxil*/**' - - '**/*DXContainer*' - - '**/*DXContainer*/**' - - clang/lib/Sema/SemaDirectX.cpp - - clang/include/clang/Sema/SemaDirectX.h - - clang/include/clang/Basic/BuiltinsDirectX.td - - clang/lib/CodeGen/TargetBuiltins/DirectX.cpp - - clang/test/CodeGenDirectX/** - - clang/test/SemaDirectX/** + - changed-files: + - any-glob-to-any-file: + - '**/*DirectX*' + - '**/*DXIL*' + - '**/*dxil*' + - '**/*DirectX*/**' + - '**/*DXIL*/**' + - '**/*dxil*/**' + - '**/*DXContainer*' + - '**/*DXContainer*/**' + - clang/lib/Sema/SemaDirectX.cpp + - clang/include/clang/Sema/SemaDirectX.h + - clang/include/clang/Basic/BuiltinsDirectX.td + - clang/lib/CodeGen/TargetBuiltins/DirectX.cpp + - clang/test/CodeGenDirectX/** + - clang/test/SemaDirectX/** backend:SPIR-V: - - clang/lib/Driver/ToolChains/SPIRV.* - - clang/lib/Sema/SemaSPIRV.cpp - - clang/include/clang/Sema/SemaSPIRV.h - - clang/include/clang/Basic/BuiltinsSPIRV.td - - clang/test/CodeGenSPIRV/** - - clang/test/SemaSPIRV/** - - llvm/lib/Target/SPIRV/** - - llvm/test/CodeGen/SPIRV/** - - llvm/test/Frontend/HLSL/** - - llvm/docs/SPIRVUsage.rst + - changed-files: + - any-glob-to-any-file: + - clang/lib/Driver/ToolChains/SPIRV.* + - clang/lib/Sema/SemaSPIRV.cpp + - clang/include/clang/Sema/SemaSPIRV.h + - clang/include/clang/Basic/BuiltinsSPIRV.td + - clang/test/CodeGenSPIRV/** + - clang/test/SemaSPIRV/** + - llvm/lib/Target/SPIRV/** + - llvm/test/CodeGen/SPIRV/** + - llvm/test/Frontend/HLSL/** + - llvm/docs/SPIRVUsage.rst mlgo: - - llvm/lib/Analysis/ML* - - llvm/include/llvm/Analysis/ML* - - llvm/lib/Analysis/*Runner.cpp - - llvm/include/llvm/Analysis/*Runner.h - - llvm/unittests/Analysis/ML* - - llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp - - llvm/lib/Analysis/TrainingLogger.cpp - - llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h - - llvm/include/llvm/Analysis/Utils/TrainingLogger.h - - llvm/test/Analysis/FunctionPropertiesAnalysis/* - - llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp - - llvm/test/Transforms/inline/ML/** - - llvm/lib/CodeGen/ML* - - llvm/unittests/CodeGen/ML* - - llvm/test/CodeGen/MLRegAlloc/** - - llvm/utils/mlgo-utils/** - - llvm/docs/MLGO.rst - - llvm/include/llvm/Analysis/IR2Vec.h - - llvm/lib/Analysis/IR2Vec.cpp - - llvm/lib/Analysis/models/** - - llvm/include/llvm/CodeGen/MIR2Vec.h - - llvm/lib/CodeGen/MIR2Vec.cpp - - llvm/test/Analysis/IR2Vec/** - - llvm/test/CodeGen/MIR2Vec/** - - llvm/unittests/Analysis/IR2VecTest.cpp - - llvm/unittests/CodeGen/MIR2VecTest.cpp - - llvm/tools/llvm-ir2vec/** - - llvm/docs/CommandGuide/llvm-ir2vec.rst + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Analysis/ML* + - llvm/include/llvm/Analysis/ML* + - llvm/lib/Analysis/*Runner.cpp + - llvm/include/llvm/Analysis/*Runner.h + - llvm/unittests/Analysis/ML* + - llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp + - llvm/lib/Analysis/TrainingLogger.cpp + - llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h + - llvm/include/llvm/Analysis/Utils/TrainingLogger.h + - llvm/test/Analysis/FunctionPropertiesAnalysis/* + - llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp + - llvm/test/Transforms/inline/ML/** + - llvm/lib/CodeGen/ML* + - llvm/unittests/CodeGen/ML* + - llvm/test/CodeGen/MLRegAlloc/** + - llvm/utils/mlgo-utils/** + - llvm/docs/MLGO.rst + - llvm/include/llvm/Analysis/IR2Vec.h + - llvm/lib/Analysis/IR2Vec.cpp + - llvm/lib/Analysis/models/** + - llvm/include/llvm/CodeGen/MIR2Vec.h + - llvm/lib/CodeGen/MIR2Vec.cpp + - llvm/test/Analysis/IR2Vec/** + - llvm/test/CodeGen/MIR2Vec/** + - llvm/unittests/Analysis/IR2VecTest.cpp + - llvm/unittests/CodeGen/MIR2VecTest.cpp + - llvm/tools/llvm-ir2vec/** + - llvm/docs/CommandGuide/llvm-ir2vec.rst tools:llvm-exegesis: - - llvm/tools/llvm-exegesis/** - - llvm/test/tools/llvm-exegesis/** - - llvm/unittests/tools/llvm-exegesis/** + - changed-files: + - any-glob-to-any-file: + - llvm/tools/llvm-exegesis/** + - llvm/test/tools/llvm-exegesis/** + - llvm/unittests/tools/llvm-exegesis/** tools:llvm-reduce: - - llvm/tools/llvm-reduce/** + - changed-files: + - any-glob-to-any-file: + - llvm/tools/llvm-reduce/** platform:windows: - - lld/COFF/** - - clang/lib/Driver/MSVC.cpp - - clang/lib/Driver/MinGW.cpp - - llvm/lib/DebugInfo/CodeView/** - - llvm/lib/DebugInfo/PDB/** - - llvm/lib/WindowsDriver/** - - llvm/lib/Support/Windows/** - - llvm/lib/BinaryFormat/COFF.cpp + - changed-files: + - any-glob-to-any-file: + - lld/COFF/** + - clang/lib/Driver/MSVC.cpp + - clang/lib/Driver/MinGW.cpp + - llvm/lib/DebugInfo/CodeView/** + - llvm/lib/DebugInfo/PDB/** + - llvm/lib/WindowsDriver/** + - llvm/lib/Support/Windows/** + - llvm/lib/BinaryFormat/COFF.cpp llvm:regalloc: - - llvm/**/CodeGen/CalcSpillWeights* - - llvm/**/CodeGen/InlineSpiller* - - llvm/**/CodeGen/InterferenceCache* - - llvm/**/CodeGen/LiveInterval* - - llvm/**/CodeGen/LiveRange* - - llvm/**/CodeGen/LiveReg* - - llvm/**/CodeGen/LiveVariables* - - llvm/**/CodeGen/MachineCopyPropagation* - - llvm/**/CodeGen/PHIElimination* - - llvm/**/CodeGen/ProcessImplicitDefs.cpp - - llvm/**/CodeGen/Register* - - llvm/**/CodeGen/RegUsage* - - llvm/**/CodeGen/RenameIndependentSubregs.cpp - - llvm/**/CodeGen/SlotIndexes.h - - llvm/**/CodeGen/SpillPlacement* - - llvm/**/CodeGen/SplitKit* - - llvm/**/CodeGen/VirtRegMap.h - - llvm/include/PBQP/** - - llvm/include/PBQPRAConstraint.h - - llvm/include/llvm/CodeGen/Spiller.h - - llvm/**/*RegAlloc + - changed-files: + - any-glob-to-any-file: + - llvm/**/CodeGen/CalcSpillWeights* + - llvm/**/CodeGen/InlineSpiller* + - llvm/**/CodeGen/InterferenceCache* + - llvm/**/CodeGen/LiveInterval* + - llvm/**/CodeGen/LiveRange* + - llvm/**/CodeGen/LiveReg* + - llvm/**/CodeGen/LiveVariables* + - llvm/**/CodeGen/MachineCopyPropagation* + - llvm/**/CodeGen/PHIElimination* + - llvm/**/CodeGen/ProcessImplicitDefs.cpp + - llvm/**/CodeGen/Register* + - llvm/**/CodeGen/RegUsage* + - llvm/**/CodeGen/RenameIndependentSubregs.cpp + - llvm/**/CodeGen/SlotIndexes.h + - llvm/**/CodeGen/SpillPlacement* + - llvm/**/CodeGen/SplitKit* + - llvm/**/CodeGen/VirtRegMap.h + - llvm/include/PBQP/** + - llvm/include/PBQPRAConstraint.h + - llvm/include/llvm/CodeGen/Spiller.h + - llvm/**/*RegAlloc lldb: - - lldb/** + - changed-files: + - any-glob-to-any-file: + - lldb/** lldb-dap: - - lldb/tools/lldb-dap/** + - changed-files: + - any-glob-to-any-file: + - lldb/tools/lldb-dap/** backend:AMDGPU: - - '**/*amdgpu*' - - '**/*AMDGPU*' - - '**/*amdgpu*/**' - - '**/*AMDGPU*/**' + - changed-files: + - any-glob-to-any-file: + - '**/*amdgpu*' + - '**/*AMDGPU*' + - '**/*amdgpu*/**' + - '**/*AMDGPU*/**' backend:NVPTX: - - 'llvm/**/*nvvm*' - - 'llvm/**/*NVVM*' - - 'llvm/**/*nvptx*' - - 'llvm/**/*NVPTX*' - - 'llvm/**/*nvvm*/**' - - 'llvm/**/*NVVM*/**' - - 'llvm/**/*nvptx*/**' - - 'llvm/**/*NVPTX*/**' + - changed-files: + - any-glob-to-any-file: + - 'llvm/**/*nvvm*' + - 'llvm/**/*NVVM*' + - 'llvm/**/*nvptx*' + - 'llvm/**/*NVPTX*' + - 'llvm/**/*nvvm*/**' + - 'llvm/**/*NVVM*/**' + - 'llvm/**/*nvptx*/**' + - 'llvm/**/*NVPTX*/**' backend:MIPS: - - '**/*mips*' - - '**/*Mips*' - - '**/*mips*/**' - - '**/*Mips*/**' + - changed-files: + - any-glob-to-any-file: + - '**/*mips*' + - '**/*Mips*' + - '**/*mips*/**' + - '**/*Mips*/**' backend:RISC-V: - - '**/*riscv*' - - '**/*RISCV*' - - '**/*riscv*/**' - - '**/*RISCV*/**' + - changed-files: + - any-glob-to-any-file: + - '**/*riscv*' + - '**/*RISCV*' + - '**/*riscv*/**' + - '**/*RISCV*/**' backend:Xtensa: - - '**/*xtensa*' - - '**/*Xtensa*' - - '**/*xtensa*/**' - - '**/*Xtensa*/**' + - changed-files: + - any-glob-to-any-file: + - '**/*xtensa*' + - '**/*Xtensa*' + - '**/*xtensa*/**' + - '**/*Xtensa*/**' lld:coff: - - lld/**/COFF/** - - lld/Common/** + - changed-files: + - any-glob-to-any-file: + - lld/**/COFF/** + - lld/Common/** lld:elf: - - lld/**/ELF/** - - lld/Common/** + - changed-files: + - any-glob-to-any-file: + - lld/**/ELF/** + - lld/Common/** lld:macho: - - lld/**/MachO/** - - lld/Common/** + - changed-files: + - any-glob-to-any-file: + - lld/**/MachO/** + - lld/Common/** lld:wasm: - - lld/**/wasm/** - - lld/Common/** + - changed-files: + - any-glob-to-any-file: + - lld/**/wasm/** + - lld/Common/** backend:ARC: - - llvm/lib/Target/ARC/** - - clang/lib/Basic/Targets/ARC.h - - clang/lib/Basic/Targets/ARC.cpp - - clang/lib/CodeGen/Targets/ARC.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Target/ARC/** + - clang/lib/Basic/Targets/ARC.h + - clang/lib/Basic/Targets/ARC.cpp + - clang/lib/CodeGen/Targets/ARC.cpp backend:ARM: - - llvm/include/llvm/IR/IntrinsicsARM.td - - llvm/test/MC/ARM/** - - llvm/lib/Target/ARM/** - - llvm/test/CodeGen/ARM/** - - clang/lib/Basic/Targets/ARM* - - clang/lib/Driver/ToolChains/Arch/ARM.* - - clang/lib/CodeGen/Targets/ARM.cpp - - clang/include/clang/Basic/BuiltinsARM* - - llvm/test/MC/DisasemblerARM/** - - clang/include/clang/Sema/SemaARM.h - - clang/lib/Sema/SemaARM.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsARM.td + - llvm/test/MC/ARM/** + - llvm/lib/Target/ARM/** + - llvm/test/CodeGen/ARM/** + - clang/lib/Basic/Targets/ARM* + - clang/lib/Driver/ToolChains/Arch/ARM.* + - clang/lib/CodeGen/Targets/ARM.cpp + - clang/include/clang/Basic/BuiltinsARM* + - llvm/test/MC/DisasemblerARM/** + - clang/include/clang/Sema/SemaARM.h + - clang/lib/Sema/SemaARM.cpp backend:AArch64: - - llvm/include/llvm/IR/IntrinsicsAArch64.td - - llvm/test/MC/AArch64/** - - llvm/lib/Target/AArch64/** - - llvm/test/CodeGen/AArch64/** - - clang/lib/Basic/Targets/AArch64* - - clang/lib/Driver/ToolChains/Arch/AArch64.* - - clang/lib/CodeGen/Targets/AArch64.cpp - - clang/include/clang/Basic/BuiltinsAArch64* - - llvm/test/MC/Disassembler/AArch64/** - - clang/include/clang/Sema/SemaARM.h - - clang/lib/Sema/SemaARM.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsAArch64.td + - llvm/test/MC/AArch64/** + - llvm/lib/Target/AArch64/** + - llvm/test/CodeGen/AArch64/** + - clang/lib/Basic/Targets/AArch64* + - clang/lib/Driver/ToolChains/Arch/AArch64.* + - clang/lib/CodeGen/Targets/AArch64.cpp + - clang/include/clang/Basic/BuiltinsAArch64* + - llvm/test/MC/Disassembler/AArch64/** + - clang/include/clang/Sema/SemaARM.h + - clang/lib/Sema/SemaARM.cpp backend:CSKY: - - llvm/lib/Target/CSKY/** - - llvm/include/llvm/TargetParser/CSKYTargetParser.def - - llvm/include/llvm/TargetParser/CSKYTargetParser.h - - llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def - - llvm/lib/TargetParser/CSKYTargetParser.cpp - - llvm/lib/Support/CSKYAttributes.cpp - - llvm/lib/Support/CSKYAttributeParser.cpp - - clang/lib/Basic/Targets/CSKY.h - - clang/lib/Basic/Targets/CSKY.cpp - - clang/lib/CodeGen/Targets/CSKY.cpp - - clang/lib/Driver/ToolChains/CSKY* + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Target/CSKY/** + - llvm/include/llvm/TargetParser/CSKYTargetParser.def + - llvm/include/llvm/TargetParser/CSKYTargetParser.h + - llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def + - llvm/lib/TargetParser/CSKYTargetParser.cpp + - llvm/lib/Support/CSKYAttributes.cpp + - llvm/lib/Support/CSKYAttributeParser.cpp + - clang/lib/Basic/Targets/CSKY.h + - clang/lib/Basic/Targets/CSKY.cpp + - clang/lib/CodeGen/Targets/CSKY.cpp + - clang/lib/Driver/ToolChains/CSKY* backend:Hexagon: - - clang/include/clang/Basic/BuiltinsHexagon*.def - - clang/include/clang/Sema/SemaHexagon.h - - clang/lib/Basic/Targets/Hexagon.* - - clang/lib/CodeGen/Targets/Hexagon.cpp - - clang/lib/Driver/ToolChains/Hexagon.* - - clang/lib/Sema/SemaHexagon.cpp - - lld/ELF/Arch/Hexagon.cpp - - lldb/source/Plugins/ABI/Hexagon/** - - lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/** - - llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def - - llvm/include/llvm/IR/IntrinsicsHexagon* - - llvm/include/llvm/Support/Hexagon* - - llvm/lib/Support/Hexagon* - - llvm/lib/Target/Hexagon/** - - llvm/test/CodeGen/Hexagon/** - - llvm/test/CodeGen/*/Hexagon/** - - llvm/test/DebugInfo/*/Hexagon/** - - llvm/test/Transforms/*/Hexagon - - llvm/test/MC/Disassembler/Hexagon/** - - llvm/test/MC/Hexagon/** - - llvm/test/tools/llvm-objdump/ELF/Hexagon/** + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/Basic/BuiltinsHexagon*.def + - clang/include/clang/Sema/SemaHexagon.h + - clang/lib/Basic/Targets/Hexagon.* + - clang/lib/CodeGen/Targets/Hexagon.cpp + - clang/lib/Driver/ToolChains/Hexagon.* + - clang/lib/Sema/SemaHexagon.cpp + - lld/ELF/Arch/Hexagon.cpp + - lldb/source/Plugins/ABI/Hexagon/** + - lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/** + - llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def + - llvm/include/llvm/IR/IntrinsicsHexagon* + - llvm/include/llvm/Support/Hexagon* + - llvm/lib/Support/Hexagon* + - llvm/lib/Target/Hexagon/** + - llvm/test/CodeGen/Hexagon/** + - llvm/test/CodeGen/*/Hexagon/** + - llvm/test/DebugInfo/*/Hexagon/** + - llvm/test/Transforms/*/Hexagon + - llvm/test/MC/Disassembler/Hexagon/** + - llvm/test/MC/Hexagon/** + - llvm/test/tools/llvm-objdump/ELF/Hexagon/** backend:Lanai: - - llvm/lib/Target/Lanai/** - - clang/lib/Basic/Targets/Lanai.h - - clang/lib/Basic/Targets/Lanai.cpp - - clang/lib/CodeGen/Targets/Lanai.cpp - - clang/lib/Driver/ToolChains/Lanai* + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Target/Lanai/** + - clang/lib/Basic/Targets/Lanai.h + - clang/lib/Basic/Targets/Lanai.cpp + - clang/lib/CodeGen/Targets/Lanai.cpp + - clang/lib/Driver/ToolChains/Lanai* backend:loongarch: - - llvm/include/llvm/IR/IntrinsicsLoongArch.td - - llvm/test/MC/LoongArch/** - - llvm/lib/Target/LoongArch/** - - llvm/test/CodeGen/LoongArch/** - - clang/lib/Basic/Targets/LoongArch* - - clang/lib/Driver/ToolChains/Arch/LoongArch.* - - clang/lib/CodeGen/Targets/LoongArch.cpp - - clang/include/clang/Basic/BuiltinsLoongArch* - - clang/include/clang/Sema/SemaLoongArch.h - - clang/lib/Sema/SemaLoongArch.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsLoongArch.td + - llvm/test/MC/LoongArch/** + - llvm/lib/Target/LoongArch/** + - llvm/test/CodeGen/LoongArch/** + - clang/lib/Basic/Targets/LoongArch* + - clang/lib/Driver/ToolChains/Arch/LoongArch.* + - clang/lib/CodeGen/Targets/LoongArch.cpp + - clang/include/clang/Basic/BuiltinsLoongArch* + - clang/include/clang/Sema/SemaLoongArch.h + - clang/lib/Sema/SemaLoongArch.cpp backend:MSP430: - - llvm/include/llvm/IR/IntrinsicsMSP430.td - - llvm/test/MC/MSP430/** - - llvm/lib/Target/MSP430/** - - llvm/test/CodeGen/MSP430/** - - clang/lib/Basic/Targets/MSP430* - - clang/lib/Driver/ToolChains/Arch/MSP430.* - - clang/lib/CodeGen/Targets/MSP430.cpp - - clang/include/clang/Basic/BuiltinsMSP430* - - llvm/test/MC/Disassembler/MSP430/** + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsMSP430.td + - llvm/test/MC/MSP430/** + - llvm/lib/Target/MSP430/** + - llvm/test/CodeGen/MSP430/** + - clang/lib/Basic/Targets/MSP430* + - clang/lib/Driver/ToolChains/Arch/MSP430.* + - clang/lib/CodeGen/Targets/MSP430.cpp + - clang/include/clang/Basic/BuiltinsMSP430* + - llvm/test/MC/Disassembler/MSP430/** backend:Sparc: - - llvm/include/llvm/IR/IntrinsicsSparc.td - - llvm/test/MC/Sparc/** - - llvm/lib/Target/Sparc/** - - llvm/test/CodeGen/Sparc/** - - clang/lib/Basic/Targets/Sparc* - - clang/lib/Driver/ToolChains/Arch/Sparc.* - - clang/lib/CodeGen/Targets/Sparc.cpp - - clang/include/clang/Basic/BuiltinsSparc* - - llvm/test/MC/Disassembler/Sparc/** + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsSparc.td + - llvm/test/MC/Sparc/** + - llvm/lib/Target/Sparc/** + - llvm/test/CodeGen/Sparc/** + - clang/lib/Basic/Targets/Sparc* + - clang/lib/Driver/ToolChains/Arch/Sparc.* + - clang/lib/CodeGen/Targets/Sparc.cpp + - clang/include/clang/Basic/BuiltinsSparc* + - llvm/test/MC/Disassembler/Sparc/** backend:WebAssembly: - - llvm/lib/Target/WebAssembly/** - - llvm/test/CodeGen/WebAssembly/** - - clang/lib/Basic/Targets/WebAssembly* - - clang/include/clang/Basic/BuiltinsWebAssembly.def - - clang/include/clang/Basic/WebAssemblyReferenceTypes.def - - clang/lib/CodeGen/Targets/WebAssembly* - - llvm/include/llvm/IR/IntinsicsWebAssembly.td - - llvm/include/llvm/Object/Wasm* - - llvm/lib/CodeGen/AsmPrinter/Wasm* - - llvm/lib/CodeGen/Wasm* - - llvm/lib/MC/MCParser/Wasm* - - llvm/lib/MC/Wasm* - - llvm/lib/ObjCopy/wasm/** - - llvm/lib/Object/Wasm* - - clang/lib/Driver/Toolchains/WebAssembly* - - clang/lib/Headers/wasm_simd128.h - - clang/test/CodeGen/WebAssembly/** - - clang/test/SemaCXX/*wasm* - - clang/test/Sema/*wasm* - - llvm/include/llvm/BinaryFormat/Wasm.h - - llvm/unittests/Target/WebAssembly/** - - llvm/test/DebugInfo/WebAssembly/** - - llvm/test/MC/WebAssembly/** - - clang/include/clang/Sema/SemaWasm.h - - clang/lib/Sema/SemaLoongWasm.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/lib/Target/WebAssembly/** + - llvm/test/CodeGen/WebAssembly/** + - clang/lib/Basic/Targets/WebAssembly* + - clang/include/clang/Basic/BuiltinsWebAssembly.def + - clang/include/clang/Basic/WebAssemblyReferenceTypes.def + - clang/lib/CodeGen/Targets/WebAssembly* + - llvm/include/llvm/IR/IntinsicsWebAssembly.td + - llvm/include/llvm/Object/Wasm* + - llvm/lib/CodeGen/AsmPrinter/Wasm* + - llvm/lib/CodeGen/Wasm* + - llvm/lib/MC/MCParser/Wasm* + - llvm/lib/MC/Wasm* + - llvm/lib/ObjCopy/wasm/** + - llvm/lib/Object/Wasm* + - clang/lib/Driver/Toolchains/WebAssembly* + - clang/lib/Headers/wasm_simd128.h + - clang/test/CodeGen/WebAssembly/** + - clang/test/SemaCXX/*wasm* + - clang/test/Sema/*wasm* + - llvm/include/llvm/BinaryFormat/Wasm.h + - llvm/unittests/Target/WebAssembly/** + - llvm/test/DebugInfo/WebAssembly/** + - llvm/test/MC/WebAssembly/** + - clang/include/clang/Sema/SemaWasm.h + - clang/lib/Sema/SemaLoongWasm.cpp backend:X86: - - llvm/include/llvm/IR/IntrinsicsX86.td - - llvm/lib/Target/X86/** - - llvm/test/CodeGen/X86/** - - llvm/test/MC/X86/** - - llvm/test/MC/Disassembler/X86/** - - llvm/test/Analysis/CostModel/X86/** - - llvm/test/tools/llvm-mca/X86/** - - clang/lib/Basic/Targets/X86/** - - clang/lib/Driver/ToolChains/Arch/X86.* - - clang/lib/CodeGen/Targets/X86.* - - clang/lib/Headers/** - - clang/test/CodeGen/X86/** - - clang/include/clang/Basic/BuiltinsX86* - - llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h - - llvm/include/llvm/TargetParser/X86* - - llvm/lib/TargetParser/X86* - - llvm/utils/TableGen/X86* - - clang/include/clang/Sema/SemaX86.h - - clang/lib/Sema/SemaX86.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/IR/IntrinsicsX86.td + - llvm/lib/Target/X86/** + - llvm/test/CodeGen/X86/** + - llvm/test/MC/X86/** + - llvm/test/MC/Disassembler/X86/** + - llvm/test/Analysis/CostModel/X86/** + - llvm/test/tools/llvm-mca/X86/** + - clang/lib/Basic/Targets/X86/** + - clang/lib/Driver/ToolChains/Arch/X86.* + - clang/lib/CodeGen/Targets/X86.* + - clang/lib/Headers/** + - clang/test/CodeGen/X86/** + - clang/include/clang/Basic/BuiltinsX86* + - llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h + - llvm/include/llvm/TargetParser/X86* + - llvm/lib/TargetParser/X86* + - llvm/utils/TableGen/X86* + - clang/include/clang/Sema/SemaX86.h + - clang/lib/Sema/SemaX86.cpp backend:PowerPC: - - llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC* - - llvm/include/llvm/BinaryFormat/XCOFF.h - - llvm/include/llvm/IR/IntrinsicsPowerPC.td - - llvm/lib/CodeGen/AsmPrinter/AIXException.cpp - - llvm/lib/Target/PowerPC/** - - llvm/test/Analysis/**/PowerPC/** - - llvm/test/CodeGen/PowerPC/** - - llvm/test/CodeGen/MIR/PowerPC/** - - llvm/test/DebugInfo/XCOFF/** - - llvm/test/DebugInfo/PowerPC/** - - llvm/test/LTO/PowerPC/** - - llvm/test/MC/Disassembler/PowerPC/** - - llvm/test/MC/PowerPC/** - - llvm/test/MC/XCOFF/** - - llvm/test/Transforms/**/PowerPC/** - - clang/include/clang/Basic/BuiltinsPPC.* - - clang/lib/Basic/Targets/PPC.* - - clang/lib/CodeGen/Targets/PPC.cpp - - clang/lib/Driver/ToolChains/PPC* - - clang/lib/Driver/ToolChains/AIX* - - clang/lib/Driver/ToolChains/Arch/PPC.* - - clang/test/CodeGen/PowerPC/** - - clang/include/clang/Sema/SemaPPC.h - - clang/lib/Sema/SemaPPC.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC* + - llvm/include/llvm/BinaryFormat/XCOFF.h + - llvm/include/llvm/IR/IntrinsicsPowerPC.td + - llvm/lib/CodeGen/AsmPrinter/AIXException.cpp + - llvm/lib/Target/PowerPC/** + - llvm/test/Analysis/**/PowerPC/** + - llvm/test/CodeGen/PowerPC/** + - llvm/test/CodeGen/MIR/PowerPC/** + - llvm/test/DebugInfo/XCOFF/** + - llvm/test/DebugInfo/PowerPC/** + - llvm/test/LTO/PowerPC/** + - llvm/test/MC/Disassembler/PowerPC/** + - llvm/test/MC/PowerPC/** + - llvm/test/MC/XCOFF/** + - llvm/test/Transforms/**/PowerPC/** + - clang/include/clang/Basic/BuiltinsPPC.* + - clang/lib/Basic/Targets/PPC.* + - clang/lib/CodeGen/Targets/PPC.cpp + - clang/lib/Driver/ToolChains/PPC* + - clang/lib/Driver/ToolChains/AIX* + - clang/lib/Driver/ToolChains/Arch/PPC.* + - clang/test/CodeGen/PowerPC/** + - clang/include/clang/Sema/SemaPPC.h + - clang/lib/Sema/SemaPPC.cpp backend:SystemZ: - - llvm/include/llvm/BinaryFormat/ELFRelocs/SystemZ* - - llvm/include/llvm/BinaryFormat/GOFF.h - - llvm/include/llvm/IR/IntrinsicsSystemZ.td - - llvm/lib/Target/SystemZ/** - - llvm/test/Analysis/**/SystemZ/** - - llvm/test/CodeGen/SystemZ/** - - llvm/test/DebugInfo/SystemZ/** - - llvm/test/ExecutionEngine/**/SystemZ/** - - llvm/test/MC/Disassembler/SystemZ/** - - llvm/test/MC/GOFF/** - - llvm/test/MC/SystemZ/** - - llvm/test/Transforms/**/SystemZ/** - - clang/include/clang/Basic/BuiltinsSystemZ.* - - clang/lib/Basic/Targets/SystemZ.* - - clang/lib/CodeGen/Targets/SystemZ.cpp - - clang/lib/Driver/ToolChains/ZOS* - - clang/lib/Driver/ToolChains/Arch/SystemZ.* - - clang/test/CodeGen/SystemZ/** - - clang/include/clang/Sema/SemaSystemZ.h - - clang/lib/Sema/SemaSystemZ.cpp + - changed-files: + - any-glob-to-any-file: + - llvm/include/llvm/BinaryFormat/ELFRelocs/SystemZ* + - llvm/include/llvm/BinaryFormat/GOFF.h + - llvm/include/llvm/IR/IntrinsicsSystemZ.td + - llvm/lib/Target/SystemZ/** + - llvm/test/Analysis/**/SystemZ/** + - llvm/test/CodeGen/SystemZ/** + - llvm/test/DebugInfo/SystemZ/** + - llvm/test/ExecutionEngine/**/SystemZ/** + - llvm/test/MC/Disassembler/SystemZ/** + - llvm/test/MC/GOFF/** + - llvm/test/MC/SystemZ/** + - llvm/test/Transforms/**/SystemZ/** + - clang/include/clang/Basic/BuiltinsSystemZ.* + - clang/lib/Basic/Targets/SystemZ.* + - clang/lib/CodeGen/Targets/SystemZ.cpp + - clang/lib/Driver/ToolChains/ZOS* + - clang/lib/Driver/ToolChains/Arch/SystemZ.* + - clang/test/CodeGen/SystemZ/** + - clang/include/clang/Sema/SemaSystemZ.h + - clang/lib/Sema/SemaSystemZ.cpp third-party:unittests: - - third-party/unittests/** + - changed-files: + - any-glob-to-any-file: + - third-party/unittests/** third-party:benchmark: - - third-party/benchmark/** + - changed-files: + - any-glob-to-any-file: + - third-party/benchmark/** llvm:binary-utilities: - - llvm/docs/CommandGuide/llvm-* - - llvm/include/llvm/BinaryFormat/** - - llvm/include/llvm/DebugInfo/Symbolize/** - - llvm/include/llvm/ObjCopy/** - - llvm/include/llvm/Object/** - - llvm/lib/BinaryFormat/** - - llvm/lib/DebugInfo/Symbolize/** - - llvm/lib/ObjCopy/** - - llvm/lib/Object/** - - llvm/test/Object/** - - llvm/test/tools/llvm-ar/** - - llvm/test/tools/llvm-cxxfilt/** - - llvm/test/tools/llvm-nm/** - - llvm/test/tools/llvm-objcopy/** - - llvm/test/tools/llvm-objdump/** - - llvm/test/tools/llvm-readobj/** - - llvm/test/tools/llvm-size/** - - llvm/test/tools/llvm-strings/** - - llvm/test/tools/llvm-symbolizer/** - - llvm/tools/llvm-ar/** - - llvm/tools/llvm-cxxfilt/** - - llvm/tools/llvm-nm/** - - llvm/tools/llvm-objcopy/** - - llvm/tools/llvm-objdump/** - - llvm/tools/llvm-readobj/** - - llvm/tools/llvm-size/** - - llvm/tools/llvm-strings/** - - llvm/tools/llvm-symbolizer/** + - changed-files: + - any-glob-to-any-file: + - llvm/docs/CommandGuide/llvm-* + - llvm/include/llvm/BinaryFormat/** + - llvm/include/llvm/DebugInfo/Symbolize/** + - llvm/include/llvm/ObjCopy/** + - llvm/include/llvm/Object/** + - llvm/lib/BinaryFormat/** + - llvm/lib/DebugInfo/Symbolize/** + - llvm/lib/ObjCopy/** + - llvm/lib/Object/** + - llvm/test/Object/** + - llvm/test/tools/llvm-ar/** + - llvm/test/tools/llvm-cxxfilt/** + - llvm/test/tools/llvm-nm/** + - llvm/test/tools/llvm-objcopy/** + - llvm/test/tools/llvm-objdump/** + - llvm/test/tools/llvm-readobj/** + - llvm/test/tools/llvm-size/** + - llvm/test/tools/llvm-strings/** + - llvm/test/tools/llvm-symbolizer/** + - llvm/tools/llvm-ar/** + - llvm/tools/llvm-cxxfilt/** + - llvm/tools/llvm-nm/** + - llvm/tools/llvm-objcopy/** + - llvm/tools/llvm-objdump/** + - llvm/tools/llvm-readobj/** + - llvm/tools/llvm-size/** + - llvm/tools/llvm-strings/** + - llvm/tools/llvm-symbolizer/** clang:openmp: - - clang/include/clang/Basic/OpenMP* - - clang/include/clang/AST/OpenMPClause.h - - clang/include/clang/AST/DeclOpenMP.h - - clang/include/clang/AST/ExprOpenMP.h - - clang/include/clang/AST/StmtOpenMP.h - - clang/lib/AST/DeclOpenMP.cpp - - clang/lib/AST/OpenMPClause.cpp - - clang/lib/AST/StmtOpenMP.cpp - - clang/lib/Headers/openmp_wrappers/** - - clang/lib/Parse/ParseOpenMP.cpp - - clang/lib/Basic/OpenMPKinds.cpp - - clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp - - clang/lib/Driver/ToolChains/AMDGPUOpenMP.h - - clang/lib/CodeGen/CgStmtOpenMP.cpp - - clang/lib/CodeGen/CGOpenMP* - - clang/lib/Sema/SemaOpenMP.cpp - - clang/test/OpenMP/** - - clang/test/AST/ast-dump-openmp-* - - llvm/lib/Frontend/OpenMP/** - - llvm/lib/Transforms/IPO/OpenMPOpt.cpp - - llvm/include/llvm/Frontend/OpenMP/** - - llvm/include/llvm/Transforms/IPO/OpenMPOpt.h - - llvm/unittests/Frontend/OpenMP* - - llvm/test/Transforms/OpenMP/** + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/Basic/OpenMP* + - clang/include/clang/AST/OpenMPClause.h + - clang/include/clang/AST/DeclOpenMP.h + - clang/include/clang/AST/ExprOpenMP.h + - clang/include/clang/AST/StmtOpenMP.h + - clang/lib/AST/DeclOpenMP.cpp + - clang/lib/AST/OpenMPClause.cpp + - clang/lib/AST/StmtOpenMP.cpp + - clang/lib/Headers/openmp_wrappers/** + - clang/lib/Parse/ParseOpenMP.cpp + - clang/lib/Basic/OpenMPKinds.cpp + - clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp + - clang/lib/Driver/ToolChains/AMDGPUOpenMP.h + - clang/lib/CodeGen/CgStmtOpenMP.cpp + - clang/lib/CodeGen/CGOpenMP* + - clang/lib/Sema/SemaOpenMP.cpp + - clang/test/OpenMP/** + - clang/test/AST/ast-dump-openmp-* + - llvm/lib/Frontend/OpenMP/** + - llvm/lib/Transforms/IPO/OpenMPOpt.cpp + - llvm/include/llvm/Frontend/OpenMP/** + - llvm/include/llvm/Transforms/IPO/OpenMPOpt.h + - llvm/unittests/Frontend/OpenMP* + - llvm/test/Transforms/OpenMP/** clang:temporal-safety: - - clang/include/clang/Analysis/Analyses/LifetimeSafety/** - - clang/lib/Analysis/LifetimeSafety/** - - clang/unittests/Analysis/LifetimeSafety* - - clang/test/Sema/*lifetime-safety* - - clang/test/Sema/*lifetime-analysis* - - clang/test/Analysis/LifetimeSafety/** + - changed-files: + - any-glob-to-any-file: + - clang/include/clang/Analysis/Analyses/LifetimeSafety/** + - clang/lib/Analysis/LifetimeSafety/** + - clang/unittests/Analysis/LifetimeSafety* + - clang/test/Sema/*lifetime-safety* + - clang/test/Sema/*lifetime-analysis* + - clang/test/Analysis/LifetimeSafety/** clang:as-a-library: - - clang/tools/libclang/** - - clang/bindings/** - - clang/include/clang-c/** - - clang/test/LibClang/** - - clang/unittest/libclang/** + - changed-files: + - any-glob-to-any-file: + - clang/tools/libclang/** + - clang/bindings/** + - clang/include/clang-c/** + - clang/test/LibClang/** + - clang/unittest/libclang/** openmp:libomp: - - any: ['openmp/**', '!openmp/libomptarget/**'] + - changed-files: + - any-glob-to-any-file: + - 'openmp/**' openmp:libomptarget: - - any: ['openmp/**', '!openmp/runtime/**'] + - changed-files: + - all-globs-to-all-file: + - openmp/** + - '!openmp/runtime/**'' bazel: - - utils/bazel/** + - changed-files: + - any-glob-to-any-file: + - utils/bazel/** offload: - - offload/** + - changed-files: + - any-glob-to-any-file: + - offload/** tablegen: - - llvm/include/TableGen/** - - llvm/lib/TableGen/** - - llvm/utils/TableGen/** + - changed-files: + - any-glob-to-any-file: + - llvm/include/TableGen/** + - llvm/lib/TableGen/** + - llvm/utils/TableGen/** infrastructure: - - .ci/** + - changed-files: + - any-glob-to-any-file: + - .ci/** diff --git a/.github/workflows/new-prs.yml b/.github/workflows/new-prs.yml index e1f2e754c1a3d..0d97e436d39c4 100644 --- a/.github/workflows/new-prs.yml +++ b/.github/workflows/new-prs.yml @@ -67,9 +67,7 @@ jobs: github.event.pull_request.draft == false && github.event.pull_request.commits < 10 steps: - - uses: actions/labeler@ac9175f8a1f3625fd0d4fb234536d26811351594 # v4.3.0 + - uses: actions/labeler@634933edcd8ababfe52f92936142cc22ac488b1b # v6.0.1 with: configuration-path: .github/new-prs-labeler.yml - # workaround for https://github.com/actions/labeler/issues/112 - sync-labels: '' repo-token: ${{ secrets.ISSUE_SUBSCRIBER_TOKEN }} From d772663a9f003a08ee76414397963c58e80b27d7 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 18 Nov 2025 17:52:59 +0000 Subject: [PATCH 18/57] Revert "[Github] Update PR labeller to v6.0.1 (#167246)" This reverts commit bd8c94177537ba30c6a160afa6dd1b8b8fc1e813. This still broke things and evidently needs more testing on a fork before relanding. https://github.com/llvm/llvm-project/actions/runs/19475911086 --- .github/new-prs-labeler.yml | 1942 ++++++++++++++------------------- .github/workflows/new-prs.yml | 4 +- 2 files changed, 815 insertions(+), 1131 deletions(-) diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index bb0eef5842b0f..efdc42d349195 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -1,1449 +1,1131 @@ BOLT: - - changed-files: - - any-glob-to-any-file: - - bolt/**/* + - bolt/**/* ClangIR: - - changed-files: - - any-glob-to-any-file: - - clang/include/clang/CIR/**/* - - clang/lib/CIR/**/* - - clang/tools/cir-*/**/* - - clang/test/CIR/**/* + - clang/include/clang/CIR/**/* + - clang/lib/CIR/**/* + - clang/tools/cir-*/**/* + - clang/test/CIR/**/* clang:bytecode: - - changed-files: - - any-glob-to-any-file: - - clang/docs/ConstantInterpreter.rst - - clang/lib/AST/ByteCode/**/* - - clang/test/AST/ByteCode/**/* - - clang/unittests/AST/ByteCode/**/* + - clang/docs/ConstantInterpreter.rst + - clang/lib/AST/ByteCode/**/* + - clang/test/AST/ByteCode/**/* + - clang/unittests/AST/ByteCode/**/* clang:dataflow: - - changed-files: - - any-glob-to-any-file: - - clang/include/clang/Analysis/FlowSensitive/**/* - - clang/lib/Analysis/FlowSensitive/**/* - - clang/unittests/Analysis/FlowSensitive/**/* - - clang/docs/DataFlowAnalysisIntro.md - - clang/docs/DataFlowAnalysisIntroImages/**/* + - clang/include/clang/Analysis/FlowSensitive/**/* + - clang/lib/Analysis/FlowSensitive/**/* + - clang/unittests/Analysis/FlowSensitive/**/* + - clang/docs/DataFlowAnalysisIntro.md + - clang/docs/DataFlowAnalysisIntroImages/**/* clang:frontend: - - changed-files: - - any-glob-to-any-file: - - clang/lib/AST/**/* - - clang/include/clang/AST/**/* - - clang/lib/Basic/**/* - - clang/include/clang/Basic/**/* - - clang/lib/Interpreter/**/* - - clang/include/clang/Interpreter/**/* - - clang/lib/Lex/**/* - - clang/include/clang/Lex/**/* - - clang/lib/Parse/**/* - - clang/include/clang/Parse/**/* - - clang/lib/Sema/**/* - - clang/include/clang/Sema/**/* + - clang/lib/AST/**/* + - clang/include/clang/AST/**/* + - clang/lib/Basic/**/* + - clang/include/clang/Basic/**/* + - clang/lib/Interpreter/**/* + - clang/include/clang/Interpreter/**/* + - clang/lib/Lex/**/* + - clang/include/clang/Lex/**/* + - clang/lib/Parse/**/* + - clang/include/clang/Parse/**/* + - clang/lib/Sema/**/* + - clang/include/clang/Sema/**/* clang:headers: - - changed-files: - - any-glob-to-any-file: - - clang/lib/Headers/**/* + - clang/lib/Headers/**/* compiler-rt: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/**/* + - compiler-rt/**/* flang: - - changed-files: - - any-glob-to-any-file: - - flang/**/* + - flang/**/* flang:frontend: - - changed-files: - - any-glob-to-any-file: - - flang/Parser/**/* - - flang/Evaluate/**/* - - flang/Semantics/**/* + - flang/Parser/**/* + - flang/Evaluate/**/* + - flang/Semantics/**/* libclc: - - changed-files: - - any-glob-to-any-file: - - libclc/** + - libclc/** HLSL: - - changed-files: - - any-glob-to-any-file: - - clang/*HLSL*/**/* - - clang/**/*HLSL* - - llvm/**/Frontend/HLSL/**/* + - clang/*HLSL*/**/* + - clang/**/*HLSL* + - llvm/**/Frontend/HLSL/**/* lld: - - changed-files: - - any-glob-to-any-file: - - lld/**/* + - lld/**/* llvm-lit: - - changed-files: - - any-glob-to-any-file: - - llvm/utils/lit/**/* + - llvm/utils/lit/**/* PGO: - - changed-files: - - any-glob-to-any-file: - - llvm/**/ProfileData/**/* - - llvm/**/SampleProfile* - - llvm/**/CodeGen/MIRSampleProfile* - - llvm/lib/Transforms/Instrumentation/CGProfile.cpp - - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp - - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp - - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp - - llvm/lib/Transforms/Instrumentation/PGO* - - llvm/lib/Transforms/Instrumentation/ValueProfile* - - llvm/test/Instrumentation/InstrProfiling/**/* - - llvm/test/Transforms/PGOProfile/**/* - - llvm/test/Transforms/SampleProfile/**/* - - llvm/**/llvm-profdata/**/* - - llvm/**/llvm-profgen/**/* + - llvm/**/ProfileData/**/* + - llvm/**/SampleProfile* + - llvm/**/CodeGen/MIRSampleProfile* + - llvm/lib/Transforms/Instrumentation/CGProfile.cpp + - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp + - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp + - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp + - llvm/lib/Transforms/Instrumentation/PGO* + - llvm/lib/Transforms/Instrumentation/ValueProfile* + - llvm/test/Instrumentation/InstrProfiling/**/* + - llvm/test/Transforms/PGOProfile/**/* + - llvm/test/Transforms/SampleProfile/**/* + - llvm/**/llvm-profdata/**/* + - llvm/**/llvm-profgen/**/* vectorizers: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Transforms/Vectorize/**/* - - llvm/include/llvm/Transforms/Vectorize/**/* + - llvm/lib/Transforms/Vectorize/**/* + - llvm/include/llvm/Transforms/Vectorize/**/* # IMPORTED FROM CODEOWNERS LTO: - - changed-files: - - any-glob-to-any-file: - - llvm/*/LTO/** - - llvm/*/Linker/** - - llvm/*/ThinLTO/** - - llvm/lib/Transforms/*/FunctionImport* - - llvm/tools/gold/** + - llvm/*/LTO/** + - llvm/*/Linker/** + - llvm/*/ThinLTO/** + - llvm/lib/Transforms/*/FunctionImport* + - llvm/tools/gold/** clang:driver: - - changed-files: - - any-glob-to-any-file: - - clang/*/Driver/** + - clang/*/Driver/** compiler-rt:asan: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/asan/** - - compiler-rt/include/sanitizer/asan_interface.h - - compiler-rt/test/asan/** - - compiler-rt/lib/asan_abi/** - - compiler-rt/test/asan_abi/** + - compiler-rt/lib/asan/** + - compiler-rt/include/sanitizer/asan_interface.h + - compiler-rt/test/asan/** + - compiler-rt/lib/asan_abi/** + - compiler-rt/test/asan_abi/** compiler-rt:builtins: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/builtins/** - - compiler-rt/test/builtins/** + - compiler-rt/lib/builtins/** + - compiler-rt/test/builtins/** compiler-rt:cfi: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/cfi/** - - compiler-rt/test/cfi/** + - compiler-rt/lib/cfi/** + - compiler-rt/test/cfi/** compiler-rt:fuzzer: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/fuzzer/** - - compiler-rt/include/fuzzer/** - - compiler-rt/test/fuzzer/** + - compiler-rt/lib/fuzzer/** + - compiler-rt/include/fuzzer/** + - compiler-rt/test/fuzzer/** compiler-rt:hwasan: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/hwasan/** - - compiler-rt/include/sanitizer/hwasan_interface.h - - compiler-rt/test/hwasan/** + - compiler-rt/lib/hwasan/** + - compiler-rt/include/sanitizer/hwasan_interface.h + - compiler-rt/test/hwasan/** compiler-rt:lsan: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/lsan/** - - compiler-rt/include/sanitizer/lsan_interface.h - - compiler-rt/test/lsan/** + - compiler-rt/lib/lsan/** + - compiler-rt/include/sanitizer/lsan_interface.h + - compiler-rt/test/lsan/** compiler-rt:msan: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/msan/** - - compiler-rt/include/sanitizer/msan_interface.h - - compiler-rt/test/msan/** + - compiler-rt/lib/msan/** + - compiler-rt/include/sanitizer/msan_interface.h + - compiler-rt/test/msan/** compiler-rt:sanitizer: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Transforms/Instrumentation/*Sanitizer* - - compiler-rt/lib/interception/** - - compiler-rt/lib/*san*/** - - compiler-rt/include/sanitizer/** - - compiler-rt/test/*san*/** - - compiler-rt/lib/fuzzer/** - - compiler-rt/include/fuzzer/** - - compiler-rt/test/fuzzer/** - - compiler-rt/lib/scudo/** - - compiler-rt/test/scudo/** + - llvm/lib/Transforms/Instrumentation/*Sanitizer* + - compiler-rt/lib/interception/** + - compiler-rt/lib/*san*/** + - compiler-rt/include/sanitizer/** + - compiler-rt/test/*san*/** + - compiler-rt/lib/fuzzer/** + - compiler-rt/include/fuzzer/** + - compiler-rt/test/fuzzer/** + - compiler-rt/lib/scudo/** + - compiler-rt/test/scudo/** compiler-rt:scudo: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/scudo/** - - compiler-rt/test/scudo/** + - compiler-rt/lib/scudo/** + - compiler-rt/test/scudo/** compiler-rt:tsan: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/tsan/** - - compiler-rt/include/sanitizer/tsan_interface.h - - compiler-rt/include/sanitizer/tsan_interface_atomic.h - - compiler-rt/test/tsan/** + - compiler-rt/lib/tsan/** + - compiler-rt/include/sanitizer/tsan_interface.h + - compiler-rt/include/sanitizer/tsan_interface_atomic.h + - compiler-rt/test/tsan/** compiler-rt:ubsan: - - changed-files: - - any-glob-to-any-file: - - compiler-rt/lib/ubsan/** - - compiler-rt/include/sanitizer/ubsan_interface.h - - compiler-rt/test/ubsan/** - - compiler-rt/lib/ubsan_minimal/** - - compiler-rt/test/ubsan_minimal/** + - compiler-rt/lib/ubsan/** + - compiler-rt/include/sanitizer/ubsan_interface.h + - compiler-rt/test/ubsan/** + - compiler-rt/lib/ubsan_minimal/** + - compiler-rt/test/ubsan_minimal/** xray: - - changed-files: - - any-glob-to-any-file: - - llvm/tools/llvm-xray/** - - compiler-rt/*/xray/** - - clang/include/clang/Basic/XRay* - - clang/lib/Basic/XRay* - - compiler-rt/*/xray/** - - llvm/include/llvm/XRay/** - - llvm/lib/XRay/** - - llvm/tools/llvm-xray/** - - llvm/unittests/XRay/** - - compiler-rt/*/xray/** + - llvm/tools/llvm-xray/** + - compiler-rt/*/xray/** + - clang/include/clang/Basic/XRay* + - clang/lib/Basic/XRay* + - compiler-rt/*/xray/** + - llvm/include/llvm/XRay/** + - llvm/lib/XRay/** + - llvm/tools/llvm-xray/** + - llvm/unittests/XRay/** + - compiler-rt/*/xray/** clang:codegen: - - changed-files: - - any-glob-to-any-file: - - clang/lib/CodeGen/** - - clang/include/clang/CodeGen/** + - clang/lib/CodeGen/** + - clang/include/clang/CodeGen/** mlir: - - changed-files: - - any-glob-to-any-file: - - mlir/** + - mlir/** mlir:core: - - changed-files: - - any-glob-to-any-file: - - mlir/include/mlir/Support/** - - mlir/lib/Support/** - - mlir/include/mlir/Parser/** - - mlir/lib/Parser/** - - mlir/include/mlir/IR/** - - mlir/lib/IR/** - - mlir/include/mlir/Bytecode/** - - mlir/lib/Bytecode/** - - mlir/include/mlir/AsmParser/** - - mlir/lib/AsmParser/** - - mlir/include/mlir/Pass/** - - mlir/lib/Pass/** - - mlir/include/mlir/Tools/** - - mlir/lib/Tools/** - - mlir/include/mlir/Reducer/** - - mlir/lib/Reducer/** - - mlir/include/mlir/Transforms/** - - mlir/lib/Transforms/** - - mlir/include/mlir/Debug/** - - mlir/lib/Debug/** - - mlir/tools/** + - mlir/include/mlir/Support/** + - mlir/lib/Support/** + - mlir/include/mlir/Parser/** + - mlir/lib/Parser/** + - mlir/include/mlir/IR/** + - mlir/lib/IR/** + - mlir/include/mlir/Bytecode/** + - mlir/lib/Bytecode/** + - mlir/include/mlir/AsmParser/** + - mlir/lib/AsmParser/** + - mlir/include/mlir/Pass/** + - mlir/lib/Pass/** + - mlir/include/mlir/Tools/** + - mlir/lib/Tools/** + - mlir/include/mlir/Reducer/** + - mlir/lib/Reducer/** + - mlir/include/mlir/Transforms/** + - mlir/lib/Transforms/** + - mlir/include/mlir/Debug/** + - mlir/lib/Debug/** + - mlir/tools/** mlir:ods: - - changed-files: - - any-glob-to-any-file: - - mlir/TableGen/** - - mlir/tblgen/** - - mlir/include/mlir/IR/*.td + - mlir/TableGen/** + - mlir/tblgen/** + - mlir/include/mlir/IR/*.td mlir:bindings: - - changed-files: - - any-glob-to-any-file: - - mlir/Bindings/** + - mlir/Bindings/** mlir:gpu: - - changed-files: - - any-glob-to-any-file: - - mlir/**/*GPU*/** + - mlir/**/*GPU*/** mlir:amdgpu: - - changed-files: - - any-glob-to-any-file: - - mlir/**/AMDGPU/** + - mlir/**/AMDGPU/** mlir:amx: - - changed-files: - - any-glob-to-any-file: - - mlir/**/AMX/** + - mlir/**/AMX/** mlir:affine: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Affine/** + - mlir/**/Affine/** mlir:arith: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Arith/** + - mlir/**/Arith/** mlir:neon: - - changed-files: - - any-glob-to-any-file: - - mlir/**/ArmNeon/** + - mlir/**/ArmNeon/** mlir:sme: - - changed-files: - - any-glob-to-any-file: - - mlir/**/ArmSME/** + - mlir/**/ArmSME/** mlir:sve: - - changed-files: - - any-glob-to-any-file: - - mlir/**/ArmSVE/** + - mlir/**/ArmSVE/** mlir:async: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Async/** - - mlir/**/Async/** + - mlir/**/Async/** + - mlir/**/Async/** mlir:bufferization: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Bufferization/** + - mlir/**/Bufferization/** mlir:complex: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Complex/** + - mlir/**/Complex/** mlir:cf: - - changed-files: - - any-glob-to-any-file: - - mlir/**/ControlFlow/** + - mlir/**/ControlFlow/** mlir:dlti: - - changed-files: - - any-glob-to-any-file: - - mlir/**/DLTI/** + - mlir/**/DLTI/** mlir:emitc: - - changed-files: - - any-glob-to-any-file: - - mlir/**/*EmitC*/** - - mlir/lib/Target/Cpp/** + - mlir/**/*EmitC*/** + - mlir/lib/Target/Cpp/** mlir:func: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Func/** + - mlir/**/Func/** mlir:irdl: - - changed-files: - - any-glob-to-any-file: - - mlir/**/IRDL/** + - mlir/**/IRDL/** mlir:index: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Index/** + - mlir/**/Index/** mlir:llvm: - - changed-files: - - any-glob-to-any-file: - - mlir/**/LLVM* - - mlir/**/LLVM*/** + - mlir/**/LLVM* + - mlir/**/LLVM*/** mlir:linalg: - - changed-files: - - any-glob-to-any-file: - - mlir/**/*linalg/** - - mlir/**/*Linalg/** + - mlir/**/*linalg/** + - mlir/**/*Linalg/** mlir:mlprogram: - - changed-files: - - any-glob-to-any-file: - - mlir/**/MLProgram/** + - mlir/**/MLProgram/** mlir:math: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Math/** + - mlir/**/Math/** mlir:memref: - - changed-files: - - any-glob-to-any-file: - - mlir/**/MemRef/** + - mlir/**/MemRef/** mlir:nvgpu: - - changed-files: - - any-glob-to-any-file: - - mlir/**/NVGPU/** + - mlir/**/NVGPU/** mlir:openacc: - - changed-files: - - any-glob-to-any-file: - - mlir/**/*OpenACC* - - mlir/**/*OpenACC*/** + - mlir/**/*OpenACC* + - mlir/**/*OpenACC*/** mlir:openmp: - - changed-files: - - any-glob-to-any-file: - - mlir/**/*OpenMP* - - mlir/**/*OpenMP*/** + - mlir/**/*OpenMP* + - mlir/**/*OpenMP*/** mlir:pdl: - - changed-files: - - any-glob-to-any-file: - - mlir/**/PDL/** + - mlir/**/PDL/** mlir:quant: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Quant/** + - mlir/**/Quant/** mlir:scf: - - changed-files: - - any-glob-to-any-file: - - mlir/**/SCF/** + - mlir/**/SCF/** mlir:spirv: - - changed-files: - - any-glob-to-any-file: - - mlir/**/SPIRV/** - - mlir/**/SPIRVTo*/** - - mlir/**/*ToSPIRV/** - - mlir/tools/mlir-spirv-cpu-runner/** - - mlir/tools/mlir-vulkan-runner/** - - mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp + - mlir/**/SPIRV/** + - mlir/**/SPIRVTo*/** + - mlir/**/*ToSPIRV/** + - mlir/tools/mlir-spirv-cpu-runner/** + - mlir/tools/mlir-vulkan-runner/** + - mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp mlir:shape: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Shape/** + - mlir/**/Shape/** mlir:sparse: - - changed-files: - - any-glob-to-any-file: - - mlir/**/SparseTensor/** + - mlir/**/SparseTensor/** mlir:tensor: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Tensor/** + - mlir/**/Tensor/** mlir:tosa: - - changed-files: - - any-glob-to-any-file: - - mlir/**/*Tosa*/** + - mlir/**/*Tosa*/** mlir:ub: - - changed-files: - - any-glob-to-any-file: - - mlir/**/UB/** + - mlir/**/UB/** mlir:vector: - - changed-files: - - any-glob-to-any-file: - - mlir/**/*Vector/** + - mlir/**/*Vector/** mlir:execution-engine: - - changed-files: - - any-glob-to-any-file: - - mlir/**/ExecutionEngine/** + - mlir/**/ExecutionEngine/** mlir:presburger: - - changed-files: - - any-glob-to-any-file: - - mlir/**/*Presburger*/** + - mlir/**/*Presburger*/** mlir:python: - - changed-files: - - any-glob-to-any-file: - - mlir/python/**/* + - mlir/python/**/* mlir:vectorops: - - changed-files: - - any-glob-to-any-file: - - mlir/**/Vector/**/* + - mlir/**/Vector/**/* coroutines: - - changed-files: - - any-glob-to-any-file: - - clang/docs/DebuggingCoroutines.rst - - clang/lib/Sema/SemaCoroutine.cpp - - clang/lib/CodeGen/CGCoroutine.cpp - - clang/test/CodeGenCoroutines/** - - llvm/docs/Coroutines.rst - - llvm/include/llvm/Transforms/Coroutines/** - - llvm/lib/Transforms/Coroutines/** - - llvm/test/Transforms/Coroutines/* + - clang/docs/DebuggingCoroutines.rst + - clang/lib/Sema/SemaCoroutine.cpp + - clang/lib/CodeGen/CGCoroutine.cpp + - clang/test/CodeGenCoroutines/** + - llvm/docs/Coroutines.rst + - llvm/include/llvm/Transforms/Coroutines/** + - llvm/lib/Transforms/Coroutines/** + - llvm/test/Transforms/Coroutines/* clang:modules: - - changed-files: - - any-glob-to-any-file: - - clang/docs/StandardCPlusPlusModules.rst - - clang/include/clang/AST/AbstractBasicReader.h - - clang/include/clang/AST/AbstractBasicWriter.h - - clang/include/clang/AST/AbstractTypeReader.h - - clang/include/clang/AST/AbstractTypeWriter.h - - clang/include/clang/AST/PropertiesBase.td - - clang/include/clang/AST/ODRHash.h - - clang/include/clang/AST/TypeProperties.td - - clang/include/clang/Basic/Module.h - - clang/include/clang/Frontend/PrecompiledPreamble.h - - clang/include/clang/Lex/ModuleLoader.h - - clang/include/clang/Lex/ModuleMap.h - - clang/include/clang/Serialization/** - - clang/lib/AST/ODRHash.cpp - - clang/lib/AST/StmtProfile.cpp - - clang/lib/Basic/Module.cpp - - clang/lib/Frontend/ModuleDependencyCollector.cpp - - clang/lib/Frontend/PrecompiledPreamble.cpp - - clang/lib/Lex/ModuleMap.cpp - - clang/lib/Sema/SemaModule.cpp - - clang/lib/Serialization/** - - clang/test/CXX/module/** - - clang/test/Modules/** - - clang/unittests/Serialization/* + - clang/docs/StandardCPlusPlusModules.rst + - clang/include/clang/AST/AbstractBasicReader.h + - clang/include/clang/AST/AbstractBasicWriter.h + - clang/include/clang/AST/AbstractTypeReader.h + - clang/include/clang/AST/AbstractTypeWriter.h + - clang/include/clang/AST/PropertiesBase.td + - clang/include/clang/AST/ODRHash.h + - clang/include/clang/AST/TypeProperties.td + - clang/include/clang/Basic/Module.h + - clang/include/clang/Frontend/PrecompiledPreamble.h + - clang/include/clang/Lex/ModuleLoader.h + - clang/include/clang/Lex/ModuleMap.h + - clang/include/clang/Serialization/** + - clang/lib/AST/ODRHash.cpp + - clang/lib/AST/StmtProfile.cpp + - clang/lib/Basic/Module.cpp + - clang/lib/Frontend/ModuleDependencyCollector.cpp + - clang/lib/Frontend/PrecompiledPreamble.cpp + - clang/lib/Lex/ModuleMap.cpp + - clang/lib/Sema/SemaModule.cpp + - clang/lib/Serialization/** + - clang/test/CXX/module/** + - clang/test/Modules/** + - clang/unittests/Serialization/* clang-tidy: - - changed-files: - - any-glob-to-any-file: - - clang-tools-extra/clang-tidy/** - - clang-tools-extra/docs/clang-tidy/** - - clang-tools-extra/test/clang-tidy/** + - clang-tools-extra/clang-tidy/** + - clang-tools-extra/docs/clang-tidy/** + - clang-tools-extra/test/clang-tidy/** clang-tools-extra: - - changed-files: - - any-glob-to-any-file: - - clang-tools-extra/** + - clang-tools-extra/** tools:llvm-mca: - - changed-files: - - any-glob-to-any-file: - - llvm/tools/llvm-mca/** - - llvm/include/llvm/MCA/** - - llvm/lib/MCA/** + - llvm/tools/llvm-mca/** + - llvm/include/llvm/MCA/** + - llvm/lib/MCA/** clang: - - changed-files: - - all-globs-to-all-file: - - clang/** - - '!clang/**/Format/**' - - '!clang/tools/clang-format/**' + - any: + - clang/** + - '!clang/**/Format/**' + - '!clang/tools/clang-format/**' testing-tools: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/FileCheck/** - - llvm/lib/FileCheck/** - - llvm/test/FileCheck/** - - llvm/unittests/FileCheck/** - - llvm/utils/lit/** - - llvm/utils/split-file/** - - llvm/utils/not/** - - llvm/utils/count/** - - llvm/utils/FileCheck/** - - llvm/docs/CommandGuide/FileCheck.rst - - llvm/docs/CommandGuide/lit.rst - - llvm/docs/TestingGuide.rst - - llvm/test/Other/FileCheck-space.txt - - llvm/utils/UpdateTestChecks/** - - llvm/utils/update*_test_checks.py + - llvm/include/llvm/FileCheck/** + - llvm/lib/FileCheck/** + - llvm/test/FileCheck/** + - llvm/unittests/FileCheck/** + - llvm/utils/lit/** + - llvm/utils/split-file/** + - llvm/utils/not/** + - llvm/utils/count/** + - llvm/utils/FileCheck/** + - llvm/docs/CommandGuide/FileCheck.rst + - llvm/docs/CommandGuide/lit.rst + - llvm/docs/TestingGuide.rst + - llvm/test/Other/FileCheck-space.txt + - llvm/utils/UpdateTestChecks/** + - llvm/utils/update*_test_checks.py debuginfo: - - changed-files: - - any-glob-to-any-file: - - clang/lib/CodeGen/CGDebugInfo.* - - llvm/include/llvm/BinaryFormat/Dwarf.* - - llvm/include/llvm/CodeGen/*Debug*.* - - llvm/include/llvm/DebugInfo/** - - llvm/include/llvm/Debuginfod/** - - llvm/include/llvm/Frontend/Debug/** - - llvm/include/llvm/IR/Debug*.* - - llvm/include/llvm/Object/*Debug*.* - - llvm/include/llvm/ObjectYAML/*Debug*.* - - llvm/include/llvm/Transforms/Utils/*Debug*.* - - llvm/include/llvm-c/DebugInfo.h - - llvm/lib/BinaryFormat/Dwarf.cpp - - llvm/lib/CodeGen/AsmPrinter/*Debug*.* - - llvm/lib/CodeGen/AsmPrinter/Dwarf*.* - - llvm/lib/CodeGen/AsmPrinter/DIE*.* - - llvm/lib/CodeGen/LiveDebugValues/** - - llvm/lib/CodeGen/*Debug*.* - - llvm/lib/CodeGen/DwarfEHPrepare.cpp - - llvm/lib/DebugInfo/** - - llvm/lib/Debuginfod/** - - llvm/lib/DWARFLinkerParallel/** - - llvm/lib/IR/Debug*.cpp - - llvm/lib/MC/MCDwarf.cpp - - llvm/lib/Transforms/Utils/*Debug*.* - - llvm/test/DebugInfo/** - - llvm/test/tools/dsymutil/** - - llvm/test/tools/llvm-debuginfo-analyzer/** - - llvm/test/tools/llvm-debuginfod/** - - llvm/test/tools/llvm-debuginfod-find/** - - llvm/test/tools/llvm-dwarfdump/** - - llvm/test/tools/llvm-dwarfutil/** - - llvm/test/tools/llvm-dwp/** - - llvm/test/tools/llvm-gsymutil/** - - llvm/test/tools/llvm-pdbuti/** - - llvm/tools/dsymutil/** - - llvm/tools/llvm-debuginfo-analyzer/** - - llvm/tools/llvm-debuginfod/** - - llvm/tools/llvm-debuginfod-find/** - - llvm/tools/llvm-dwarfdump/** - - llvm/tools/llvm-dwarfutil/** - - llvm/tools/llvm-dwp/** - - llvm/tools/llvm-gsymutil/** - - llvm/tools/llvm-pdbutil/** + - clang/lib/CodeGen/CGDebugInfo.* + - llvm/include/llvm/BinaryFormat/Dwarf.* + - llvm/include/llvm/CodeGen/*Debug*.* + - llvm/include/llvm/DebugInfo/** + - llvm/include/llvm/Debuginfod/** + - llvm/include/llvm/Frontend/Debug/** + - llvm/include/llvm/IR/Debug*.* + - llvm/include/llvm/Object/*Debug*.* + - llvm/include/llvm/ObjectYAML/*Debug*.* + - llvm/include/llvm/Transforms/Utils/*Debug*.* + - llvm/include/llvm-c/DebugInfo.h + - llvm/lib/BinaryFormat/Dwarf.cpp + - llvm/lib/CodeGen/AsmPrinter/*Debug*.* + - llvm/lib/CodeGen/AsmPrinter/Dwarf*.* + - llvm/lib/CodeGen/AsmPrinter/DIE*.* + - llvm/lib/CodeGen/LiveDebugValues/** + - llvm/lib/CodeGen/*Debug*.* + - llvm/lib/CodeGen/DwarfEHPrepare.cpp + - llvm/lib/DebugInfo/** + - llvm/lib/Debuginfod/** + - llvm/lib/DWARFLinkerParallel/** + - llvm/lib/IR/Debug*.cpp + - llvm/lib/MC/MCDwarf.cpp + - llvm/lib/Transforms/Utils/*Debug*.* + - llvm/test/DebugInfo/** + - llvm/test/tools/dsymutil/** + - llvm/test/tools/llvm-debuginfo-analyzer/** + - llvm/test/tools/llvm-debuginfod/** + - llvm/test/tools/llvm-debuginfod-find/** + - llvm/test/tools/llvm-dwarfdump/** + - llvm/test/tools/llvm-dwarfutil/** + - llvm/test/tools/llvm-dwp/** + - llvm/test/tools/llvm-gsymutil/** + - llvm/test/tools/llvm-pdbuti/** + - llvm/tools/dsymutil/** + - llvm/tools/llvm-debuginfo-analyzer/** + - llvm/tools/llvm-debuginfod/** + - llvm/tools/llvm-debuginfod-find/** + - llvm/tools/llvm-dwarfdump/** + - llvm/tools/llvm-dwarfutil/** + - llvm/tools/llvm-dwp/** + - llvm/tools/llvm-gsymutil/** + - llvm/tools/llvm-pdbutil/** github:workflow: - - changed-files: - - any-glob-to-any-file: - - .github/workflows/** + - .github/workflows/** cmake: - - changed-files: - - any-glob-to-any-file: - - cmake/** - - llvm/cmake/** - - runtimes/** + - cmake/** + - llvm/cmake/** + - runtimes/** flang:driver: - - changed-files: - - any-glob-to-any-file: - - flang/tools/flang-driver/** - - flang/unittests/Frontend/** - - flang/lib/FrontendTool/** - - flang/lib/Frontend/** - - flang/include/flang/Frontend/** - - flang/include/flang/FrontendTool/** - - flang/test/Driver/** + - flang/tools/flang-driver/** + - flang/unittests/Frontend/** + - flang/lib/FrontendTool/** + - flang/lib/Frontend/** + - flang/include/flang/Frontend/** + - flang/include/flang/FrontendTool/** + - flang/test/Driver/** backend:m68k: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Target/M68k/** - - clang/lib/Basic/Targets/M68k.* - - clang/lib/CodeGen/Targets/M68k.cpp - - llvm/test/CodeGen/M68k/** - - llvm/test/MC/Disassembler/M68k/** - - llvm/test/MC/M68k/** + - llvm/lib/Target/M68k/** + - clang/lib/Basic/Targets/M68k.* + - clang/lib/CodeGen/Targets/M68k.cpp + - llvm/test/CodeGen/M68k/** + - llvm/test/MC/Disassembler/M68k/** + - llvm/test/MC/M68k/** libc++: - - changed-files: - - any-glob-to-any-file: - - libcxx/** - - .github/workflows/libcxx-* + - libcxx/** + - .github/workflows/libcxx-* libc++abi: - - changed-files: - - any-glob-to-any-file: - - libcxxabi/** + - libcxxabi/** libunwind: - - changed-files: - - any-glob-to-any-file: - - libunwind/** + - libunwind/** objectyaml: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/ObjectYAML/** - - llvm/lib/ObjectYAML/** - - llvm/test/tools/obj2yaml/** - - llvm/test/tools/yaml2obj/** - - llvm/tools/obj2yaml/** - - llvm/tools/yaml2obj/** + - llvm/include/llvm/ObjectYAML/** + - llvm/lib/ObjectYAML/** + - llvm/test/tools/obj2yaml/** + - llvm/test/tools/yaml2obj/** + - llvm/tools/obj2yaml/** + - llvm/tools/yaml2obj/** clang:analysis: - - changed-files: - - any-glob-to-any-file: - - clang/include/clang/Analysis/** - - clang/lib/Analysis/** + - clang/include/clang/Analysis/** + - clang/lib/Analysis/** clang:static analyzer: - - changed-files: - - any-glob-to-any-file: - - clang/include/clang/StaticAnalyzer/** - - clang/lib/StaticAnalyzer/** - - clang/tools/scan-build/** - - clang/utils/analyzer/** - - clang/docs/analyzer/** - - clang/test/Analysis/** + - clang/include/clang/StaticAnalyzer/** + - clang/lib/StaticAnalyzer/** + - clang/tools/scan-build/** + - clang/utils/analyzer/** + - clang/docs/analyzer/** + - clang/test/Analysis/** pgo: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Transforms/Instrumentation/CGProfile.cpp - - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp - - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp - - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp - - llvm/lib/Transforms/Instrumentation/PGO* - - llvm/lib/Transforms/Instrumentation/ValueProfile* - - llvm/test/Instrumentation/InstrProfiling/** - - llvm/test/Transforms/PGOProfile/** - - compiler-rt/lib/profile/** - - compiler-rt/lib/memprof/** - - compiler-rt/test/profile/** - - compiler-rt/test/memprof/** - - llvm/tools/llvm-profdata/** - - llvm/tools/llvm-profgen/** - - llvm/test/tools/llvm-profdata/** - - llvm/test/tools/llvm-profgen/** - - llvm/unittests/ProfileData/* + - llvm/lib/Transforms/Instrumentation/CGProfile.cpp + - llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp + - llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp + - llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp + - llvm/lib/Transforms/Instrumentation/PGO* + - llvm/lib/Transforms/Instrumentation/ValueProfile* + - llvm/test/Instrumentation/InstrProfiling/** + - llvm/test/Transforms/PGOProfile/** + - compiler-rt/lib/profile/** + - compiler-rt/lib/memprof/** + - compiler-rt/test/profile/** + - compiler-rt/test/memprof/** + - llvm/tools/llvm-profdata/** + - llvm/tools/llvm-profgen/** + - llvm/test/tools/llvm-profdata/** + - llvm/test/tools/llvm-profgen/** + - llvm/unittests/ProfileData/* openacc: - - changed-files: - - any-glob-to-any-file: - - flang/**/OpenACC/** - - flang/include/flang/Lower/OpenACC.h - - flang/docs/OpenACC.md - - flang/lib/Parser/openacc-parsers.cpp - - flang/lib/Lower/OpenACC.cpp - - llvm/**/Frontend/OpenACC/** - - llvm/unittests/Frontend/OpenACCTest.cpp - - mlir/test/Target/LLVMIR/openacc-llvm.mlir - - mlir/**/*OpenACC/** + - flang/**/OpenACC/** + - flang/include/flang/Lower/OpenACC.h + - flang/docs/OpenACC.md + - flang/lib/Parser/openacc-parsers.cpp + - flang/lib/Lower/OpenACC.cpp + - llvm/**/Frontend/OpenACC/** + - llvm/unittests/Frontend/OpenACCTest.cpp + - mlir/test/Target/LLVMIR/openacc-llvm.mlir + - mlir/**/*OpenACC/** flang:runtime: - - changed-files: - - any-glob-to-any-file: - - flang/runtime/** + - flang/runtime/** flang:parser: - - changed-files: - - any-glob-to-any-file: - - flang/**/Parser/** + - flang/**/Parser/** flang:semantics: - - changed-files: - - any-glob-to-any-file: - - flang/**/Evaluate/** - - flang/**/Semantics/** + - flang/**/Evaluate/** + - flang/**/Semantics/** flang:fir-hlfir: - - changed-files: - - any-glob-to-any-file: - - flang/**/Lower/** - - flang/**/Optimizer/** + - flang/**/Lower/** + - flang/**/Optimizer/** flang:codegen: - - changed-files: - - any-glob-to-any-file: - - flang/**/CodeGen/** + - flang/**/CodeGen/** llvm:codegen: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/CodeGen/* - - llvm/lib/CodeGen/MIRParser/* - - llvm/lib/CodeGen/LiveDebugValues/* - - llvm/lib/CodeGen/AsmPrinter/* + - llvm/lib/CodeGen/* + - llvm/lib/CodeGen/MIRParser/* + - llvm/lib/CodeGen/LiveDebugValues/* + - llvm/lib/CodeGen/AsmPrinter/* llvm:globalisel: - - changed-files: - - any-glob-to-any-file: - - llvm/**/GlobalISel/** - - llvm/utils/TableGen/GlobalISel* + - llvm/**/GlobalISel/** + - llvm/utils/TableGen/GlobalISel* function-specialization: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/Transforms/Utils/SCCPSolver.h - - llvm/lib/Transforms/Utils/SCCPSolver.cpp - - llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h - - llvm/lib/Transforms/IPO/FunctionSpecialization.cpp - - llvm/test/Transforms/FunctionSpecialization/* + - llvm/include/llvm/Transforms/Utils/SCCPSolver.h + - llvm/lib/Transforms/Utils/SCCPSolver.cpp + - llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h + - llvm/lib/Transforms/IPO/FunctionSpecialization.cpp + - llvm/test/Transforms/FunctionSpecialization/* libc: - - changed-files: - - any-glob-to-any-file: - - libc/** - - utils/bazel/llvm-project-overlay/libc/** + - libc/** + - utils/bazel/llvm-project-overlay/libc/** clang-format: - - changed-files: - - any-glob-to-any-file: - - clang/**/Format/** - - clang/tools/clang-format/** + - clang/**/Format/** + - clang/tools/clang-format/** flang:openmp: - - changed-files: - - any-glob-to-any-file: - - flang/test/**/OpenMP/** - - flang/lib/Lower/OpenMP.cpp - - flang/lib/Semantics/resolve-directives.cpp - - flang/lib/Semantics/check-omp-structure.cpp - - flang/lib/Optimizer/Transforms/OMP* - - flang/test/Fir/convert-to-llvm-openmp-and-fir.fir - - flang/test/Lower/OpenMP/** - - flang/test/Transforms/omp* - - mlir/**/*OpenMP* - - mlir/test/Target/LLVMIR/openmp* - - llvm/lib/Frontend/OpenMP/** - - llvm/include/llvm/Frontend/OpenMP/** - - llvm/unittests/Frontend/OpenMP* + - flang/test/**/OpenMP/** + - flang/lib/Lower/OpenMP.cpp + - flang/lib/Semantics/resolve-directives.cpp + - flang/lib/Semantics/check-omp-structure.cpp + - flang/lib/Optimizer/Transforms/OMP* + - flang/test/Fir/convert-to-llvm-openmp-and-fir.fir + - flang/test/Lower/OpenMP/** + - flang/test/Transforms/omp* + - mlir/**/*OpenMP* + - mlir/test/Target/LLVMIR/openmp* + - llvm/lib/Frontend/OpenMP/** + - llvm/include/llvm/Frontend/OpenMP/** + - llvm/unittests/Frontend/OpenMP* llvm:ir: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/IR/** - - llvm/include/llvm/IR/** - - llvm/docs/LangRef.rst - - llvm/unittests/IR/** + - llvm/lib/IR/** + - llvm/include/llvm/IR/** + - llvm/docs/LangRef.rst + - llvm/unittests/IR/** llvm:SandboxIR: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/SandboxIR/** - - llvm/include/llvm/SandboxIR/** - - llvm/docs/SandboxIR.md - - llvm/unittests/SandboxIR/** + - llvm/lib/SandboxIR/** + - llvm/include/llvm/SandboxIR/** + - llvm/docs/SandboxIR.md + - llvm/unittests/SandboxIR/** llvm:analysis: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Analysis/** - - llvm/include/llvm/Analysis/** - - llvm/test/Analysis/** - - llvm/unittests/Analysis/** + - llvm/lib/Analysis/** + - llvm/include/llvm/Analysis/** + - llvm/test/Analysis/** + - llvm/unittests/Analysis/** llvm:adt: - - changed-files: - - any-glob-to-any-file: - - llvm/**/ADT/* + - llvm/**/ADT/* llvm:support: - - changed-files: - - any-glob-to-any-file: - - llvm/**/Support/** + - llvm/**/Support/** # Skip llvm/test/MC and llvm/unittests/MC, which includes target-specific directories. llvm:mc: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/MC/** - - llvm/lib/MC/** - - llvm/tools/llvm-mc/** + - llvm/include/llvm/MC/** + - llvm/lib/MC/** + - llvm/tools/llvm-mc/** llvm:transforms: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Transforms/** - - llvm/include/llvm/Transforms/** - - llvm/test/Transforms/** - - llvm/unittests/Transforms/** + - llvm/lib/Transforms/** + - llvm/include/llvm/Transforms/** + - llvm/test/Transforms/** + - llvm/unittests/Transforms/** llvm:instcombine: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Analysis/InstructionSimplify.cpp - - llvm/lib/Transforms/InstCombine/** - - llvm/include/llvm/Transforms/InstCombine/ - - llvm/include/llvm/Analysis/InstructionSimplify.h - - llvm/test/Transforms/InstCombine/** - - llvm/test/Transforms/InstSimplify/** + - llvm/lib/Analysis/InstructionSimplify.cpp + - llvm/lib/Transforms/InstCombine/** + - llvm/include/llvm/Transforms/InstCombine/ + - llvm/include/llvm/Analysis/InstructionSimplify.h + - llvm/test/Transforms/InstCombine/** + - llvm/test/Transforms/InstSimplify/** llvm:vectorcombine: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Transforms/Vectorize/VectorCombine.cpp - - llvm/test/Transforms/VectorCombine/** + - llvm/lib/Transforms/Vectorize/VectorCombine.cpp + - llvm/test/Transforms/VectorCombine/** clangd: - - changed-files: - - any-glob-to-any-file: - - clang-tools-extra/clangd/** + - clang-tools-extra/clangd/** hlsl: - - changed-files: - - any-glob-to-any-file: - - clang/test/ParserHLSL/** - - clang/test/SemaHLSL/** - - clang/test/AST/HLSL/** - - clang/test/CodeGenHLSL/** - - clang/cmake/caches/HLSL.cmake - - clang/include/clang/Basic/HLSL*.h - - clang/include/clang/Sema/HLSL*.h - - clang/docs/HLSL/** - - clang/lib/Driver/ToolChains/HLSL* - - clang/lib/Parse/ParseHLSL.cpp - - clang/lib/Sema/HLSLExternalSemaSource.cpp - - clang/lib/Sema/SemaHLSL.cpp - - clang/lib/CodeGen/CGHLSLRuntime.* - - clang/lib/CodeGen/CGHLSLBuiltins.cpp - - llvm/include/llvm/Frontend/HLSL/** - - llvm/lib/Frontend/HLSL/** + - clang/test/ParserHLSL/** + - clang/test/SemaHLSL/** + - clang/test/AST/HLSL/** + - clang/test/CodeGenHLSL/** + - clang/cmake/caches/HLSL.cmake + - clang/include/clang/Basic/HLSL*.h + - clang/include/clang/Sema/HLSL*.h + - clang/docs/HLSL/** + - clang/lib/Driver/ToolChains/HLSL* + - clang/lib/Parse/ParseHLSL.cpp + - clang/lib/Sema/HLSLExternalSemaSource.cpp + - clang/lib/Sema/SemaHLSL.cpp + - clang/lib/CodeGen/CGHLSLRuntime.* + - clang/lib/CodeGen/CGHLSLBuiltins.cpp + - llvm/include/llvm/Frontend/HLSL/** + - llvm/lib/Frontend/HLSL/** llvm:SelectionDAG: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/CodeGen/SelectionDAG*.h - - llvm/include/llvm/CodeGen/SDNodeProperties.td - - llvm/include/llvm/Target/TargetSelectionDAG.td - - llvm/lib/CodeGen/SelectionDAG/** - - llvm/utils/TableGen/CodeGenDAG* - - llvm/utils/TableGen/DAGISel* - - llvm/include/llvm/CodeGen/DAGCombine.h - - llvm/include/llvm/CodeGen/ISDOpcodes.h + - llvm/include/llvm/CodeGen/SelectionDAG*.h + - llvm/include/llvm/CodeGen/SDNodeProperties.td + - llvm/include/llvm/Target/TargetSelectionDAG.td + - llvm/lib/CodeGen/SelectionDAG/** + - llvm/utils/TableGen/CodeGenDAG* + - llvm/utils/TableGen/DAGISel* + - llvm/include/llvm/CodeGen/DAGCombine.h + - llvm/include/llvm/CodeGen/ISDOpcodes.h backend:DirectX: - - changed-files: - - any-glob-to-any-file: - - '**/*DirectX*' - - '**/*DXIL*' - - '**/*dxil*' - - '**/*DirectX*/**' - - '**/*DXIL*/**' - - '**/*dxil*/**' - - '**/*DXContainer*' - - '**/*DXContainer*/**' - - clang/lib/Sema/SemaDirectX.cpp - - clang/include/clang/Sema/SemaDirectX.h - - clang/include/clang/Basic/BuiltinsDirectX.td - - clang/lib/CodeGen/TargetBuiltins/DirectX.cpp - - clang/test/CodeGenDirectX/** - - clang/test/SemaDirectX/** + - '**/*DirectX*' + - '**/*DXIL*' + - '**/*dxil*' + - '**/*DirectX*/**' + - '**/*DXIL*/**' + - '**/*dxil*/**' + - '**/*DXContainer*' + - '**/*DXContainer*/**' + - clang/lib/Sema/SemaDirectX.cpp + - clang/include/clang/Sema/SemaDirectX.h + - clang/include/clang/Basic/BuiltinsDirectX.td + - clang/lib/CodeGen/TargetBuiltins/DirectX.cpp + - clang/test/CodeGenDirectX/** + - clang/test/SemaDirectX/** backend:SPIR-V: - - changed-files: - - any-glob-to-any-file: - - clang/lib/Driver/ToolChains/SPIRV.* - - clang/lib/Sema/SemaSPIRV.cpp - - clang/include/clang/Sema/SemaSPIRV.h - - clang/include/clang/Basic/BuiltinsSPIRV.td - - clang/test/CodeGenSPIRV/** - - clang/test/SemaSPIRV/** - - llvm/lib/Target/SPIRV/** - - llvm/test/CodeGen/SPIRV/** - - llvm/test/Frontend/HLSL/** - - llvm/docs/SPIRVUsage.rst + - clang/lib/Driver/ToolChains/SPIRV.* + - clang/lib/Sema/SemaSPIRV.cpp + - clang/include/clang/Sema/SemaSPIRV.h + - clang/include/clang/Basic/BuiltinsSPIRV.td + - clang/test/CodeGenSPIRV/** + - clang/test/SemaSPIRV/** + - llvm/lib/Target/SPIRV/** + - llvm/test/CodeGen/SPIRV/** + - llvm/test/Frontend/HLSL/** + - llvm/docs/SPIRVUsage.rst mlgo: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Analysis/ML* - - llvm/include/llvm/Analysis/ML* - - llvm/lib/Analysis/*Runner.cpp - - llvm/include/llvm/Analysis/*Runner.h - - llvm/unittests/Analysis/ML* - - llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp - - llvm/lib/Analysis/TrainingLogger.cpp - - llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h - - llvm/include/llvm/Analysis/Utils/TrainingLogger.h - - llvm/test/Analysis/FunctionPropertiesAnalysis/* - - llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp - - llvm/test/Transforms/inline/ML/** - - llvm/lib/CodeGen/ML* - - llvm/unittests/CodeGen/ML* - - llvm/test/CodeGen/MLRegAlloc/** - - llvm/utils/mlgo-utils/** - - llvm/docs/MLGO.rst - - llvm/include/llvm/Analysis/IR2Vec.h - - llvm/lib/Analysis/IR2Vec.cpp - - llvm/lib/Analysis/models/** - - llvm/include/llvm/CodeGen/MIR2Vec.h - - llvm/lib/CodeGen/MIR2Vec.cpp - - llvm/test/Analysis/IR2Vec/** - - llvm/test/CodeGen/MIR2Vec/** - - llvm/unittests/Analysis/IR2VecTest.cpp - - llvm/unittests/CodeGen/MIR2VecTest.cpp - - llvm/tools/llvm-ir2vec/** - - llvm/docs/CommandGuide/llvm-ir2vec.rst + - llvm/lib/Analysis/ML* + - llvm/include/llvm/Analysis/ML* + - llvm/lib/Analysis/*Runner.cpp + - llvm/include/llvm/Analysis/*Runner.h + - llvm/unittests/Analysis/ML* + - llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp + - llvm/lib/Analysis/TrainingLogger.cpp + - llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h + - llvm/include/llvm/Analysis/Utils/TrainingLogger.h + - llvm/test/Analysis/FunctionPropertiesAnalysis/* + - llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp + - llvm/test/Transforms/inline/ML/** + - llvm/lib/CodeGen/ML* + - llvm/unittests/CodeGen/ML* + - llvm/test/CodeGen/MLRegAlloc/** + - llvm/utils/mlgo-utils/** + - llvm/docs/MLGO.rst + - llvm/include/llvm/Analysis/IR2Vec.h + - llvm/lib/Analysis/IR2Vec.cpp + - llvm/lib/Analysis/models/** + - llvm/include/llvm/CodeGen/MIR2Vec.h + - llvm/lib/CodeGen/MIR2Vec.cpp + - llvm/test/Analysis/IR2Vec/** + - llvm/test/CodeGen/MIR2Vec/** + - llvm/unittests/Analysis/IR2VecTest.cpp + - llvm/unittests/CodeGen/MIR2VecTest.cpp + - llvm/tools/llvm-ir2vec/** + - llvm/docs/CommandGuide/llvm-ir2vec.rst tools:llvm-exegesis: - - changed-files: - - any-glob-to-any-file: - - llvm/tools/llvm-exegesis/** - - llvm/test/tools/llvm-exegesis/** - - llvm/unittests/tools/llvm-exegesis/** + - llvm/tools/llvm-exegesis/** + - llvm/test/tools/llvm-exegesis/** + - llvm/unittests/tools/llvm-exegesis/** tools:llvm-reduce: - - changed-files: - - any-glob-to-any-file: - - llvm/tools/llvm-reduce/** + - llvm/tools/llvm-reduce/** platform:windows: - - changed-files: - - any-glob-to-any-file: - - lld/COFF/** - - clang/lib/Driver/MSVC.cpp - - clang/lib/Driver/MinGW.cpp - - llvm/lib/DebugInfo/CodeView/** - - llvm/lib/DebugInfo/PDB/** - - llvm/lib/WindowsDriver/** - - llvm/lib/Support/Windows/** - - llvm/lib/BinaryFormat/COFF.cpp + - lld/COFF/** + - clang/lib/Driver/MSVC.cpp + - clang/lib/Driver/MinGW.cpp + - llvm/lib/DebugInfo/CodeView/** + - llvm/lib/DebugInfo/PDB/** + - llvm/lib/WindowsDriver/** + - llvm/lib/Support/Windows/** + - llvm/lib/BinaryFormat/COFF.cpp llvm:regalloc: - - changed-files: - - any-glob-to-any-file: - - llvm/**/CodeGen/CalcSpillWeights* - - llvm/**/CodeGen/InlineSpiller* - - llvm/**/CodeGen/InterferenceCache* - - llvm/**/CodeGen/LiveInterval* - - llvm/**/CodeGen/LiveRange* - - llvm/**/CodeGen/LiveReg* - - llvm/**/CodeGen/LiveVariables* - - llvm/**/CodeGen/MachineCopyPropagation* - - llvm/**/CodeGen/PHIElimination* - - llvm/**/CodeGen/ProcessImplicitDefs.cpp - - llvm/**/CodeGen/Register* - - llvm/**/CodeGen/RegUsage* - - llvm/**/CodeGen/RenameIndependentSubregs.cpp - - llvm/**/CodeGen/SlotIndexes.h - - llvm/**/CodeGen/SpillPlacement* - - llvm/**/CodeGen/SplitKit* - - llvm/**/CodeGen/VirtRegMap.h - - llvm/include/PBQP/** - - llvm/include/PBQPRAConstraint.h - - llvm/include/llvm/CodeGen/Spiller.h - - llvm/**/*RegAlloc + - llvm/**/CodeGen/CalcSpillWeights* + - llvm/**/CodeGen/InlineSpiller* + - llvm/**/CodeGen/InterferenceCache* + - llvm/**/CodeGen/LiveInterval* + - llvm/**/CodeGen/LiveRange* + - llvm/**/CodeGen/LiveReg* + - llvm/**/CodeGen/LiveVariables* + - llvm/**/CodeGen/MachineCopyPropagation* + - llvm/**/CodeGen/PHIElimination* + - llvm/**/CodeGen/ProcessImplicitDefs.cpp + - llvm/**/CodeGen/Register* + - llvm/**/CodeGen/RegUsage* + - llvm/**/CodeGen/RenameIndependentSubregs.cpp + - llvm/**/CodeGen/SlotIndexes.h + - llvm/**/CodeGen/SpillPlacement* + - llvm/**/CodeGen/SplitKit* + - llvm/**/CodeGen/VirtRegMap.h + - llvm/include/PBQP/** + - llvm/include/PBQPRAConstraint.h + - llvm/include/llvm/CodeGen/Spiller.h + - llvm/**/*RegAlloc lldb: - - changed-files: - - any-glob-to-any-file: - - lldb/** + - lldb/** lldb-dap: - - changed-files: - - any-glob-to-any-file: - - lldb/tools/lldb-dap/** + - lldb/tools/lldb-dap/** backend:AMDGPU: - - changed-files: - - any-glob-to-any-file: - - '**/*amdgpu*' - - '**/*AMDGPU*' - - '**/*amdgpu*/**' - - '**/*AMDGPU*/**' + - '**/*amdgpu*' + - '**/*AMDGPU*' + - '**/*amdgpu*/**' + - '**/*AMDGPU*/**' backend:NVPTX: - - changed-files: - - any-glob-to-any-file: - - 'llvm/**/*nvvm*' - - 'llvm/**/*NVVM*' - - 'llvm/**/*nvptx*' - - 'llvm/**/*NVPTX*' - - 'llvm/**/*nvvm*/**' - - 'llvm/**/*NVVM*/**' - - 'llvm/**/*nvptx*/**' - - 'llvm/**/*NVPTX*/**' + - 'llvm/**/*nvvm*' + - 'llvm/**/*NVVM*' + - 'llvm/**/*nvptx*' + - 'llvm/**/*NVPTX*' + - 'llvm/**/*nvvm*/**' + - 'llvm/**/*NVVM*/**' + - 'llvm/**/*nvptx*/**' + - 'llvm/**/*NVPTX*/**' backend:MIPS: - - changed-files: - - any-glob-to-any-file: - - '**/*mips*' - - '**/*Mips*' - - '**/*mips*/**' - - '**/*Mips*/**' + - '**/*mips*' + - '**/*Mips*' + - '**/*mips*/**' + - '**/*Mips*/**' backend:RISC-V: - - changed-files: - - any-glob-to-any-file: - - '**/*riscv*' - - '**/*RISCV*' - - '**/*riscv*/**' - - '**/*RISCV*/**' + - '**/*riscv*' + - '**/*RISCV*' + - '**/*riscv*/**' + - '**/*RISCV*/**' backend:Xtensa: - - changed-files: - - any-glob-to-any-file: - - '**/*xtensa*' - - '**/*Xtensa*' - - '**/*xtensa*/**' - - '**/*Xtensa*/**' + - '**/*xtensa*' + - '**/*Xtensa*' + - '**/*xtensa*/**' + - '**/*Xtensa*/**' lld:coff: - - changed-files: - - any-glob-to-any-file: - - lld/**/COFF/** - - lld/Common/** + - lld/**/COFF/** + - lld/Common/** lld:elf: - - changed-files: - - any-glob-to-any-file: - - lld/**/ELF/** - - lld/Common/** + - lld/**/ELF/** + - lld/Common/** lld:macho: - - changed-files: - - any-glob-to-any-file: - - lld/**/MachO/** - - lld/Common/** + - lld/**/MachO/** + - lld/Common/** lld:wasm: - - changed-files: - - any-glob-to-any-file: - - lld/**/wasm/** - - lld/Common/** + - lld/**/wasm/** + - lld/Common/** backend:ARC: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Target/ARC/** - - clang/lib/Basic/Targets/ARC.h - - clang/lib/Basic/Targets/ARC.cpp - - clang/lib/CodeGen/Targets/ARC.cpp + - llvm/lib/Target/ARC/** + - clang/lib/Basic/Targets/ARC.h + - clang/lib/Basic/Targets/ARC.cpp + - clang/lib/CodeGen/Targets/ARC.cpp backend:ARM: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/IR/IntrinsicsARM.td - - llvm/test/MC/ARM/** - - llvm/lib/Target/ARM/** - - llvm/test/CodeGen/ARM/** - - clang/lib/Basic/Targets/ARM* - - clang/lib/Driver/ToolChains/Arch/ARM.* - - clang/lib/CodeGen/Targets/ARM.cpp - - clang/include/clang/Basic/BuiltinsARM* - - llvm/test/MC/DisasemblerARM/** - - clang/include/clang/Sema/SemaARM.h - - clang/lib/Sema/SemaARM.cpp + - llvm/include/llvm/IR/IntrinsicsARM.td + - llvm/test/MC/ARM/** + - llvm/lib/Target/ARM/** + - llvm/test/CodeGen/ARM/** + - clang/lib/Basic/Targets/ARM* + - clang/lib/Driver/ToolChains/Arch/ARM.* + - clang/lib/CodeGen/Targets/ARM.cpp + - clang/include/clang/Basic/BuiltinsARM* + - llvm/test/MC/DisasemblerARM/** + - clang/include/clang/Sema/SemaARM.h + - clang/lib/Sema/SemaARM.cpp backend:AArch64: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/IR/IntrinsicsAArch64.td - - llvm/test/MC/AArch64/** - - llvm/lib/Target/AArch64/** - - llvm/test/CodeGen/AArch64/** - - clang/lib/Basic/Targets/AArch64* - - clang/lib/Driver/ToolChains/Arch/AArch64.* - - clang/lib/CodeGen/Targets/AArch64.cpp - - clang/include/clang/Basic/BuiltinsAArch64* - - llvm/test/MC/Disassembler/AArch64/** - - clang/include/clang/Sema/SemaARM.h - - clang/lib/Sema/SemaARM.cpp + - llvm/include/llvm/IR/IntrinsicsAArch64.td + - llvm/test/MC/AArch64/** + - llvm/lib/Target/AArch64/** + - llvm/test/CodeGen/AArch64/** + - clang/lib/Basic/Targets/AArch64* + - clang/lib/Driver/ToolChains/Arch/AArch64.* + - clang/lib/CodeGen/Targets/AArch64.cpp + - clang/include/clang/Basic/BuiltinsAArch64* + - llvm/test/MC/Disassembler/AArch64/** + - clang/include/clang/Sema/SemaARM.h + - clang/lib/Sema/SemaARM.cpp backend:CSKY: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Target/CSKY/** - - llvm/include/llvm/TargetParser/CSKYTargetParser.def - - llvm/include/llvm/TargetParser/CSKYTargetParser.h - - llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def - - llvm/lib/TargetParser/CSKYTargetParser.cpp - - llvm/lib/Support/CSKYAttributes.cpp - - llvm/lib/Support/CSKYAttributeParser.cpp - - clang/lib/Basic/Targets/CSKY.h - - clang/lib/Basic/Targets/CSKY.cpp - - clang/lib/CodeGen/Targets/CSKY.cpp - - clang/lib/Driver/ToolChains/CSKY* + - llvm/lib/Target/CSKY/** + - llvm/include/llvm/TargetParser/CSKYTargetParser.def + - llvm/include/llvm/TargetParser/CSKYTargetParser.h + - llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def + - llvm/lib/TargetParser/CSKYTargetParser.cpp + - llvm/lib/Support/CSKYAttributes.cpp + - llvm/lib/Support/CSKYAttributeParser.cpp + - clang/lib/Basic/Targets/CSKY.h + - clang/lib/Basic/Targets/CSKY.cpp + - clang/lib/CodeGen/Targets/CSKY.cpp + - clang/lib/Driver/ToolChains/CSKY* backend:Hexagon: - - changed-files: - - any-glob-to-any-file: - - clang/include/clang/Basic/BuiltinsHexagon*.def - - clang/include/clang/Sema/SemaHexagon.h - - clang/lib/Basic/Targets/Hexagon.* - - clang/lib/CodeGen/Targets/Hexagon.cpp - - clang/lib/Driver/ToolChains/Hexagon.* - - clang/lib/Sema/SemaHexagon.cpp - - lld/ELF/Arch/Hexagon.cpp - - lldb/source/Plugins/ABI/Hexagon/** - - lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/** - - llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def - - llvm/include/llvm/IR/IntrinsicsHexagon* - - llvm/include/llvm/Support/Hexagon* - - llvm/lib/Support/Hexagon* - - llvm/lib/Target/Hexagon/** - - llvm/test/CodeGen/Hexagon/** - - llvm/test/CodeGen/*/Hexagon/** - - llvm/test/DebugInfo/*/Hexagon/** - - llvm/test/Transforms/*/Hexagon - - llvm/test/MC/Disassembler/Hexagon/** - - llvm/test/MC/Hexagon/** - - llvm/test/tools/llvm-objdump/ELF/Hexagon/** + - clang/include/clang/Basic/BuiltinsHexagon*.def + - clang/include/clang/Sema/SemaHexagon.h + - clang/lib/Basic/Targets/Hexagon.* + - clang/lib/CodeGen/Targets/Hexagon.cpp + - clang/lib/Driver/ToolChains/Hexagon.* + - clang/lib/Sema/SemaHexagon.cpp + - lld/ELF/Arch/Hexagon.cpp + - lldb/source/Plugins/ABI/Hexagon/** + - lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/** + - llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def + - llvm/include/llvm/IR/IntrinsicsHexagon* + - llvm/include/llvm/Support/Hexagon* + - llvm/lib/Support/Hexagon* + - llvm/lib/Target/Hexagon/** + - llvm/test/CodeGen/Hexagon/** + - llvm/test/CodeGen/*/Hexagon/** + - llvm/test/DebugInfo/*/Hexagon/** + - llvm/test/Transforms/*/Hexagon + - llvm/test/MC/Disassembler/Hexagon/** + - llvm/test/MC/Hexagon/** + - llvm/test/tools/llvm-objdump/ELF/Hexagon/** backend:Lanai: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Target/Lanai/** - - clang/lib/Basic/Targets/Lanai.h - - clang/lib/Basic/Targets/Lanai.cpp - - clang/lib/CodeGen/Targets/Lanai.cpp - - clang/lib/Driver/ToolChains/Lanai* + - llvm/lib/Target/Lanai/** + - clang/lib/Basic/Targets/Lanai.h + - clang/lib/Basic/Targets/Lanai.cpp + - clang/lib/CodeGen/Targets/Lanai.cpp + - clang/lib/Driver/ToolChains/Lanai* backend:loongarch: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/IR/IntrinsicsLoongArch.td - - llvm/test/MC/LoongArch/** - - llvm/lib/Target/LoongArch/** - - llvm/test/CodeGen/LoongArch/** - - clang/lib/Basic/Targets/LoongArch* - - clang/lib/Driver/ToolChains/Arch/LoongArch.* - - clang/lib/CodeGen/Targets/LoongArch.cpp - - clang/include/clang/Basic/BuiltinsLoongArch* - - clang/include/clang/Sema/SemaLoongArch.h - - clang/lib/Sema/SemaLoongArch.cpp + - llvm/include/llvm/IR/IntrinsicsLoongArch.td + - llvm/test/MC/LoongArch/** + - llvm/lib/Target/LoongArch/** + - llvm/test/CodeGen/LoongArch/** + - clang/lib/Basic/Targets/LoongArch* + - clang/lib/Driver/ToolChains/Arch/LoongArch.* + - clang/lib/CodeGen/Targets/LoongArch.cpp + - clang/include/clang/Basic/BuiltinsLoongArch* + - clang/include/clang/Sema/SemaLoongArch.h + - clang/lib/Sema/SemaLoongArch.cpp backend:MSP430: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/IR/IntrinsicsMSP430.td - - llvm/test/MC/MSP430/** - - llvm/lib/Target/MSP430/** - - llvm/test/CodeGen/MSP430/** - - clang/lib/Basic/Targets/MSP430* - - clang/lib/Driver/ToolChains/Arch/MSP430.* - - clang/lib/CodeGen/Targets/MSP430.cpp - - clang/include/clang/Basic/BuiltinsMSP430* - - llvm/test/MC/Disassembler/MSP430/** + - llvm/include/llvm/IR/IntrinsicsMSP430.td + - llvm/test/MC/MSP430/** + - llvm/lib/Target/MSP430/** + - llvm/test/CodeGen/MSP430/** + - clang/lib/Basic/Targets/MSP430* + - clang/lib/Driver/ToolChains/Arch/MSP430.* + - clang/lib/CodeGen/Targets/MSP430.cpp + - clang/include/clang/Basic/BuiltinsMSP430* + - llvm/test/MC/Disassembler/MSP430/** backend:Sparc: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/IR/IntrinsicsSparc.td - - llvm/test/MC/Sparc/** - - llvm/lib/Target/Sparc/** - - llvm/test/CodeGen/Sparc/** - - clang/lib/Basic/Targets/Sparc* - - clang/lib/Driver/ToolChains/Arch/Sparc.* - - clang/lib/CodeGen/Targets/Sparc.cpp - - clang/include/clang/Basic/BuiltinsSparc* - - llvm/test/MC/Disassembler/Sparc/** + - llvm/include/llvm/IR/IntrinsicsSparc.td + - llvm/test/MC/Sparc/** + - llvm/lib/Target/Sparc/** + - llvm/test/CodeGen/Sparc/** + - clang/lib/Basic/Targets/Sparc* + - clang/lib/Driver/ToolChains/Arch/Sparc.* + - clang/lib/CodeGen/Targets/Sparc.cpp + - clang/include/clang/Basic/BuiltinsSparc* + - llvm/test/MC/Disassembler/Sparc/** backend:WebAssembly: - - changed-files: - - any-glob-to-any-file: - - llvm/lib/Target/WebAssembly/** - - llvm/test/CodeGen/WebAssembly/** - - clang/lib/Basic/Targets/WebAssembly* - - clang/include/clang/Basic/BuiltinsWebAssembly.def - - clang/include/clang/Basic/WebAssemblyReferenceTypes.def - - clang/lib/CodeGen/Targets/WebAssembly* - - llvm/include/llvm/IR/IntinsicsWebAssembly.td - - llvm/include/llvm/Object/Wasm* - - llvm/lib/CodeGen/AsmPrinter/Wasm* - - llvm/lib/CodeGen/Wasm* - - llvm/lib/MC/MCParser/Wasm* - - llvm/lib/MC/Wasm* - - llvm/lib/ObjCopy/wasm/** - - llvm/lib/Object/Wasm* - - clang/lib/Driver/Toolchains/WebAssembly* - - clang/lib/Headers/wasm_simd128.h - - clang/test/CodeGen/WebAssembly/** - - clang/test/SemaCXX/*wasm* - - clang/test/Sema/*wasm* - - llvm/include/llvm/BinaryFormat/Wasm.h - - llvm/unittests/Target/WebAssembly/** - - llvm/test/DebugInfo/WebAssembly/** - - llvm/test/MC/WebAssembly/** - - clang/include/clang/Sema/SemaWasm.h - - clang/lib/Sema/SemaLoongWasm.cpp + - llvm/lib/Target/WebAssembly/** + - llvm/test/CodeGen/WebAssembly/** + - clang/lib/Basic/Targets/WebAssembly* + - clang/include/clang/Basic/BuiltinsWebAssembly.def + - clang/include/clang/Basic/WebAssemblyReferenceTypes.def + - clang/lib/CodeGen/Targets/WebAssembly* + - llvm/include/llvm/IR/IntinsicsWebAssembly.td + - llvm/include/llvm/Object/Wasm* + - llvm/lib/CodeGen/AsmPrinter/Wasm* + - llvm/lib/CodeGen/Wasm* + - llvm/lib/MC/MCParser/Wasm* + - llvm/lib/MC/Wasm* + - llvm/lib/ObjCopy/wasm/** + - llvm/lib/Object/Wasm* + - clang/lib/Driver/Toolchains/WebAssembly* + - clang/lib/Headers/wasm_simd128.h + - clang/test/CodeGen/WebAssembly/** + - clang/test/SemaCXX/*wasm* + - clang/test/Sema/*wasm* + - llvm/include/llvm/BinaryFormat/Wasm.h + - llvm/unittests/Target/WebAssembly/** + - llvm/test/DebugInfo/WebAssembly/** + - llvm/test/MC/WebAssembly/** + - clang/include/clang/Sema/SemaWasm.h + - clang/lib/Sema/SemaLoongWasm.cpp backend:X86: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/IR/IntrinsicsX86.td - - llvm/lib/Target/X86/** - - llvm/test/CodeGen/X86/** - - llvm/test/MC/X86/** - - llvm/test/MC/Disassembler/X86/** - - llvm/test/Analysis/CostModel/X86/** - - llvm/test/tools/llvm-mca/X86/** - - clang/lib/Basic/Targets/X86/** - - clang/lib/Driver/ToolChains/Arch/X86.* - - clang/lib/CodeGen/Targets/X86.* - - clang/lib/Headers/** - - clang/test/CodeGen/X86/** - - clang/include/clang/Basic/BuiltinsX86* - - llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h - - llvm/include/llvm/TargetParser/X86* - - llvm/lib/TargetParser/X86* - - llvm/utils/TableGen/X86* - - clang/include/clang/Sema/SemaX86.h - - clang/lib/Sema/SemaX86.cpp + - llvm/include/llvm/IR/IntrinsicsX86.td + - llvm/lib/Target/X86/** + - llvm/test/CodeGen/X86/** + - llvm/test/MC/X86/** + - llvm/test/MC/Disassembler/X86/** + - llvm/test/Analysis/CostModel/X86/** + - llvm/test/tools/llvm-mca/X86/** + - clang/lib/Basic/Targets/X86/** + - clang/lib/Driver/ToolChains/Arch/X86.* + - clang/lib/CodeGen/Targets/X86.* + - clang/lib/Headers/** + - clang/test/CodeGen/X86/** + - clang/include/clang/Basic/BuiltinsX86* + - llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h + - llvm/include/llvm/TargetParser/X86* + - llvm/lib/TargetParser/X86* + - llvm/utils/TableGen/X86* + - clang/include/clang/Sema/SemaX86.h + - clang/lib/Sema/SemaX86.cpp backend:PowerPC: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC* - - llvm/include/llvm/BinaryFormat/XCOFF.h - - llvm/include/llvm/IR/IntrinsicsPowerPC.td - - llvm/lib/CodeGen/AsmPrinter/AIXException.cpp - - llvm/lib/Target/PowerPC/** - - llvm/test/Analysis/**/PowerPC/** - - llvm/test/CodeGen/PowerPC/** - - llvm/test/CodeGen/MIR/PowerPC/** - - llvm/test/DebugInfo/XCOFF/** - - llvm/test/DebugInfo/PowerPC/** - - llvm/test/LTO/PowerPC/** - - llvm/test/MC/Disassembler/PowerPC/** - - llvm/test/MC/PowerPC/** - - llvm/test/MC/XCOFF/** - - llvm/test/Transforms/**/PowerPC/** - - clang/include/clang/Basic/BuiltinsPPC.* - - clang/lib/Basic/Targets/PPC.* - - clang/lib/CodeGen/Targets/PPC.cpp - - clang/lib/Driver/ToolChains/PPC* - - clang/lib/Driver/ToolChains/AIX* - - clang/lib/Driver/ToolChains/Arch/PPC.* - - clang/test/CodeGen/PowerPC/** - - clang/include/clang/Sema/SemaPPC.h - - clang/lib/Sema/SemaPPC.cpp + - llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC* + - llvm/include/llvm/BinaryFormat/XCOFF.h + - llvm/include/llvm/IR/IntrinsicsPowerPC.td + - llvm/lib/CodeGen/AsmPrinter/AIXException.cpp + - llvm/lib/Target/PowerPC/** + - llvm/test/Analysis/**/PowerPC/** + - llvm/test/CodeGen/PowerPC/** + - llvm/test/CodeGen/MIR/PowerPC/** + - llvm/test/DebugInfo/XCOFF/** + - llvm/test/DebugInfo/PowerPC/** + - llvm/test/LTO/PowerPC/** + - llvm/test/MC/Disassembler/PowerPC/** + - llvm/test/MC/PowerPC/** + - llvm/test/MC/XCOFF/** + - llvm/test/Transforms/**/PowerPC/** + - clang/include/clang/Basic/BuiltinsPPC.* + - clang/lib/Basic/Targets/PPC.* + - clang/lib/CodeGen/Targets/PPC.cpp + - clang/lib/Driver/ToolChains/PPC* + - clang/lib/Driver/ToolChains/AIX* + - clang/lib/Driver/ToolChains/Arch/PPC.* + - clang/test/CodeGen/PowerPC/** + - clang/include/clang/Sema/SemaPPC.h + - clang/lib/Sema/SemaPPC.cpp backend:SystemZ: - - changed-files: - - any-glob-to-any-file: - - llvm/include/llvm/BinaryFormat/ELFRelocs/SystemZ* - - llvm/include/llvm/BinaryFormat/GOFF.h - - llvm/include/llvm/IR/IntrinsicsSystemZ.td - - llvm/lib/Target/SystemZ/** - - llvm/test/Analysis/**/SystemZ/** - - llvm/test/CodeGen/SystemZ/** - - llvm/test/DebugInfo/SystemZ/** - - llvm/test/ExecutionEngine/**/SystemZ/** - - llvm/test/MC/Disassembler/SystemZ/** - - llvm/test/MC/GOFF/** - - llvm/test/MC/SystemZ/** - - llvm/test/Transforms/**/SystemZ/** - - clang/include/clang/Basic/BuiltinsSystemZ.* - - clang/lib/Basic/Targets/SystemZ.* - - clang/lib/CodeGen/Targets/SystemZ.cpp - - clang/lib/Driver/ToolChains/ZOS* - - clang/lib/Driver/ToolChains/Arch/SystemZ.* - - clang/test/CodeGen/SystemZ/** - - clang/include/clang/Sema/SemaSystemZ.h - - clang/lib/Sema/SemaSystemZ.cpp + - llvm/include/llvm/BinaryFormat/ELFRelocs/SystemZ* + - llvm/include/llvm/BinaryFormat/GOFF.h + - llvm/include/llvm/IR/IntrinsicsSystemZ.td + - llvm/lib/Target/SystemZ/** + - llvm/test/Analysis/**/SystemZ/** + - llvm/test/CodeGen/SystemZ/** + - llvm/test/DebugInfo/SystemZ/** + - llvm/test/ExecutionEngine/**/SystemZ/** + - llvm/test/MC/Disassembler/SystemZ/** + - llvm/test/MC/GOFF/** + - llvm/test/MC/SystemZ/** + - llvm/test/Transforms/**/SystemZ/** + - clang/include/clang/Basic/BuiltinsSystemZ.* + - clang/lib/Basic/Targets/SystemZ.* + - clang/lib/CodeGen/Targets/SystemZ.cpp + - clang/lib/Driver/ToolChains/ZOS* + - clang/lib/Driver/ToolChains/Arch/SystemZ.* + - clang/test/CodeGen/SystemZ/** + - clang/include/clang/Sema/SemaSystemZ.h + - clang/lib/Sema/SemaSystemZ.cpp third-party:unittests: - - changed-files: - - any-glob-to-any-file: - - third-party/unittests/** + - third-party/unittests/** third-party:benchmark: - - changed-files: - - any-glob-to-any-file: - - third-party/benchmark/** + - third-party/benchmark/** llvm:binary-utilities: - - changed-files: - - any-glob-to-any-file: - - llvm/docs/CommandGuide/llvm-* - - llvm/include/llvm/BinaryFormat/** - - llvm/include/llvm/DebugInfo/Symbolize/** - - llvm/include/llvm/ObjCopy/** - - llvm/include/llvm/Object/** - - llvm/lib/BinaryFormat/** - - llvm/lib/DebugInfo/Symbolize/** - - llvm/lib/ObjCopy/** - - llvm/lib/Object/** - - llvm/test/Object/** - - llvm/test/tools/llvm-ar/** - - llvm/test/tools/llvm-cxxfilt/** - - llvm/test/tools/llvm-nm/** - - llvm/test/tools/llvm-objcopy/** - - llvm/test/tools/llvm-objdump/** - - llvm/test/tools/llvm-readobj/** - - llvm/test/tools/llvm-size/** - - llvm/test/tools/llvm-strings/** - - llvm/test/tools/llvm-symbolizer/** - - llvm/tools/llvm-ar/** - - llvm/tools/llvm-cxxfilt/** - - llvm/tools/llvm-nm/** - - llvm/tools/llvm-objcopy/** - - llvm/tools/llvm-objdump/** - - llvm/tools/llvm-readobj/** - - llvm/tools/llvm-size/** - - llvm/tools/llvm-strings/** - - llvm/tools/llvm-symbolizer/** + - llvm/docs/CommandGuide/llvm-* + - llvm/include/llvm/BinaryFormat/** + - llvm/include/llvm/DebugInfo/Symbolize/** + - llvm/include/llvm/ObjCopy/** + - llvm/include/llvm/Object/** + - llvm/lib/BinaryFormat/** + - llvm/lib/DebugInfo/Symbolize/** + - llvm/lib/ObjCopy/** + - llvm/lib/Object/** + - llvm/test/Object/** + - llvm/test/tools/llvm-ar/** + - llvm/test/tools/llvm-cxxfilt/** + - llvm/test/tools/llvm-nm/** + - llvm/test/tools/llvm-objcopy/** + - llvm/test/tools/llvm-objdump/** + - llvm/test/tools/llvm-readobj/** + - llvm/test/tools/llvm-size/** + - llvm/test/tools/llvm-strings/** + - llvm/test/tools/llvm-symbolizer/** + - llvm/tools/llvm-ar/** + - llvm/tools/llvm-cxxfilt/** + - llvm/tools/llvm-nm/** + - llvm/tools/llvm-objcopy/** + - llvm/tools/llvm-objdump/** + - llvm/tools/llvm-readobj/** + - llvm/tools/llvm-size/** + - llvm/tools/llvm-strings/** + - llvm/tools/llvm-symbolizer/** clang:openmp: - - changed-files: - - any-glob-to-any-file: - - clang/include/clang/Basic/OpenMP* - - clang/include/clang/AST/OpenMPClause.h - - clang/include/clang/AST/DeclOpenMP.h - - clang/include/clang/AST/ExprOpenMP.h - - clang/include/clang/AST/StmtOpenMP.h - - clang/lib/AST/DeclOpenMP.cpp - - clang/lib/AST/OpenMPClause.cpp - - clang/lib/AST/StmtOpenMP.cpp - - clang/lib/Headers/openmp_wrappers/** - - clang/lib/Parse/ParseOpenMP.cpp - - clang/lib/Basic/OpenMPKinds.cpp - - clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp - - clang/lib/Driver/ToolChains/AMDGPUOpenMP.h - - clang/lib/CodeGen/CgStmtOpenMP.cpp - - clang/lib/CodeGen/CGOpenMP* - - clang/lib/Sema/SemaOpenMP.cpp - - clang/test/OpenMP/** - - clang/test/AST/ast-dump-openmp-* - - llvm/lib/Frontend/OpenMP/** - - llvm/lib/Transforms/IPO/OpenMPOpt.cpp - - llvm/include/llvm/Frontend/OpenMP/** - - llvm/include/llvm/Transforms/IPO/OpenMPOpt.h - - llvm/unittests/Frontend/OpenMP* - - llvm/test/Transforms/OpenMP/** + - clang/include/clang/Basic/OpenMP* + - clang/include/clang/AST/OpenMPClause.h + - clang/include/clang/AST/DeclOpenMP.h + - clang/include/clang/AST/ExprOpenMP.h + - clang/include/clang/AST/StmtOpenMP.h + - clang/lib/AST/DeclOpenMP.cpp + - clang/lib/AST/OpenMPClause.cpp + - clang/lib/AST/StmtOpenMP.cpp + - clang/lib/Headers/openmp_wrappers/** + - clang/lib/Parse/ParseOpenMP.cpp + - clang/lib/Basic/OpenMPKinds.cpp + - clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp + - clang/lib/Driver/ToolChains/AMDGPUOpenMP.h + - clang/lib/CodeGen/CgStmtOpenMP.cpp + - clang/lib/CodeGen/CGOpenMP* + - clang/lib/Sema/SemaOpenMP.cpp + - clang/test/OpenMP/** + - clang/test/AST/ast-dump-openmp-* + - llvm/lib/Frontend/OpenMP/** + - llvm/lib/Transforms/IPO/OpenMPOpt.cpp + - llvm/include/llvm/Frontend/OpenMP/** + - llvm/include/llvm/Transforms/IPO/OpenMPOpt.h + - llvm/unittests/Frontend/OpenMP* + - llvm/test/Transforms/OpenMP/** clang:temporal-safety: - - changed-files: - - any-glob-to-any-file: - - clang/include/clang/Analysis/Analyses/LifetimeSafety/** - - clang/lib/Analysis/LifetimeSafety/** - - clang/unittests/Analysis/LifetimeSafety* - - clang/test/Sema/*lifetime-safety* - - clang/test/Sema/*lifetime-analysis* - - clang/test/Analysis/LifetimeSafety/** + - clang/include/clang/Analysis/Analyses/LifetimeSafety/** + - clang/lib/Analysis/LifetimeSafety/** + - clang/unittests/Analysis/LifetimeSafety* + - clang/test/Sema/*lifetime-safety* + - clang/test/Sema/*lifetime-analysis* + - clang/test/Analysis/LifetimeSafety/** clang:as-a-library: - - changed-files: - - any-glob-to-any-file: - - clang/tools/libclang/** - - clang/bindings/** - - clang/include/clang-c/** - - clang/test/LibClang/** - - clang/unittest/libclang/** + - clang/tools/libclang/** + - clang/bindings/** + - clang/include/clang-c/** + - clang/test/LibClang/** + - clang/unittest/libclang/** openmp:libomp: - - changed-files: - - any-glob-to-any-file: - - 'openmp/**' + - any: ['openmp/**', '!openmp/libomptarget/**'] openmp:libomptarget: - - changed-files: - - all-globs-to-all-file: - - openmp/** - - '!openmp/runtime/**'' + - any: ['openmp/**', '!openmp/runtime/**'] bazel: - - changed-files: - - any-glob-to-any-file: - - utils/bazel/** + - utils/bazel/** offload: - - changed-files: - - any-glob-to-any-file: - - offload/** + - offload/** tablegen: - - changed-files: - - any-glob-to-any-file: - - llvm/include/TableGen/** - - llvm/lib/TableGen/** - - llvm/utils/TableGen/** + - llvm/include/TableGen/** + - llvm/lib/TableGen/** + - llvm/utils/TableGen/** infrastructure: - - changed-files: - - any-glob-to-any-file: - - .ci/** + - .ci/** diff --git a/.github/workflows/new-prs.yml b/.github/workflows/new-prs.yml index 0d97e436d39c4..e1f2e754c1a3d 100644 --- a/.github/workflows/new-prs.yml +++ b/.github/workflows/new-prs.yml @@ -67,7 +67,9 @@ jobs: github.event.pull_request.draft == false && github.event.pull_request.commits < 10 steps: - - uses: actions/labeler@634933edcd8ababfe52f92936142cc22ac488b1b # v6.0.1 + - uses: actions/labeler@ac9175f8a1f3625fd0d4fb234536d26811351594 # v4.3.0 with: configuration-path: .github/new-prs-labeler.yml + # workaround for https://github.com/actions/labeler/issues/112 + sync-labels: '' repo-token: ${{ secrets.ISSUE_SUBSCRIBER_TOKEN }} From 5af03989cc01e7bf9a45240d86411e9eee5b0e8b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 18 Nov 2025 17:53:45 +0000 Subject: [PATCH 19/57] [X86] Add test examples of build vectors of reversed scalar loads that could be converted to vector loads plus shuffles (#168571) This is turning up in some legalisation code when shuffling vectors bitcast from illegal loads. Ideally we'd handle more complex shuffles, but reverse is a start. --- .../X86/merge-consecutive-loads-128.ll | 520 ++++++++++++++++++ .../X86/merge-consecutive-loads-256.ll | 352 ++++++++++++ .../X86/merge-consecutive-loads-512.ll | 324 +++++++++++ 3 files changed, 1196 insertions(+) diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll index 595f8491b405c..26f076d450c15 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll @@ -353,6 +353,69 @@ define <4 x float> @merge_4f32_f32_019u(ptr %ptr) nounwind uwtable noinline ssp ret <4 x float> %res3 } +define <4 x float> @merge_v4f32_f32_3210(ptr %ptr) nounwind uwtable noinline ssp { +; SSE2-LABEL: merge_v4f32_f32_3210: +; SSE2: # %bb.0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: retq +; +; SSE41-LABEL: merge_v4f32_f32_3210: +; SSE41: # %bb.0: +; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] +; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] +; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] +; SSE41-NEXT: retq +; +; AVX-LABEL: merge_v4f32_f32_3210: +; AVX: # %bb.0: +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] +; AVX-NEXT: retq +; +; X86-SSE1-LABEL: merge_v4f32_f32_3210: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; X86-SSE1-NEXT: retl +; +; X86-SSE41-LABEL: merge_v4f32_f32_3210: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] +; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] +; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] +; X86-SSE41-NEXT: retl + %ptr0 = getelementptr inbounds float, ptr %ptr, i64 3 + %ptr1 = getelementptr inbounds float, ptr %ptr, i64 2 + %ptr2 = getelementptr inbounds float, ptr %ptr, i64 1 + %ptr3 = getelementptr inbounds float, ptr %ptr, i64 0 + %val0 = load float, ptr %ptr0, align 4 + %val1 = load float, ptr %ptr1, align 4 + %val2 = load float, ptr %ptr2, align 4 + %val3 = load float, ptr %ptr3, align 4 + %res0 = insertelement <4 x float> poison, float %val0, i64 0 + %res1 = insertelement <4 x float> %res0, float %val1, i64 1 + %res2 = insertelement <4 x float> %res1, float %val2, i64 2 + %res3 = insertelement <4 x float> %res2, float %val3, i64 3 + ret <4 x float> %res3 +} + define <4 x i32> @merge_4i32_i32_23u5(ptr %ptr) nounwind uwtable noinline ssp { ; SSE-LABEL: merge_4i32_i32_23u5: ; SSE: # %bb.0: @@ -724,6 +787,81 @@ define <4 x i32> @merge_4i32_i32_45zz_inc5(ptr %ptr) nounwind uwtable noinline s ret <4 x i32> %res1 } +define <4 x i32> @merge_v4i32_i32_3210(ptr %ptr) nounwind uwtable noinline ssp { +; SSE2-LABEL: merge_v4i32_i32_3210: +; SSE2: # %bb.0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: retq +; +; SSE41-LABEL: merge_v4i32_i32_3210: +; SSE41: # %bb.0: +; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-NEXT: pinsrd $1, 8(%rdi), %xmm0 +; SSE41-NEXT: pinsrd $2, 4(%rdi), %xmm0 +; SSE41-NEXT: pinsrd $3, (%rdi), %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: merge_v4i32_i32_3210: +; AVX: # %bb.0: +; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vpinsrd $1, 8(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrd $2, 4(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrd $3, (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq +; +; X86-SSE1-LABEL: merge_v4i32_i32_3210: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %edi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE1-NEXT: pushl %esi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 12 +; X86-SSE1-NEXT: .cfi_offset %esi, -12 +; X86-SSE1-NEXT: .cfi_offset %edi, -8 +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE1-NEXT: movl 12(%ecx), %edx +; X86-SSE1-NEXT: movl 8(%ecx), %esi +; X86-SSE1-NEXT: movl (%ecx), %edi +; X86-SSE1-NEXT: movl 4(%ecx), %ecx +; X86-SSE1-NEXT: movl %edi, 12(%eax) +; X86-SSE1-NEXT: movl %ecx, 8(%eax) +; X86-SSE1-NEXT: movl %esi, 4(%eax) +; X86-SSE1-NEXT: movl %edx, (%eax) +; X86-SSE1-NEXT: popl %esi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE1-NEXT: popl %edi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE1-NEXT: retl $4 +; +; X86-SSE41-LABEL: merge_v4i32_i32_3210: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE41-NEXT: pinsrd $1, 8(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrd $2, 4(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrd $3, (%eax), %xmm0 +; X86-SSE41-NEXT: retl + %ptr0 = getelementptr inbounds i32, ptr %ptr, i64 3 + %ptr1 = getelementptr inbounds i32, ptr %ptr, i64 2 + %ptr2 = getelementptr inbounds i32, ptr %ptr, i64 1 + %ptr3 = getelementptr inbounds i32, ptr %ptr, i64 0 + %val0 = load i32, ptr %ptr0, align 4 + %val1 = load i32, ptr %ptr1, align 4 + %val2 = load i32, ptr %ptr2, align 4 + %val3 = load i32, ptr %ptr3, align 4 + %res0 = insertelement <4 x i32> poison, i32 %val0, i64 0 + %res1 = insertelement <4 x i32> %res0, i32 %val1, i64 1 + %res2 = insertelement <4 x i32> %res1, i32 %val2, i64 2 + %res3 = insertelement <4 x i32> %res2, i32 %val3, i64 3 + ret <4 x i32> %res3 +} + define <8 x i16> @merge_8i16_i16_23u567u9(ptr %ptr) nounwind uwtable noinline ssp { ; SSE-LABEL: merge_8i16_i16_23u567u9: ; SSE: # %bb.0: @@ -862,6 +1000,150 @@ define <8 x i16> @merge_8i16_i16_45u7zzzz(ptr %ptr) nounwind uwtable noinline ss ret <8 x i16> %res7 } +define <8 x i16> @merge_8i16_i16_76543210(ptr %ptr) nounwind uwtable noinline ssp { +; SSE2-LABEL: merge_8i16_i16_76543210: +; SSE2: # %bb.0: +; SSE2-NEXT: movzwl (%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: movzwl 2(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: movzwl 4(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: movzwl 6(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: movzwl 8(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: movzwl 10(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: movzwl 12(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm3 +; SSE2-NEXT: movzwl 14(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: retq +; +; SSE41-LABEL: merge_8i16_i16_76543210: +; SSE41: # %bb.0: +; SSE41-NEXT: movzwl 14(%rdi), %eax +; SSE41-NEXT: movd %eax, %xmm0 +; SSE41-NEXT: pinsrw $1, 12(%rdi), %xmm0 +; SSE41-NEXT: pinsrw $2, 10(%rdi), %xmm0 +; SSE41-NEXT: pinsrw $3, 8(%rdi), %xmm0 +; SSE41-NEXT: pinsrw $4, 6(%rdi), %xmm0 +; SSE41-NEXT: pinsrw $5, 4(%rdi), %xmm0 +; SSE41-NEXT: pinsrw $6, 2(%rdi), %xmm0 +; SSE41-NEXT: pinsrw $7, (%rdi), %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: merge_8i16_i16_76543210: +; AVX: # %bb.0: +; AVX-NEXT: movzwl 14(%rdi), %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vpinsrw $1, 12(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrw $2, 10(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrw $3, 8(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrw $4, 6(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrw $5, 4(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrw $6, 2(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrw $7, (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq +; +; X86-SSE1-LABEL: merge_8i16_i16_76543210: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp +; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE1-NEXT: pushl %ebx +; X86-SSE1-NEXT: .cfi_def_cfa_offset 12 +; X86-SSE1-NEXT: pushl %edi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE1-NEXT: pushl %esi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 20 +; X86-SSE1-NEXT: pushl %eax +; X86-SSE1-NEXT: .cfi_def_cfa_offset 24 +; X86-SSE1-NEXT: .cfi_offset %esi, -20 +; X86-SSE1-NEXT: .cfi_offset %edi, -16 +; X86-SSE1-NEXT: .cfi_offset %ebx, -12 +; X86-SSE1-NEXT: .cfi_offset %ebp, -8 +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movzwl 14(%eax), %ecx +; X86-SSE1-NEXT: movw %cx, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill +; X86-SSE1-NEXT: movzwl 12(%eax), %ecx +; X86-SSE1-NEXT: movw %cx, (%esp) # 2-byte Spill +; X86-SSE1-NEXT: movzwl 10(%eax), %esi +; X86-SSE1-NEXT: movzwl 8(%eax), %edi +; X86-SSE1-NEXT: movzwl 6(%eax), %ebx +; X86-SSE1-NEXT: movzwl 4(%eax), %ebp +; X86-SSE1-NEXT: movzwl (%eax), %ecx +; X86-SSE1-NEXT: movzwl 2(%eax), %edx +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movw %cx, 14(%eax) +; X86-SSE1-NEXT: movw %dx, 12(%eax) +; X86-SSE1-NEXT: movw %bp, 10(%eax) +; X86-SSE1-NEXT: movw %bx, 8(%eax) +; X86-SSE1-NEXT: movw %di, 6(%eax) +; X86-SSE1-NEXT: movw %si, 4(%eax) +; X86-SSE1-NEXT: movzwl (%esp), %ecx # 2-byte Folded Reload +; X86-SSE1-NEXT: movw %cx, 2(%eax) +; X86-SSE1-NEXT: movzwl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 2-byte Folded Reload +; X86-SSE1-NEXT: movw %cx, (%eax) +; X86-SSE1-NEXT: addl $4, %esp +; X86-SSE1-NEXT: .cfi_def_cfa_offset 20 +; X86-SSE1-NEXT: popl %esi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE1-NEXT: popl %edi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 12 +; X86-SSE1-NEXT: popl %ebx +; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE1-NEXT: popl %ebp +; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE1-NEXT: retl $4 +; +; X86-SSE41-LABEL: merge_8i16_i16_76543210: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movzwl 14(%eax), %ecx +; X86-SSE41-NEXT: movd %ecx, %xmm0 +; X86-SSE41-NEXT: pinsrw $1, 12(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrw $2, 10(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrw $3, 8(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrw $4, 6(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrw $5, 4(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrw $6, 2(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrw $7, (%eax), %xmm0 +; X86-SSE41-NEXT: retl + %ptr0 = getelementptr inbounds i16, ptr %ptr, i64 7 + %ptr1 = getelementptr inbounds i16, ptr %ptr, i64 6 + %ptr2 = getelementptr inbounds i16, ptr %ptr, i64 5 + %ptr3 = getelementptr inbounds i16, ptr %ptr, i64 4 + %ptr4 = getelementptr inbounds i16, ptr %ptr, i64 3 + %ptr5 = getelementptr inbounds i16, ptr %ptr, i64 2 + %ptr6 = getelementptr inbounds i16, ptr %ptr, i64 1 + %ptr7 = getelementptr inbounds i16, ptr %ptr, i64 0 + %val0 = load i16, ptr %ptr0 + %val1 = load i16, ptr %ptr1 + %val2 = load i16, ptr %ptr2 + %val3 = load i16, ptr %ptr3 + %val4 = load i16, ptr %ptr4 + %val5 = load i16, ptr %ptr5 + %val6 = load i16, ptr %ptr6 + %val7 = load i16, ptr %ptr7 + %res0 = insertelement <8 x i16> poison, i16 %val0, i64 0 + %res1 = insertelement <8 x i16> %res0, i16 %val1, i64 1 + %res2 = insertelement <8 x i16> %res1, i16 %val2, i64 2 + %res3 = insertelement <8 x i16> %res2, i16 %val3, i64 3 + %res4 = insertelement <8 x i16> %res3, i16 %val4, i64 4 + %res5 = insertelement <8 x i16> %res4, i16 %val5, i64 5 + %res6 = insertelement <8 x i16> %res5, i16 %val6, i64 6 + %res7 = insertelement <8 x i16> %res6, i16 %val7, i64 7 + ret <8 x i16> %res7 +} + define <16 x i8> @merge_16i8_i8_01u3456789ABCDuF(ptr %ptr) nounwind uwtable noinline ssp { ; SSE-LABEL: merge_16i8_i8_01u3456789ABCDuF: ; SSE: # %bb.0: @@ -1056,6 +1338,244 @@ define <16 x i8> @merge_16i8_i8_0123uu67uuuuuzzz(ptr %ptr) nounwind uwtable noin ret <16 x i8> %resF } +define <16 x i8> @merge_16i8_i8_FEDCBA9876543210(ptr %ptr) nounwind uwtable noinline ssp { +; SSE2-LABEL: merge_16i8_i8_FEDCBA9876543210: +; SSE2: # %bb.0: +; SSE2-NEXT: movzbl (%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: movzbl 1(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: movzbl 2(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: movzbl 3(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-NEXT: movzbl 4(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: movzbl 5(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm3 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; SSE2-NEXT: movzbl 6(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: movzbl 7(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE2-NEXT: movzbl 8(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: movzbl 9(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-NEXT: movzbl 10(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: movzbl 11(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm3 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; SSE2-NEXT: movzbl 12(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: movzbl 13(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-NEXT: movzbl 14(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm4 +; SSE2-NEXT: movzbl 15(%rdi), %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: retq +; +; SSE41-LABEL: merge_16i8_i8_FEDCBA9876543210: +; SSE41: # %bb.0: +; SSE41-NEXT: movzbl 15(%rdi), %eax +; SSE41-NEXT: movd %eax, %xmm0 +; SSE41-NEXT: pinsrb $1, 14(%rdi), %xmm0 +; SSE41-NEXT: pinsrb $2, 13(%rdi), %xmm0 +; SSE41-NEXT: pinsrb $3, 12(%rdi), %xmm0 +; SSE41-NEXT: pinsrb $4, 11(%rdi), %xmm0 +; SSE41-NEXT: pinsrb $5, 10(%rdi), %xmm0 +; SSE41-NEXT: pinsrb $6, 9(%rdi), %xmm0 +; SSE41-NEXT: pinsrb $7, 8(%rdi), %xmm0 +; SSE41-NEXT: pinsrb $8, 7(%rdi), %xmm0 +; SSE41-NEXT: pinsrb $9, 6(%rdi), %xmm0 +; SSE41-NEXT: pinsrb $10, 5(%rdi), %xmm0 +; SSE41-NEXT: pinsrb $11, 4(%rdi), %xmm0 +; SSE41-NEXT: pinsrb $12, 3(%rdi), %xmm0 +; SSE41-NEXT: pinsrb $13, 2(%rdi), %xmm0 +; SSE41-NEXT: pinsrb $14, 1(%rdi), %xmm0 +; SSE41-NEXT: pinsrb $15, (%rdi), %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: merge_16i8_i8_FEDCBA9876543210: +; AVX: # %bb.0: +; AVX-NEXT: movzbl 15(%rdi), %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vpinsrb $1, 14(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrb $2, 13(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrb $3, 12(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrb $4, 11(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrb $5, 10(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrb $6, 9(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrb $7, 8(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrb $8, 7(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrb $9, 6(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrb $10, 5(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrb $11, 4(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrb $12, 3(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrb $13, 2(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrb $14, 1(%rdi), %xmm0, %xmm0 +; AVX-NEXT: vpinsrb $15, (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq +; +; X86-SSE1-LABEL: merge_16i8_i8_FEDCBA9876543210: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebx +; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE1-NEXT: pushl %esi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 12 +; X86-SSE1-NEXT: subl $12, %esp +; X86-SSE1-NEXT: .cfi_def_cfa_offset 24 +; X86-SSE1-NEXT: .cfi_offset %esi, -12 +; X86-SSE1-NEXT: .cfi_offset %ebx, -8 +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE1-NEXT: movzbl 15(%esi), %ecx +; X86-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SSE1-NEXT: movzbl 14(%esi), %ecx +; X86-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SSE1-NEXT: movzbl 13(%esi), %ecx +; X86-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SSE1-NEXT: movzbl 12(%esi), %ecx +; X86-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SSE1-NEXT: movzbl 11(%esi), %ecx +; X86-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SSE1-NEXT: movzbl 10(%esi), %ecx +; X86-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SSE1-NEXT: movzbl 9(%esi), %ecx +; X86-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SSE1-NEXT: movzbl 8(%esi), %ecx +; X86-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SSE1-NEXT: movzbl 7(%esi), %ecx +; X86-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SSE1-NEXT: movzbl 6(%esi), %ecx +; X86-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SSE1-NEXT: movb 5(%esi), %bh +; X86-SSE1-NEXT: movb 4(%esi), %bl +; X86-SSE1-NEXT: movb 3(%esi), %dh +; X86-SSE1-NEXT: movb 2(%esi), %ch +; X86-SSE1-NEXT: movb (%esi), %cl +; X86-SSE1-NEXT: movb 1(%esi), %dl +; X86-SSE1-NEXT: movb %cl, 15(%eax) +; X86-SSE1-NEXT: movb %dl, 14(%eax) +; X86-SSE1-NEXT: movb %ch, 13(%eax) +; X86-SSE1-NEXT: movb %dh, 12(%eax) +; X86-SSE1-NEXT: movb %bl, 11(%eax) +; X86-SSE1-NEXT: movb %bh, 10(%eax) +; X86-SSE1-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE1-NEXT: movb %cl, 9(%eax) +; X86-SSE1-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE1-NEXT: movb %cl, 8(%eax) +; X86-SSE1-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE1-NEXT: movb %cl, 7(%eax) +; X86-SSE1-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE1-NEXT: movb %cl, 6(%eax) +; X86-SSE1-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE1-NEXT: movb %cl, 5(%eax) +; X86-SSE1-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE1-NEXT: movb %cl, 4(%eax) +; X86-SSE1-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE1-NEXT: movb %cl, 3(%eax) +; X86-SSE1-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE1-NEXT: movb %cl, 2(%eax) +; X86-SSE1-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE1-NEXT: movb %cl, 1(%eax) +; X86-SSE1-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SSE1-NEXT: movb %cl, (%eax) +; X86-SSE1-NEXT: addl $12, %esp +; X86-SSE1-NEXT: .cfi_def_cfa_offset 12 +; X86-SSE1-NEXT: popl %esi +; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE1-NEXT: popl %ebx +; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE1-NEXT: retl $4 +; +; X86-SSE41-LABEL: merge_16i8_i8_FEDCBA9876543210: +; X86-SSE41: # %bb.0: +; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE41-NEXT: movzbl 15(%eax), %ecx +; X86-SSE41-NEXT: movd %ecx, %xmm0 +; X86-SSE41-NEXT: pinsrb $1, 14(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrb $2, 13(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrb $3, 12(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrb $4, 11(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrb $5, 10(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrb $6, 9(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrb $7, 8(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrb $8, 7(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrb $9, 6(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrb $10, 5(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrb $11, 4(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrb $12, 3(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrb $13, 2(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrb $14, 1(%eax), %xmm0 +; X86-SSE41-NEXT: pinsrb $15, (%eax), %xmm0 +; X86-SSE41-NEXT: retl + %ptr0 = getelementptr inbounds i8, ptr %ptr, i64 15 + %ptr1 = getelementptr inbounds i8, ptr %ptr, i64 14 + %ptr2 = getelementptr inbounds i8, ptr %ptr, i64 13 + %ptr3 = getelementptr inbounds i8, ptr %ptr, i64 12 + %ptr4 = getelementptr inbounds i8, ptr %ptr, i64 11 + %ptr5 = getelementptr inbounds i8, ptr %ptr, i64 10 + %ptr6 = getelementptr inbounds i8, ptr %ptr, i64 9 + %ptr7 = getelementptr inbounds i8, ptr %ptr, i64 8 + %ptr8 = getelementptr inbounds i8, ptr %ptr, i64 7 + %ptr9 = getelementptr inbounds i8, ptr %ptr, i64 6 + %ptrA = getelementptr inbounds i8, ptr %ptr, i64 5 + %ptrB = getelementptr inbounds i8, ptr %ptr, i64 4 + %ptrC = getelementptr inbounds i8, ptr %ptr, i64 3 + %ptrD = getelementptr inbounds i8, ptr %ptr, i64 2 + %ptrE = getelementptr inbounds i8, ptr %ptr, i64 1 + %ptrF = getelementptr inbounds i8, ptr %ptr, i64 0 + %val0 = load i8, ptr %ptr0 + %val1 = load i8, ptr %ptr1 + %val2 = load i8, ptr %ptr2 + %val3 = load i8, ptr %ptr3 + %val4 = load i8, ptr %ptr4 + %val5 = load i8, ptr %ptr5 + %val6 = load i8, ptr %ptr6 + %val7 = load i8, ptr %ptr7 + %val8 = load i8, ptr %ptr8 + %val9 = load i8, ptr %ptr9 + %valA = load i8, ptr %ptrA + %valB = load i8, ptr %ptrB + %valC = load i8, ptr %ptrC + %valD = load i8, ptr %ptrD + %valE = load i8, ptr %ptrE + %valF = load i8, ptr %ptrF + %res0 = insertelement <16 x i8> poison, i8 %val0, i8 0 + %res1 = insertelement <16 x i8> %res0, i8 %val1, i64 1 + %res2 = insertelement <16 x i8> %res1, i8 %val2, i64 2 + %res3 = insertelement <16 x i8> %res2, i8 %val3, i64 3 + %res4 = insertelement <16 x i8> %res3, i8 %val4, i64 4 + %res5 = insertelement <16 x i8> %res4, i8 %val5, i64 5 + %res6 = insertelement <16 x i8> %res5, i8 %val6, i64 6 + %res7 = insertelement <16 x i8> %res6, i8 %val7, i64 7 + %res8 = insertelement <16 x i8> %res7, i8 %val8, i64 8 + %res9 = insertelement <16 x i8> %res8, i8 %val9, i64 9 + %resA = insertelement <16 x i8> %res9, i8 %valA, i64 10 + %resB = insertelement <16 x i8> %resA, i8 %valB, i64 11 + %resC = insertelement <16 x i8> %resB, i8 %valC, i64 12 + %resD = insertelement <16 x i8> %resC, i8 %valD, i64 13 + %resE = insertelement <16 x i8> %resD, i8 %valE, i64 14 + %resF = insertelement <16 x i8> %resE, i8 %valF, i64 15 + ret <16 x i8> %resF +} + define void @merge_4i32_i32_combine(ptr %dst, ptr %src) { ; SSE-LABEL: merge_4i32_i32_combine: ; SSE: # %bb.0: diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll index 33e8d62c00a4c..e5e99e17053a0 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll @@ -126,6 +126,40 @@ define <4 x double> @merge_4f64_f64_45zz(ptr %ptr) nounwind uwtable noinline ssp ret <4 x double> %res1 } +define <4 x double> @merge_v4f64_f64_3210(ptr %ptr) nounwind uwtable noinline ssp { +; AVX-LABEL: merge_v4f64_f64_3210: +; AVX: # %bb.0: +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; AVX-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: retq +; +; X86-AVX-LABEL: merge_v4f64_f64_3210: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; X86-AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; X86-AVX-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] +; X86-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X86-AVX-NEXT: retl + %ptr0 = getelementptr inbounds double, ptr %ptr, i64 3 + %ptr1 = getelementptr inbounds double, ptr %ptr, i64 2 + %ptr2 = getelementptr inbounds double, ptr %ptr, i64 1 + %ptr3 = getelementptr inbounds double, ptr %ptr, i64 0 + %val0 = load double, ptr %ptr0, align 4 + %val1 = load double, ptr %ptr1, align 4 + %val2 = load double, ptr %ptr2, align 4 + %val3 = load double, ptr %ptr3, align 4 + %res0 = insertelement <4 x double> poison, double %val0, i64 0 + %res1 = insertelement <4 x double> %res0, double %val1, i64 1 + %res2 = insertelement <4 x double> %res1, double %val2, i64 2 + %res3 = insertelement <4 x double> %res2, double %val3, i64 3 + ret <4 x double> %res3 +} + define <4 x double> @merge_4f64_f64_34z6(ptr %ptr) nounwind uwtable noinline ssp { ; AVX-LABEL: merge_4f64_f64_34z6: ; AVX: # %bb.0: @@ -234,6 +268,46 @@ define <4 x i64> @merge_4i64_i64_23zz(ptr %ptr) nounwind uwtable noinline ssp { ret <4 x i64> %res1 } +define <4 x i64> @merge_v4i64_i64_3210(ptr %ptr) nounwind uwtable noinline ssp { +; AVX-LABEL: merge_v4i64_i64_3210: +; AVX: # %bb.0: +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: retq +; +; X86-AVX-LABEL: merge_v4i64_i64_3210: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vpinsrd $1, 12(%eax), %xmm0, %xmm0 +; X86-AVX-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0 +; X86-AVX-NEXT: vpinsrd $3, 4(%eax), %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vpinsrd $1, 28(%eax), %xmm1, %xmm1 +; X86-AVX-NEXT: vpinsrd $2, 16(%eax), %xmm1, %xmm1 +; X86-AVX-NEXT: vpinsrd $3, 20(%eax), %xmm1, %xmm1 +; X86-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X86-AVX-NEXT: retl + %ptr0 = getelementptr inbounds i64, ptr %ptr, i64 3 + %ptr1 = getelementptr inbounds i64, ptr %ptr, i64 2 + %ptr2 = getelementptr inbounds i64, ptr %ptr, i64 1 + %ptr3 = getelementptr inbounds i64, ptr %ptr, i64 0 + %val0 = load i64, ptr %ptr0, align 4 + %val1 = load i64, ptr %ptr1, align 4 + %val2 = load i64, ptr %ptr2, align 4 + %val3 = load i64, ptr %ptr3, align 4 + %res0 = insertelement <4 x i64> poison, i64 %val0, i64 0 + %res1 = insertelement <4 x i64> %res0, i64 %val1, i64 1 + %res2 = insertelement <4 x i64> %res1, i64 %val2, i64 2 + %res3 = insertelement <4 x i64> %res2, i64 %val3, i64 3 + ret <4 x i64> %res3 +} + define <8 x float> @merge_8f32_2f32_23z5(ptr %ptr) nounwind uwtable noinline ssp { ; AVX-LABEL: merge_8f32_2f32_23z5: ; AVX: # %bb.0: @@ -335,6 +409,60 @@ define <8 x float> @merge_8f32_f32_1u3u5zu8(ptr %ptr) nounwind uwtable noinline ret <8 x float> %res7 } +define <8 x float> @merge_8f32_f32_76543210(ptr %ptr) nounwind uwtable noinline ssp { +; AVX-LABEL: merge_8f32_f32_76543210: +; AVX: # %bb.0: +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] +; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] +; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] +; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: retq +; +; X86-AVX-LABEL: merge_8f32_f32_76543210: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] +; X86-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] +; X86-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] +; X86-AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] +; X86-AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] +; X86-AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; X86-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X86-AVX-NEXT: retl + %ptr0 = getelementptr inbounds float, ptr %ptr, i64 7 + %ptr1 = getelementptr inbounds float, ptr %ptr, i64 6 + %ptr2 = getelementptr inbounds float, ptr %ptr, i64 5 + %ptr3 = getelementptr inbounds float, ptr %ptr, i64 4 + %ptr4 = getelementptr inbounds float, ptr %ptr, i64 3 + %ptr5 = getelementptr inbounds float, ptr %ptr, i64 2 + %ptr6 = getelementptr inbounds float, ptr %ptr, i64 1 + %ptr7 = getelementptr inbounds float, ptr %ptr, i64 0 + %val0 = load float, ptr %ptr0 + %val1 = load float, ptr %ptr1 + %val2 = load float, ptr %ptr2 + %val3 = load float, ptr %ptr3 + %val4 = load float, ptr %ptr4 + %val5 = load float, ptr %ptr5 + %val6 = load float, ptr %ptr6 + %val7 = load float, ptr %ptr7 + %res0 = insertelement <8 x float> poison, float %val0, i64 0 + %res1 = insertelement <8 x float> %res0, float %val1, i64 1 + %res2 = insertelement <8 x float> %res1, float %val2, i64 2 + %res3 = insertelement <8 x float> %res2, float %val3, i64 3 + %res4 = insertelement <8 x float> %res3, float %val4, i64 4 + %res5 = insertelement <8 x float> %res4, float %val5, i64 5 + %res6 = insertelement <8 x float> %res5, float %val6, i64 6 + %res7 = insertelement <8 x float> %res6, float %val7, i64 7 + ret <8 x float> %res7 +} + define <8 x i32> @merge_8i32_4i32_z3(ptr %ptr) nounwind uwtable noinline ssp { ; AVX-LABEL: merge_8i32_4i32_z3: ; AVX: # %bb.0: @@ -414,6 +542,86 @@ define <8 x i32> @merge_8i32_i32_1u3u5zu8(ptr %ptr) nounwind uwtable noinline ss ret <8 x i32> %res7 } +define <8 x i32> @merge_8i32_i32_76543210(ptr %ptr) nounwind uwtable noinline ssp { +; AVX1-LABEL: merge_8i32_i32_76543210: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX1-NEXT: vpinsrd $1, 8(%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vpinsrd $2, 4(%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vpinsrd $3, (%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX1-NEXT: vpinsrd $1, 24(%rdi), %xmm1, %xmm1 +; AVX1-NEXT: vpinsrd $2, 20(%rdi), %xmm1, %xmm1 +; AVX1-NEXT: vpinsrd $3, 16(%rdi), %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: merge_8i32_i32_76543210: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX2-NEXT: vpinsrd $1, 8(%rdi), %xmm0, %xmm0 +; AVX2-NEXT: vpinsrd $2, 4(%rdi), %xmm0, %xmm0 +; AVX2-NEXT: vpinsrd $3, (%rdi), %xmm0, %xmm0 +; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX2-NEXT: vpinsrd $1, 24(%rdi), %xmm1, %xmm1 +; AVX2-NEXT: vpinsrd $2, 20(%rdi), %xmm1, %xmm1 +; AVX2-NEXT: vpinsrd $3, 16(%rdi), %xmm1, %xmm1 +; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: merge_8i32_i32_76543210: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512F-NEXT: vpinsrd $1, 8(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrd $2, 4(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrd $3, (%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX512F-NEXT: vpinsrd $1, 24(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vpinsrd $2, 20(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vpinsrd $3, 16(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512F-NEXT: retq +; +; X86-AVX-LABEL: merge_8i32_i32_76543210: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vpinsrd $1, 8(%eax), %xmm0, %xmm0 +; X86-AVX-NEXT: vpinsrd $2, 4(%eax), %xmm0, %xmm0 +; X86-AVX-NEXT: vpinsrd $3, (%eax), %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vpinsrd $1, 24(%eax), %xmm1, %xmm1 +; X86-AVX-NEXT: vpinsrd $2, 20(%eax), %xmm1, %xmm1 +; X86-AVX-NEXT: vpinsrd $3, 16(%eax), %xmm1, %xmm1 +; X86-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X86-AVX-NEXT: retl + %ptr0 = getelementptr inbounds i32, ptr %ptr, i64 7 + %ptr1 = getelementptr inbounds i32, ptr %ptr, i64 6 + %ptr2 = getelementptr inbounds i32, ptr %ptr, i64 5 + %ptr3 = getelementptr inbounds i32, ptr %ptr, i64 4 + %ptr4 = getelementptr inbounds i32, ptr %ptr, i64 3 + %ptr5 = getelementptr inbounds i32, ptr %ptr, i64 2 + %ptr6 = getelementptr inbounds i32, ptr %ptr, i64 1 + %ptr7 = getelementptr inbounds i32, ptr %ptr, i64 0 + %val0 = load i32, ptr %ptr0 + %val1 = load i32, ptr %ptr1 + %val2 = load i32, ptr %ptr2 + %val3 = load i32, ptr %ptr3 + %val4 = load i32, ptr %ptr4 + %val5 = load i32, ptr %ptr5 + %val6 = load i32, ptr %ptr6 + %val7 = load i32, ptr %ptr7 + %res0 = insertelement <8 x i32> poison, i32 %val0, i64 0 + %res1 = insertelement <8 x i32> %res0, i32 %val1, i64 1 + %res2 = insertelement <8 x i32> %res1, i32 %val2, i64 2 + %res3 = insertelement <8 x i32> %res2, i32 %val3, i64 3 + %res4 = insertelement <8 x i32> %res3, i32 %val4, i64 4 + %res5 = insertelement <8 x i32> %res4, i32 %val5, i64 5 + %res6 = insertelement <8 x i32> %res5, i32 %val6, i64 6 + %res7 = insertelement <8 x i32> %res6, i32 %val7, i64 7 + ret <8 x i32> %res7 +} + define <16 x i16> @merge_16i16_i16_89zzzuuuuuuuuuuuz(ptr %ptr) nounwind uwtable noinline ssp { ; AVX-LABEL: merge_16i16_i16_89zzzuuuuuuuuuuuz: ; AVX: # %bb.0: @@ -522,6 +730,150 @@ define <16 x i16> @merge_16i16_i16_0uu3zzuuuuuzCuEF(ptr %ptr) nounwind uwtable n ret <16 x i16> %resF } +define <16 x i16> @merge_16i16_i16_FEDCBA9876543210(ptr %ptr) nounwind uwtable noinline ssp { +; AVX1-LABEL: merge_16i16_i16_FEDCBA9876543210: +; AVX1: # %bb.0: +; AVX1-NEXT: movzwl 14(%rdi), %eax +; AVX1-NEXT: vmovd %eax, %xmm0 +; AVX1-NEXT: vpinsrw $1, 12(%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vpinsrw $2, 10(%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vpinsrw $3, 8(%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vpinsrw $4, 6(%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vpinsrw $5, 4(%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vpinsrw $6, 2(%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vpinsrw $7, (%rdi), %xmm0, %xmm0 +; AVX1-NEXT: movzwl 30(%rdi), %eax +; AVX1-NEXT: vmovd %eax, %xmm1 +; AVX1-NEXT: vpinsrw $1, 28(%rdi), %xmm1, %xmm1 +; AVX1-NEXT: vpinsrw $2, 26(%rdi), %xmm1, %xmm1 +; AVX1-NEXT: vpinsrw $3, 24(%rdi), %xmm1, %xmm1 +; AVX1-NEXT: vpinsrw $4, 22(%rdi), %xmm1, %xmm1 +; AVX1-NEXT: vpinsrw $5, 20(%rdi), %xmm1, %xmm1 +; AVX1-NEXT: vpinsrw $6, 18(%rdi), %xmm1, %xmm1 +; AVX1-NEXT: vpinsrw $7, 16(%rdi), %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: merge_16i16_i16_FEDCBA9876543210: +; AVX2: # %bb.0: +; AVX2-NEXT: movzwl 14(%rdi), %eax +; AVX2-NEXT: vmovd %eax, %xmm0 +; AVX2-NEXT: vpinsrw $1, 12(%rdi), %xmm0, %xmm0 +; AVX2-NEXT: vpinsrw $2, 10(%rdi), %xmm0, %xmm0 +; AVX2-NEXT: vpinsrw $3, 8(%rdi), %xmm0, %xmm0 +; AVX2-NEXT: vpinsrw $4, 6(%rdi), %xmm0, %xmm0 +; AVX2-NEXT: vpinsrw $5, 4(%rdi), %xmm0, %xmm0 +; AVX2-NEXT: vpinsrw $6, 2(%rdi), %xmm0, %xmm0 +; AVX2-NEXT: vpinsrw $7, (%rdi), %xmm0, %xmm0 +; AVX2-NEXT: movzwl 30(%rdi), %eax +; AVX2-NEXT: vmovd %eax, %xmm1 +; AVX2-NEXT: vpinsrw $1, 28(%rdi), %xmm1, %xmm1 +; AVX2-NEXT: vpinsrw $2, 26(%rdi), %xmm1, %xmm1 +; AVX2-NEXT: vpinsrw $3, 24(%rdi), %xmm1, %xmm1 +; AVX2-NEXT: vpinsrw $4, 22(%rdi), %xmm1, %xmm1 +; AVX2-NEXT: vpinsrw $5, 20(%rdi), %xmm1, %xmm1 +; AVX2-NEXT: vpinsrw $6, 18(%rdi), %xmm1, %xmm1 +; AVX2-NEXT: vpinsrw $7, 16(%rdi), %xmm1, %xmm1 +; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: merge_16i16_i16_FEDCBA9876543210: +; AVX512F: # %bb.0: +; AVX512F-NEXT: movzwl 14(%rdi), %eax +; AVX512F-NEXT: vmovd %eax, %xmm0 +; AVX512F-NEXT: vpinsrw $1, 12(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrw $2, 10(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrw $3, 8(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrw $4, 6(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrw $5, 4(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrw $6, 2(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrw $7, (%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: movzwl 30(%rdi), %eax +; AVX512F-NEXT: vmovd %eax, %xmm1 +; AVX512F-NEXT: vpinsrw $1, 28(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vpinsrw $2, 26(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vpinsrw $3, 24(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vpinsrw $4, 22(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vpinsrw $5, 20(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vpinsrw $6, 18(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vpinsrw $7, 16(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512F-NEXT: retq +; +; X86-AVX-LABEL: merge_16i16_i16_FEDCBA9876543210: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: movzwl 14(%eax), %ecx +; X86-AVX-NEXT: vmovd %ecx, %xmm0 +; X86-AVX-NEXT: vpinsrw $1, 12(%eax), %xmm0, %xmm0 +; X86-AVX-NEXT: vpinsrw $2, 10(%eax), %xmm0, %xmm0 +; X86-AVX-NEXT: vpinsrw $3, 8(%eax), %xmm0, %xmm0 +; X86-AVX-NEXT: vpinsrw $4, 6(%eax), %xmm0, %xmm0 +; X86-AVX-NEXT: vpinsrw $5, 4(%eax), %xmm0, %xmm0 +; X86-AVX-NEXT: vpinsrw $6, 2(%eax), %xmm0, %xmm0 +; X86-AVX-NEXT: vpinsrw $7, (%eax), %xmm0, %xmm0 +; X86-AVX-NEXT: movzwl 30(%eax), %ecx +; X86-AVX-NEXT: vmovd %ecx, %xmm1 +; X86-AVX-NEXT: vpinsrw $1, 28(%eax), %xmm1, %xmm1 +; X86-AVX-NEXT: vpinsrw $2, 26(%eax), %xmm1, %xmm1 +; X86-AVX-NEXT: vpinsrw $3, 24(%eax), %xmm1, %xmm1 +; X86-AVX-NEXT: vpinsrw $4, 22(%eax), %xmm1, %xmm1 +; X86-AVX-NEXT: vpinsrw $5, 20(%eax), %xmm1, %xmm1 +; X86-AVX-NEXT: vpinsrw $6, 18(%eax), %xmm1, %xmm1 +; X86-AVX-NEXT: vpinsrw $7, 16(%eax), %xmm1, %xmm1 +; X86-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X86-AVX-NEXT: retl + %ptr0 = getelementptr inbounds i16, ptr %ptr, i64 15 + %ptr1 = getelementptr inbounds i16, ptr %ptr, i64 14 + %ptr2 = getelementptr inbounds i16, ptr %ptr, i64 13 + %ptr3 = getelementptr inbounds i16, ptr %ptr, i64 12 + %ptr4 = getelementptr inbounds i16, ptr %ptr, i64 11 + %ptr5 = getelementptr inbounds i16, ptr %ptr, i64 10 + %ptr6 = getelementptr inbounds i16, ptr %ptr, i64 9 + %ptr7 = getelementptr inbounds i16, ptr %ptr, i64 8 + %ptr8 = getelementptr inbounds i16, ptr %ptr, i64 7 + %ptr9 = getelementptr inbounds i16, ptr %ptr, i64 6 + %ptrA = getelementptr inbounds i16, ptr %ptr, i64 5 + %ptrB = getelementptr inbounds i16, ptr %ptr, i64 4 + %ptrC = getelementptr inbounds i16, ptr %ptr, i64 3 + %ptrD = getelementptr inbounds i16, ptr %ptr, i64 2 + %ptrE = getelementptr inbounds i16, ptr %ptr, i64 1 + %ptrF = getelementptr inbounds i16, ptr %ptr, i64 0 + %val0 = load i16, ptr %ptr0 + %val1 = load i16, ptr %ptr1 + %val2 = load i16, ptr %ptr2 + %val3 = load i16, ptr %ptr3 + %val4 = load i16, ptr %ptr4 + %val5 = load i16, ptr %ptr5 + %val6 = load i16, ptr %ptr6 + %val7 = load i16, ptr %ptr7 + %val8 = load i16, ptr %ptr8 + %val9 = load i16, ptr %ptr9 + %valA = load i16, ptr %ptrA + %valB = load i16, ptr %ptrB + %valC = load i16, ptr %ptrC + %valD = load i16, ptr %ptrD + %valE = load i16, ptr %ptrE + %valF = load i16, ptr %ptrF + %res0 = insertelement <16 x i16> poison, i16 %val0, i64 0 + %res1 = insertelement <16 x i16> %res0, i16 %val1, i64 1 + %res2 = insertelement <16 x i16> %res1, i16 %val2, i64 2 + %res3 = insertelement <16 x i16> %res2, i16 %val3, i64 3 + %res4 = insertelement <16 x i16> %res3, i16 %val4, i64 4 + %res5 = insertelement <16 x i16> %res4, i16 %val5, i64 5 + %res6 = insertelement <16 x i16> %res5, i16 %val6, i64 6 + %res7 = insertelement <16 x i16> %res6, i16 %val7, i64 7 + %res8 = insertelement <16 x i16> %res7, i16 %val8, i64 8 + %res9 = insertelement <16 x i16> %res8, i16 %val9, i64 9 + %resA = insertelement <16 x i16> %res9, i16 %valA, i64 10 + %resB = insertelement <16 x i16> %resA, i16 %valB, i64 11 + %resC = insertelement <16 x i16> %resB, i16 %valC, i64 12 + %resD = insertelement <16 x i16> %resC, i16 %valD, i64 13 + %resE = insertelement <16 x i16> %resD, i16 %valE, i64 14 + %resF = insertelement <16 x i16> %resE, i16 %valF, i64 15 + ret <16 x i16> %resF +} + define <32 x i8> @merge_32i8_i8_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu(ptr %ptr) nounwind uwtable noinline ssp { ; AVX-LABEL: merge_32i8_i8_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu: ; AVX: # %bb.0: diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll index 790bed4188efe..fabca0ea5007e 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll @@ -148,6 +148,64 @@ define <8 x double> @merge_8f64_f64_1u3u5zu8(ptr %ptr) nounwind uwtable noinline ret <8 x double> %res7 } +define <8 x double> @merge_8f64_f64_76543210(ptr %ptr) nounwind uwtable noinline ssp { +; ALL-LABEL: merge_8f64_f64_76543210: +; ALL: # %bb.0: +; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; ALL-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero +; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; ALL-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; ALL-NEXT: vmovhps {{.*#+}} xmm1 = xmm2[0,1],mem[0,1] +; ALL-NEXT: vmovhps {{.*#+}} xmm2 = xmm3[0,1],mem[0,1] +; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 +; ALL-NEXT: retq +; +; X86-AVX512F-LABEL: merge_8f64_f64_76543210: +; X86-AVX512F: # %bb.0: +; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; X86-AVX512F-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; X86-AVX512F-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] +; X86-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; X86-AVX512F-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] +; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; X86-AVX512F-NEXT: vmovhps {{.*#+}} xmm2 = xmm2[0,1],mem[0,1] +; X86-AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; X86-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 +; X86-AVX512F-NEXT: retl + %ptr0 = getelementptr inbounds double, ptr %ptr, i64 7 + %ptr1 = getelementptr inbounds double, ptr %ptr, i64 6 + %ptr2 = getelementptr inbounds double, ptr %ptr, i64 5 + %ptr3 = getelementptr inbounds double, ptr %ptr, i64 4 + %ptr4 = getelementptr inbounds double, ptr %ptr, i64 3 + %ptr5 = getelementptr inbounds double, ptr %ptr, i64 2 + %ptr6 = getelementptr inbounds double, ptr %ptr, i64 1 + %ptr7 = getelementptr inbounds double, ptr %ptr, i64 0 + %val0 = load double, ptr %ptr0 + %val1 = load double, ptr %ptr1 + %val2 = load double, ptr %ptr2 + %val3 = load double, ptr %ptr3 + %val4 = load double, ptr %ptr4 + %val5 = load double, ptr %ptr5 + %val6 = load double, ptr %ptr6 + %val7 = load double, ptr %ptr7 + %res0 = insertelement <8 x double> poison, double %val0, i64 0 + %res1 = insertelement <8 x double> %res0, double %val1, i64 1 + %res2 = insertelement <8 x double> %res1, double %val2, i64 2 + %res3 = insertelement <8 x double> %res2, double %val3, i64 3 + %res4 = insertelement <8 x double> %res3, double %val4, i64 4 + %res5 = insertelement <8 x double> %res4, double %val5, i64 5 + %res6 = insertelement <8 x double> %res5, double %val6, i64 6 + %res7 = insertelement <8 x double> %res6, double %val7, i64 7 + ret <8 x double> %res7 +} + define <8 x i64> @merge_8i64_4i64_z3(ptr %ptr) nounwind uwtable noinline ssp { ; ALL-LABEL: merge_8i64_4i64_z3: ; ALL: # %bb.0: @@ -227,6 +285,76 @@ define <8 x i64> @merge_8i64_i64_1u3u5zu8(ptr %ptr) nounwind uwtable noinline ss ret <8 x i64> %res7 } +define <8 x i64> @merge_8i64_i64_76543210(ptr %ptr) nounwind uwtable noinline ssp { +; ALL-LABEL: merge_8i64_i64_76543210: +; ALL: # %bb.0: +; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; ALL-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero +; ALL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0] +; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 +; ALL-NEXT: retq +; +; X86-AVX512F-LABEL: merge_8i64_i64_76543210: +; X86-AVX512F: # %bb.0: +; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX512F-NEXT: vpinsrd $1, 12(%eax), %xmm0, %xmm0 +; X86-AVX512F-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0 +; X86-AVX512F-NEXT: vpinsrd $3, 4(%eax), %xmm0, %xmm0 +; X86-AVX512F-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-AVX512F-NEXT: vpinsrd $1, 28(%eax), %xmm1, %xmm1 +; X86-AVX512F-NEXT: vpinsrd $2, 16(%eax), %xmm1, %xmm1 +; X86-AVX512F-NEXT: vpinsrd $3, 20(%eax), %xmm1, %xmm1 +; X86-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; X86-AVX512F-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-AVX512F-NEXT: vpinsrd $1, 44(%eax), %xmm1, %xmm1 +; X86-AVX512F-NEXT: vpinsrd $2, 32(%eax), %xmm1, %xmm1 +; X86-AVX512F-NEXT: vpinsrd $3, 36(%eax), %xmm1, %xmm1 +; X86-AVX512F-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-AVX512F-NEXT: vpinsrd $1, 60(%eax), %xmm2, %xmm2 +; X86-AVX512F-NEXT: vpinsrd $2, 48(%eax), %xmm2, %xmm2 +; X86-AVX512F-NEXT: vpinsrd $3, 52(%eax), %xmm2, %xmm2 +; X86-AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; X86-AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; X86-AVX512F-NEXT: retl + %ptr0 = getelementptr inbounds i64, ptr %ptr, i64 7 + %ptr1 = getelementptr inbounds i64, ptr %ptr, i64 6 + %ptr2 = getelementptr inbounds i64, ptr %ptr, i64 5 + %ptr3 = getelementptr inbounds i64, ptr %ptr, i64 4 + %ptr4 = getelementptr inbounds i64, ptr %ptr, i64 3 + %ptr5 = getelementptr inbounds i64, ptr %ptr, i64 2 + %ptr6 = getelementptr inbounds i64, ptr %ptr, i64 1 + %ptr7 = getelementptr inbounds i64, ptr %ptr, i64 0 + %val0 = load i64, ptr %ptr0 + %val1 = load i64, ptr %ptr1 + %val2 = load i64, ptr %ptr2 + %val3 = load i64, ptr %ptr3 + %val4 = load i64, ptr %ptr4 + %val5 = load i64, ptr %ptr5 + %val6 = load i64, ptr %ptr6 + %val7 = load i64, ptr %ptr7 + %res0 = insertelement <8 x i64> poison, i64 %val0, i64 0 + %res1 = insertelement <8 x i64> %res0, i64 %val1, i64 1 + %res2 = insertelement <8 x i64> %res1, i64 %val2, i64 2 + %res3 = insertelement <8 x i64> %res2, i64 %val3, i64 3 + %res4 = insertelement <8 x i64> %res3, i64 %val4, i64 4 + %res5 = insertelement <8 x i64> %res4, i64 %val5, i64 5 + %res6 = insertelement <8 x i64> %res5, i64 %val6, i64 6 + %res7 = insertelement <8 x i64> %res6, i64 %val7, i64 7 + ret <8 x i64> %res7 +} + define <16 x float> @merge_16f32_f32_89zzzuuuuuuuuuuuz(ptr %ptr) nounwind uwtable noinline ssp { ; ALL-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz: ; ALL: # %bb.0: @@ -335,6 +463,104 @@ define <16 x float> @merge_16f32_f32_0uu3zzuuuuuzCuEF(ptr %ptr) nounwind uwtable ret <16 x float> %resF } +define <16 x float> @merge_16f32_f32_FEDCBA9876543210(ptr %ptr) nounwind uwtable noinline ssp { +; ALL-LABEL: merge_16f32_f32_FEDCBA9876543210: +; ALL: # %bb.0: +; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; ALL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; ALL-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] +; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] +; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] +; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] +; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] +; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],mem[0],xmm2[2,3] +; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] +; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; ALL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],mem[0],xmm3[2,3] +; ALL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] +; ALL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0] +; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 +; ALL-NEXT: retq +; +; X86-AVX512F-LABEL: merge_16f32_f32_FEDCBA9876543210: +; X86-AVX512F: # %bb.0: +; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX512F-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] +; X86-AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] +; X86-AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] +; X86-AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] +; X86-AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] +; X86-AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; X86-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X86-AVX512F-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] +; X86-AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] +; X86-AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; X86-AVX512F-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] +; X86-AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] +; X86-AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0] +; X86-AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; X86-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 +; X86-AVX512F-NEXT: retl + %ptr0 = getelementptr inbounds float, ptr %ptr, i64 15 + %ptr1 = getelementptr inbounds float, ptr %ptr, i64 14 + %ptr2 = getelementptr inbounds float, ptr %ptr, i64 13 + %ptr3 = getelementptr inbounds float, ptr %ptr, i64 12 + %ptr4 = getelementptr inbounds float, ptr %ptr, i64 11 + %ptr5 = getelementptr inbounds float, ptr %ptr, i64 10 + %ptr6 = getelementptr inbounds float, ptr %ptr, i64 9 + %ptr7 = getelementptr inbounds float, ptr %ptr, i64 8 + %ptr8 = getelementptr inbounds float, ptr %ptr, i64 7 + %ptr9 = getelementptr inbounds float, ptr %ptr, i64 6 + %ptrA = getelementptr inbounds float, ptr %ptr, i64 5 + %ptrB = getelementptr inbounds float, ptr %ptr, i64 4 + %ptrC = getelementptr inbounds float, ptr %ptr, i64 3 + %ptrD = getelementptr inbounds float, ptr %ptr, i64 2 + %ptrE = getelementptr inbounds float, ptr %ptr, i64 1 + %ptrF = getelementptr inbounds float, ptr %ptr, i64 0 + %val0 = load float, ptr %ptr0 + %val1 = load float, ptr %ptr1 + %val2 = load float, ptr %ptr2 + %val3 = load float, ptr %ptr3 + %val4 = load float, ptr %ptr4 + %val5 = load float, ptr %ptr5 + %val6 = load float, ptr %ptr6 + %val7 = load float, ptr %ptr7 + %val8 = load float, ptr %ptr8 + %val9 = load float, ptr %ptr9 + %valA = load float, ptr %ptrA + %valB = load float, ptr %ptrB + %valC = load float, ptr %ptrC + %valD = load float, ptr %ptrD + %valE = load float, ptr %ptrE + %valF = load float, ptr %ptrF + %res0 = insertelement <16 x float> poison, float %val0, i64 0 + %res1 = insertelement <16 x float> %res0, float %val1, i64 1 + %res2 = insertelement <16 x float> %res1, float %val2, i64 2 + %res3 = insertelement <16 x float> %res2, float %val3, i64 3 + %res4 = insertelement <16 x float> %res3, float %val4, i64 4 + %res5 = insertelement <16 x float> %res4, float %val5, i64 5 + %res6 = insertelement <16 x float> %res5, float %val6, i64 6 + %res7 = insertelement <16 x float> %res6, float %val7, i64 7 + %res8 = insertelement <16 x float> %res7, float %val8, i64 8 + %res9 = insertelement <16 x float> %res8, float %val9, i64 9 + %resA = insertelement <16 x float> %res9, float %valA, i64 10 + %resB = insertelement <16 x float> %resA, float %valB, i64 11 + %resC = insertelement <16 x float> %resB, float %valC, i64 12 + %resD = insertelement <16 x float> %resC, float %valD, i64 13 + %resE = insertelement <16 x float> %resD, float %valE, i64 14 + %resF = insertelement <16 x float> %resE, float %valF, i64 15 + ret <16 x float> %resF +} + define <16 x i32> @merge_16i32_i32_12zzzuuuuuuuuuuuz(ptr %ptr) nounwind uwtable noinline ssp { ; ALL-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz: ; ALL: # %bb.0: @@ -443,6 +669,104 @@ define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(ptr %ptr) nounwind uwtable n ret <16 x i32> %resF } +define <16 x i32> @merge_16i32_i32_FEDCBA9876543210(ptr %ptr) nounwind uwtable noinline ssp { +; ALL-LABEL: merge_16i32_i32_FEDCBA9876543210: +; ALL: # %bb.0: +; ALL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ALL-NEXT: vpinsrd $1, 8(%rdi), %xmm0, %xmm0 +; ALL-NEXT: vpinsrd $2, 4(%rdi), %xmm0, %xmm0 +; ALL-NEXT: vpinsrd $3, (%rdi), %xmm0, %xmm0 +; ALL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; ALL-NEXT: vpinsrd $1, 24(%rdi), %xmm1, %xmm1 +; ALL-NEXT: vpinsrd $2, 20(%rdi), %xmm1, %xmm1 +; ALL-NEXT: vpinsrd $3, 16(%rdi), %xmm1, %xmm1 +; ALL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; ALL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; ALL-NEXT: vpinsrd $1, 40(%rdi), %xmm1, %xmm1 +; ALL-NEXT: vpinsrd $2, 36(%rdi), %xmm1, %xmm1 +; ALL-NEXT: vpinsrd $3, 32(%rdi), %xmm1, %xmm1 +; ALL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; ALL-NEXT: vpinsrd $1, 56(%rdi), %xmm2, %xmm2 +; ALL-NEXT: vpinsrd $2, 52(%rdi), %xmm2, %xmm2 +; ALL-NEXT: vpinsrd $3, 48(%rdi), %xmm2, %xmm2 +; ALL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; ALL-NEXT: retq +; +; X86-AVX512F-LABEL: merge_16i32_i32_FEDCBA9876543210: +; X86-AVX512F: # %bb.0: +; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX512F-NEXT: vpinsrd $1, 8(%eax), %xmm0, %xmm0 +; X86-AVX512F-NEXT: vpinsrd $2, 4(%eax), %xmm0, %xmm0 +; X86-AVX512F-NEXT: vpinsrd $3, (%eax), %xmm0, %xmm0 +; X86-AVX512F-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-AVX512F-NEXT: vpinsrd $1, 24(%eax), %xmm1, %xmm1 +; X86-AVX512F-NEXT: vpinsrd $2, 20(%eax), %xmm1, %xmm1 +; X86-AVX512F-NEXT: vpinsrd $3, 16(%eax), %xmm1, %xmm1 +; X86-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; X86-AVX512F-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-AVX512F-NEXT: vpinsrd $1, 40(%eax), %xmm1, %xmm1 +; X86-AVX512F-NEXT: vpinsrd $2, 36(%eax), %xmm1, %xmm1 +; X86-AVX512F-NEXT: vpinsrd $3, 32(%eax), %xmm1, %xmm1 +; X86-AVX512F-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-AVX512F-NEXT: vpinsrd $1, 56(%eax), %xmm2, %xmm2 +; X86-AVX512F-NEXT: vpinsrd $2, 52(%eax), %xmm2, %xmm2 +; X86-AVX512F-NEXT: vpinsrd $3, 48(%eax), %xmm2, %xmm2 +; X86-AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; X86-AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; X86-AVX512F-NEXT: retl + %ptr0 = getelementptr inbounds i32, ptr %ptr, i64 15 + %ptr1 = getelementptr inbounds i32, ptr %ptr, i64 14 + %ptr2 = getelementptr inbounds i32, ptr %ptr, i64 13 + %ptr3 = getelementptr inbounds i32, ptr %ptr, i64 12 + %ptr4 = getelementptr inbounds i32, ptr %ptr, i64 11 + %ptr5 = getelementptr inbounds i32, ptr %ptr, i64 10 + %ptr6 = getelementptr inbounds i32, ptr %ptr, i64 9 + %ptr7 = getelementptr inbounds i32, ptr %ptr, i64 8 + %ptr8 = getelementptr inbounds i32, ptr %ptr, i64 7 + %ptr9 = getelementptr inbounds i32, ptr %ptr, i64 6 + %ptrA = getelementptr inbounds i32, ptr %ptr, i64 5 + %ptrB = getelementptr inbounds i32, ptr %ptr, i64 4 + %ptrC = getelementptr inbounds i32, ptr %ptr, i64 3 + %ptrD = getelementptr inbounds i32, ptr %ptr, i64 2 + %ptrE = getelementptr inbounds i32, ptr %ptr, i64 1 + %ptrF = getelementptr inbounds i32, ptr %ptr, i64 0 + %val0 = load i32, ptr %ptr0 + %val1 = load i32, ptr %ptr1 + %val2 = load i32, ptr %ptr2 + %val3 = load i32, ptr %ptr3 + %val4 = load i32, ptr %ptr4 + %val5 = load i32, ptr %ptr5 + %val6 = load i32, ptr %ptr6 + %val7 = load i32, ptr %ptr7 + %val8 = load i32, ptr %ptr8 + %val9 = load i32, ptr %ptr9 + %valA = load i32, ptr %ptrA + %valB = load i32, ptr %ptrB + %valC = load i32, ptr %ptrC + %valD = load i32, ptr %ptrD + %valE = load i32, ptr %ptrE + %valF = load i32, ptr %ptrF + %res0 = insertelement <16 x i32> poison, i32 %val0, i64 0 + %res1 = insertelement <16 x i32> %res0, i32 %val1, i64 1 + %res2 = insertelement <16 x i32> %res1, i32 %val2, i64 2 + %res3 = insertelement <16 x i32> %res2, i32 %val3, i64 3 + %res4 = insertelement <16 x i32> %res3, i32 %val4, i64 4 + %res5 = insertelement <16 x i32> %res4, i32 %val5, i64 5 + %res6 = insertelement <16 x i32> %res5, i32 %val6, i64 6 + %res7 = insertelement <16 x i32> %res6, i32 %val7, i64 7 + %res8 = insertelement <16 x i32> %res7, i32 %val8, i64 8 + %res9 = insertelement <16 x i32> %res8, i32 %val9, i64 9 + %resA = insertelement <16 x i32> %res9, i32 %valA, i64 10 + %resB = insertelement <16 x i32> %resA, i32 %valB, i64 11 + %resC = insertelement <16 x i32> %resB, i32 %valC, i64 12 + %resD = insertelement <16 x i32> %resC, i32 %valD, i64 13 + %resE = insertelement <16 x i32> %resD, i32 %valE, i64 14 + %resF = insertelement <16 x i32> %resE, i32 %valF, i64 15 + ret <16 x i32> %resF +} + define <32 x i16> @merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz(ptr %ptr) nounwind uwtable noinline ssp { ; ALL-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz: ; ALL: # %bb.0: From 5407e62611abfbb359f595d89d9f29adf647be02 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 18 Nov 2025 19:15:37 +0100 Subject: [PATCH 20/57] Revert "[MLIR][NVVM] Add tcgen05.mma MLIR Ops" (#168583) Reverts llvm/llvm-project#164356 The bots are broken. --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 545 --------------- mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 612 ----------------- .../nvvm/tcgen05-mma-block-scale-shared.mlir | 229 ------- .../nvvm/tcgen05-mma-block-scale-tensor.mlir | 229 ------- .../LLVMIR/nvvm/tcgen05-mma-invalid.mlir | 119 ---- .../LLVMIR/nvvm/tcgen05-mma-shared.mlir | 442 ------------ .../tcgen05-mma-sp-block-scale-shared.mlir | 229 ------- .../tcgen05-mma-sp-block-scale-tensor.mlir | 229 ------- .../LLVMIR/nvvm/tcgen05-mma-sp-shared.mlir | 442 ------------ .../LLVMIR/nvvm/tcgen05-mma-sp-tensor.mlir | 634 ------------------ .../LLVMIR/nvvm/tcgen05-mma-tensor.mlir | 633 ----------------- .../LLVMIR/nvvm/tcgen05-mma-ws-shared.mlir | 133 ---- .../LLVMIR/nvvm/tcgen05-mma-ws-sp-shared.mlir | 133 ---- .../LLVMIR/nvvm/tcgen05-mma-ws-sp-tensor.mlir | 133 ---- .../LLVMIR/nvvm/tcgen05-mma-ws-tensor.mlir | 133 ---- 15 files changed, 4875 deletions(-) delete mode 100644 mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-shared.mlir delete mode 100644 mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-tensor.mlir delete mode 100644 mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-invalid.mlir delete mode 100644 mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-shared.mlir delete mode 100644 mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-shared.mlir delete mode 100644 mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-tensor.mlir delete mode 100644 mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-shared.mlir delete mode 100644 mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-tensor.mlir delete mode 100644 mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-tensor.mlir delete mode 100644 mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-shared.mlir delete mode 100644 mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-shared.mlir delete mode 100644 mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-tensor.mlir delete mode 100644 mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-tensor.mlir diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 524b9f820f290..8d5bc7333d47f 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -4598,551 +4598,6 @@ def NVVM_ClusterLaunchControlQueryCancelOp }]; } -//===----------------------------------------------------------------------===// -// NVVM tcgen05.mma Ops -//===----------------------------------------------------------------------===// - -def Tcgen05MMAKindF16 : I32EnumAttrCase<"F16", 0, "f16">; -def Tcgen05MMAKindTF32 : I32EnumAttrCase<"TF32", 1, "tf32">; -def Tcgen05MMAKindF8F6F4 : I32EnumAttrCase<"F8F6F4", 2, "f8f6f4">; -def Tcgen05MMAKindINT8 : I32EnumAttrCase<"I8", 3, "i8">; - -def Tcgen05MMAKind : I32EnumAttr< - "Tcgen05MMAKind", - "tcgen05 MMA Supported Types", - [Tcgen05MMAKindF8F6F4, Tcgen05MMAKindINT8, Tcgen05MMAKindF16, - Tcgen05MMAKindTF32]> { - let cppNamespace = "::mlir::NVVM"; - let genSpecializedAttr = 0; -} - -def Tcgen05MMAKindAttr : EnumAttr { - let description = [{ - The Tcgen05MMAKind attribute describes the allowed set of types for matrix A and B in the tcgen05.mma.{sp} Op. The following are supported types for each kind: - - ``` - +-------------+--------------------------------------------+ - | Matrix Kind | supported types for A / B | - +-------------+--------------------------------------------+ - | f16 | f16, bf16 | - | tf32 | tf32 | - | f8f6f4 | e4m3, e5m2, e2m3, e3m2, e2m1 | - | i8 | unsigned 8b, signed 8b | - +-------------+--------------------------------------------+ - ``` - }]; - let assemblyFormat = "`<` $value `>`"; -} - -def Tcgen05MMACollectorOpDiscard : I32EnumAttrCase<"DISCARD", 0, "discard">; -def Tcgen05MMACollectorOpLastUse : I32EnumAttrCase<"LASTUSE", 1, "lastuse">; -def Tcgen05MMACollectorOpFill : I32EnumAttrCase<"FILL", 2, "fill">; -def Tcgen05MMACollectorOpUse : I32EnumAttrCase<"USE", 3, "use">; - -def Tcgen05MMACollectorOp : I32EnumAttr< - "Tcgen05MMACollectorOp", - "tcgen05.mma Collector Buffer Operation", - [Tcgen05MMACollectorOpDiscard, - Tcgen05MMACollectorOpLastUse, - Tcgen05MMACollectorOpFill, - Tcgen05MMACollectorOpUse]> { - let cppNamespace = "::mlir::NVVM"; - let genSpecializedAttr = 0; -} - -def Tcgen05MMACollectorOpAttr : EnumAttr { - let description = [{ - Tcgen05MMACollectorOp attribute specifies the collector buffer operations. - The following are the supported operations: - * discard : Release buffer after use (default) - * lastuse : Mark buffer for last use - * fill : Fill buffer - * use : Use buffer without modification - }]; - let assemblyFormat = "`<` $value `>`"; -} - -def NVVM_Tcgen05MMAOp : NVVM_Op<"tcgen05.mma", - [AttrSizedOperandSegments, - NVVMRequiresSMa<[100, 110]>]> { - let summary = "Performs MMA operation on 5th-gen tensor cores"; - - let description = [{ - The `tcgen05.mma` operation is an asynchronous tensor core instruction that - performs matrix multiplication, accumulation in a single fused operation. It - targets 5th-generation tensor cores, providing developers with fine-grained - control over execution and scheduling. - - ``` - D = A * B + (D * 2^ -scaleInputD) // if `scaleInputD` is provided - D = A * B // if `enableInputD` is false - D = A * B + D // otherwise - ``` - - where: - - A is an `M x K` matrix in tensor memory or described using shared memory descriptor - - B is a `K x N` matrix described using shared memory descriptor - - D is an `M x N` accumulator matrix in tensor memory - - The `shared memory descriptor` can be generated using `tcgen05.mma_smem_desc` Op - - - idesc is a 32-bit value representing the [Instruction Descriptor](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instruction-descriptor) - - Optional Operands: - - `scaleInputD` is an Immediate value operand used for scaling D matrix by 2 ^ (-scaleInputD). The valid range is [0, 15] - - - `disableOutputLane` is a vector mask for selective output - * vector<4 x i32> when ctaGroup is CTA_1 - * vector<8 x i32> when ctaGroup is CTA_2 - - Required Attributes: - - `kind` is a Tcgen05MMAKind attribute - - - `ctaGroup` specifies CTA group configuration - * cta_1: MMA will be performed on the current thread's CTA - * cta_2: MMA will be performed on the current thread and it's peer CTA - - Default Attributes: - - collectorOp is a Tcgen05MMACollectorOp attribute with matrix A as the collector buffer - - - `aShift` shifts the rows of the A matrix down by one row and can only be - applied if A is in tensor memory - - [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma) - }]; - - let arguments = (ins - Tcgen05MMAKindAttr:$kind, - CTAGroupKindAttr:$ctaGroup, - DefaultValuedAttr:$collectorOp, - UnitAttr:$aShift, - LLVM_PointerTensor:$matrixD, - AnyTypeOf<[LLVM_PointerTensor, I64]>:$matrixA, - I64:$matrixB, - I32:$idesc, - I1:$enableInputD, - Optional:$scaleInputD, - Optional>:$disableOutputLane - ); - - let assemblyFormat = [{ - $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD (`scale` `=` $scaleInputD^)? - (`mask` `=` $disableOutputLane^)? attr-dict `:` `(` type(operands) `)` - }]; - - let hasVerifier = true; - - let extraClassDeclaration = [{ - static mlir::NVVM::IDArgPair getIntrinsicIDAndArgs( - Operation &op, LLVM::ModuleTranslation &mt, - llvm::IRBuilderBase &builder); - }]; - - let llvmBuilder = [{ - auto [ID, args] = NVVM::Tcgen05MMAOp::getIntrinsicIDAndArgs( - *op, moduleTranslation, builder); - createIntrinsicCall(builder, ID, args); - }]; -} - -def NVVM_Tcgen05MMASparseOp : NVVM_Op<"tcgen05.mma.sp", - [AttrSizedOperandSegments, - NVVMRequiresSMa<[100, 110]>]> { - let summary = "Performs MMA operation with sparse A matrix on 5th-gen tensor cores"; - - let description = [{ - The `tcgen05.mma.sp` operation is an asynchronous tensor core instruction - that performs matrix multiplication, accumulation with sparse `A` matrix in - a single fused operation. It targets 5th-generation tensor cores, providing - developers with fine-grained control over execution and scheduling. - - ``` - D = A * B + (D * 2^ -scaleInputD) // if `scaleInputD` is provided - D = A * B // if `enableInputD` is false - D = A * B + D // otherwise - ``` - - where: - - A is an `M x (K / 2)` matrix in tensor memory or described using shared memory descriptor - - B is a `K x N` matrix described using shared memory descriptor - - D is an `M x N` accumulator matrix in tensor memory - - sparseMetadata located in tensor memory specifies the mapping of the `K / 2` - non-zero elements to the K elements before performing the MMA operation - - Other attributes and operands are similar to that of tcgen05.mma Op - - [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma-sp) - }]; - - let arguments = (ins - Tcgen05MMAKindAttr:$kind, - CTAGroupKindAttr:$ctaGroup, - DefaultValuedAttr:$collectorOp, - UnitAttr:$aShift, - LLVM_PointerTensor:$matrixD, - AnyTypeOf<[LLVM_PointerTensor, I64]>:$matrixA, - I64:$matrixB, - I32:$idesc, - I1:$enableInputD, - LLVM_PointerTensor:$sparseMetadata, - Optional:$scaleInputD, - Optional>:$disableOutputLane - ); - - let assemblyFormat = [{ - $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD `,` $sparseMetadata (`scale` `=` $scaleInputD^)? (`mask` `=` $disableOutputLane^)? attr-dict `:` `(` type(operands) `)` - }]; - - let hasVerifier = true; - - let extraClassDeclaration = [{ - static mlir::NVVM::IDArgPair getIntrinsicIDAndArgs( - Operation &op, LLVM::ModuleTranslation &mt, - llvm::IRBuilderBase &builder); - }]; - - let llvmBuilder = [{ - auto [ID, args] = NVVM::Tcgen05MMASparseOp::getIntrinsicIDAndArgs( - *op, moduleTranslation, builder); - createIntrinsicCall(builder, ID, args); - }]; -} - -def Tcgen05MMAKindMXF8F6F4 : I32EnumAttrCase<"MXF8F6F4", 0, "mxf8f6f4">; -def Tcgen05MMAKindMXF4 : I32EnumAttrCase<"MXF4", 1, "mxf4">; -def Tcgen05MMAKindMXF4NVF4 : I32EnumAttrCase<"MXF4NVF4", 2, "mxf4nvf4">; - -def Tcgen05MMABlockScaleKind : I32EnumAttr< - "Tcgen05MMABlockScaleKind", - "tcgen05.mma.block_scale supported types", - [Tcgen05MMAKindMXF8F6F4, Tcgen05MMAKindMXF4, Tcgen05MMAKindMXF4NVF4]> { - let cppNamespace = "::mlir::NVVM"; - let genSpecializedAttr = 0; -} - -def Tcgen05MMABlockScaleKindAttr : EnumAttr { - let description = [{ - The Tcgen05MMABlockScaleKind attribute describes the allowed set of types for matrix A and B in the tcgen05.mma.{sp}.block_scale Op. The following are supported types for each kind: - - ``` - +--------------+-------------------------------------------+ - | Matrix Kind | supported types for A / B | - +--------------+-------------------------------------------+ - | mxf8f6f4 | e4m3, e5m3, e2m3, e3m2, e2m1 | - | mxf4 | e2m1 | - | mxf4nvf4 | e2m1 | - +--------------+-------------------------------------------+ - ``` - }]; - let assemblyFormat = "`<` $value `>`"; -} - -def Tcgen05MMABlockScaleDefault : I32EnumAttrCase<"DEFAULT", 0, "default">; -def Tcgen05MMABlockScaleBlock16 : I32EnumAttrCase<"BLOCK16", 1, "block16">; -def Tcgen05MMABlockScaleBlock32 : I32EnumAttrCase<"BLOCK32", 2, "block32">; - -def Tcgen05MMABlockScale - : I32EnumAttr<"Tcgen05MMABlockScale", - "tcgen05.mma block scale attribute", - [Tcgen05MMABlockScaleDefault, Tcgen05MMABlockScaleBlock16, - Tcgen05MMABlockScaleBlock32]> { - let cppNamespace = "::mlir::NVVM"; - let genSpecializedAttr = 0; -} - -def Tcgen05MMABlockScaleAttr : EnumAttr { - let assemblyFormat = "`<` $value `>`"; -} - -def NVVM_Tcgen05MMABlockScaleOp : NVVM_Op<"tcgen05.mma.block_scale", - [NVVMRequiresSMa<[100, 110]>]> { - let summary = "Performs block scaled MMA operation on 5th-gen tensor cores"; - - let description = [{ - The `tcgen05.mma.block_scale` operation is an asynchronous tensor core instruction - that performs matrix multiplication, accumulation with block scaling in a - single fused operation. It targets 5th-generation tensor cores, providing - developers with fine-grained control over execution and scheduling. - - ``` - D = (A * scale_a) * (B * scale_b)` // if `enableInputD` is false - D = (A * scale_a) * (B * scale_b) + D` - ``` - - where: - - A is an M x (K / 2) matrix in tensor memory or described using shared memory descriptor - - B is a K x N matrix described using shared memory descriptor - - D is an M x N accumulator matrix in tensor memory - - `scale_a` and `scale_b` are matrices in tensor memory used to scale `A` and `B` respectively - - The `shared memory descriptor` can be generated using `tcgen05.mma_smem_desc` Op - - - `idesc` is a 32 bit value representing the [Instruction Descriptor](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instruction-descriptor) - - Required Attributes: - - `kind` is a Tcgen05MMABlockScaleKind attribute - - - `ctaGroup` specifies CTA group configuration - * cta_1: MMA will be performed on the current thread's CTA - * cta_2: MMA will be performed on the current thread and it's peer CTA - - Default Attributes: - - collectorOp is a Tcgen05MMACollectorOp attribute with matrix A as the collector buffer - - [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma) - }]; - - let arguments = (ins - Tcgen05MMABlockScaleKindAttr:$kind, - CTAGroupKindAttr:$ctaGroup, - DefaultValuedAttr:$blockScale, - DefaultValuedAttr:$collectorOp, - LLVM_PointerTensor:$matrixD, - AnyTypeOf<[LLVM_PointerTensor, I64]>:$matrixA, - I64:$matrixB, - I32:$idesc, I1:$enableInputD, - LLVM_PointerTensor:$scaleA, - LLVM_PointerTensor:$scaleB - ); - - let assemblyFormat = [{ - $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD `,` $scaleA `,` $scaleB - attr-dict `:` `(` type(operands) `)` - }]; - - let hasVerifier = true; - - let extraClassDeclaration = [{ - static mlir::NVVM::IDArgPair - getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt, - llvm::IRBuilderBase &builder); - }]; - - let llvmBuilder = [{ - auto [ID, args] = NVVM::Tcgen05MMABlockScaleOp::getIntrinsicIDAndArgs( - *op, moduleTranslation, builder); - createIntrinsicCall(builder, ID, args); - }]; -} - -def NVVM_Tcgen05MMASparseBlockScaleOp : NVVM_Op<"tcgen05.mma.sp.block_scale", - [NVVMRequiresSMa<[100, 110]>]> { - let summary = "Performs block scaled MMA operation with sparse A matrix on 5th-gen tensor cores"; - - let description = [{ - The `tcgen05.mma.sp.block_scale` operation is an asynchronous tensor core - instruction that performs matrix multiplication, accumulation with block - scaling, and sparse `A` matrix in a single fused operation. It targets - 5th-generation tensor cores, providing developers with fine-grained control - over execution, and scheduling. - - ``` - D = (A * scale_a) * (B * scale_b) // if `enableInputD` is specified - D = (A * scale_a) * (B * scale_b) + D // otherwise - ``` - - where: - - A is an M x (K / 2) matrix in tensor memory or described using shared memory descriptor - - B is a K x N matrix described using shared memory descriptor - - D is an M x N accumulator matrix in tensor memory - - `scale_a` and `scale_b` are matrices in tensor memory used to scale `A` and `B` respectively - - Other attributes and operands are similar to that of tcgen05.mma.block_scale Op - - [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma-sp) - }]; - - let arguments = (ins - Tcgen05MMABlockScaleKindAttr:$kind, - CTAGroupKindAttr:$ctaGroup, - DefaultValuedAttr:$blockScale, - DefaultValuedAttr:$collectorOp, - LLVM_PointerTensor:$matrixD, - AnyTypeOf<[LLVM_PointerTensor, I64]>:$matrixA, - I64:$matrixB, - I32:$idesc, - I1:$enableInputD, - LLVM_PointerTensor:$sparseMetadata, - LLVM_PointerTensor:$scaleA, - LLVM_PointerTensor:$scaleB - ); - - let assemblyFormat = [{ - $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD `,` $sparseMetadata `,` $scaleA `,` $scaleB - attr-dict `:` `(` type(operands) `)` - }]; - - let hasVerifier = true; - - let extraClassDeclaration = [{ - static mlir::NVVM::IDArgPair - getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt, - llvm::IRBuilderBase &builder); - }]; - - let llvmBuilder = [{ - auto [ID, args] = NVVM::Tcgen05MMASparseBlockScaleOp::getIntrinsicIDAndArgs( - *op, moduleTranslation, builder); - createIntrinsicCall(builder, ID, args); - }]; -} - -def Tcgen05MMACollectorBBuffer0 : I32EnumAttrCase<"B0", 0, "b0">; -def Tcgen05MMACollectorBBuffer1 : I32EnumAttrCase<"B1", 1, "b1">; -def Tcgen05MMACollectorBBuffer2 : I32EnumAttrCase<"B2", 2, "b2">; -def Tcgen05MMACollectorBBuffer3 : I32EnumAttrCase<"B3", 3, "b3">; - -def Tcgen05MMACollectorBBuffer : I32EnumAttr< - "Tcgen05MMACollectorBBuffer", - "tcgen05 MMA Collector Buffer B Attribute", - [Tcgen05MMACollectorBBuffer0, Tcgen05MMACollectorBBuffer1, Tcgen05MMACollectorBBuffer2, - Tcgen05MMACollectorBBuffer3]> { - let cppNamespace = "::mlir::NVVM"; - let genSpecializedAttr = 0; -} - -def Tcgen05MMACollectorBBufferAttr : EnumAttr { - let assemblyFormat = "`<` $value `>`"; -} - -def NVVM_Tcgen05MMAWsOp : NVVM_Op<"tcgen05.mma.ws", - [NVVMRequiresSMa<[100, 110]>]> { - let summary = "Performs weight stationary convolution MMA operation on 5th-gen tensor cores"; - - let description = [{ - The `tcgen05.mma.ws` operation is an asynchronous tensor core instruction - that performs weight stationary convolution matrix multiplication, accumulation - in a single fused operation. It targets 5th-generation tensor cores, providing - developers with fine-grained control over execution, and scheduling. - - ``` - D = A * B` // if `enableInputD` is false - D = A * B + D` // otherwise - ``` - - where: - - A is an `M x K` matrix in tensor memory or described using shared memory descriptor - - B is a `K x N` matrix described using shared memory descriptor - - D is an `M x N` accumulator matrix in tensor memory - - The `shared memory descriptor` can be generated using `tcgen05.mma_smem_desc` Op - - - idesc is a 32-bit value representing the [Instruction Descriptor](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instruction-descriptor) - - Optional Operands: - - zeroColMask is a 64 bit value representing the [Zero-column mask descriptor](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-zero-column-mask-descriptor) - - Required Attributes: - - `kind` is a Tcgen05MMAKind attribute - - Default Valued Attributes: - - collectorBBuffer specifies collector buffer for matrix B: b0 (default), b1, b2, b3 - - - collectorOp is a Tcgen05MMACollectorOp attribute with matrix B as the collector buffer - - [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma-ws) - }]; - - let arguments = (ins - Tcgen05MMAKindAttr:$kind, - DefaultValuedAttr:$collectorBBuffer, - DefaultValuedAttr:$collectorOp, - LLVM_PointerTensor:$matrixD, - AnyTypeOf<[LLVM_PointerTensor, I64]>:$matrixA, - I64:$matrixB, - I32:$idesc, - I1:$enableInputD, - Optional:$zeroColMask - ); - - let assemblyFormat = [{ - $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD (`,` $zeroColMask^)? - attr-dict `:` `(` type(operands) `)` - }]; - - let extraClassDeclaration = [{ - static mlir::NVVM::IDArgPair getIntrinsicIDAndArgs( - Operation &op, LLVM::ModuleTranslation &mt, - llvm::IRBuilderBase &builder); - }]; - - let llvmBuilder = [{ - auto [ID, args] = - NVVM::Tcgen05MMAWsOp::getIntrinsicIDAndArgs(*op, moduleTranslation, builder); - createIntrinsicCall(builder, ID, args); - }]; -} - -def NVVM_Tcgen05MMAWsSparseOp : NVVM_Op<"tcgen05.mma.ws.sp", - [NVVMRequiresSMa<[100, 110]>]> { - let summary = "Performs weight stationary convolution MMA with sparse A matrix on 5th-gen tensor cores"; - - let description = [{ - The `tcgen05.mma.ws.sp` operation is an asynchronous tensor core instruction - that performs weight stationary convolution matrix multiplication, accumulation - with sparse `A` matrix in a single fused operation. It targets 5th-generation - tensor cores, providing developers with fine-grained control over execution, - and scheduling. - - ``` - D = A * B` // if `enableInputD` is false - D = A * B + D` // otherwise - ``` - - where: - - A is an M x (K / 2) matrix in memory or descriptor format - - B is a K x N matrix - - D is an M x N accumulator matrix - - sparseMetadata located in tensor memory specifies the mapping of the `K / 2` - non-zero elements to the K elements before performing the MMA operation - - Other attributes and operands are similar to that of tcgen05.mma.ws Op - - [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-mma-instructions-mma-ws-sp) - }]; - - let arguments = (ins - Tcgen05MMAKindAttr:$kind, - DefaultValuedAttr:$collectorBBuffer, - DefaultValuedAttr:$collectorOp, - LLVM_PointerTensor:$matrixD, - AnyTypeOf<[LLVM_PointerTensor, I64]>:$matrixA, - I64:$matrixB, - I32:$idesc, - I1:$enableInputD, - LLVM_PointerTensor:$sparseMetadata, - Optional:$zeroColMask - ); - - let assemblyFormat = [{ - $matrixD `,` $matrixA `,` $matrixB `,` $idesc `,` $enableInputD `,` $sparseMetadata (`,` $zeroColMask^)? attr-dict `:` `(` type(operands) `)` - }]; - - let extraClassDeclaration = [{ - static mlir::NVVM::IDArgPair - getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt, - llvm::IRBuilderBase &builder); - }]; - - let llvmBuilder = [{ - auto [ID, args] = NVVM::Tcgen05MMAWsSparseOp::getIntrinsicIDAndArgs( - *op, moduleTranslation, builder); - createIntrinsicCall(builder, ID, args); - }]; -} - //===----------------------------------------------------------------------===// // NVVM target attribute. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index 369305b40c689..7ac427dbe3941 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -31,7 +31,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/NVVMIntrinsicUtils.h" #include "llvm/Support/Casting.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/NVPTXAddrSpace.h" @@ -61,18 +60,6 @@ static bool isPtrInSharedCTASpace(mlir::Value ptr) { return isPtrInAddrSpace(ptr, NVVMMemorySpace::Shared); } -// Helper method to convert CtaGroupKind in NVVM Dialect to CtaGroupKind in LLVM -static constexpr llvm::nvvm::CTAGroupKind -getNVVMCtaGroupKind(NVVM::CTAGroupKind ctaGroup) { - switch (ctaGroup) { - case NVVM::CTAGroupKind::CTA_1: - return llvm::nvvm::CTAGroupKind::CG_1; - case NVVM::CTAGroupKind::CTA_2: - return llvm::nvvm::CTAGroupKind::CG_2; - } - llvm_unreachable("unsupported cta_group value"); -} - //===----------------------------------------------------------------------===// // Verifier methods //===----------------------------------------------------------------------===// @@ -3104,605 +3091,6 @@ NVVM::IDArgPair ClusterLaunchControlQueryCancelOp::getIntrinsicIDAndArgs( return {intrinsicID, args}; } -//===----------------------------------------------------------------------===// -// NVVM tcgen05.mma functions -//===----------------------------------------------------------------------===// - -mlir::NVVM::IDArgPair -Tcgen05MMAOp::getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt, - llvm::IRBuilderBase &builder) { - - auto thisOp = cast(op); - llvm::SmallVector args; - - args.push_back(mt.lookupValue(thisOp.getMatrixD())); - - llvm::Value *A = mt.lookupValue(thisOp.getMatrixA()); - const bool isATensor = isa(A->getType()); - args.push_back(A); - - args.push_back(mt.lookupValue(thisOp.getMatrixB())); - args.push_back(mt.lookupValue(thisOp.getIdesc())); - args.push_back(mt.lookupValue(thisOp.getEnableInputD())); - - using EnableAShiftArray = std::array; - using CtaGroupArray = std::array; - using IsATensorArray = std::array; - using HasScaleInputDArray = std::array; - using HasDisableOutputLaneArray = std::array; - - // [hasDisableOutputLane][hasScaleInputD][isATensor][CtaGroup][EnableAShift] - static constexpr HasDisableOutputLaneArray tcgen05MMAIDs = { - { // without diable output lane - {{// without scale input D - {{ - // shared - {{// cg1 - {llvm::Intrinsic::nvvm_tcgen05_mma_shared, notIntrinsic}, - // cg2 - {llvm::Intrinsic::nvvm_tcgen05_mma_shared, notIntrinsic}}}, - {{// tensor - { - // cg1 - llvm::Intrinsic::nvvm_tcgen05_mma_tensor, - llvm::Intrinsic::nvvm_tcgen05_mma_tensor_ashift, - }, - { - // cg2 - llvm::Intrinsic::nvvm_tcgen05_mma_tensor, - llvm::Intrinsic::nvvm_tcgen05_mma_tensor_ashift, - }}}, - }}, - // with scale input D - {{ // shared - {{// cg1 - {llvm::Intrinsic::nvvm_tcgen05_mma_shared_scale_d, notIntrinsic}, - // cg2 - {llvm::Intrinsic::nvvm_tcgen05_mma_shared_scale_d, notIntrinsic}}}, - {{// tensor - { - // cg1 - llvm::Intrinsic::nvvm_tcgen05_mma_tensor_scale_d, - llvm::Intrinsic::nvvm_tcgen05_mma_tensor_scale_d_ashift, - }, - { - // cg2 - llvm::Intrinsic::nvvm_tcgen05_mma_tensor_scale_d, - llvm::Intrinsic::nvvm_tcgen05_mma_tensor_scale_d_ashift, - }}}}}}}, - // with disable output lane - {{ // without scale input D - {{ // shared - {{// cg1 - {llvm::Intrinsic::nvvm_tcgen05_mma_shared_disable_output_lane_cg1, - notIntrinsic}, - // cg2 - {llvm::Intrinsic::nvvm_tcgen05_mma_shared_disable_output_lane_cg2, - notIntrinsic}}}, - {{// cg1 - { - llvm::Intrinsic:: - nvvm_tcgen05_mma_tensor_disable_output_lane_cg1, - llvm::Intrinsic:: - nvvm_tcgen05_mma_tensor_disable_output_lane_cg1_ashift, - }, - // cg2 - { - llvm::Intrinsic:: - nvvm_tcgen05_mma_tensor_disable_output_lane_cg2, - llvm::Intrinsic:: - nvvm_tcgen05_mma_tensor_disable_output_lane_cg2_ashift, - }}}}}, - // with scale input D - {{ // shared - {{// cg1 - {llvm::Intrinsic:: - nvvm_tcgen05_mma_shared_scale_d_disable_output_lane_cg1, - notIntrinsic}, - // cg2 - {llvm::Intrinsic:: - nvvm_tcgen05_mma_shared_scale_d_disable_output_lane_cg2, - notIntrinsic}}}, - // tensor - {{// cg1 - {llvm::Intrinsic:: - nvvm_tcgen05_mma_tensor_scale_d_disable_output_lane_cg1, - llvm::Intrinsic:: - nvvm_tcgen05_mma_tensor_scale_d_disable_output_lane_cg1_ashift}, - // cg2 - { - llvm::Intrinsic:: - nvvm_tcgen05_mma_tensor_scale_d_disable_output_lane_cg2, - llvm::Intrinsic:: - nvvm_tcgen05_mma_tensor_scale_d_disable_output_lane_cg2_ashift, - }}}}}}}}}; - - llvm::Value *ScaleInputD = mt.lookupValue(thisOp.getScaleInputD()); - bool hasScaleInputD = ScaleInputD != nullptr; - - llvm::Value *DisableOutputLane = - mt.lookupValue(thisOp.getDisableOutputLane()); - bool hasDisableOutputLane = DisableOutputLane != nullptr; - - const unsigned ctaGroup = - static_cast(getNVVMCtaGroupKind(thisOp.getCtaGroup())); - - llvm::Intrinsic::ID ID = - tcgen05MMAIDs[hasDisableOutputLane][hasScaleInputD][isATensor] - [ctaGroup - 1][thisOp.getAShift()]; - - assert(ID != notIntrinsic && "Invalid intrinsic for Tcgen05MMAOp."); - - if (hasScaleInputD) - args.push_back(ScaleInputD); - - if (hasDisableOutputLane) - args.push_back(DisableOutputLane); - - args.push_back(builder.getInt32(static_cast(thisOp.getKind()))); - - if (!hasDisableOutputLane) - args.push_back(builder.getInt32(ctaGroup)); - - args.push_back( - builder.getInt32(static_cast(thisOp.getCollectorOp()))); - - return {ID, args}; -} - -static LogicalResult -verifyTcgen05MMAOp(bool isATensor, mlir::Value disableOutputLane, - NVVM::CTAGroupKind ctaGroup, bool hasAShift, - NVVM::Tcgen05MMACollectorOp collectorOp, Location loc) { - - if (disableOutputLane) { - mlir::VectorType disableOutputLaneType = - cast(disableOutputLane.getType()); - if ((ctaGroup == NVVM::CTAGroupKind::CTA_1 && - disableOutputLaneType.getNumElements() != 4) || - (ctaGroup == NVVM::CTAGroupKind::CTA_2 && - disableOutputLaneType.getNumElements() != 8)) - return emitError(loc) << "Disable Output Lane of length " - << disableOutputLaneType.getNumElements() - << " is incompatible with CtaGroupAttr"; - } - - if (hasAShift && !isATensor) - return emitError( - loc, "A-shift can be applied only when matrix A is in tensor memory"); - - if (hasAShift == true && (collectorOp == Tcgen05MMACollectorOp::FILL || - collectorOp == Tcgen05MMACollectorOp::USE)) - return emitError( - loc, "Cannot use collector buffer operation fill or use with ashift"); - - return success(); -} - -LogicalResult Tcgen05MMAOp::verify() { - return verifyTcgen05MMAOp(isa(getMatrixA().getType()), - getDisableOutputLane(), getCtaGroup(), getAShift(), - getCollectorOp(), getLoc()); -} - -//===----------------------------------------------------------------------===// -// NVVM tcgen05.mma.sp functions -//===----------------------------------------------------------------------===// - -mlir::NVVM::IDArgPair Tcgen05MMASparseOp::getIntrinsicIDAndArgs( - Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { - - auto thisOp = cast(op); - llvm::SmallVector args; - - args.push_back(mt.lookupValue(thisOp.getMatrixD())); - - llvm::Value *A = mt.lookupValue(thisOp.getMatrixA()); - bool isATensor = isa(A->getType()); - args.push_back(A); - - args.push_back(mt.lookupValue(thisOp.getMatrixB())); - args.push_back(mt.lookupValue(thisOp.getIdesc())); - args.push_back(mt.lookupValue(thisOp.getEnableInputD())); - args.push_back(mt.lookupValue(thisOp.getSparseMetadata())); - - using EnableAShiftArray = std::array; - using CtaGroupArray = std::array; - using IsATensorArray = std::array; - using HasScaleInputDArray = std::array; - using HasDisableOutputLaneArray = std::array; - - // [hasDisableOutputLane][hasScaleInputD][isATensor][CtaGroup][EnableAShift] - static constexpr HasDisableOutputLaneArray tcgen05MMASparseIDs = { - { // without diable output lane - {{// without scale input D - {{ - // shared - {{// cg1 - {llvm::Intrinsic::nvvm_tcgen05_mma_sp_shared, notIntrinsic}, - // cg2 - {llvm::Intrinsic::nvvm_tcgen05_mma_sp_shared, notIntrinsic}}}, - {{// tensor - { - // cg1 - llvm::Intrinsic::nvvm_tcgen05_mma_sp_tensor, - llvm::Intrinsic::nvvm_tcgen05_mma_sp_tensor_ashift, - }, - { - // cg2 - llvm::Intrinsic::nvvm_tcgen05_mma_sp_tensor, - llvm::Intrinsic::nvvm_tcgen05_mma_sp_tensor_ashift, - }}}, - }}, - // with scale input D - {{ // shared - {{// cg1 - {llvm::Intrinsic::nvvm_tcgen05_mma_sp_shared_scale_d, - notIntrinsic}, - // cg2 - {llvm::Intrinsic::nvvm_tcgen05_mma_sp_shared_scale_d, - notIntrinsic}}}, - {{// tensor - { - // cg1 - llvm::Intrinsic::nvvm_tcgen05_mma_sp_tensor_scale_d, - llvm::Intrinsic::nvvm_tcgen05_mma_sp_tensor_scale_d_ashift, - }, - { - // cg2 - llvm::Intrinsic::nvvm_tcgen05_mma_sp_tensor_scale_d, - llvm::Intrinsic::nvvm_tcgen05_mma_sp_tensor_scale_d_ashift, - }}}}}}}, - // with disable output lane - {{ // without scale input D - {{ // shared - {{// cg1 - {llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_shared_disable_output_lane_cg1, - notIntrinsic}, - // cg2 - {llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_shared_disable_output_lane_cg2, - notIntrinsic}}}, - {{// cg1 - { - llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_tensor_disable_output_lane_cg1, - llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_tensor_disable_output_lane_cg1_ashift, - }, - // cg2 - { - llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_tensor_disable_output_lane_cg2, - llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_tensor_disable_output_lane_cg2_ashift, - }}}}}, - // with scale input D - {{ // shared - {{// cg1 - {llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_shared_scale_d_disable_output_lane_cg1, - notIntrinsic}, - // cg2 - {llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_shared_scale_d_disable_output_lane_cg2, - notIntrinsic}}}, - // tensor - {{// cg1 - {llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_tensor_scale_d_disable_output_lane_cg1, - llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_tensor_scale_d_disable_output_lane_cg1_ashift}, - // cg2 - { - llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_tensor_scale_d_disable_output_lane_cg2, - llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_tensor_scale_d_disable_output_lane_cg2_ashift, - }}}}}}}}}; - - llvm::Value *ScaleInputD = mt.lookupValue(thisOp.getScaleInputD()); - bool hasScaleInputD = ScaleInputD != nullptr; - - llvm::Value *DisableOutputLane = - mt.lookupValue(thisOp.getDisableOutputLane()); - bool hasDisableOutputLane = DisableOutputLane != nullptr; - - unsigned ctaGroup = - static_cast(getNVVMCtaGroupKind(thisOp.getCtaGroup())); - - llvm::Intrinsic::ID ID = - tcgen05MMASparseIDs[hasDisableOutputLane][hasScaleInputD][isATensor] - [ctaGroup - 1][thisOp.getAShift()]; - - assert(ID != notIntrinsic && "Invalid intrinsic for Tcgen05MMASparseOp."); - - if (hasScaleInputD) - args.push_back(ScaleInputD); - - if (hasDisableOutputLane) - args.push_back(DisableOutputLane); - - args.push_back(builder.getInt32(static_cast(thisOp.getKind()))); - - if (!hasDisableOutputLane) - args.push_back(builder.getInt32(ctaGroup)); - - args.push_back( - builder.getInt32(static_cast(thisOp.getCollectorOp()))); - - return {ID, args}; -} - -LogicalResult Tcgen05MMASparseOp::verify() { - return verifyTcgen05MMAOp(isa(getMatrixA().getType()), - getDisableOutputLane(), getCtaGroup(), getAShift(), - getCollectorOp(), getLoc()); -} - -//===----------------------------------------------------------------------===// -// NVVM tcgen05.mma.block_scale functions -//===----------------------------------------------------------------------===// - -mlir::NVVM::IDArgPair Tcgen05MMABlockScaleOp::getIntrinsicIDAndArgs( - Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { - - auto thisOp = cast(op); - llvm::SmallVector args; - - args.push_back(mt.lookupValue(thisOp.getMatrixD())); - - llvm::Value *A = mt.lookupValue(thisOp.getMatrixA()); - bool isATensor = isa(A->getType()); - args.push_back(A); - - args.push_back(mt.lookupValue(thisOp.getMatrixB())); - args.push_back(mt.lookupValue(thisOp.getIdesc())); - args.push_back(mt.lookupValue(thisOp.getEnableInputD())); - args.push_back(mt.lookupValue(thisOp.getScaleA())); - args.push_back(mt.lookupValue(thisOp.getScaleB())); - args.push_back(builder.getInt32( - static_cast(getNVVMCtaGroupKind(thisOp.getCtaGroup())))); - args.push_back( - builder.getInt32(static_cast(thisOp.getCollectorOp()))); - - auto kind = thisOp.getKind(); - auto blockScale = thisOp.getBlockScale(); - llvm::Intrinsic::ID ID = [&]() { - if (kind == NVVM::Tcgen05MMABlockScaleKind::MXF8F6F4) { - if (blockScale == NVVM::Tcgen05MMABlockScale::DEFAULT) { - return isATensor ? llvm::Intrinsic:: - nvvm_tcgen05_mma_tensor_mxf8f6f4_block_scale - : llvm::Intrinsic:: - nvvm_tcgen05_mma_shared_mxf8f6f4_block_scale; - } else if (blockScale == NVVM::Tcgen05MMABlockScale::BLOCK32) { - return isATensor - ? llvm::Intrinsic:: - nvvm_tcgen05_mma_tensor_mxf8f6f4_block_scale_block32 - : llvm::Intrinsic:: - nvvm_tcgen05_mma_shared_mxf8f6f4_block_scale_block32; - } - } else if (kind == NVVM::Tcgen05MMABlockScaleKind::MXF4) { - if (blockScale == NVVM::Tcgen05MMABlockScale::DEFAULT) { - return isATensor - ? llvm::Intrinsic::nvvm_tcgen05_mma_tensor_mxf4_block_scale - : llvm::Intrinsic::nvvm_tcgen05_mma_shared_mxf4_block_scale; - } else if (blockScale == NVVM::Tcgen05MMABlockScale::BLOCK32) { - return isATensor ? llvm::Intrinsic:: - nvvm_tcgen05_mma_tensor_mxf4_block_scale_block32 - : llvm::Intrinsic:: - nvvm_tcgen05_mma_shared_mxf4_block_scale_block32; - } - } else if (kind == NVVM::Tcgen05MMABlockScaleKind::MXF4NVF4) { - if (blockScale == NVVM::Tcgen05MMABlockScale::BLOCK32) { - return isATensor - ? llvm::Intrinsic:: - nvvm_tcgen05_mma_tensor_mxf4nvf4_block_scale_block32 - : llvm::Intrinsic:: - nvvm_tcgen05_mma_shared_mxf4nvf4_block_scale_block32; - - } else if (blockScale == NVVM::Tcgen05MMABlockScale::BLOCK16) { - return isATensor - ? llvm::Intrinsic:: - nvvm_tcgen05_mma_tensor_mxf4nvf4_block_scale_block16 - : llvm::Intrinsic:: - nvvm_tcgen05_mma_shared_mxf4nvf4_block_scale_block16; - } - } - llvm_unreachable("Invalid tcgen05.mma.block_scale attributes"); - }(); - - return {ID, args}; -} - -static LogicalResult -verifyTcgen05MMABlockScaleOp(NVVM::Tcgen05MMACollectorOp collectorOp, - NVVM::Tcgen05MMABlockScaleKind kind, - NVVM::Tcgen05MMABlockScale blockScale, - Location loc) { - - if (blockScale == NVVM::Tcgen05MMABlockScale::DEFAULT && - kind == Tcgen05MMABlockScaleKind::MXF4NVF4) - return emitError(loc, "mxf4nvf4 requires block scale attribute"); - - if (blockScale == NVVM::Tcgen05MMABlockScale::BLOCK16 && - kind != Tcgen05MMABlockScaleKind::MXF4NVF4) - return emitError(loc, - llvm::formatv("{} kind does not support block16 attribute", - stringifyEnum(kind))); - - return success(); -} - -LogicalResult Tcgen05MMABlockScaleOp::verify() { - return verifyTcgen05MMABlockScaleOp(getCollectorOp(), getKind(), - getBlockScale(), getLoc()); -} - -//===----------------------------------------------------------------------===// -// NVVM tcgen05.mma.sp.block_scale functions -//===----------------------------------------------------------------------===// - -mlir::NVVM::IDArgPair Tcgen05MMASparseBlockScaleOp::getIntrinsicIDAndArgs( - Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { - - auto thisOp = cast(op); - llvm::SmallVector args; - - args.push_back(mt.lookupValue(thisOp.getMatrixD())); - - llvm::Value *A = mt.lookupValue(thisOp.getMatrixA()); - bool isATensor = isa(A->getType()); - args.push_back(A); - - args.push_back(mt.lookupValue(thisOp.getMatrixB())); - args.push_back(mt.lookupValue(thisOp.getIdesc())); - args.push_back(mt.lookupValue(thisOp.getEnableInputD())); - args.push_back(mt.lookupValue(thisOp.getSparseMetadata())); - args.push_back(mt.lookupValue(thisOp.getScaleA())); - args.push_back(mt.lookupValue(thisOp.getScaleB())); - args.push_back(builder.getInt32( - static_cast(getNVVMCtaGroupKind(thisOp.getCtaGroup())))); - args.push_back( - builder.getInt32(static_cast(thisOp.getCollectorOp()))); - - auto kind = thisOp.getKind(); - auto blockScale = thisOp.getBlockScale(); - llvm::Intrinsic::ID ID = [&]() { - if (kind == NVVM::Tcgen05MMABlockScaleKind::MXF8F6F4) { - if (blockScale == NVVM::Tcgen05MMABlockScale::DEFAULT) { - return isATensor ? llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_tensor_mxf8f6f4_block_scale - : llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_shared_mxf8f6f4_block_scale; - } else if (blockScale == NVVM::Tcgen05MMABlockScale::BLOCK32) { - return isATensor - ? llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_tensor_mxf8f6f4_block_scale_block32 - : llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_shared_mxf8f6f4_block_scale_block32; - } - } else if (kind == NVVM::Tcgen05MMABlockScaleKind::MXF4) { - if (blockScale == NVVM::Tcgen05MMABlockScale::DEFAULT) { - return isATensor ? llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_tensor_mxf4_block_scale - : llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_shared_mxf4_block_scale; - } else if (blockScale == NVVM::Tcgen05MMABlockScale::BLOCK32) { - return isATensor - ? llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_tensor_mxf4_block_scale_block32 - : llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_shared_mxf4_block_scale_block32; - } - } else if (kind == NVVM::Tcgen05MMABlockScaleKind::MXF4NVF4) { - if (blockScale == NVVM::Tcgen05MMABlockScale::BLOCK32) { - return isATensor - ? llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_tensor_mxf4nvf4_block_scale_block32 - : llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_shared_mxf4nvf4_block_scale_block32; - - } else if (blockScale == NVVM::Tcgen05MMABlockScale::BLOCK16) { - return isATensor - ? llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_tensor_mxf4nvf4_block_scale_block16 - : llvm::Intrinsic:: - nvvm_tcgen05_mma_sp_shared_mxf4nvf4_block_scale_block16; - } - } - llvm_unreachable("Invalid tcgen05.mma.sp.block_scale attributes"); - }(); - - return {ID, args}; -} - -LogicalResult Tcgen05MMASparseBlockScaleOp::verify() { - return verifyTcgen05MMABlockScaleOp(getCollectorOp(), getKind(), - getBlockScale(), getLoc()); -} - -//===----------------------------------------------------------------------===// -// NVVM tcgen05.mma.ws functions -//===----------------------------------------------------------------------===// - -mlir::NVVM::IDArgPair Tcgen05MMAWsOp::getIntrinsicIDAndArgs( - Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { - - auto thisOp = cast(op); - llvm::SmallVector args; - - args.push_back(mt.lookupValue(thisOp.getMatrixD())); - - llvm::Value *A = mt.lookupValue(thisOp.getMatrixA()); - bool isATensor = isa(A->getType()); - args.push_back(A); - - args.push_back(mt.lookupValue(thisOp.getMatrixB())); - args.push_back(mt.lookupValue(thisOp.getIdesc())); - args.push_back(mt.lookupValue(thisOp.getEnableInputD())); - - mlir::Value ZeroColMask = thisOp.getZeroColMask(); - llvm::Intrinsic::ID ID = notIntrinsic; - if (ZeroColMask) { - args.push_back(mt.lookupValue(ZeroColMask)); - ID = isATensor ? llvm::Intrinsic::nvvm_tcgen05_mma_ws_tensor_zero_col_mask - : llvm::Intrinsic::nvvm_tcgen05_mma_ws_shared_zero_col_mask; - } else - ID = isATensor ? llvm::Intrinsic::nvvm_tcgen05_mma_ws_tensor - : llvm::Intrinsic::nvvm_tcgen05_mma_ws_shared; - - args.push_back(builder.getInt32(static_cast(thisOp.getKind()))); - args.push_back( - builder.getInt32(static_cast(thisOp.getCollectorBBuffer()))); - args.push_back( - builder.getInt32(static_cast(thisOp.getCollectorOp()))); - - return {ID, args}; -} - -//===----------------------------------------------------------------------===// -// NVVM tcgen05.mma.ws.sp functions -//===----------------------------------------------------------------------===// - -mlir::NVVM::IDArgPair Tcgen05MMAWsSparseOp::getIntrinsicIDAndArgs( - Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { - - auto thisOp = cast(op); - llvm::SmallVector args; - - args.push_back(mt.lookupValue(thisOp.getMatrixD())); - - llvm::Value *A = mt.lookupValue(thisOp.getMatrixA()); - bool isATensor = isa(A->getType()); - args.push_back(A); - - args.push_back(mt.lookupValue(thisOp.getMatrixB())); - args.push_back(mt.lookupValue(thisOp.getIdesc())); - args.push_back(mt.lookupValue(thisOp.getEnableInputD())); - args.push_back(mt.lookupValue(thisOp.getSparseMetadata())); - - mlir::Value ZeroColMask = thisOp.getZeroColMask(); - llvm::Intrinsic::ID ID = notIntrinsic; - if (ZeroColMask) { - args.push_back(mt.lookupValue(ZeroColMask)); - ID = isATensor - ? llvm::Intrinsic::nvvm_tcgen05_mma_ws_sp_tensor_zero_col_mask - : llvm::Intrinsic::nvvm_tcgen05_mma_ws_sp_shared_zero_col_mask; - } else - ID = isATensor ? llvm::Intrinsic::nvvm_tcgen05_mma_ws_sp_tensor - : llvm::Intrinsic::nvvm_tcgen05_mma_ws_sp_shared; - - args.push_back(builder.getInt32(static_cast(thisOp.getKind()))); - args.push_back( - builder.getInt32(static_cast(thisOp.getCollectorBBuffer()))); - args.push_back( - builder.getInt32(static_cast(thisOp.getCollectorOp()))); - - return {ID, args}; -} - //===----------------------------------------------------------------------===// // NVVMDialect initialization, type parsing, and registration. //===----------------------------------------------------------------------===// diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-shared.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-shared.mlir deleted file mode 100644 index db4574bfaf78f..0000000000000 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-shared.mlir +++ /dev/null @@ -1,229 +0,0 @@ -// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s - -// CHECK-LABEL: @nvvm_tcgen05_mma_mxf8f6f4_block_scale_cta_1 -llvm.func @nvvm_tcgen05_mma_mxf8f6f4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_mxf8f6f4_block_scale_cta_2 -llvm.func @nvvm_tcgen05_mma_mxf8f6f4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_mxf4_block_scale_cta_1 -llvm.func @nvvm_tcgen05_mma_mxf4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_mxf4_block_scale_cta_2 -llvm.func @nvvm_tcgen05_mma_mxf4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_mxf4nvf4_block_scale_cta_1 -llvm.func @nvvm_tcgen05_mma_mxf4nvf4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_mxf4nvf4_block_scale_cta_2 -llvm.func @nvvm_tcgen05_mma_mxf4nvf4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-tensor.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-tensor.mlir deleted file mode 100644 index a15c3fb73de9c..0000000000000 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-block-scale-tensor.mlir +++ /dev/null @@ -1,229 +0,0 @@ -// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s - -// CHECK-LABEL: @nvvm_tcgen05_mma_mxf8f6f4_block_scale_cta_1 -llvm.func @nvvm_tcgen05_mma_mxf8f6f4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_mxf8f6f4_block_scale_cta_2 -llvm.func @nvvm_tcgen05_mma_mxf8f6f4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_mxf4_block_scale_cta_1 -llvm.func @nvvm_tcgen05_mma_mxf4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_mxf4_block_scale_cta_2 -llvm.func @nvvm_tcgen05_mma_mxf4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_mxf4nvf4_block_scale_cta_1 -llvm.func @nvvm_tcgen05_mma_mxf4nvf4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_mxf4nvf4_block_scale_cta_2 -llvm.func @nvvm_tcgen05_mma_mxf4nvf4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-invalid.mlir deleted file mode 100644 index f46b35a910fd9..0000000000000 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-invalid.mlir +++ /dev/null @@ -1,119 +0,0 @@ -// RUN: mlir-translate --mlir-to-llvmir -verify-diagnostics -split-input-file %s - -// CHECK-LABEL: @nvvm_tcgen05_mma_disable_output_lane_cta_1 -llvm.func @nvvm_tcgen05_mma_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLanev4: vector<4 x i32>, %disableOutputLanev8: vector<8 x i32>) { - // expected-error @below {{Disable Output Lane of length 8 is incompatible with CtaGroupAttr}} - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLanev8 - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - llvm.return -} - -// ----- - -// CHECK-LABEL: @nvvm_tcgen05_mma_disable_output_lane_cta_2 -llvm.func @nvvm_tcgen05_mma_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLanev4: vector<4 x i32>, %disableOutputLanev8: vector<8 x i32>) { - // expected-error @below {{Disable Output Lane of length 8 is incompatible with CtaGroupAttr}} - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLanev8 - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - llvm.return -} - -// ----- - -// CHECK-LABEL: @nvvm_tcgen05_mma_shared_ashift -llvm.func @nvvm_tcgen05_mma_shared_ashift(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1) { - // expected-error @below {{A-shift can be applied only when matrix A is in tensor memory}} - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, i64, i64, i32, i1) - llvm.return -} - -// ----- - -// CHECK-LABEL: @nvvm_tcgen05_mma_ashift -llvm.func @nvvm_tcgen05_mma_ashift(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1) { - // expected-error @below {{Cannot use collector buffer operation fill or use with ashift}} - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - llvm.return -} - -// ----- - -// CHECK-LABEL: @nvvm_tcgen05_mma_mxf4nvf4_block_scale_default -llvm.func @nvvm_tcgen05_mma_mxf4nvf4_block_scale_default(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scalea: !llvm.ptr<6>, %scaleb: !llvm.ptr<6>) { - // expected-error @below {{mxf4nvf4 requires block scale attribute}} - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scalea, %scaleb - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - llvm.return -} - -// ----- - -// CHECK-LABEL: @nvvm_tcgen05_mma_mxf4_block_scale_default -llvm.func @nvvm_tcgen05_mma_mxf4_block_scale_default(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scalea: !llvm.ptr<6>, %scaleb: !llvm.ptr<6>) { - // expected-error @below {{mxf4 kind does not support block16 attribute}} - nvvm.tcgen05.mma.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %scalea, %scaleb - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, ashift, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>) - llvm.return -} - -// ----- - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_disable_output_lane_cta_1 -llvm.func @nvvm_tcgen05_mma_sp_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLanev4: vector<4 x i32>, %disableOutputLanev8: vector<8 x i32>, %spmetadata: !llvm.ptr<6>) { - // expected-error @below {{Disable Output Lane of length 8 is incompatible with CtaGroupAttr}} - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLanev8 - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - llvm.return -} - -// ----- - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_disable_output_lane_cta_2 -llvm.func @nvvm_tcgen05_mma_sp_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLanev4: vector<4 x i32>, %disableOutputLanev8: vector<8 x i32>, %spmetadata: !llvm.ptr<6>) { - // expected-error @below {{Disable Output Lane of length 8 is incompatible with CtaGroupAttr}} - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLanev8 - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - llvm.return -} - -// ----- - -// CHECK-LABEL: @nvvm_tcgen05_sp_mma_shared_ashift -llvm.func @nvvm_tcgen05_sp_mma_shared_ashift(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>) { - // expected-error @below {{A-shift can be applied only when matrix A is in tensor memory}} - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - llvm.return -} - -// ----- - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_ashift -llvm.func @nvvm_tcgen05_mma_sp_ashift(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>) { - // expected-error @below {{Cannot use collector buffer operation fill or use with ashift}} - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - llvm.return -} - -// ----- - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_mxf4nvf4_block_scale_default -llvm.func @nvvm_tcgen05_mma_sp_mxf4nvf4_block_scale_default(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scalea: !llvm.ptr<6>, %scaleb: !llvm.ptr<6>, %spmetadata: !llvm.ptr<6>) { - // expected-error @below {{mxf4nvf4 requires block scale attribute}} - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scalea, %scaleb - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - llvm.return -} - -// ----- - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_mxf4_block_scale_default -llvm.func @nvvm_tcgen05_mma_sp_mxf4_block_scale_default(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scalea: !llvm.ptr<6>, %scaleb: !llvm.ptr<6>, %spmetadata: !llvm.ptr<6>) { - // expected-error @below {{mxf4 kind does not support block16 attribute}} - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scalea, %scaleb - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, ashift, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - llvm.return -} diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-shared.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-shared.mlir deleted file mode 100644 index 286df36730e77..0000000000000 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-shared.mlir +++ /dev/null @@ -1,442 +0,0 @@ -// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s - -// CHECK-LABEL: @nvvm_tcgen05_mma_cta_1 -llvm.func @nvvm_tcgen05_mma_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_cta_2 -llvm.func @nvvm_tcgen05_mma_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 2, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 2, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 2, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 2, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 2, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 2, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 2, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 2, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - llvm.return -} - - -// CHECK-LABEL: @nvvm_tcgen05_mma_scale_d_imm_cta_1 -llvm.func @nvvm_tcgen05_mma_scale_d_imm_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1) { - - %scale_d_imm = llvm.mlir.constant(0:i64) : i64 - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_scale_d_imm_cta_2 -llvm.func @nvvm_tcgen05_mma_scale_d_imm_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1) { - - %scale_d_imm = llvm.mlir.constant(0:i64) : i64 - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_disable_output_lane_cta_1 -llvm.func @nvvm_tcgen05_mma_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLane : vector<4 x i32>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<4 x i32>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_disable_output_lane_cta_2 -llvm.func @nvvm_tcgen05_mma_disable_output_lane_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLane: vector<8 x i32>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, vector<8 x i32>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_scale_d_imm_disable_output_lane_cta_1 -llvm.func @nvvm_tcgen05_mma_scale_d_imm_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLane: vector<4 x i32>) { - - %scale_d_imm = llvm.mlir.constant(0:i64) : i64 - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<4 x i32>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_scale_d_imm_disable_output_lane_cta_2 -llvm.func @nvvm_tcgen05_mma_scale_d_imm_disable_output_lane_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLane: vector<8 x i32>) { - - %scale_d_imm = llvm.mlir.constant(0:i64) : i64 - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64, vector<8 x i32>) - - llvm.return -} diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-shared.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-shared.mlir deleted file mode 100644 index 5c7eabee71b4e..0000000000000 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-shared.mlir +++ /dev/null @@ -1,229 +0,0 @@ -// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_mxf8f6f4_block_scale_cta_1 -llvm.func @nvvm_tcgen05_mma_sp_mxf8f6f4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_mxf8f6f4_block_scale_cta_2 -llvm.func @nvvm_tcgen05_mma_sp_mxf8f6f4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_mxf4_block_scale_cta_1 -llvm.func @nvvm_tcgen05_mma_sp_mxf4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_mxf4_block_scale_cta_2 -llvm.func @nvvm_tcgen05_mma_sp_mxf4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_mxf4nvf4_block_scale_cta_1 -llvm.func @nvvm_tcgen05_mma_sp_mxf4nvf4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_mxf4nvf4_block_scale_cta_2 -llvm.func @nvvm_tcgen05_mma_sp_mxf4nvf4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-tensor.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-tensor.mlir deleted file mode 100644 index 3200411aee213..0000000000000 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-block-scale-tensor.mlir +++ /dev/null @@ -1,229 +0,0 @@ -// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_mxf8f6f4_block_scale_cta_1 -llvm.func @nvvm_tcgen05_mma_sp_mxf8f6f4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_mxf8f6f4_block_scale_cta_2 -llvm.func @nvvm_tcgen05_mma_sp_mxf8f6f4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf8f6f4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_mxf4_block_scale_cta_1 -llvm.func @nvvm_tcgen05_mma_sp_mxf4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_mxf4_block_scale_cta_2 -llvm.func @nvvm_tcgen05_mma_sp_mxf4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_mxf4nvf4_block_scale_cta_1 -llvm.func @nvvm_tcgen05_mma_sp_mxf4nvf4_block_scale_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_mxf4nvf4_block_scale_cta_2 -llvm.func @nvvm_tcgen05_mma_sp_mxf4nvf4_block_scale_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %scale_a: !llvm.ptr<6>, %scale_b : !llvm.ptr<6>, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block16(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, blockScale = #nvvm.tcgen05_mma_block_scale} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.mxf4nvf4.block_scale.block32(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.sp.block_scale %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %scale_a, %scale_b - {kind = #nvvm.tcgen05_mma_block_scale_kind, ctaGroup = #nvvm.cta_group, blockScale = #nvvm.tcgen05_mma_block_scale, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, !llvm.ptr<6>, !llvm.ptr<6>) - - llvm.return -} diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-shared.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-shared.mlir deleted file mode 100644 index 96044cf669d63..0000000000000 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-shared.mlir +++ /dev/null @@ -1,442 +0,0 @@ -// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_cta_1 -llvm.func @nvvm_tcgen05_mma_sp_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_cta_2 -llvm.func @nvvm_tcgen05_mma_sp_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - llvm.return -} - - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_scale_d_imm_cta_1 -llvm.func @nvvm_tcgen05_mma_sp_scale_d_imm_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>) { - - %scale_d_imm = llvm.mlir.constant(0:i64) : i64 - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_scale_d_imm_cta_2 -llvm.func @nvvm_tcgen05_mma_sp_scale_d_imm_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>) { - - %scale_d_imm = llvm.mlir.constant(0:i64) : i64 - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_disable_output_lane_cta_1 -llvm.func @nvvm_tcgen05_mma_sp_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLane : vector<4 x i32>, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_disable_output_lane_cta_2 -llvm.func @nvvm_tcgen05_mma_sp_disable_output_lane_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLane: vector<8 x i32>, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_scale_d_imm_disable_output_lane_cta_1 -llvm.func @nvvm_tcgen05_mma_sp_scale_d_imm_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLane: vector<4 x i32>, %spmetadata: !llvm.ptr<6>) { - - %scale_d_imm = llvm.mlir.constant(0:i64) : i64 - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_scale_d_imm_disable_output_lane_cta_2 -llvm.func @nvvm_tcgen05_mma_sp_scale_d_imm_disable_output_lane_cta_2(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLane: vector<8 x i32>, %spmetadata: !llvm.ptr<6>) { - - %scale_d_imm = llvm.mlir.constant(0:i64) : i64 - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.shared.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - llvm.return -} diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-tensor.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-tensor.mlir deleted file mode 100644 index 709beb0508bb8..0000000000000 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-sp-tensor.mlir +++ /dev/null @@ -1,634 +0,0 @@ -// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_cta_1 -llvm.func @nvvm_tcgen05_mma_sp_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_cta_2 -llvm.func @nvvm_tcgen05_mma_sp_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 2, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 2, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 2, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 2, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - llvm.return -} - - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_scale_d_imm_cta_1 -llvm.func @nvvm_tcgen05_mma_sp_scale_d_imm_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>) { - - %scale_d_imm = llvm.mlir.constant(0:i64) : i64 - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_scale_d_imm_cta_2 -llvm.func @nvvm_tcgen05_mma_sp_scale_d_imm_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>) { - - %scale_d_imm = llvm.mlir.constant(0:i64) : i64 - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_disable_output_lane_cta_1 -llvm.func @nvvm_tcgen05_mma_sp_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLane : vector<4 x i32>, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<4 x i32>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_disable_output_lane_cta_2 -llvm.func @nvvm_tcgen05_mma_sp_disable_output_lane_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLane: vector<8 x i32>, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, vector<8 x i32>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_scale_d_imm_disable_output_lane_cta_1 -llvm.func @nvvm_tcgen05_mma_sp_scale_d_imm_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLane: vector<4 x i32>, %spmetadata: !llvm.ptr<6>) { - - %scale_d_imm = llvm.mlir.constant(0:i64) : i64 - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<4 x i32>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_sp_scale_d_imm_disable_output_lane_cta_2 -llvm.func @nvvm_tcgen05_mma_sp_scale_d_imm_disable_output_lane_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLane: vector<8 x i32>, %spmetadata: !llvm.ptr<6>) { - - %scale_d_imm = llvm.mlir.constant(0:i64) : i64 - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.sp.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64, vector<8 x i32>) - - llvm.return -} diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-tensor.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-tensor.mlir deleted file mode 100644 index 798e311778beb..0000000000000 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-tensor.mlir +++ /dev/null @@ -1,633 +0,0 @@ -// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s - -// CHECK-LABEL: @nvvm_tcgen05_mma_cta_1 -llvm.func @nvvm_tcgen05_mma_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f16 */ i32 0, /* cta_group= */ i32 1, /* collector=discard */ i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=tf32 */ i32 1, /* cta_group= */ i32 1, /* collector=discard */ i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 1, /* collector=discard */ i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=i8 */ i32 3, /* cta_group= */ i32 1, /* collector=discard */ i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f16 */ i32 0, /* cta_group= */ i32 1, /* collector=lastuse */ i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=tf32 */ i32 1, /* cta_group= */ i32 1, /* collector=lastuse */ i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 1, /* collector=lastuse */ i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=i8 */ i32 3, /* cta_group= */ i32 1, /* collector=lastuse */ i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f16 */ i32 0, /* cta_group= */ i32 1, /* collector=fill */ i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=tf32 */ i32 1, /* cta_group= */ i32 1, /* collector=fill */ i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 1, /* collector=fill */ i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=i8 */ i32 3, /* cta_group= */ i32 1, /* collector=fill */ i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f16 */ i32 0, /* cta_group= */ i32 1, /* collector=use */ i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=tf32 */ i32 1, /* cta_group= */ i32 1, /* collector=use */ i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 1, /* collector=use */ i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=i8 */ i32 3, /* cta_group= */ i32 1, /* collector=use */ i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_cta_2 -llvm.func @nvvm_tcgen05_mma_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f16 */ i32 0, /* cta_group= */ i32 2, /* collector=discard */ i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=tf32 */ i32 1, /* cta_group= */ i32 2, /* collector=discard */ i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 2, /* collector=discard */ i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=i8 */ i32 3, /* cta_group= */ i32 2, /* collector=discard */ i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f16 */ i32 0, /* cta_group= */ i32 2, /* collector=lastuse */ i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=tf32 */ i32 1, /* cta_group= */ i32 2, /* collector=lastuse */ i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 2, /* collector=lastuse */ i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=i8 */ i32 3, /* cta_group= */ i32 2, /* collector=lastuse */ i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f16 */ i32 0, /* cta_group= */ i32 2, /* collector=fill */ i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=tf32 */ i32 1, /* cta_group= */ i32 2, /* collector=fill */ i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 2, /* collector=fill */ i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=i8 */ i32 3, /* cta_group= */ i32 2, /* collector=fill */ i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f16 */ i32 0, /* cta_group= */ i32 2, /* collector=use */ i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=tf32 */ i32 1, /* cta_group= */ i32 2, /* collector=use */ i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=f8f6f4 */ i32 2, /* cta_group= */ i32 2, /* collector=use */ i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, /* kind=i8 */ i32 3, /* cta_group= */ i32 2, /* collector=use */ i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_scale_d_imm_cta_1 -llvm.func @nvvm_tcgen05_mma_scale_d_imm_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1) { - - %scale_d_imm = llvm.mlir.constant(0:i64) : i64 - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 1, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 1, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_scale_d_imm_cta_2 -llvm.func @nvvm_tcgen05_mma_scale_d_imm_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1) { - - %scale_d_imm = llvm.mlir.constant(0:i64) : i64 - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 0, i32 2, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, i32 1, i32 2, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_disable_output_lane_cta_1 -llvm.func @nvvm_tcgen05_mma_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLane : vector<4 x i32>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 0, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <4 x i32> {{%[0-9]+}}, i32 3, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<4 x i32>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_disable_output_lane_cta_2 -llvm.func @nvvm_tcgen05_mma_disable_output_lane_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLane: vector<8 x i32>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 0, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 2, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, <8 x i32> {{%[0-9]+}}, i32 3, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, vector<8 x i32>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_scale_d_imm_disable_output_lane_cta_1 -llvm.func @nvvm_tcgen05_mma_scale_d_imm_disable_output_lane_cta_1(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLane: vector<4 x i32>) { - - %scale_d_imm = llvm.mlir.constant(0:i64) : i64 - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 0, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg1(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <4 x i32> {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<4 x i32>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_scale_d_imm_disable_output_lane_cta_2 -llvm.func @nvvm_tcgen05_mma_scale_d_imm_disable_output_lane_cta_2(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %adesc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %disableOutputLane: vector<8 x i32>) { - - %scale_d_imm = llvm.mlir.constant(0:i64) : i64 - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 0) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2.ashift(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 1) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop, aShift} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 2) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 0, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.tensor.scale_d.disable_output_lane.cg2(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 0, <8 x i32> {{%[0-9]+}}, i32 1, i32 3) - nvvm.tcgen05.mma %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d scale = %scale_d_imm mask = %disableOutputLane - {kind = #nvvm.tcgen05_mma_kind, ctaGroup = #nvvm.cta_group, collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64, vector<8 x i32>) - - llvm.return -} diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-shared.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-shared.mlir deleted file mode 100644 index 5f1aeb05888bd..0000000000000 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-shared.mlir +++ /dev/null @@ -1,133 +0,0 @@ -// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s - -// CHECK-LABEL: @nvvm_tcgen05_mma_ws -llvm.func @nvvm_tcgen05_mma_ws(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 0, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 0, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 0, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 0, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 1) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 1) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_ws_zero_col_mask -llvm.func @nvvm_tcgen05_mma_ws_zero_col_mask(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %zero_col_mask: i64) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 0, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 0, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 0, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 0, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 1, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 1, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 1, i32 1) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 1, i32 1) - nvvm.tcgen05.mma.ws %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, i64) - - llvm.return -} diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-shared.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-shared.mlir deleted file mode 100644 index e390e350090ad..0000000000000 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-shared.mlir +++ /dev/null @@ -1,133 +0,0 @@ -// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s - -// CHECK-LABEL: @nvvm_tcgen05_mma_ws_sp -llvm.func @nvvm_tcgen05_mma_ws_sp(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 0, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 0, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 1) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 1) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_ws_sp_zero_col_mask -llvm.func @nvvm_tcgen05_mma_ws_sp_zero_col_mask(%d_tmem : !llvm.ptr<6>, %a_desc: i64, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>, %zero_col_mask: i64) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 0, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 0, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 0, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 0, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 1, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 1, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 1, i32 1) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.shared.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 1, i32 1) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_desc, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, i64, i64, i32, i1, !llvm.ptr<6>, i64) - - llvm.return -} diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-tensor.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-tensor.mlir deleted file mode 100644 index f7ce5484803e9..0000000000000 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-sp-tensor.mlir +++ /dev/null @@ -1,133 +0,0 @@ -// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s - -// CHECK-LABEL: @nvvm_tcgen05_mma_ws_sp -llvm.func @nvvm_tcgen05_mma_ws_sp(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 0, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 0, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 0, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 0, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 2, i32 1, i32 1) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i32 3, i32 1, i32 1) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_ws_sp_zero_col_mask -llvm.func @nvvm_tcgen05_mma_ws_sp_zero_col_mask(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %spmetadata: !llvm.ptr<6>, %zero_col_mask: i64) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 0, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 0, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 0, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 0, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 1, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 1, i32 0) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 1, i32 1) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.sp.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 1, i32 1) - nvvm.tcgen05.mma.ws.sp %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %spmetadata, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, !llvm.ptr<6>, i64) - - llvm.return -} diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-tensor.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-tensor.mlir deleted file mode 100644 index cecbb3fbd90af..0000000000000 --- a/mlir/test/Target/LLVMIR/nvvm/tcgen05-mma-ws-tensor.mlir +++ /dev/null @@ -1,133 +0,0 @@ -// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s - -// CHECK-LABEL: @nvvm_tcgen05_mma_ws -llvm.func @nvvm_tcgen05_mma_ws(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %b_desc: i64, %idesc: i32, %enable_input_d: i1) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 0, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 0, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 0, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 0, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 2, i32 1, i32 1) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i32 3, i32 1, i32 1) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1) - - llvm.return -} - -// CHECK-LABEL: @nvvm_tcgen05_mma_ws_zero_col_mask -llvm.func @nvvm_tcgen05_mma_ws_zero_col_mask(%d_tmem : !llvm.ptr<6>, %a_tmem: !llvm.ptr<6>, %b_desc: i64, %idesc: i32, %enable_input_d: i1, %zero_col_mask: i64) { - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 0, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 0, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 0, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 0, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 1, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 1, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 1, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 1, i32 0) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 0, i32 1, i32 1) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 1, i32 1, i32 1) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 2, i32 1, i32 1) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - // CHECK: call void @llvm.nvvm.tcgen05.mma.ws.tensor.zero_col_mask(ptr addrspace(6) {{%[0-9]+}}, ptr addrspace(6) {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 {{%[0-9]+}}, i1 {{%[0-9]+}}, i64 {{%[0-9]+}}, i32 3, i32 1, i32 1) - nvvm.tcgen05.mma.ws %d_tmem, %a_tmem, %b_desc, %idesc, %enable_input_d, %zero_col_mask - {kind = #nvvm.tcgen05_mma_kind, - collectorBBuffer = #nvvm.tcgen05_mma_collectorb, - collectorOp = #nvvm.tcgen05_mma_collectorop} : (!llvm.ptr<6>, !llvm.ptr<6>, i64, i32, i1, i64) - - llvm.return -} From 8bdd82ce1dd9c7da647b270b3a58eb36641e8e34 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 18 Nov 2025 10:38:39 -0800 Subject: [PATCH 21/57] [CI] Skip Running Premerge Advisor on AArch64 (#168404) They were still running because the conditional was not correct. This patch fixes that so they do not interefere with the results of the job. --- .ci/premerge_advisor_explain.py | 2 +- .ci/premerge_advisor_upload.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/premerge_advisor_explain.py b/.ci/premerge_advisor_explain.py index 269f75cace266..69568895e9030 100644 --- a/.ci/premerge_advisor_explain.py +++ b/.ci/premerge_advisor_explain.py @@ -148,7 +148,7 @@ def main( # Skip looking for results on AArch64 for now because the premerge advisor # service is not available on AWS currently. - if platform.machine() == "arm64": + if platform.machine() == "arm64" or platform.machine() == "aarch64": sys.exit(0) main( diff --git a/.ci/premerge_advisor_upload.py b/.ci/premerge_advisor_upload.py index 9e14743c7cc07..86032a42e179d 100644 --- a/.ci/premerge_advisor_upload.py +++ b/.ci/premerge_advisor_upload.py @@ -59,7 +59,7 @@ def main(commit_sha, workflow_run_number, build_log_files): # Skip uploading results on AArch64 for now because the premerge advisor # service is not available on AWS currently. - if platform.machine() == "arm64": + if platform.machine() == "arm64" or platform.machine() == "aarch64": sys.exit(0) main(args.commit_sha, args.workflow_run_number, args.build_log_files) From 40ed57c5054615d172f266dddb7b1ef5abf9b402 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 18 Nov 2025 10:39:01 -0800 Subject: [PATCH 22/57] [CI] Prefer Bash Tests over Empty String Comparisons (#168575) These are more idiomatic in bash. --- .ci/monolithic-linux.sh | 6 +++--- .ci/monolithic-windows.sh | 4 ++-- .ci/utils.sh | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.ci/monolithic-linux.sh b/.ci/monolithic-linux.sh index ca619aa7e98a1..e6a59a2ae1306 100755 --- a/.ci/monolithic-linux.sh +++ b/.ci/monolithic-linux.sh @@ -64,13 +64,13 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \ start-group "ninja" -if [[ "${targets}" != "" ]]; then +if [[ -n "${targets}" ]]; then # Targets are not escaped as they are passed as separate arguments. ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log cp ${BUILD_DIR}/.ninja_log ninja.ninja_log fi -if [[ "${runtime_targets}" != "" ]]; then +if [[ -n "${runtime_targets}" ]]; then start-group "ninja Runtimes" ninja -C "${BUILD_DIR}" ${runtime_targets} |& tee ninja_runtimes.log @@ -79,7 +79,7 @@ fi # Compiling runtimes with just-built Clang and running their tests # as an additional testing for Clang. -if [[ "${runtime_targets_needs_reconfig}" != "" ]]; then +if [[ -n "${runtime_targets_needs_reconfig}" ]]; then start-group "CMake Runtimes C++26" cmake \ diff --git a/.ci/monolithic-windows.sh b/.ci/monolithic-windows.sh index 99e7758ce8d79..36941644c6a6c 100755 --- a/.ci/monolithic-windows.sh +++ b/.ci/monolithic-windows.sh @@ -51,13 +51,13 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \ start-group "ninja" -if [[ "${targets}" != "" ]]; then +if [[ -n "${targets}" ]]; then # Targets are not escaped as they are passed as separate arguments. ninja -C "${BUILD_DIR}" -k 0 ${targets} |& tee ninja.log cp ${BUILD_DIR}/.ninja_log ninja.ninja_log fi -if [[ "${runtimes_targets}" != "" ]]; then +if [[ -n "${runtimes_targets}" ]]; then start-group "ninja runtimes" ninja -C "${BUILD_DIR}" -k 0 ${runtimes_targets} |& tee ninja_runtimes.log diff --git a/.ci/utils.sh b/.ci/utils.sh index c364f9395d67b..713a07ba5d898 100644 --- a/.ci/utils.sh +++ b/.ci/utils.sh @@ -33,7 +33,7 @@ function at-exit { # If building fails there will be no results files. shopt -s nullglob - if [[ "$GITHUB_ACTIONS" != "" ]]; then + if [[ -n "$GITHUB_ACTIONS" ]]; then python "${MONOREPO_ROOT}"/.ci/generate_test_report_github.py \ $retcode "${BUILD_DIR}"/test-results.*.xml "${MONOREPO_ROOT}"/ninja*.log \ >> $GITHUB_STEP_SUMMARY @@ -44,7 +44,7 @@ function at-exit { fi if [[ "$retcode" != "0" ]]; then - if [[ "$GITHUB_ACTIONS" != "" ]]; then + if [[ -n "$GITHUB_ACTIONS" ]]; then python "${MONOREPO_ROOT}"/.ci/premerge_advisor_upload.py \ $(git rev-parse HEAD~1) $GITHUB_RUN_NUMBER \ "${BUILD_DIR}"/test-results.*.xml "${MONOREPO_ROOT}"/ninja*.log @@ -59,10 +59,10 @@ trap at-exit EXIT function start-group { groupname=$1 - if [[ "$GITHUB_ACTIONS" != "" ]]; then + if [[ -n "$GITHUB_ACTIONS" ]]; then echo "::endgroup" echo "::group::$groupname" - elif [[ "$POSTCOMMIT_CI" != "" ]]; then + elif [[ -n "$POSTCOMMIT_CI" ]]; then echo "@@@$STEP@@@" else echo "Starting $groupname" @@ -73,6 +73,6 @@ export PIP_BREAK_SYSTEM_PACKAGES=1 pip install -q -r "${MONOREPO_ROOT}"/.ci/all_requirements.txt # The ARM64 builders run on AWS and don't have access to the GCS cache. -if [[ "$GITHUB_ACTIONS" != "" ]] && [[ "$RUNNER_ARCH" != "ARM64" ]]; then +if [[ -n "$GITHUB_ACTIONS" ]] && [[ "$RUNNER_ARCH" != "ARM64" ]]; then python .ci/cache_lit_timing_files.py download fi From 0ae2bccde4593b456bb7a13264a885e7dda0e80a Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Tue, 18 Nov 2025 21:41:52 +0300 Subject: [PATCH 23/57] [ARM] TableGen-erate node descriptions (#168212) This allows SDNodes to be validated against their expected type profiles and reduces the number of changes required to add a new node. Some nodes fail validation, those are enumerated in `ARMSelectionDAGInfo::verifyTargetNode()`. Some of the bugs are easy to fix, but probably they should be fixed separately, this patch is already big. Part of #119709. Pull Request: https://github.com/llvm/llvm-project/pull/168212 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 220 +----------- llvm/lib/Target/ARM/ARMISelLowering.h | 315 ------------------ llvm/lib/Target/ARM/ARMInstrInfo.td | 142 +++++++- llvm/lib/Target/ARM/ARMInstrMVE.td | 55 +++ llvm/lib/Target/ARM/ARMInstrNEON.td | 17 + llvm/lib/Target/ARM/ARMInstrThumb.td | 1 + llvm/lib/Target/ARM/ARMInstrThumb2.td | 5 + llvm/lib/Target/ARM/ARMInstrVFP.td | 16 + llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp | 83 ++++- llvm/lib/Target/ARM/ARMSelectionDAGInfo.h | 66 +++- llvm/lib/Target/ARM/CMakeLists.txt | 1 + .../Target/ARM/ARMSelectionDAGTest.cpp | 2 +- 12 files changed, 382 insertions(+), 541 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index f28640ce7b107..cd8d7a0bee5e3 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1556,220 +1556,6 @@ ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, return std::make_pair(RRC, Cost); } -const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { -#define MAKE_CASE(V) \ - case V: \ - return #V; - switch ((ARMISD::NodeType)Opcode) { - case ARMISD::FIRST_NUMBER: - break; - MAKE_CASE(ARMISD::Wrapper) - MAKE_CASE(ARMISD::WrapperPIC) - MAKE_CASE(ARMISD::WrapperJT) - MAKE_CASE(ARMISD::COPY_STRUCT_BYVAL) - MAKE_CASE(ARMISD::CALL) - MAKE_CASE(ARMISD::CALL_PRED) - MAKE_CASE(ARMISD::CALL_NOLINK) - MAKE_CASE(ARMISD::tSECALL) - MAKE_CASE(ARMISD::t2CALL_BTI) - MAKE_CASE(ARMISD::BRCOND) - MAKE_CASE(ARMISD::BR_JT) - MAKE_CASE(ARMISD::BR2_JT) - MAKE_CASE(ARMISD::RET_GLUE) - MAKE_CASE(ARMISD::SERET_GLUE) - MAKE_CASE(ARMISD::INTRET_GLUE) - MAKE_CASE(ARMISD::PIC_ADD) - MAKE_CASE(ARMISD::CMP) - MAKE_CASE(ARMISD::CMN) - MAKE_CASE(ARMISD::CMPZ) - MAKE_CASE(ARMISD::CMPFP) - MAKE_CASE(ARMISD::CMPFPE) - MAKE_CASE(ARMISD::CMPFPw0) - MAKE_CASE(ARMISD::CMPFPEw0) - MAKE_CASE(ARMISD::BCC_i64) - MAKE_CASE(ARMISD::FMSTAT) - MAKE_CASE(ARMISD::CMOV) - MAKE_CASE(ARMISD::SSAT) - MAKE_CASE(ARMISD::USAT) - MAKE_CASE(ARMISD::ASRL) - MAKE_CASE(ARMISD::LSRL) - MAKE_CASE(ARMISD::LSLL) - MAKE_CASE(ARMISD::LSLS) - MAKE_CASE(ARMISD::LSRS1) - MAKE_CASE(ARMISD::ASRS1) - MAKE_CASE(ARMISD::RRX) - MAKE_CASE(ARMISD::ADDC) - MAKE_CASE(ARMISD::ADDE) - MAKE_CASE(ARMISD::SUBC) - MAKE_CASE(ARMISD::SUBE) - MAKE_CASE(ARMISD::VMOVRRD) - MAKE_CASE(ARMISD::VMOVDRR) - MAKE_CASE(ARMISD::VMOVhr) - MAKE_CASE(ARMISD::VMOVrh) - MAKE_CASE(ARMISD::VMOVSR) - MAKE_CASE(ARMISD::EH_SJLJ_SETJMP) - MAKE_CASE(ARMISD::EH_SJLJ_LONGJMP) - MAKE_CASE(ARMISD::EH_SJLJ_SETUP_DISPATCH) - MAKE_CASE(ARMISD::TC_RETURN) - MAKE_CASE(ARMISD::THREAD_POINTER) - MAKE_CASE(ARMISD::DYN_ALLOC) - MAKE_CASE(ARMISD::MEMBARRIER_MCR) - MAKE_CASE(ARMISD::PRELOAD) - MAKE_CASE(ARMISD::LDRD) - MAKE_CASE(ARMISD::STRD) - MAKE_CASE(ARMISD::WIN__CHKSTK) - MAKE_CASE(ARMISD::WIN__DBZCHK) - MAKE_CASE(ARMISD::PREDICATE_CAST) - MAKE_CASE(ARMISD::VECTOR_REG_CAST) - MAKE_CASE(ARMISD::MVESEXT) - MAKE_CASE(ARMISD::MVEZEXT) - MAKE_CASE(ARMISD::MVETRUNC) - MAKE_CASE(ARMISD::VCMP) - MAKE_CASE(ARMISD::VCMPZ) - MAKE_CASE(ARMISD::VTST) - MAKE_CASE(ARMISD::VSHLs) - MAKE_CASE(ARMISD::VSHLu) - MAKE_CASE(ARMISD::VSHLIMM) - MAKE_CASE(ARMISD::VSHRsIMM) - MAKE_CASE(ARMISD::VSHRuIMM) - MAKE_CASE(ARMISD::VRSHRsIMM) - MAKE_CASE(ARMISD::VRSHRuIMM) - MAKE_CASE(ARMISD::VRSHRNIMM) - MAKE_CASE(ARMISD::VQSHLsIMM) - MAKE_CASE(ARMISD::VQSHLuIMM) - MAKE_CASE(ARMISD::VQSHLsuIMM) - MAKE_CASE(ARMISD::VQSHRNsIMM) - MAKE_CASE(ARMISD::VQSHRNuIMM) - MAKE_CASE(ARMISD::VQSHRNsuIMM) - MAKE_CASE(ARMISD::VQRSHRNsIMM) - MAKE_CASE(ARMISD::VQRSHRNuIMM) - MAKE_CASE(ARMISD::VQRSHRNsuIMM) - MAKE_CASE(ARMISD::VSLIIMM) - MAKE_CASE(ARMISD::VSRIIMM) - MAKE_CASE(ARMISD::VGETLANEu) - MAKE_CASE(ARMISD::VGETLANEs) - MAKE_CASE(ARMISD::VMOVIMM) - MAKE_CASE(ARMISD::VMVNIMM) - MAKE_CASE(ARMISD::VMOVFPIMM) - MAKE_CASE(ARMISD::VDUP) - MAKE_CASE(ARMISD::VDUPLANE) - MAKE_CASE(ARMISD::VEXT) - MAKE_CASE(ARMISD::VREV64) - MAKE_CASE(ARMISD::VREV32) - MAKE_CASE(ARMISD::VREV16) - MAKE_CASE(ARMISD::VZIP) - MAKE_CASE(ARMISD::VUZP) - MAKE_CASE(ARMISD::VTRN) - MAKE_CASE(ARMISD::VTBL1) - MAKE_CASE(ARMISD::VTBL2) - MAKE_CASE(ARMISD::VMOVN) - MAKE_CASE(ARMISD::VQMOVNs) - MAKE_CASE(ARMISD::VQMOVNu) - MAKE_CASE(ARMISD::VCVTN) - MAKE_CASE(ARMISD::VCVTL) - MAKE_CASE(ARMISD::VIDUP) - MAKE_CASE(ARMISD::VMULLs) - MAKE_CASE(ARMISD::VMULLu) - MAKE_CASE(ARMISD::VQDMULH) - MAKE_CASE(ARMISD::VADDVs) - MAKE_CASE(ARMISD::VADDVu) - MAKE_CASE(ARMISD::VADDVps) - MAKE_CASE(ARMISD::VADDVpu) - MAKE_CASE(ARMISD::VADDLVs) - MAKE_CASE(ARMISD::VADDLVu) - MAKE_CASE(ARMISD::VADDLVAs) - MAKE_CASE(ARMISD::VADDLVAu) - MAKE_CASE(ARMISD::VADDLVps) - MAKE_CASE(ARMISD::VADDLVpu) - MAKE_CASE(ARMISD::VADDLVAps) - MAKE_CASE(ARMISD::VADDLVApu) - MAKE_CASE(ARMISD::VMLAVs) - MAKE_CASE(ARMISD::VMLAVu) - MAKE_CASE(ARMISD::VMLAVps) - MAKE_CASE(ARMISD::VMLAVpu) - MAKE_CASE(ARMISD::VMLALVs) - MAKE_CASE(ARMISD::VMLALVu) - MAKE_CASE(ARMISD::VMLALVps) - MAKE_CASE(ARMISD::VMLALVpu) - MAKE_CASE(ARMISD::VMLALVAs) - MAKE_CASE(ARMISD::VMLALVAu) - MAKE_CASE(ARMISD::VMLALVAps) - MAKE_CASE(ARMISD::VMLALVApu) - MAKE_CASE(ARMISD::VMINVu) - MAKE_CASE(ARMISD::VMINVs) - MAKE_CASE(ARMISD::VMAXVu) - MAKE_CASE(ARMISD::VMAXVs) - MAKE_CASE(ARMISD::UMAAL) - MAKE_CASE(ARMISD::UMLAL) - MAKE_CASE(ARMISD::SMLAL) - MAKE_CASE(ARMISD::SMLALBB) - MAKE_CASE(ARMISD::SMLALBT) - MAKE_CASE(ARMISD::SMLALTB) - MAKE_CASE(ARMISD::SMLALTT) - MAKE_CASE(ARMISD::SMULWB) - MAKE_CASE(ARMISD::SMULWT) - MAKE_CASE(ARMISD::SMLALD) - MAKE_CASE(ARMISD::SMLALDX) - MAKE_CASE(ARMISD::SMLSLD) - MAKE_CASE(ARMISD::SMLSLDX) - MAKE_CASE(ARMISD::SMMLAR) - MAKE_CASE(ARMISD::SMMLSR) - MAKE_CASE(ARMISD::QADD16b) - MAKE_CASE(ARMISD::QSUB16b) - MAKE_CASE(ARMISD::QADD8b) - MAKE_CASE(ARMISD::QSUB8b) - MAKE_CASE(ARMISD::UQADD16b) - MAKE_CASE(ARMISD::UQSUB16b) - MAKE_CASE(ARMISD::UQADD8b) - MAKE_CASE(ARMISD::UQSUB8b) - MAKE_CASE(ARMISD::BUILD_VECTOR) - MAKE_CASE(ARMISD::BFI) - MAKE_CASE(ARMISD::VORRIMM) - MAKE_CASE(ARMISD::VBICIMM) - MAKE_CASE(ARMISD::VBSP) - MAKE_CASE(ARMISD::MEMCPY) - MAKE_CASE(ARMISD::VLD1DUP) - MAKE_CASE(ARMISD::VLD2DUP) - MAKE_CASE(ARMISD::VLD3DUP) - MAKE_CASE(ARMISD::VLD4DUP) - MAKE_CASE(ARMISD::VLD1_UPD) - MAKE_CASE(ARMISD::VLD2_UPD) - MAKE_CASE(ARMISD::VLD3_UPD) - MAKE_CASE(ARMISD::VLD4_UPD) - MAKE_CASE(ARMISD::VLD1x2_UPD) - MAKE_CASE(ARMISD::VLD1x3_UPD) - MAKE_CASE(ARMISD::VLD1x4_UPD) - MAKE_CASE(ARMISD::VLD2LN_UPD) - MAKE_CASE(ARMISD::VLD3LN_UPD) - MAKE_CASE(ARMISD::VLD4LN_UPD) - MAKE_CASE(ARMISD::VLD1DUP_UPD) - MAKE_CASE(ARMISD::VLD2DUP_UPD) - MAKE_CASE(ARMISD::VLD3DUP_UPD) - MAKE_CASE(ARMISD::VLD4DUP_UPD) - MAKE_CASE(ARMISD::VST1_UPD) - MAKE_CASE(ARMISD::VST2_UPD) - MAKE_CASE(ARMISD::VST3_UPD) - MAKE_CASE(ARMISD::VST4_UPD) - MAKE_CASE(ARMISD::VST1x2_UPD) - MAKE_CASE(ARMISD::VST1x3_UPD) - MAKE_CASE(ARMISD::VST1x4_UPD) - MAKE_CASE(ARMISD::VST2LN_UPD) - MAKE_CASE(ARMISD::VST3LN_UPD) - MAKE_CASE(ARMISD::VST4LN_UPD) - MAKE_CASE(ARMISD::WLS) - MAKE_CASE(ARMISD::WLSSETUP) - MAKE_CASE(ARMISD::LE) - MAKE_CASE(ARMISD::LOOP_DEC) - MAKE_CASE(ARMISD::CSINV) - MAKE_CASE(ARMISD::CSNEG) - MAKE_CASE(ARMISD::CSINC) - MAKE_CASE(ARMISD::MEMCPYLOOP) - MAKE_CASE(ARMISD::MEMSETLOOP) -#undef MAKE_CASE - } - return nullptr; -} - EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) @@ -3344,8 +3130,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return LowerInterruptReturn(RetOps, dl, DAG); } - ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE : - ARMISD::RET_GLUE; + unsigned RetNode = + AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE : ARMISD::RET_GLUE; return DAG.getNode(RetNode, dl, MVT::Other, RetOps); } @@ -4861,7 +4647,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, } } - ARMISD::NodeType CompareType; + unsigned CompareType; switch (CondCode) { default: CompareType = ARMISD::CMP; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index bc2fec3c1bdb5..8191eb40a712a 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -51,319 +51,6 @@ class TargetMachine; class TargetRegisterInfo; class VectorType; - namespace ARMISD { - - // ARM Specific DAG Nodes - enum NodeType : unsigned { - // Start the numbering where the builtin ops and target ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - Wrapper, // Wrapper - A wrapper node for TargetConstantPool, - // TargetExternalSymbol, and TargetGlobalAddress. - WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in - // PIC mode. - WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable - - // Add pseudo op to model memcpy for struct byval. - COPY_STRUCT_BYVAL, - - CALL, // Function call. - CALL_PRED, // Function call that's predicable. - CALL_NOLINK, // Function call with branch not branch-and-link. - tSECALL, // CMSE non-secure function call. - t2CALL_BTI, // Thumb function call followed by BTI instruction. - BRCOND, // Conditional branch. - BR_JT, // Jumptable branch. - BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump). - RET_GLUE, // Return with a flag operand. - SERET_GLUE, // CMSE Entry function return with a flag operand. - INTRET_GLUE, // Interrupt return with an LR-offset and a flag operand. - - PIC_ADD, // Add with a PC operand and a PIC label. - - ASRL, // MVE long arithmetic shift right. - LSRL, // MVE long shift right. - LSLL, // MVE long shift left. - - CMP, // ARM compare instructions. - CMN, // ARM CMN instructions. - CMPZ, // ARM compare that sets only Z flag. - CMPFP, // ARM VFP compare instruction, sets FPSCR. - CMPFPE, // ARM VFP signalling compare instruction, sets FPSCR. - CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. - CMPFPEw0, // ARM VFP signalling compare against zero instruction, sets - // FPSCR. - FMSTAT, // ARM fmstat instruction. - - CMOV, // ARM conditional move instructions. - - SSAT, // Signed saturation - USAT, // Unsigned saturation - - BCC_i64, - - LSLS, // Flag-setting shift left. - LSRS1, // Flag-setting logical shift right by one bit. - ASRS1, // Flag-setting arithmetic shift right by one bit. - RRX, // Shift right one bit with carry in. - - ADDC, // Add with carry - ADDE, // Add using carry - SUBC, // Sub with carry - SUBE, // Sub using carry - - VMOVRRD, // double to two gprs. - VMOVDRR, // Two gprs to double. - VMOVSR, // move gpr to single, used for f32 literal constructed in a gpr - - EH_SJLJ_SETJMP, // SjLj exception handling setjmp. - EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. - EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch. - - TC_RETURN, // Tail call return pseudo. - - THREAD_POINTER, - - DYN_ALLOC, // Dynamic allocation on the stack. - - MEMBARRIER_MCR, // Memory barrier (MCR) - - PRELOAD, // Preload - - WIN__CHKSTK, // Windows' __chkstk call to do stack probing. - WIN__DBZCHK, // Windows' divide by zero check - - WLS, // Low-overhead loops, While Loop Start branch. See t2WhileLoopStart - WLSSETUP, // Setup for the iteration count of a WLS. See t2WhileLoopSetup. - LOOP_DEC, // Really a part of LE, performs the sub - LE, // Low-overhead loops, Loop End - - PREDICATE_CAST, // Predicate cast for MVE i1 types - VECTOR_REG_CAST, // Reinterpret the current contents of a vector register - - MVESEXT, // Legalization aids for extending a vector into two/four vectors. - MVEZEXT, // or truncating two/four vectors into one. Eventually becomes - MVETRUNC, // stack store/load sequence, if not optimized to anything else. - - VCMP, // Vector compare. - VCMPZ, // Vector compare to zero. - VTST, // Vector test bits. - - // Vector shift by vector - VSHLs, // ...left/right by signed - VSHLu, // ...left/right by unsigned - - // Vector shift by immediate: - VSHLIMM, // ...left - VSHRsIMM, // ...right (signed) - VSHRuIMM, // ...right (unsigned) - - // Vector rounding shift by immediate: - VRSHRsIMM, // ...right (signed) - VRSHRuIMM, // ...right (unsigned) - VRSHRNIMM, // ...right narrow - - // Vector saturating shift by immediate: - VQSHLsIMM, // ...left (signed) - VQSHLuIMM, // ...left (unsigned) - VQSHLsuIMM, // ...left (signed to unsigned) - VQSHRNsIMM, // ...right narrow (signed) - VQSHRNuIMM, // ...right narrow (unsigned) - VQSHRNsuIMM, // ...right narrow (signed to unsigned) - - // Vector saturating rounding shift by immediate: - VQRSHRNsIMM, // ...right narrow (signed) - VQRSHRNuIMM, // ...right narrow (unsigned) - VQRSHRNsuIMM, // ...right narrow (signed to unsigned) - - // Vector shift and insert: - VSLIIMM, // ...left - VSRIIMM, // ...right - - // Vector get lane (VMOV scalar to ARM core register) - // (These are used for 8- and 16-bit element types only.) - VGETLANEu, // zero-extend vector extract element - VGETLANEs, // sign-extend vector extract element - - // Vector move immediate and move negated immediate: - VMOVIMM, - VMVNIMM, - - // Vector move f32 immediate: - VMOVFPIMM, - - // Move H <-> R, clearing top 16 bits - VMOVrh, - VMOVhr, - - // Vector duplicate: - VDUP, - VDUPLANE, - - // Vector shuffles: - VEXT, // extract - VREV64, // reverse elements within 64-bit doublewords - VREV32, // reverse elements within 32-bit words - VREV16, // reverse elements within 16-bit halfwords - VZIP, // zip (interleave) - VUZP, // unzip (deinterleave) - VTRN, // transpose - VTBL1, // 1-register shuffle with mask - VTBL2, // 2-register shuffle with mask - VMOVN, // MVE vmovn - - // MVE Saturating truncates - VQMOVNs, // Vector (V) Saturating (Q) Move and Narrow (N), signed (s) - VQMOVNu, // Vector (V) Saturating (Q) Move and Narrow (N), unsigned (u) - - // MVE float <> half converts - VCVTN, // MVE vcvt f32 -> f16, truncating into either the bottom or top - // lanes - VCVTL, // MVE vcvt f16 -> f32, extending from either the bottom or top lanes - - // MVE VIDUP instruction, taking a start value and increment. - VIDUP, - - // Vector multiply long: - VMULLs, // ...signed - VMULLu, // ...unsigned - - VQDMULH, // MVE vqdmulh instruction - - // MVE reductions - VADDVs, // sign- or zero-extend the elements of a vector to i32, - VADDVu, // add them all together, and return an i32 of their sum - VADDVps, // Same as VADDV[su] but with a v4i1 predicate mask - VADDVpu, - VADDLVs, // sign- or zero-extend elements to i64 and sum, returning - VADDLVu, // the low and high 32-bit halves of the sum - VADDLVAs, // Same as VADDLV[su] but also add an input accumulator - VADDLVAu, // provided as low and high halves - VADDLVps, // Same as VADDLV[su] but with a v4i1 predicate mask - VADDLVpu, - VADDLVAps, // Same as VADDLVp[su] but with a v4i1 predicate mask - VADDLVApu, - VMLAVs, // sign- or zero-extend the elements of two vectors to i32, multiply - VMLAVu, // them and add the results together, returning an i32 of the sum - VMLAVps, // Same as VMLAV[su] with a v4i1 predicate mask - VMLAVpu, - VMLALVs, // Same as VMLAV but with i64, returning the low and - VMLALVu, // high 32-bit halves of the sum - VMLALVps, // Same as VMLALV[su] with a v4i1 predicate mask - VMLALVpu, - VMLALVAs, // Same as VMLALV but also add an input accumulator - VMLALVAu, // provided as low and high halves - VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask - VMLALVApu, - VMINVu, // Find minimum unsigned value of a vector and register - VMINVs, // Find minimum signed value of a vector and register - VMAXVu, // Find maximum unsigned value of a vector and register - VMAXVs, // Find maximum signed value of a vector and register - - SMULWB, // Signed multiply word by half word, bottom - SMULWT, // Signed multiply word by half word, top - UMLAL, // 64bit Unsigned Accumulate Multiply - SMLAL, // 64bit Signed Accumulate Multiply - UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply - SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16 - SMLALBT, // 64-bit signed accumulate multiply bottom, top 16 - SMLALTB, // 64-bit signed accumulate multiply top, bottom 16 - SMLALTT, // 64-bit signed accumulate multiply top, top 16 - SMLALD, // Signed multiply accumulate long dual - SMLALDX, // Signed multiply accumulate long dual exchange - SMLSLD, // Signed multiply subtract long dual - SMLSLDX, // Signed multiply subtract long dual exchange - SMMLAR, // Signed multiply long, round and add - SMMLSR, // Signed multiply long, subtract and round - - // Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b - // stands for. - QADD8b, - QSUB8b, - QADD16b, - QSUB16b, - UQADD8b, - UQSUB8b, - UQADD16b, - UQSUB16b, - - // Operands of the standard BUILD_VECTOR node are not legalized, which - // is fine if BUILD_VECTORs are always lowered to shuffles or other - // operations, but for ARM some BUILD_VECTORs are legal as-is and their - // operands need to be legalized. Define an ARM-specific version of - // BUILD_VECTOR for this purpose. - BUILD_VECTOR, - - // Bit-field insert - BFI, - - // Vector OR with immediate - VORRIMM, - // Vector AND with NOT of immediate - VBICIMM, - - // Pseudo vector bitwise select - VBSP, - - // Pseudo-instruction representing a memory copy using ldm/stm - // instructions. - MEMCPY, - - // Pseudo-instruction representing a memory copy using a tail predicated - // loop - MEMCPYLOOP, - // Pseudo-instruction representing a memset using a tail predicated - // loop - MEMSETLOOP, - - // V8.1MMainline condition select - CSINV, // Conditional select invert. - CSNEG, // Conditional select negate. - CSINC, // Conditional select increment. - - // Vector load N-element structure to all lanes: - FIRST_MEMORY_OPCODE, - VLD1DUP = FIRST_MEMORY_OPCODE, - VLD2DUP, - VLD3DUP, - VLD4DUP, - - // NEON loads with post-increment base updates: - VLD1_UPD, - VLD2_UPD, - VLD3_UPD, - VLD4_UPD, - VLD2LN_UPD, - VLD3LN_UPD, - VLD4LN_UPD, - VLD1DUP_UPD, - VLD2DUP_UPD, - VLD3DUP_UPD, - VLD4DUP_UPD, - VLD1x2_UPD, - VLD1x3_UPD, - VLD1x4_UPD, - - // NEON stores with post-increment base updates: - VST1_UPD, - VST2_UPD, - VST3_UPD, - VST4_UPD, - VST2LN_UPD, - VST3LN_UPD, - VST4LN_UPD, - VST1x2_UPD, - VST1x3_UPD, - VST1x4_UPD, - - // Load/Store of dual registers - LDRD, - STRD, - LAST_MEMORY_OPCODE = STRD, - }; - - } // end namespace ARMISD - namespace ARM { /// Possible values of current rounding mode, which is specified in bits /// 23:22 of FPSCR. @@ -427,8 +114,6 @@ class VectorType; void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const override; - const char *getTargetNodeName(unsigned Opcode) const override; - bool isSelectSupported(SelectSupportKind Kind) const override { // ARM does not support scalar condition selects on vectors. return (Kind != ScalarCondVectorVal); diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index f7176a65d8163..ddc89415cfb20 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -40,7 +40,7 @@ def SDT_ARMCMov : SDTypeProfile<1, 4, [ SDTCisVT<4, FlagsVT>, // in flags ]>; -def SDT_ARMBrcond : SDTypeProfile<0, 2, [ +def SDT_ARMBrcond : SDTypeProfile<0, 3, [ SDTCisVT<0, OtherVT>, // target basic block SDTCisVT<1, CondCodeVT>, // condition code SDTCisVT<2, FlagsVT>, // in flags @@ -133,9 +133,16 @@ def SDT_ARMIntShiftParts : SDTypeProfile<2, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<4>]>; +// Signed multiply accumulate long dual def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>; + +// Signed multiply accumulate long dual exchange def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>; + +// Signed multiply subtract long dual def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>; + +// Signed multiply subtract long dual exchange def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>; def SDT_ARMCSel : SDTypeProfile<1, 4, [ @@ -146,8 +153,13 @@ def SDT_ARMCSel : SDTypeProfile<1, 4, [ SDTCisVT<3, FlagsVT> // in flags ]>; +// Conditional select invert. def ARMcsinv : SDNode<"ARMISD::CSINV", SDT_ARMCSel>; + +// Conditional select negate. def ARMcsneg : SDNode<"ARMISD::CSNEG", SDT_ARMCSel>; + +// Conditional select increment. def ARMcsinc : SDNode<"ARMISD::CSINC", SDT_ARMCSel>; def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>, @@ -155,110 +167,197 @@ def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; +// Signed multiply long, round and add def ARMsmmlar : SDNode<"ARMISD::SMMLAR", SDT_MulHSR>; + +// Signed multiply long, subtract and round def ARMsmmlsr : SDNode<"ARMISD::SMMLSR", SDT_MulHSR>; -// Node definitions. + +// Wrapper - A wrapper node for TargetConstantPool, +// TargetExternalSymbol, and TargetGlobalAddress. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; + +// WrapperPIC - A wrapper node for TargetGlobalAddress in +// PIC mode. def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; + +// WrapperJT - A wrapper node for TargetJumpTable def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntUnaryOp>; def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +// Add pseudo op to model memcpy for struct byval. def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" , SDT_ARMStructByVal, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad]>; +// Function call. def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +// Function call that's predicable. def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +// Function call with branch not branch-and-link. def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +// Return with a flag operand. def ARMretglue : SDNode<"ARMISD::RET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +// CMSE Entry function return with a flag operand. def ARMseretglue : SDNode<"ARMISD::SERET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +// Interrupt return with an LR-offset and a flag operand. def ARMintretglue : SDNode<"ARMISD::INTRET_GLUE", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +// ARM conditional move instructions. def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov>; +// Signed saturation def ARMssat : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>; +// Unsigned saturation def ARMusat : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>; +// Conditional branch. def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, [SDNPHasChain]>; +// Jumptable branch. def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT, [SDNPHasChain]>; + +// Jumptable branch (2 level - jumptable entry is a jump). def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT, [SDNPHasChain]>; def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64, [SDNPHasChain]>; +// ARM compare instructions. def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp>; +// ARM CMN instructions. def ARMcmn : SDNode<"ARMISD::CMN", SDT_ARMCmp>; +// ARM compare that sets only Z flag. def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp, [SDNPCommutative]>; +// Add with a PC operand and a PIC label. def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>; +// MVE long arithmetic shift right. def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>; + +// MVE long shift right. def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>; + +// MVE long shift left. def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>; +// Flag-setting logical shift right by one bit. def ARMlsrs1 : SDNode<"ARMISD::LSRS1", SDTIntUnaryOpWithFlagsOut>; + +// Flag-setting arithmetic shift right by one bit. def ARMasrs1 : SDNode<"ARMISD::ASRS1", SDTIntUnaryOpWithFlagsOut>; + +// Shift right one bit with carry in. def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOpWithFlagsIn>; +// Add with carry def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags, [SDNPCommutative]>; + +// Sub with carry def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>; + +// Flag-setting shift left. def ARMlsls : SDNode<"ARMISD::LSLS", SDTBinaryArithWithFlags>; + +// Add using carry def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>; + +// Sub using carry def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>; def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; + +// SjLj exception handling setjmp. def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain, SDNPSideEffect]>; + +// SjLj exception handling longjmp. def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain, SDNPSideEffect]>; + +// SjLj exception handling setup_dispatch. def ARMeh_sjlj_setup_dispatch: SDNode<"ARMISD::EH_SJLJ_SETUP_DISPATCH", SDT_ARMEH_SJLJ_SetupDispatch, [SDNPHasChain, SDNPSideEffect]>; +// Memory barrier (MCR) def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, [SDNPHasChain, SDNPSideEffect]>; + +// Preload def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH, [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; +// Tail call return pseudo. def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +// Bit-field insert def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; +// Pseudo-instruction representing a memory copy using ldm/stm instructions. def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad]>; +// Signed multiply word by half word, bottom def ARMsmulwb : SDNode<"ARMISD::SMULWB", SDTIntBinOp, []>; + +// Signed multiply word by half word, top def ARMsmulwt : SDNode<"ARMISD::SMULWT", SDTIntBinOp, []>; + +// 64bit Unsigned Accumulate Multiply +def ARMumlal : SDNode<"ARMISD::UMLAL", SDT_LongMac>; + +// 64bit Signed Accumulate Multiply +def ARMsmlal : SDNode<"ARMISD::SMLAL", SDT_LongMac>; + +// 64-bit Unsigned Accumulate Accumulate Multiply +def ARMumaal : SDNode<"ARMISD::UMAAL", SDT_LongMac>; + +// 64-bit signed accumulate multiply bottom, bottom 16 def ARMsmlalbb : SDNode<"ARMISD::SMLALBB", SDT_LongMac, []>; + +// 64-bit signed accumulate multiply bottom, top 16 def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>; + +// 64-bit signed accumulate multiply top, bottom 16 def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>; + +// 64-bit signed accumulate multiply top, top 16 def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>; +// Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b +// stands for. def ARMqadd8b : SDNode<"ARMISD::QADD8b", SDT_ARMAnd, []>; def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>; def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>; @@ -270,13 +369,15 @@ def ARMuqadd16b : SDNode<"ARMISD::UQADD16b", SDT_ARMAnd, []>; def ARMuqsub16b : SDNode<"ARMISD::UQSUB16b", SDT_ARMAnd, []>; def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; -def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; - def SDT_ARMstrd : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; + +// Load/Store of dual registers +def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def ARMstrd : SDNode<"ARMISD::STRD", SDT_ARMstrd, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; // Vector operations shared between NEON and MVE +// Vector duplicate def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; // VDUPLANE can produce a quad-register result from a double-register source, @@ -287,40 +388,65 @@ def ARMvduplane : SDNode<"ARMISD::VDUPLANE", def SDTARMVIDUP : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; + +// MVE VIDUP instruction, taking a start value and increment. def ARMvidup : SDNode<"ARMISD::VIDUP", SDTARMVIDUP>; def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; + +// reverse elements within 64-bit doublewords def ARMvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; + +// reverse elements within 32-bit words def ARMvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; + +// reverse elements within 16-bit halfwords def ARMvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>; + +// Vector get lane (VMOV scalar to ARM core register) +// (These are used for 8- and 16-bit element types only.) def ARMvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; def ARMvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; + +// Vector move immediate and move negated immediate def ARMvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; def ARMvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; + +// Vector move f32 immediate def ARMvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; + +// Vector OR with immediate def ARMvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; + +// Vector AND with NOT of immediate def ARMvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,]>; + +// Vector shift by immediate def ARMvshlImm : SDNode<"ARMISD::VSHLIMM", SDTARMVSHIMM>; def ARMvshrsImm : SDNode<"ARMISD::VSHRsIMM", SDTARMVSHIMM>; def ARMvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>; + +// Vector shift by vector def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>; def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>; def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>]>; + +// Vector multiply long def ARMvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; def ARMvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; @@ -328,9 +454,13 @@ def SDTARMVCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisInt<3>]>; def SDTARMVCMPZ : SDTypeProfile<1, 2, [SDTCisInt<2>]>; +// Vector compare. def ARMvcmp : SDNode<"ARMISD::VCMP", SDTARMVCMP>; + +// Vector compare to zero. def ARMvcmpz : SDNode<"ARMISD::VCMPZ", SDTARMVCMPZ>; +// Reinterpret the current contents of a vector register // 'VECTOR_REG_CAST' is an operation that reinterprets the contents of a // vector register as a different vector type, without changing the contents of // the register. It differs from 'bitconvert' in that bitconvert reinterprets @@ -5894,13 +6024,17 @@ def MSRbanked : ABI<0b0001, (outs), (ins banked_reg:$banked, GPRnopc:$Rn), // The main point of having separate instruction are extra unmodelled effects // (compared to ordinary calls) like stack pointer change. +// Windows' __chkstk call to do stack probing. def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone, [SDNPHasChain, SDNPSideEffect]>; + let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP], hasNoSchedulingInfo = 1 in def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>; +// Windows' divide by zero check def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK, [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; + let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in def WIN__DBZCHK : PseudoInst<(outs), (ins tGPR:$divisor), NoItinerary, [(win__dbzchk tGPR:$divisor)]>; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index e24413465799f..98591fa3f5bd7 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -683,8 +683,13 @@ class MVE_VADDV, SDTCisVec<1>, SDTCisVec<2> ]>; + +// sign- or zero-extend the elements of a vector to i32, +// add them all together, and return an i32 of their sum def ARMVADDVs : SDNode<"ARMISD::VADDVs", SDTVecReduce>; def ARMVADDVu : SDNode<"ARMISD::VADDVu", SDTVecReduce>; + +// Same as VADDV[su] but with a v4i1 predicate mask def ARMVADDVps : SDNode<"ARMISD::VADDVps", SDTVecReduceP>; def ARMVADDVpu : SDNode<"ARMISD::VADDVpu", SDTVecReduceP>; @@ -806,9 +811,19 @@ multiclass MVE_VADDLV_A { defvar InstN = !cast(NAME # "no_acc"); defvar letter = VTI.SuffixLetter; + + // sign- or zero-extend elements to i64 and sum, returning + // the low and high 32-bit halves of the sum defvar ARMVADDLV = SDNode<"ARMISD::VADDLV" # letter, SDTVecReduceL>; + + // Same as VADDLV[su] but also add an input accumulator + // provided as low and high halves defvar ARMVADDLVA = SDNode<"ARMISD::VADDLVA" # letter, SDTVecReduceLA>; + + // Same as VADDLV[su] but with a v4i1 predicate mask defvar ARMVADDLVp = SDNode<"ARMISD::VADDLVp" # letter, SDTVecReduceLP>; + + // Same as VADDLVp[su] but with a v4i1 predicate mask defvar ARMVADDLVAp = SDNode<"ARMISD::VADDLVAp" # letter, SDTVecReduceLPA>; let Predicates = [HasMVEInt] in { @@ -943,9 +958,17 @@ multiclass MVE_VMINMAXV_ty { def SDTVecReduceR : SDTypeProfile<1, 2, [ // Reduction of an integer and vector into an integer SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2> ]>; + +// Find minimum unsigned value of a vector and register def ARMVMINVu : SDNode<"ARMISD::VMINVu", SDTVecReduceR>; + +// Find minimum signed value of a vector and register def ARMVMINVs : SDNode<"ARMISD::VMINVs", SDTVecReduceR>; + +// Find maximum unsigned value of a vector and register def ARMVMAXVu : SDNode<"ARMISD::VMAXVu", SDTVecReduceR>; + +// Find maximum signed value of a vector and register def ARMVMAXVs : SDNode<"ARMISD::VMAXVs", SDTVecReduceR>; defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">; @@ -1146,16 +1169,31 @@ def SDTVecReduce2LAP : SDTypeProfile<2, 5, [ // VMLALVA SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>, SDTCisVec<4>, SDTCisVec<5>, SDTCisVec<6> ]>; + +// sign- or zero-extend the elements of two vectors to i32, multiply +// them and add the results together, returning an i32 of the sum def ARMVMLAVs : SDNode<"ARMISD::VMLAVs", SDTVecReduce2>; def ARMVMLAVu : SDNode<"ARMISD::VMLAVu", SDTVecReduce2>; + +// Same as VMLAV but with i64, returning the low and +// high 32-bit halves of the sum def ARMVMLALVs : SDNode<"ARMISD::VMLALVs", SDTVecReduce2L>; def ARMVMLALVu : SDNode<"ARMISD::VMLALVu", SDTVecReduce2L>; + +// Same as VMLALV but also add an input accumulator +// provided as low and high halves def ARMVMLALVAs : SDNode<"ARMISD::VMLALVAs", SDTVecReduce2LA>; def ARMVMLALVAu : SDNode<"ARMISD::VMLALVAu", SDTVecReduce2LA>; + +// Same as VMLAV[su] with a v4i1 predicate mask def ARMVMLAVps : SDNode<"ARMISD::VMLAVps", SDTVecReduce2P>; def ARMVMLAVpu : SDNode<"ARMISD::VMLAVpu", SDTVecReduce2P>; + +// Same as VMLALV[su] with a v4i1 predicate mask def ARMVMLALVps : SDNode<"ARMISD::VMLALVps", SDTVecReduce2LP>; def ARMVMLALVpu : SDNode<"ARMISD::VMLALVpu", SDTVecReduce2LP>; + +// Same as VMLALVA[su] with a v4i1 predicate mask def ARMVMLALVAps : SDNode<"ARMISD::VMLALVAps", SDTVecReduce2LAP>; def ARMVMLALVApu : SDNode<"ARMISD::VMLALVApu", SDTVecReduce2LAP>; @@ -1997,6 +2035,7 @@ class MVE_VQxDMULH_Base size, bit rounding, let validForTailPredication = 1; } +// MVE vqdmulh instruction def MVEvqdmulh : SDNode<"ARMISD::VQDMULH", SDTIntBinOp>; multiclass MVE_VQxDMULH_m; } +// Predicate cast for MVE i1 types // Occasionally we need to cast between a i32 and a boolean vector, for // example when moving between rGPR and VPR.P0 as part of predicate vector // shuffles. We also sometimes need to cast between different predicate @@ -4810,6 +4850,7 @@ defm MVE_VQMOVNu32 : MVE_VxMOVxN_halves<"vqmovn", "u32", 0b1, 0b1, 0b01>; defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>; defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>; +// MVE vmovn def MVEvmovn : SDNode<"ARMISD::VMOVN", SDTARMVEXT>; multiclass MVE_VMOVN_p; def SDTARMVMOVNQ : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVec<2>, SDTCisVT<3, i32>]>; + +// Vector (V) Saturating (Q) Move and Narrow (N), signed (s) def MVEvqmovns : SDNode<"ARMISD::VQMOVNs", SDTARMVMOVNQ>; + +// Vector (V) Saturating (Q) Move and Narrow (N), unsigned (u) def MVEvqmovnu : SDNode<"ARMISD::VQMOVNu", SDTARMVMOVNQ>; let Predicates = [HasMVEInt] in { @@ -4938,7 +4983,11 @@ class MVE_VCVT_ff, SDTCisVec<1>, SDTCisVT<2, i32>]>; + +// MVE vcvt f32 -> f16, truncating into either the bottom or top lanes def MVEvcvtn : SDNode<"ARMISD::VCVTN", SDTARMVMOVNQ>; + +// MVE vcvt f16 -> f32, extending from either the bottom or top lanes def MVEvcvtl : SDNode<"ARMISD::VCVTL", SDTARMVCVTL>; multiclass MVE_VCVT_f2h_m { @@ -6865,6 +6914,9 @@ class MVE_WLSTP size> def SDT_MVEMEMCPYLOOPNODE : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; + +// Pseudo-instruction representing a memory copy using a tail predicated +// loop def MVE_MEMCPYLOOPNODE : SDNode<"ARMISD::MEMCPYLOOP", SDT_MVEMEMCPYLOOPNODE, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; @@ -6877,6 +6929,9 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CPSR] in { def SDT_MVEMEMSETLOOPNODE : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisVT<1, v16i8>, SDTCisVT<2, i32>]>; + +// Pseudo-instruction representing a memset using a tail predicated +// loop def MVE_MEMSETLOOPNODE : SDNode<"ARMISD::MEMSETLOOP", SDT_MVEMEMSETLOOPNODE, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 37f0103363b9a..90e74a5f54f7b 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -475,6 +475,8 @@ def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), //===----------------------------------------------------------------------===// def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; + +// Vector test bits. def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>; // Types for vector shift by immediates. The "SHX" version is for long and @@ -487,10 +489,12 @@ def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>; +// Vector rounding shift by immediate def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>; def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>; def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>; +// Vector saturating shift by immediate def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>; def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>; def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>; @@ -498,13 +502,16 @@ def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>; def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>; def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>; +// Vector saturating rounding shift by immediate def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>; def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>; def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>; +// Vector shift and insert def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>; def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>; +// Pseudo vector bitwise select def NEONvbsp : SDNode<"ARMISD::VBSP", SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, @@ -518,15 +525,25 @@ def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; + +// zip (interleave) def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; + +// unzip (deinterleave) def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; + +// transpose def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, SDTCisVT<2, v8i8>]>; def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; + +// 1-register shuffle with mask def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; + +// 2-register shuffle with mask def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index 0c5ea3e0fa8d5..55b0d9e1c01fc 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -14,6 +14,7 @@ // Thumb specific DAG Nodes. // +// CMSE non-secure function call. def ARMtsecall : SDNode<"ARMISD::tSECALL", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 911d7ebfba141..317959c0342f7 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -5581,16 +5581,20 @@ class t2LOL let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB]; } +// Setup for the iteration count of a WLS. See t2WhileLoopSetup. def arm_wlssetup : SDNode<"ARMISD::WLSSETUP", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<1, 0>]>>; +// Low-overhead loops, While Loop Start branch. See t2WhileLoopStart def arm_wls : SDNode<"ARMISD::WLS", SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>, [SDNPHasChain]>; +// Really a part of LE, performs the sub def arm_loop_dec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>; +// Low-overhead loops, Loop End def arm_le : SDNode<"ARMISD::LE", SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>, [SDNPHasChain]>; @@ -5890,6 +5894,7 @@ def t2AUT : PACBTIHintSpaceUseInst<"aut", 0b00101101> { let hasSideEffects = 1; } +// Thumb function call followed by BTI instruction. def ARMt2CallBTI : SDNode<"ARMISD::t2CALL_BTI", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index e2cc97b7b4634..65c61c259d465 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -28,11 +28,20 @@ def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>; +// ARM VFP compare instruction, sets FPSCR. def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_CMPFP>; + +// ARM VFP compare against zero instruction, sets FPSCR. def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0>; + +// ARM VFP signalling compare instruction, sets FPSCR. def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_CMPFP>; + +// ARM VFP signalling compare against zero instruction, sets +// FPSCR. def arm_cmpfpe0 : SDNode<"ARMISD::CMPFPEw0", SDT_CMPFP0>; +// ARM fmstat instruction. def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTypeProfile<1, 1, [ SDTCisVT<0, FlagsVT>, // out flags @@ -40,12 +49,19 @@ def arm_fmstat : SDNode<"ARMISD::FMSTAT", ]> >; +// Two gprs to double. def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; + +// double to two gprs. def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>; + +// move gpr to single, used for f32 literal constructed in a gpr def arm_vmovsr : SDNode<"ARMISD::VMOVSR", SDT_VMOVSR>; def SDT_VMOVhr : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, i32>] >; def SDT_VMOVrh : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisFP<1>] >; + +// Move H <-> R, clearing top 16 bits def arm_vmovhr : SDNode<"ARMISD::VMOVhr", SDT_VMOVhr>; def arm_vmovrh : SDNode<"ARMISD::VMOVrh", SDT_VMOVrh>; diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index bf7c962f02efc..501dce96bb2d6 100644 --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -10,9 +10,14 @@ // //===----------------------------------------------------------------------===// +#include "ARMSelectionDAGInfo.h" #include "ARMTargetTransformInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Support/CommandLine.h" + +#define GET_SDNODE_DESC +#include "ARMGenSDNodeInfo.inc" + using namespace llvm; #define DEBUG_TYPE "arm-selectiondag-info" @@ -30,9 +35,83 @@ static cl::opt EnableMemtransferTPLoop( "Allow (may be subject to certain conditions) " "conversion of memcpy to TP loop."))); +ARMSelectionDAGInfo::ARMSelectionDAGInfo() + : SelectionDAGGenTargetInfo(ARMGenSDNodeInfo) {} + +const char *ARMSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { +#define MAKE_CASE(V) \ + case V: \ + return #V; + + // These nodes don't have corresponding entries in *.td files yet. + switch (static_cast(Opcode)) { + MAKE_CASE(ARMISD::DYN_ALLOC) + MAKE_CASE(ARMISD::MVESEXT) + MAKE_CASE(ARMISD::MVEZEXT) + MAKE_CASE(ARMISD::MVETRUNC) + MAKE_CASE(ARMISD::BUILD_VECTOR) + MAKE_CASE(ARMISD::VLD1DUP) + MAKE_CASE(ARMISD::VLD2DUP) + MAKE_CASE(ARMISD::VLD3DUP) + MAKE_CASE(ARMISD::VLD4DUP) + MAKE_CASE(ARMISD::VLD1_UPD) + MAKE_CASE(ARMISD::VLD2_UPD) + MAKE_CASE(ARMISD::VLD3_UPD) + MAKE_CASE(ARMISD::VLD4_UPD) + MAKE_CASE(ARMISD::VLD1x2_UPD) + MAKE_CASE(ARMISD::VLD1x3_UPD) + MAKE_CASE(ARMISD::VLD1x4_UPD) + MAKE_CASE(ARMISD::VLD2LN_UPD) + MAKE_CASE(ARMISD::VLD3LN_UPD) + MAKE_CASE(ARMISD::VLD4LN_UPD) + MAKE_CASE(ARMISD::VLD1DUP_UPD) + MAKE_CASE(ARMISD::VLD2DUP_UPD) + MAKE_CASE(ARMISD::VLD3DUP_UPD) + MAKE_CASE(ARMISD::VLD4DUP_UPD) + MAKE_CASE(ARMISD::VST1_UPD) + MAKE_CASE(ARMISD::VST3_UPD) + MAKE_CASE(ARMISD::VST1x2_UPD) + MAKE_CASE(ARMISD::VST1x3_UPD) + MAKE_CASE(ARMISD::VST1x4_UPD) + MAKE_CASE(ARMISD::VST2LN_UPD) + MAKE_CASE(ARMISD::VST3LN_UPD) + MAKE_CASE(ARMISD::VST4LN_UPD) + } +#undef MAKE_CASE + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); +} + bool ARMSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= ARMISD::FIRST_MEMORY_OPCODE && - Opcode <= ARMISD::LAST_MEMORY_OPCODE; + // These nodes don't have corresponding entries in *.td files yet. + if (Opcode >= ARMISD::FIRST_MEMORY_OPCODE && + Opcode <= ARMISD::LAST_MEMORY_OPCODE) + return true; + + return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode); +} + +void ARMSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + switch (N->getOpcode()) { + default: + break; + case ARMISD::WIN__DBZCHK: + // invalid number of results; expected 2, got 1 + case ARMISD::WIN__CHKSTK: + // invalid number of results; expected 1, got 2 + case ARMISD::COPY_STRUCT_BYVAL: + // invalid number of operands; expected 6, got 5 + case ARMISD::MEMCPY: + // invalid number of operands; expected 5, got 4 + case ARMISD::VMOVRRD: + // operand #0 must have type f64, but has type v1i64/v4f16/v8i8 + case ARMISD::VMOVIMM: + // operand #0 must have type i32, but has type i16 + return; + } + + SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); } // Emit, if possible, a specialized version of the given Libcall. Typically this diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h index d68150e66567c..38d2a6555c1be 100644 --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -17,7 +17,62 @@ #include "llvm/CodeGen/RuntimeLibcallUtil.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "ARMGenSDNodeInfo.inc" + namespace llvm { +namespace ARMISD { + +enum NodeType : unsigned { + DYN_ALLOC = GENERATED_OPCODE_END, // Dynamic allocation on the stack. + + MVESEXT, // Legalization aids for extending a vector into two/four vectors. + MVEZEXT, // or truncating two/four vectors into one. Eventually becomes + MVETRUNC, // stack store/load sequence, if not optimized to anything else. + + // Operands of the standard BUILD_VECTOR node are not legalized, which + // is fine if BUILD_VECTORs are always lowered to shuffles or other + // operations, but for ARM some BUILD_VECTORs are legal as-is and their + // operands need to be legalized. Define an ARM-specific version of + // BUILD_VECTOR for this purpose. + BUILD_VECTOR, + + // Vector load N-element structure to all lanes: + FIRST_MEMORY_OPCODE, + VLD1DUP = FIRST_MEMORY_OPCODE, + VLD2DUP, + VLD3DUP, + VLD4DUP, + + // NEON loads with post-increment base updates: + VLD1_UPD, + VLD2_UPD, + VLD3_UPD, + VLD4_UPD, + VLD2LN_UPD, + VLD3LN_UPD, + VLD4LN_UPD, + VLD1DUP_UPD, + VLD2DUP_UPD, + VLD3DUP_UPD, + VLD4DUP_UPD, + VLD1x2_UPD, + VLD1x3_UPD, + VLD1x4_UPD, + + // NEON stores with post-increment base updates: + VST1_UPD, + VST3_UPD, + VST2LN_UPD, + VST3LN_UPD, + VST4LN_UPD, + VST1x2_UPD, + VST1x3_UPD, + VST1x4_UPD, + LAST_MEMORY_OPCODE = VST1x4_UPD, +}; + +} // namespace ARMISD namespace ARM_AM { static inline ShiftOpc getShiftOpcForNode(unsigned Opcode) { @@ -35,10 +90,17 @@ namespace ARM_AM { } } // end namespace ARM_AM -class ARMSelectionDAGInfo : public SelectionDAGTargetInfo { +class ARMSelectionDAGInfo : public SelectionDAGGenTargetInfo { public: + ARMSelectionDAGInfo(); + + const char *getTargetNodeName(unsigned Opcode) const override; + bool isTargetMemoryOpcode(unsigned Opcode) const override; + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, @@ -66,6 +128,6 @@ class ARMSelectionDAGInfo : public SelectionDAGTargetInfo { RTLIB::Libcall LC) const; }; -} +} // namespace llvm #endif diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt index fa778cad4af8e..eb3ad01a54fb2 100644 --- a/llvm/lib/Target/ARM/CMakeLists.txt +++ b/llvm/lib/Target/ARM/CMakeLists.txt @@ -15,6 +15,7 @@ tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM ARMGenRegisterBank.inc -gen-register-bank) tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM ARMGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM ARMGenSystemRegister.inc -gen-searchable-tables) diff --git a/llvm/unittests/Target/ARM/ARMSelectionDAGTest.cpp b/llvm/unittests/Target/ARM/ARMSelectionDAGTest.cpp index ca9afded0c0c4..c763da95fa455 100644 --- a/llvm/unittests/Target/ARM/ARMSelectionDAGTest.cpp +++ b/llvm/unittests/Target/ARM/ARMSelectionDAGTest.cpp @@ -5,7 +5,7 @@ // //===----------------------------------------------------------------------===// -#include "ARMISelLowering.h" +#include "ARMSelectionDAGInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/AsmParser/Parser.h" From 523bd2df6d1542e92ed70d7c6baec74dbe181699 Mon Sep 17 00:00:00 2001 From: Hongyu Chen Date: Wed, 19 Nov 2025 02:49:13 +0800 Subject: [PATCH 24/57] [GISel][RISCV] Compute CTPOP of small odd-sized integer correctly (#168559) Fixes the assertion in #168523 This patch lifts the small, odd-sized integer to 8 bits, ensuring that the following lowering code behaves correctly. --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 4 + .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 5 +- .../test/CodeGen/RISCV/GlobalISel/bitmanip.ll | 140 ++++++++++++++++++ .../legalizer/legalize-ctpop-rv64.mir | 112 ++++++++++++++ 4 files changed, 260 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index ba28e4dda3313..e9e05be2fcbd4 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7698,6 +7698,10 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { unsigned Size = Ty.getSizeInBits(); MachineIRBuilder &B = MIRBuilder; + // Bail out on irregular type lengths. + if (Size > 128 || Size % 8 != 0) + return UnableToLegalize; + // Count set bits in blocks of 2 bits. Default approach would be // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 } // We use following formula instead: diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index b1794b78a3e2a..1fba16d3d51c2 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -238,7 +238,10 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .clampScalar(0, sXLen, sXLen) .scalarSameSizeAs(1, 0); } else { - CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower(); + CTPOPActions.widenScalarToNextPow2(0, /*Min*/ 8) + .clampScalar(0, s8, sXLen) + .scalarSameSizeAs(1, 0) + .lower(); } getActionDefinitionsBuilder(G_CONSTANT) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll index 68bc1e5db6095..cb12ef20b0994 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll @@ -205,3 +205,143 @@ define i24 @bitreverse_i24(i24 %x) { %rev = call i24 @llvm.bitreverse.i24(i24 %x) ret i24 %rev } + +define i2 @test_ctpop_i2(i2 %a) { +; RV32-LABEL: test_ctpop_i2: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: andi a0, a0, 3 +; RV32-NEXT: srli a1, a0, 1 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: zext.b a1, a0 +; RV32-NEXT: srli a1, a1, 2 +; RV32-NEXT: andi a1, a1, 51 +; RV32-NEXT: andi a0, a0, 51 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: andi a0, a0, 15 +; RV32-NEXT: li a1, 1 +; RV32-NEXT: call __mulsi3 +; RV32-NEXT: zext.b a0, a0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: test_ctpop_i2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: andi a0, a0, 3 +; RV64-NEXT: srli a1, a0, 1 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: zext.b a1, a0 +; RV64-NEXT: srli a1, a1, 2 +; RV64-NEXT: andi a1, a1, 51 +; RV64-NEXT: andi a0, a0, 51 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: andi a0, a0, 15 +; RV64-NEXT: li a1, 1 +; RV64-NEXT: call __muldi3 +; RV64-NEXT: zext.b a0, a0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %1 = call i2 @llvm.ctpop.i2(i2 %a) + ret i2 %1 +} + +define i11 @test_ctpop_i11(i11 %a) { +; RV32-LABEL: test_ctpop_i11: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: andi a0, a0, 2047 +; RV32-NEXT: lui a1, 5 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: srli a3, a0, 1 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: and a1, a3, a1 +; RV32-NEXT: lui a3, 3 +; RV32-NEXT: addi s0, a2, -1 +; RV32-NEXT: addi a2, a3, 819 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: and a1, a0, s0 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: srli a1, a1, 2 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: lui a2, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: srli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: addi a1, a2, -241 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: li a1, 257 +; RV32-NEXT: call __mulsi3 +; RV32-NEXT: and a0, a0, s0 +; RV32-NEXT: srli a0, a0, 8 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: test_ctpop_i11: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: andi a0, a0, 2047 +; RV64-NEXT: lui a1, 5 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: srli a3, a0, 1 +; RV64-NEXT: addi a1, a1, 1365 +; RV64-NEXT: and a1, a3, a1 +; RV64-NEXT: lui a3, 3 +; RV64-NEXT: addi s0, a2, -1 +; RV64-NEXT: addi a2, a3, 819 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: and a1, a0, s0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: srli a1, a1, 2 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: lui a2, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: srli a1, a0, 4 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: addi a1, a2, -241 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: li a1, 257 +; RV64-NEXT: call __muldi3 +; RV64-NEXT: and a0, a0, s0 +; RV64-NEXT: srli a0, a0, 8 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret + %1 = call i11 @llvm.ctpop.i11(i11 %a) + ret i11 %1 +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir index c61c46df0a434..aae48cf7df6d0 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir @@ -216,3 +216,115 @@ body: | PseudoRET implicit $x10 ... +--- +name: ctpop_i2 +body: | + bb.1: + liveins: $x10 + + ; RV64I-LABEL: name: ctpop_i2 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] + ; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64) + ; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 85 + ; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C2]] + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[AND]](s64) + ; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY1]], [[AND1]] + ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C4]] + ; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C3]](s64) + ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 51 + ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C5]] + ; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C5]] + ; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND3]], [[AND4]] + ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C6]](s64) + ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]] + ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C7]] + ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND5]], [[C]] + ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C8]](s64) + ; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + ; + ; RV64ZBB-LABEL: name: ctpop_i2 + ; RV64ZBB: liveins: $x10 + ; RV64ZBB-NEXT: {{ $}} + ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64) + ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64) + ; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64) + ; RV64ZBB-NEXT: PseudoRET implicit $x10 + %1:_(s64) = COPY $x10 + %0:_(s2) = G_TRUNC %1(s64) + %2:_(s2) = G_CTPOP %0(s2) + %3:_(s64) = G_ANYEXT %2(s2) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 + +... +--- +name: ctpop_i11 +body: | + bb.1: + liveins: $x10 + + ; RV64I-LABEL: name: ctpop_i11 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2047 + ; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C1]] + ; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C]](s64) + ; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 21845 + ; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C2]] + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[AND]](s64) + ; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY1]], [[AND1]] + ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C4]] + ; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C3]](s64) + ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 13107 + ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C5]] + ; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C5]] + ; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND3]], [[AND4]] + ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C6]](s64) + ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]] + ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 3855 + ; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C7]] + ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 257 + ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND5]], [[C8]] + ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[MUL]], [[C4]] + ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[AND6]], [[C9]](s64) + ; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + ; + ; RV64ZBB-LABEL: name: ctpop_i11 + ; RV64ZBB: liveins: $x10 + ; RV64ZBB-NEXT: {{ $}} + ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2047 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; RV64ZBB-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[AND]](s64) + ; RV64ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[CTPOP]](s64) + ; RV64ZBB-NEXT: $x10 = COPY [[COPY1]](s64) + ; RV64ZBB-NEXT: PseudoRET implicit $x10 + %1:_(s64) = COPY $x10 + %0:_(s11) = G_TRUNC %1(s64) + %2:_(s11) = G_CTPOP %0(s11) + %3:_(s64) = G_ANYEXT %2(s11) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 + +... From 46565f32d8d70f5eb9aa3aa4fba15fbd19912ccb Mon Sep 17 00:00:00 2001 From: Dan Liew Date: Tue, 18 Nov 2025 10:53:32 -0800 Subject: [PATCH 25/57] [LLDB] Add log channel for InstrumentationRuntime plugins (#168508) This patch adds `LLDBLog::InstrumentationRuntime` as a log channel to provide an appropriate channel for instrumentation runtime plugins as previously one did not exist. A small use of the channel is added to illustrate its use. The logging added is not intended to be comprehensive. This is primarily motivated by an `-fbounds-safety` instrumentation plugin (https://github.com/swiftlang/llvm-project/pull/11835). rdar://164920875 --- lldb/include/lldb/Utility/LLDBLog.h | 3 ++- .../InstrumentationRuntime/Utility/ReportRetriever.cpp | 5 ++++- lldb/source/Utility/LLDBLog.cpp | 3 +++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/lldb/include/lldb/Utility/LLDBLog.h b/lldb/include/lldb/Utility/LLDBLog.h index 18e4a3ca73507..ac360bfdf8cee 100644 --- a/lldb/include/lldb/Utility/LLDBLog.h +++ b/lldb/include/lldb/Utility/LLDBLog.h @@ -50,7 +50,8 @@ enum class LLDBLog : Log::MaskType { OnDemand = Log::ChannelFlag<31>, Source = Log::ChannelFlag<32>, Disassembler = Log::ChannelFlag<33>, - LLVM_MARK_AS_BITMASK_ENUM(Disassembler), + InstrumentationRuntime = Log::ChannelFlag<34>, + LLVM_MARK_AS_BITMASK_ENUM(InstrumentationRuntime), }; LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); diff --git a/lldb/source/Plugins/InstrumentationRuntime/Utility/ReportRetriever.cpp b/lldb/source/Plugins/InstrumentationRuntime/Utility/ReportRetriever.cpp index 38c334bfb78ac..3642cb18c7a97 100644 --- a/lldb/source/Plugins/InstrumentationRuntime/Utility/ReportRetriever.cpp +++ b/lldb/source/Plugins/InstrumentationRuntime/Utility/ReportRetriever.cpp @@ -207,8 +207,11 @@ bool ReportRetriever::NotifyBreakpointHit(ProcessSP process_sp, return false; StructuredData::ObjectSP report = RetrieveReportData(process_sp); - if (!report || report->GetType() != lldb::eStructuredDataTypeDictionary) + if (!report || report->GetType() != lldb::eStructuredDataTypeDictionary) { + LLDB_LOGF(GetLog(LLDBLog::InstrumentationRuntime), + "ReportRetriever::RetrieveReportData() failed"); return false; + } std::string description = FormatDescription(report); diff --git a/lldb/source/Utility/LLDBLog.cpp b/lldb/source/Utility/LLDBLog.cpp index 613dae42064a8..a08764d84edd2 100644 --- a/lldb/source/Utility/LLDBLog.cpp +++ b/lldb/source/Utility/LLDBLog.cpp @@ -67,6 +67,9 @@ static constexpr Log::Category g_categories[] = { {{"disassembler"}, {"log disassembler related activities"}, LLDBLog::Disassembler}, + {{"instrumentation-runtime"}, + {"log instrumentation runtime plugin related activities"}, + LLDBLog::InstrumentationRuntime}, }; static Log::Channel g_log_channel(g_categories, From 3f614026f9a5af9409acac225516c868e927ac7a Mon Sep 17 00:00:00 2001 From: Qiongsi Wu Date: Tue, 18 Nov 2025 10:56:07 -0800 Subject: [PATCH 26/57] [clang][DependencyScanning] Add Test Coverage of `StabeDirs` during By-Name Lookups (#168143) This PR adds some test coverage for `StableDirs` during by-name lookups. --- .../module-in-stable-dir-by-name.c | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 clang/test/ClangScanDeps/module-in-stable-dir-by-name.c diff --git a/clang/test/ClangScanDeps/module-in-stable-dir-by-name.c b/clang/test/ClangScanDeps/module-in-stable-dir-by-name.c new file mode 100644 index 0000000000000..742bc2e046ceb --- /dev/null +++ b/clang/test/ClangScanDeps/module-in-stable-dir-by-name.c @@ -0,0 +1,43 @@ +// UNSUPPORTED: system-windows +// RUN: rm -rf %t +// RUN: split-file %s %t + +// Verify the stable dir path. +//--- Sysroot/usr/include/SysA/module.modulemap +module SysA { + header "SysA.h" +} + +//--- Sysroot/usr/include/SysA/SysA.h +int SysVal = 42; + +//--- cdb.json.template +[{ + "file": "", + "directory": "DIR", + "command": "clang -fmodules -fmodules-cache-path=DIR/cache -isysroot DIR/Sysroot -IDIR/Sysroot/usr/include -x c" +}] + +// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json +// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full -module-names=SysA > %t/result.json +// RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s + +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK-NEXT: { +// CHECK-NEXT: "is-in-stable-directories": true, +// CHECK-NEXT: "clang-module-deps": [], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/Sysroot/usr/include/SysA/module.modulemap", +// CHECK-NEXT: "command-line": [ +// CHECK: ], +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/Sysroot/usr/include/SysA/module.modulemap", +// CHECK-NEXT: "[[PREFIX]]/Sysroot/usr/include/SysA/SysA.h" +// CHECK-NEXT: ], +// CHECK-NEXT: "link-libraries": [], +// CHECK-NEXT: "name": "SysA" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "translation-units": [] +// CHECK-NEXT: } From 8f67759585f7bd25cfebf2224680b131ffe5425b Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Tue, 18 Nov 2025 11:11:19 -0800 Subject: [PATCH 27/57] [NFC][TableGen] Remove `close` member from various CodeGenHelpers (#167904) Always rely on local scopes to enforce the lifetime of these helper objects and by extension where the "closing" of various C++ code constructs happens. --- llvm/include/llvm/TableGen/CodeGenHelpers.h | 30 +---- .../utils/TableGen/Basic/DirectiveEmitter.cpp | 123 +++++++++--------- mlir/tools/mlir-tblgen/EnumsGen.cpp | 79 +++++------ 3 files changed, 106 insertions(+), 126 deletions(-) diff --git a/llvm/include/llvm/TableGen/CodeGenHelpers.h b/llvm/include/llvm/TableGen/CodeGenHelpers.h index e357b2670be15..1b1b5e63a8fc4 100644 --- a/llvm/include/llvm/TableGen/CodeGenHelpers.h +++ b/llvm/include/llvm/TableGen/CodeGenHelpers.h @@ -33,25 +33,17 @@ class IfDefEmitter { OS << "#undef " << Name << "\n"; OS << "\n"; } - ~IfDefEmitter() { close(); } - - // Explicit function to close the ifdef scopes. - void close() { - if (Closed) - return; - + ~IfDefEmitter() { OS << "\n"; if (LateUndef) OS << "#undef " << Name << "\n"; OS << "#endif // " << Name << "\n\n"; - Closed = true; } private: std::string Name; raw_ostream &OS; bool LateUndef; - bool Closed = false; }; // Simple RAII helper for emitting header include guard (ifndef-define-endif). @@ -62,20 +54,11 @@ class IncludeGuardEmitter { OS << "#ifndef " << Name << "\n" << "#define " << Name << "\n\n"; } - ~IncludeGuardEmitter() { close(); } - - // Explicit function to close the ifdef scopes. - void close() { - if (Closed) - return; - OS << "\n#endif // " << Name << "\n\n"; - Closed = true; - } + ~IncludeGuardEmitter() { OS << "\n#endif // " << Name << "\n\n"; } private: std::string Name; raw_ostream &OS; - bool Closed = false; }; // Simple RAII helper for emitting namespace scope. Name can be a single @@ -89,15 +72,9 @@ class NamespaceEmitter { OS << "namespace " << Name << " {\n\n"; } - ~NamespaceEmitter() { close(); } - - // Explicit function to close the namespace scopes. - void close() { - if (Closed) - return; + ~NamespaceEmitter() { if (!Name.empty()) OS << "\n} // namespace " << Name << "\n"; - Closed = true; } private: @@ -114,7 +91,6 @@ class NamespaceEmitter { } std::string Name; raw_ostream &OS; - bool Closed = false; }; } // end namespace llvm diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp index 0bb743dc8a7f5..3a488ed952210 100644 --- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp +++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp @@ -276,80 +276,81 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) { OS << "#include \n"; // for std::pair OS << "\n"; NamespaceEmitter LlvmNS(OS, "llvm"); - NamespaceEmitter DirLangNS(OS, DirLang.getCppNamespace()); - - if (DirLang.hasEnableBitmaskEnumInNamespace()) - OS << "LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();\n\n"; - - // Emit Directive associations - std::vector Associations; - copy_if(DirLang.getAssociations(), std::back_inserter(Associations), - // Skip the "special" value - [](const Record *Def) { return Def->getName() != "AS_FromLeaves"; }); - generateEnumClass(Associations, OS, "Association", - /*Prefix=*/"", /*ExportEnums=*/false); + { + NamespaceEmitter DirLangNS(OS, DirLang.getCppNamespace()); + + if (DirLang.hasEnableBitmaskEnumInNamespace()) + OS << "LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();\n\n"; + + // Emit Directive associations + std::vector Associations; + copy_if( + DirLang.getAssociations(), std::back_inserter(Associations), + // Skip the "special" value + [](const Record *Def) { return Def->getName() != "AS_FromLeaves"; }); + generateEnumClass(Associations, OS, "Association", + /*Prefix=*/"", /*ExportEnums=*/false); - generateEnumClass(DirLang.getCategories(), OS, "Category", /*Prefix=*/"", - /*ExportEnums=*/false); + generateEnumClass(DirLang.getCategories(), OS, "Category", /*Prefix=*/"", + /*ExportEnums=*/false); - generateEnumBitmask(DirLang.getSourceLanguages(), OS, "SourceLanguage", - /*Prefix=*/"", /*ExportEnums=*/false); + generateEnumBitmask(DirLang.getSourceLanguages(), OS, "SourceLanguage", + /*Prefix=*/"", /*ExportEnums=*/false); - // Emit Directive enumeration - generateEnumClass(DirLang.getDirectives(), OS, "Directive", - DirLang.getDirectivePrefix(), - DirLang.hasMakeEnumAvailableInNamespace()); + // Emit Directive enumeration + generateEnumClass(DirLang.getDirectives(), OS, "Directive", + DirLang.getDirectivePrefix(), + DirLang.hasMakeEnumAvailableInNamespace()); - // Emit Clause enumeration - generateEnumClass(DirLang.getClauses(), OS, "Clause", - DirLang.getClausePrefix(), - DirLang.hasMakeEnumAvailableInNamespace()); + // Emit Clause enumeration + generateEnumClass(DirLang.getClauses(), OS, "Clause", + DirLang.getClausePrefix(), + DirLang.hasMakeEnumAvailableInNamespace()); - // Emit ClauseVals enumeration - std::string EnumHelperFuncs; - generateClauseEnumVal(DirLang.getClauses(), OS, DirLang, EnumHelperFuncs); + // Emit ClauseVals enumeration + std::string EnumHelperFuncs; + generateClauseEnumVal(DirLang.getClauses(), OS, DirLang, EnumHelperFuncs); - // Generic function signatures - OS << "// Enumeration helper functions\n"; + // Generic function signatures + OS << "// Enumeration helper functions\n"; - OS << "LLVM_ABI std::pair get" << Lang - << "DirectiveKindAndVersions(StringRef Str);\n"; + OS << "LLVM_ABI std::pair get" << Lang + << "DirectiveKindAndVersions(StringRef Str);\n"; - OS << "inline Directive get" << Lang << "DirectiveKind(StringRef Str) {\n"; - OS << " return get" << Lang << "DirectiveKindAndVersions(Str).first;\n"; - OS << "}\n"; - OS << "\n"; + OS << "inline Directive get" << Lang << "DirectiveKind(StringRef Str) {\n"; + OS << " return get" << Lang << "DirectiveKindAndVersions(Str).first;\n"; + OS << "}\n"; + OS << "\n"; - OS << "LLVM_ABI StringRef get" << Lang - << "DirectiveName(Directive D, unsigned Ver = 0);\n"; - OS << "\n"; + OS << "LLVM_ABI StringRef get" << Lang + << "DirectiveName(Directive D, unsigned Ver = 0);\n"; + OS << "\n"; - OS << "LLVM_ABI std::pair get" << Lang - << "ClauseKindAndVersions(StringRef Str);\n"; - OS << "\n"; + OS << "LLVM_ABI std::pair get" << Lang + << "ClauseKindAndVersions(StringRef Str);\n"; + OS << "\n"; - OS << "inline Clause get" << Lang << "ClauseKind(StringRef Str) {\n"; - OS << " return get" << Lang << "ClauseKindAndVersions(Str).first;\n"; - OS << "}\n"; - OS << "\n"; + OS << "inline Clause get" << Lang << "ClauseKind(StringRef Str) {\n"; + OS << " return get" << Lang << "ClauseKindAndVersions(Str).first;\n"; + OS << "}\n"; + OS << "\n"; - OS << "LLVM_ABI StringRef get" << Lang - << "ClauseName(Clause C, unsigned Ver = 0);\n"; - OS << "\n"; + OS << "LLVM_ABI StringRef get" << Lang + << "ClauseName(Clause C, unsigned Ver = 0);\n"; + OS << "\n"; - OS << "/// Return true if \\p C is a valid clause for \\p D in version \\p " - << "Version.\n"; - OS << "LLVM_ABI bool isAllowedClauseForDirective(Directive D, " - << "Clause C, unsigned Version);\n"; - OS << "\n"; - OS << "constexpr std::size_t getMaxLeafCount() { return " - << getMaxLeafCount(DirLang) << "; }\n"; - OS << "LLVM_ABI Association getDirectiveAssociation(Directive D);\n"; - OS << "LLVM_ABI Category getDirectiveCategory(Directive D);\n"; - OS << "LLVM_ABI SourceLanguage getDirectiveLanguages(Directive D);\n"; - OS << EnumHelperFuncs; - - DirLangNS.close(); + OS << "/// Return true if \\p C is a valid clause for \\p D in version \\p " + << "Version.\n"; + OS << "LLVM_ABI bool isAllowedClauseForDirective(Directive D, " + << "Clause C, unsigned Version);\n"; + OS << "\n"; + OS << "constexpr std::size_t getMaxLeafCount() { return " + << getMaxLeafCount(DirLang) << "; }\n"; + OS << "LLVM_ABI Association getDirectiveAssociation(Directive D);\n"; + OS << "LLVM_ABI Category getDirectiveCategory(Directive D);\n"; + OS << "LLVM_ABI SourceLanguage getDirectiveLanguages(Directive D);\n"; + OS << EnumHelperFuncs; + } // close DirLangNS // These specializations need to be in ::llvm. for (StringRef Enum : {"Association", "Category", "Directive", "Clause"}) { diff --git a/mlir/tools/mlir-tblgen/EnumsGen.cpp b/mlir/tools/mlir-tblgen/EnumsGen.cpp index 11bf9ce732ce6..8c7f9f7b4bc49 100644 --- a/mlir/tools/mlir-tblgen/EnumsGen.cpp +++ b/mlir/tools/mlir-tblgen/EnumsGen.cpp @@ -702,41 +702,45 @@ static void emitEnumDecl(const Record &enumDef, raw_ostream &os) { StringRef underlyingToSymFnName = enumInfo.getUnderlyingToSymbolFnName(); auto enumerants = enumInfo.getAllCases(); - llvm::NamespaceEmitter ns(os, cppNamespace); - - // Emit the enum class definition - emitEnumClass(enumDef, enumName, underlyingType, description, enumerants, os); - - // Emit conversion function declarations - if (llvm::all_of(enumerants, [](EnumCase enumerant) { - return enumerant.getValue() >= 0; - })) { - os << formatv( - "::std::optional<{0}> {1}({2});\n", enumName, underlyingToSymFnName, - underlyingType.empty() ? std::string("unsigned") : underlyingType); - } - os << formatv("{2} {1}({0});\n", enumName, symToStrFnName, symToStrFnRetType); - os << formatv("::std::optional<{0}> {1}(::llvm::StringRef);\n", enumName, - strToSymFnName); - - if (enumInfo.isBitEnum()) { - emitOperators(enumDef, os); - } else { - emitMaxValueFn(enumDef, os); - } + { + llvm::NamespaceEmitter ns(os, cppNamespace); + + // Emit the enum class definition + emitEnumClass(enumDef, enumName, underlyingType, description, enumerants, + os); + + // Emit conversion function declarations + if (llvm::all_of(enumerants, [](EnumCase enumerant) { + return enumerant.getValue() >= 0; + })) { + os << formatv( + "::std::optional<{0}> {1}({2});\n", enumName, underlyingToSymFnName, + underlyingType.empty() ? std::string("unsigned") : underlyingType); + } + os << formatv("{2} {1}({0});\n", enumName, symToStrFnName, + symToStrFnRetType); + os << formatv("::std::optional<{0}> {1}(::llvm::StringRef);\n", enumName, + strToSymFnName); + + if (enumInfo.isBitEnum()) { + emitOperators(enumDef, os); + } else { + emitMaxValueFn(enumDef, os); + } - // Generate a generic `stringifyEnum` function that forwards to the method - // specified by the user. - const char *const stringifyEnumStr = R"( + // Generate a generic `stringifyEnum` function that forwards to the method + // specified by the user. + const char *const stringifyEnumStr = R"( inline {0} stringifyEnum({1} enumValue) {{ return {2}(enumValue); } )"; - os << formatv(stringifyEnumStr, symToStrFnRetType, enumName, symToStrFnName); + os << formatv(stringifyEnumStr, symToStrFnRetType, enumName, + symToStrFnName); - // Generate a generic `symbolizeEnum` function that forwards to the method - // specified by the user. - const char *const symbolizeEnumStr = R"( + // Generate a generic `symbolizeEnum` function that forwards to the method + // specified by the user. + const char *const symbolizeEnumStr = R"( template ::std::optional symbolizeEnum(::llvm::StringRef); @@ -745,9 +749,9 @@ inline ::std::optional<{0}> symbolizeEnum<{0}>(::llvm::StringRef str) { return {1}(str); } )"; - os << formatv(symbolizeEnumStr, enumName, strToSymFnName); + os << formatv(symbolizeEnumStr, enumName, strToSymFnName); - const char *const attrClassDecl = R"( + const char *const attrClassDecl = R"( class {1} : public ::mlir::{2} { public: using ValueType = {0}; @@ -757,13 +761,12 @@ class {1} : public ::mlir::{2} { {0} getValue() const; }; )"; - if (enumInfo.genSpecializedAttr()) { - StringRef attrClassName = enumInfo.getSpecializedAttrClassName(); - StringRef baseAttrClassName = "IntegerAttr"; - os << formatv(attrClassDecl, enumName, attrClassName, baseAttrClassName); - } - - ns.close(); + if (enumInfo.genSpecializedAttr()) { + StringRef attrClassName = enumInfo.getSpecializedAttrClassName(); + StringRef baseAttrClassName = "IntegerAttr"; + os << formatv(attrClassDecl, enumName, attrClassName, baseAttrClassName); + } + } // close `ns`. // Generate a generic parser and printer for the enum. std::string qualName = From 4ab24235cbebee68a9cba4a5caba3325542b64b9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 18 Nov 2025 11:13:15 -0800 Subject: [PATCH 28/57] [ConstantFolding] Generalize constant folding for vector_interleave2 to interleave3-8. (#168473) --- llvm/lib/Analysis/ConstantFolding.cpp | 27 ++++++++--- .../InstSimplify/ConstProp/vector-calls.ll | 48 +++++++++++++++++++ 2 files changed, 68 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index da32542cf7870..a13df6c5bf552 100755 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1659,6 +1659,12 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::vector_extract: case Intrinsic::vector_insert: case Intrinsic::vector_interleave2: + case Intrinsic::vector_interleave3: + case Intrinsic::vector_interleave4: + case Intrinsic::vector_interleave5: + case Intrinsic::vector_interleave6: + case Intrinsic::vector_interleave7: + case Intrinsic::vector_interleave8: case Intrinsic::vector_deinterleave2: // Target intrinsics case Intrinsic::amdgcn_perm: @@ -4207,16 +4213,23 @@ static Constant *ConstantFoldFixedVectorCall( } return ConstantVector::get(Result); } - case Intrinsic::vector_interleave2: { + case Intrinsic::vector_interleave2: + case Intrinsic::vector_interleave3: + case Intrinsic::vector_interleave4: + case Intrinsic::vector_interleave5: + case Intrinsic::vector_interleave6: + case Intrinsic::vector_interleave7: + case Intrinsic::vector_interleave8: { unsigned NumElements = cast(Operands[0]->getType())->getNumElements(); + unsigned NumOperands = Operands.size(); for (unsigned I = 0; I < NumElements; ++I) { - Constant *Elt0 = Operands[0]->getAggregateElement(I); - Constant *Elt1 = Operands[1]->getAggregateElement(I); - if (!Elt0 || !Elt1) - return nullptr; - Result[2 * I] = Elt0; - Result[2 * I + 1] = Elt1; + for (unsigned J = 0; J < NumOperands; ++J) { + Constant *Elt = Operands[J]->getAggregateElement(I); + if (!Elt) + return nullptr; + Result[NumOperands * I + J] = Elt; + } } return ConstantVector::get(Result); } diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll index 14543f339db5d..848f0d17ff373 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-calls.ll @@ -51,6 +51,54 @@ define <8 x i32> @fold_vector_interleave2() { ret <8 x i32> %1 } +define <12 x i32> @fold_vector_interleave3() { +; CHECK-LABEL: define <12 x i32> @fold_vector_interleave3() { +; CHECK-NEXT: ret <12 x i32> +; + %1 = call <12 x i32> @llvm.vector.interleave3.v12i32(<4 x i32> , <4 x i32> , <4 x i32> ) + ret <12 x i32> %1 +} + +define <16 x i32> @fold_vector_interleave4() { +; CHECK-LABEL: define <16 x i32> @fold_vector_interleave4() { +; CHECK-NEXT: ret <16 x i32> +; + %1 = call <16 x i32> @llvm.vector.interleave4.v16i32(<4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> ) + ret <16 x i32> %1 +} + +define <20 x i32> @fold_vector_interleave5() { +; CHECK-LABEL: define <20 x i32> @fold_vector_interleave5() { +; CHECK-NEXT: ret <20 x i32> +; + %1 = call <20 x i32> @llvm.vector.interleave5.v20i32(<4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> ) + ret <20 x i32> %1 +} + +define <24 x i32> @fold_vector_interleave6() { +; CHECK-LABEL: define <24 x i32> @fold_vector_interleave6() { +; CHECK-NEXT: ret <24 x i32> +; + %1 = call <24 x i32> @llvm.vector.interleave6.v24i32(<4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> ) + ret <24 x i32> %1 +} + +define <28 x i32> @fold_vector_interleave7() { +; CHECK-LABEL: define <28 x i32> @fold_vector_interleave7() { +; CHECK-NEXT: ret <28 x i32> +; + %1 = call <28 x i32> @llvm.vector.interleave7.v28i32(<4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> ) + ret <28 x i32> %1 +} + +define <32 x i32> @fold_vector_interleave8() { +; CHECK-LABEL: define <32 x i32> @fold_vector_interleave8() { +; CHECK-NEXT: ret <32 x i32> +; + %1 = call <32 x i32> @llvm.vector.interleave8.v32i32(<4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> ) + ret <32 x i32> %1 +} + define {<4 x i32>, <4 x i32>} @fold_vector_deinterleave2() { ; CHECK-LABEL: define { <4 x i32>, <4 x i32> } @fold_vector_deinterleave2() { ; CHECK-NEXT: ret { <4 x i32>, <4 x i32> } { <4 x i32> , <4 x i32> } From 96e58b83a3aa681cbf5b8288c3012d5d5d20398c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 18 Nov 2025 11:13:54 -0800 Subject: [PATCH 29/57] [RISCV] Legalize misaligned unmasked vp.load/vp.store to vle8/vse8. (#167745) If vector-unaligned-mem support is not enabled, we should not generate loads/stores that are not aligned to their element size. We already do this for non-VP vector loads/stores. This code has been in our downstream for about a year and a half after finding the vectorizer generating misaligned loads/stores. I don't think that is unique to our downstream. Doing this for masked vp.load/store requires widening the mask as well which is harder to do. NOTE: Because we have to scale the VL, this will introduce additional vsetvli and the VL optimizer will not be effective at optimizing any arithmetic that is consumed by the store. --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 11 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 103 +++++++++++++++++- llvm/lib/Target/RISCV/RISCVISelLowering.h | 3 + .../rvv/fixed-vectors-shuffle-exact-vlen.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vpload.ll | 13 +++ llvm/test/CodeGen/RISCV/rvv/vpstore.ll | 13 +++ 6 files changed, 141 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 16fdef06d6679..8827bff111c22 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -9913,8 +9913,6 @@ SDValue SelectionDAG::getLoadVP( MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges, bool IsExpanding) { - assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - MMOFlags |= MachineMemOperand::MOLoad; assert((MMOFlags & MachineMemOperand::MOStore) == 0); // If we don't have a PtrInfo, infer the trivial frame index case to simplify @@ -9936,6 +9934,11 @@ SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + assert(Mask.getValueType().getVectorElementCount() == + VT.getVectorElementCount() && + "Vector width mismatch between mask and data"); + bool Indexed = AM != ISD::UNINDEXED; assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); @@ -10031,6 +10034,10 @@ SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, ISD::MemIndexedMode AM, bool IsTruncating, bool IsCompressing) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + assert(Mask.getValueType().getVectorElementCount() == + Val.getValueType().getVectorElementCount() && + "Vector width mismatch between mask and data"); + bool Indexed = AM != ISD::UNINDEXED; assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!"); SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 921d12757d672..2d6bb06d689c3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6846,6 +6846,99 @@ SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op, Store->getMemOperand()->getFlags()); } +// While RVV has alignment restrictions, we should always be able to load as a +// legal equivalently-sized byte-typed vector instead. This method is +// responsible for re-expressing a ISD::VP_LOAD via a correctly-aligned type. If +// the load is already correctly-aligned, it returns SDValue(). +SDValue RISCVTargetLowering::expandUnalignedVPLoad(SDValue Op, + SelectionDAG &DAG) const { + auto *Load = cast(Op); + assert(Load && Load->getMemoryVT().isVector() && "Expected vector load"); + + if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + Load->getMemoryVT(), + *Load->getMemOperand())) + return SDValue(); + + SDValue Mask = Load->getMask(); + + // FIXME: Handled masked loads somehow. + if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode())) + return SDValue(); + + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + unsigned EltSizeBits = VT.getScalarSizeInBits(); + assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && + "Unexpected unaligned RVV load type"); + MVT NewVT = + MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); + assert(NewVT.isValid() && + "Expecting equally-sized RVV vector types to be legal"); + + SDValue VL = Load->getVectorLength(); + VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL, + DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType())); + + MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount()); + SDValue L = DAG.getLoadVP(NewVT, DL, Load->getChain(), Load->getBasePtr(), + DAG.getAllOnesConstant(DL, MaskVT), VL, + Load->getPointerInfo(), Load->getBaseAlign(), + Load->getMemOperand()->getFlags(), AAMDNodes()); + return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL); +} + +// While RVV has alignment restrictions, we should always be able to store as a +// legal equivalently-sized byte-typed vector instead. This method is +// responsible for re-expressing a ISD::VP STORE via a correctly-aligned type. +// It returns SDValue() if the store is already correctly aligned. +SDValue RISCVTargetLowering::expandUnalignedVPStore(SDValue Op, + SelectionDAG &DAG) const { + auto *Store = cast(Op); + assert(Store && Store->getValue().getValueType().isVector() && + "Expected vector store"); + + if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + Store->getMemoryVT(), + *Store->getMemOperand())) + return SDValue(); + + SDValue Mask = Store->getMask(); + + // FIXME: Handled masked stores somehow. + if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode())) + return SDValue(); + + SDLoc DL(Op); + SDValue StoredVal = Store->getValue(); + MVT VT = StoredVal.getSimpleValueType(); + unsigned EltSizeBits = VT.getScalarSizeInBits(); + assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && + "Unexpected unaligned RVV store type"); + MVT NewVT = + MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); + assert(NewVT.isValid() && + "Expecting equally-sized RVV vector types to be legal"); + + SDValue VL = Store->getVectorLength(); + VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL, + DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType())); + + StoredVal = DAG.getBitcast(NewVT, StoredVal); + + LocationSize Size = LocationSize::precise(NewVT.getStoreSize()); + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + Store->getPointerInfo(), Store->getMemOperand()->getFlags(), Size, + Store->getBaseAlign()); + + MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount()); + return DAG.getStoreVP(Store->getChain(), DL, StoredVal, Store->getBasePtr(), + DAG.getUNDEF(Store->getBasePtr().getValueType()), + DAG.getAllOnesConstant(DL, MaskVT), VL, NewVT, MMO, + ISD::UNINDEXED); +} + static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { assert(Op.getValueType() == MVT::i64 && "Unexpected VT"); @@ -8401,13 +8494,19 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerFixedLengthVectorStoreToRVV(Op, DAG); return Op; } - case ISD::MLOAD: case ISD::VP_LOAD: + if (SDValue V = expandUnalignedVPLoad(Op, DAG)) + return V; + [[fallthrough]]; + case ISD::MLOAD: return lowerMaskedLoad(Op, DAG); case ISD::VP_LOAD_FF: return lowerLoadFF(Op, DAG); - case ISD::MSTORE: case ISD::VP_STORE: + if (SDValue V = expandUnalignedVPStore(Op, DAG)) + return V; + [[fallthrough]]; + case ISD::MSTORE: return lowerMaskedStore(Op, DAG); case ISD::VECTOR_COMPRESS: return lowerVectorCompress(Op, DAG); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 5cc427c867cfd..616664306bcab 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -576,6 +576,9 @@ class RISCVTargetLowering : public TargetLowering { SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const; SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const; + SDValue expandUnalignedVPLoad(SDValue Op, SelectionDAG &DAG) const; + SDValue expandUnalignedVPStore(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll index c3fe6b335d3da..2b800c449953b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll @@ -180,8 +180,8 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) { ; CHECK-LABEL: shuffle1: ; CHECK: # %bb.0: ; CHECK-NEXT: addi a0, a0, 252 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vsetivli zero, 12, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: vmv.v.i v0, 1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vslidedown.vi v10, v10, 1, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index edfa4a7560949..3a26af0279d50 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -612,6 +612,19 @@ define @vpload_nxv17f64(ptr %ptr, ptr %out, %lo } +define @unaligned_vpload_nxv1i64_allones_mask(* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: unaligned_vpload_nxv1i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call @llvm.vp.load.nxv1i64.p0(* align 1 %ptr, %b, i32 %evl) + ret %load +} + define @vpload_all_active_nxv8i8(ptr %ptr) { ; CHECK-LABEL: vpload_all_active_nxv8i8: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll index 3b406656a4dd6..982ec218e4688 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -511,6 +511,19 @@ define void @vpstore_nxv17f64( %val, ptr %ptr, %val, * %ptr, i32 zeroext %evl) { +; CHECK-LABEL: unaligned_vpstore_nxv1i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + call void @llvm.vp.store.nxv1i64.p0( %val, * align 1 %ptr, %b, i32 %evl) + ret void +} + define void @vpstore_all_active_nxv8i8( %val, ptr %ptr) { ; CHECK-LABEL: vpstore_all_active_nxv8i8: ; CHECK: # %bb.0: From 0dd3cb55e2bc93586d15920d5ccd0437c0c6f3ee Mon Sep 17 00:00:00 2001 From: Shubham Sandeep Rastogi Date: Tue, 18 Nov 2025 11:18:19 -0800 Subject: [PATCH 30/57] Reland instr-ref-target-hooks-sp-clobber.mir (#168136) This test was failing on chromium builds with error: ``` /Volumes/Work/s/w/ir/x/w/llvm_build/bin/llc -o - /Volumes/Work/s/w/ir/x/w/llvm-llvm-project/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir -run-pass=livedebugvalues | /Volumes/Work/s/w/ir/x/w/llvm_build/bin/FileCheck /Volumes/Work/s/w/ir/x/w/llvm-llvm-project/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir # RUN: at line 8 + /Volumes/Work/s/w/ir/x/w/llvm_build/bin/llc -o - /Volumes/Work/s/w/ir/x/w/llvm-llvm-project/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir -run-pass=livedebugvalues + /Volumes/Work/s/w/ir/x/w/llvm_build/bin/FileCheck /Volumes/Work/s/w/ir/x/w/llvm-llvm-project/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir error: YAML:121:3: unknown key 'stackSizePPR' stackSizePPR: 0 ^~~~~~~~~~~~ FileCheck error: '' is empty. FileCheck command line: /Volumes/Work/s/w/ir/x/w/llvm_build/bin/FileCheck /Volumes/Work/s/w/ir/x/w/llvm-llvm-project/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir ``` This is an attempt to reland the failing test --- .../instr-ref-target-hooks-sp-clobber.mir | 188 ++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100644 llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir diff --git a/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir b/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir new file mode 100644 index 0000000000000..6faa53d290bae --- /dev/null +++ b/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks-sp-clobber.mir @@ -0,0 +1,188 @@ +# Test to ensure that variable "__last" is properly recovered at the end of the livedebugvalues pass when Instruction Referencing-based LiveDebugValues is used. +# This testcase was obtained by looking at FileCheck.cpp and reducing it down via llvm-reduce. +# This test is the same as llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll, however, the output is taken just before the livedebugvalues pass, and then a clobber +# to the stack slot has been added after the first LDRXui in bb.2.if.then13, the livedebugvalues pass should still recover the value, as it was loaded into $x8 before the clobber. + +# RUN: llc -o - %s -start-before=livedebugvalues -stop-after=livedebugvalues -mtriple=aarch64-apple-darwin | FileCheck %s + +# CHECK: ![[LOC:[0-9]+]] = !DILocalVariable(name: "__last", +# CHECK-LABEL: bb.2.if.then13 +# CHECK: STRXui $xzr, $sp, 1 +# CHECK-NEXT: DBG_VALUE_LIST ![[LOC]], !DIExpression(DW_OP_LLVM_arg, 0), $x8 + +--- | + ; ModuleID = '/Users/srastogi/Development/llvm-project/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll' + source_filename = "/Users/srastogi/Development/llvm-project/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll" + target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" + + declare void @_ZdlPvm() + + define fastcc void @"_ZNSt3__111__introsortINS_17_ClassicAlgPolicyERZL18DumpAnnotatedInputRN4llvm11raw_ostreamERKNS2_16FileCheckRequestE20DumpInputFilterValuejNS2_9StringRefERNS_6vectorI15InputAnnotationNS_9allocatorISB_EEEEjE3$_0PSB_Lb0EEEvT1_SJ_T0_NS_15iterator_traitsISJ_E15difference_typeEb"(ptr %__first, ptr %__last, i1 %cmp, ptr %__first.addr.0, ptr %Label3.i.i.i241, ptr %__pivot.sroa.9113.8.copyload.i, ptr %0, ptr %1) !dbg !4 { + br label %while.cond + + while.cond: ; preds = %if.end16, %2 + br i1 %cmp, label %if.then13, label %if.end16 + + if.then13: ; preds = %while.cond + %cmp.i = icmp eq ptr %__first, %__last + %or.cond.i = select i1 %cmp.i, i1 false, i1 false + #dbg_value(ptr %__last, !10, !DIExpression(), !16) + br i1 %or.cond.i, label %common.ret, label %for.body.i, !dbg !20 + + common.ret: ; preds = %for.body.i, %if.then13 + ret void + + for.body.i: ; preds = %if.then13 + %InputLine.i.i = getelementptr i8, ptr %__first.addr.0, i64 132 + br label %common.ret + + if.end16: ; preds = %while.cond + %__pivot.sroa.13.8.copyload.i = load i64, ptr null, align 8 + call void @_ZdlPvm() + store ptr %__pivot.sroa.9113.8.copyload.i, ptr %0, align 8 + store i64 %__pivot.sroa.13.8.copyload.i, ptr %1, align 8 + store i64 0, ptr %__first, align 8 + store i32 0, ptr %__first.addr.0, align 8 + store i32 1, ptr %Label3.i.i.i241, align 4 + br label %while.cond + } + + !llvm.module.flags = !{!0} + !llvm.dbg.cu = !{!1} + + !0 = !{i32 2, !"Debug Info Version", i32 3} + !1 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !2, producer: "clang version 22.0.0git (git@github.com:llvm/llvm-project.git 46a3b4d5dc6dd9449ec7c0c9065552368cdf41d6)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, retainedTypes: !3, globals: !3, imports: !3, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/Library/Developer/CommandLineTools/SDKs/MacOSX15.3.sdk", sdk: "MacOSX15.3.sdk") + !2 = !DIFile(filename: "/Users/shubhamrastogi/Development/llvm-project-instr-ref/llvm-project/llvm/utils/FileCheck/FileCheck.cpp", directory: "/Users/shubhamrastogi/Development/llvm-project-instr-ref/llvm-project/build-instr-ref-stage2", checksumkind: CSK_MD5, checksum: "fa5f53f1b5782eb8b92fadec416b8941") + !3 = !{} + !4 = distinct !DISubprogram(name: "__introsort", linkageName: "_ZNSt3__111__introsortINS_17_ClassicAlgPolicyERZL18DumpAnnotatedInputRN4llvm11raw_ostreamERKNS2_16FileCheckRequestE20DumpInputFilterValuejNS2_9StringRefERNS_6vectorI15InputAnnotationNS_9allocatorISB_EEEEjE3$_0PSB_Lb0EEEvT1_SJ_T0_NS_15iterator_traitsISJ_E15difference_typeEb", scope: !6, file: !5, line: 758, type: !8, scopeLine: 762, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3, keyInstructions: true) + !5 = !DIFile(filename: "/Library/Developer/CommandLineTools/SDKs/MacOSX15.3.sdk/usr/include/c++/v1/__algorithm/sort.h", directory: "") + !6 = !DINamespace(name: "__1", scope: !7, exportSymbols: true) + !7 = !DINamespace(name: "std", scope: null) + !8 = !DISubroutineType(cc: DW_CC_nocall, types: !9) + !9 = !{null} + !10 = !DILocalVariable(name: "__last", arg: 2, scope: !11, file: !5, line: 284, type: !13) + !11 = distinct !DISubprogram(name: "__insertion_sort", linkageName: "_ZNSt3__116__insertion_sortB8nn180100INS_17_ClassicAlgPolicyERZL18DumpAnnotatedInputRN4llvm11raw_ostreamERKNS2_16FileCheckRequestE20DumpInputFilterValuejNS2_9StringRefERNS_6vectorI15InputAnnotationNS_9allocatorISB_EEEEjE3$_0PSB_EEvT1_SJ_T0_", scope: !6, file: !5, line: 284, type: !12, scopeLine: 284, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !3, retainedNodes: !3, keyInstructions: true) + !12 = distinct !DISubroutineType(types: !9) + !13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 64) + !14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "InputAnnotation", file: !15, line: 323, size: 768, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !3, identifier: "_ZTS15InputAnnotation") + !15 = !DIFile(filename: "llvm/utils/FileCheck/FileCheck.cpp", directory: "/Users/shubhamrastogi/Development/llvm-project-instr-ref/llvm-project", checksumkind: CSK_MD5, checksum: "fa5f53f1b5782eb8b92fadec416b8941") + !16 = !DILocation(line: 0, scope: !11, inlinedAt: !17) + !17 = distinct !DILocation(line: 800, column: 9, scope: !18) + !18 = distinct !DILexicalBlock(scope: !19, file: !5, line: 799, column: 23) + !19 = distinct !DILexicalBlock(scope: !4, file: !5, line: 770, column: 16) + !20 = !DILocation(line: 288, column: 15, scope: !21, inlinedAt: !17, atomGroup: 1, atomRank: 1) + !21 = distinct !DILexicalBlock(scope: !11, file: !5, line: 288, column: 7) +... +--- +name: '_ZNSt3__111__introsortINS_17_ClassicAlgPolicyERZL18DumpAnnotatedInputRN4llvm11raw_ostreamERKNS2_16FileCheckRequestE20DumpInputFilterValuejNS2_9StringRefERNS_6vectorI15InputAnnotationNS_9allocatorISB_EEEEjE3$_0PSB_Lb0EEEvT1_SJ_T0_NS_15iterator_traitsISJ_E15difference_typeEb' +alignment: 4 +tracksRegLiveness: true +noPhis: true +isSSA: false +noVRegs: true +hasFakeUses: false +debugInstrRef: true +tracksDebugUserValues: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$w2' } + - { reg: '$x3' } + - { reg: '$x4' } + - { reg: '$x5' } + - { reg: '$x6' } + - { reg: '$x7' } +frameInfo: + stackSize: 112 + maxAlignment: 8 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 + isCalleeSavedInfoValid: true +stack: + - { id: 0, type: spill-slot, offset: -104, size: 8, alignment: 8 } + - { id: 1, type: spill-slot, offset: -8, size: 8, alignment: 8, callee-saved-register: '$lr' } + - { id: 2, type: spill-slot, offset: -16, size: 8, alignment: 8, callee-saved-register: '$fp' } + - { id: 3, type: spill-slot, offset: -24, size: 8, alignment: 8, callee-saved-register: '$x19' } + - { id: 4, type: spill-slot, offset: -32, size: 8, alignment: 8, callee-saved-register: '$x20' } + - { id: 5, type: spill-slot, offset: -40, size: 8, alignment: 8, callee-saved-register: '$x21' } + - { id: 6, type: spill-slot, offset: -48, size: 8, alignment: 8, callee-saved-register: '$x22' } + - { id: 7, type: spill-slot, offset: -56, size: 8, alignment: 8, callee-saved-register: '$x23' } + - { id: 8, type: spill-slot, offset: -64, size: 8, alignment: 8, callee-saved-register: '$x24' } + - { id: 9, type: spill-slot, offset: -72, size: 8, alignment: 8, callee-saved-register: '$x25' } + - { id: 10, type: spill-slot, offset: -80, size: 8, alignment: 8, callee-saved-register: '$x26' } + - { id: 11, type: spill-slot, offset: -88, size: 8, alignment: 8, callee-saved-register: '$x27' } + - { id: 12, type: spill-slot, offset: -96, size: 8, alignment: 8, callee-saved-register: '$x28' } +machineFunctionInfo: + hasRedZone: false + stackSizeZPR: 0 + stackSizePPR: 0 + hasStackFrame: true +body: | + bb.0 (%ir-block.2): + successors: %bb.2(0x04000000), %bb.3(0x7c000000) + liveins: $w2, $x0, $x1, $x3, $x4, $x5, $x6, $x7, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20, $lr + + $sp = frame-setup SUBXri $sp, 112, 0 + frame-setup STPXi killed $x28, killed $x27, $sp, 2 :: (store (s64) into %stack.12), (store (s64) into %stack.11) + frame-setup STPXi killed $x26, killed $x25, $sp, 4 :: (store (s64) into %stack.10), (store (s64) into %stack.9) + frame-setup STPXi killed $x24, killed $x23, $sp, 6 :: (store (s64) into %stack.8), (store (s64) into %stack.7) + frame-setup STPXi killed $x22, killed $x21, $sp, 8 :: (store (s64) into %stack.6), (store (s64) into %stack.5) + frame-setup STPXi killed $x20, killed $x19, $sp, 10 :: (store (s64) into %stack.4), (store (s64) into %stack.3) + frame-setup STPXi $fp, killed $lr, $sp, 12 :: (store (s64) into %stack.2), (store (s64) into %stack.1) + frame-setup CFI_INSTRUCTION def_cfa_offset 112 + frame-setup CFI_INSTRUCTION offset $w30, -8 + frame-setup CFI_INSTRUCTION offset $w29, -16 + frame-setup CFI_INSTRUCTION offset $w19, -24 + frame-setup CFI_INSTRUCTION offset $w20, -32 + frame-setup CFI_INSTRUCTION offset $w21, -40 + frame-setup CFI_INSTRUCTION offset $w22, -48 + frame-setup CFI_INSTRUCTION offset $w23, -56 + frame-setup CFI_INSTRUCTION offset $w24, -64 + frame-setup CFI_INSTRUCTION offset $w25, -72 + frame-setup CFI_INSTRUCTION offset $w26, -80 + frame-setup CFI_INSTRUCTION offset $w27, -88 + frame-setup CFI_INSTRUCTION offset $w28, -96 + DBG_PHI $x1, 1 + $x19 = ORRXrs $xzr, killed $x7, 0 + $x20 = ORRXrs $xzr, killed $x6, 0 + $x21 = ORRXrs $xzr, killed $x5, 0 + $x22 = ORRXrs $xzr, killed $x4, 0 + $x23 = ORRXrs $xzr, killed $x3, 0 + $w25 = ORRWrs $wzr, killed $w2, 0 + $x26 = ORRXrs $xzr, killed $x0, 0 + renamable $w27 = MOVZWi 1, 0 + STRXui killed $x1, $sp, 1 :: (store (s64) into %stack.0) + TBNZW renamable $w25, 0, %bb.2 + + bb.3.if.end16: + successors: %bb.2(0x04000000), %bb.3(0x7c000000) + liveins: $w25, $w27, $x19, $x20, $x21, $x22, $x23, $x26 + + $x28 = ORRXrs $xzr, $xzr, 0 + renamable $x24 = LDRXui killed renamable $x28, 0 :: (load (s64) from `ptr null`) + BL @_ZdlPvm, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + STRXui renamable $x21, renamable $x20, 0 :: (store (s64) into %ir.0) + STRXui killed renamable $x24, renamable $x19, 0 :: (store (s64) into %ir.1) + STRXui $xzr, renamable $x26, 0 :: (store (s64) into %ir.__first) + STRWui $wzr, renamable $x23, 0 :: (store (s32) into %ir.__first.addr.0, align 8) + STRWui renamable $w27, renamable $x22, 0 :: (store (s32) into %ir.Label3.i.i.i241) + TBZW renamable $w25, 0, %bb.3 + + bb.2.if.then13: + liveins: $x26 + + DBG_INSTR_REF !10, !DIExpression(DW_OP_LLVM_arg, 0), dbg-instr-ref(1, 0), debug-location !16 + renamable $x8 = LDRXui $sp, 1 :: (load (s64) from %stack.0) + ; Clobber the stack slot that contains the value we care about, to ensure that LDV can still recover it from $x8 above + STRXui $xzr, $sp, 1 :: (store (s64) into %stack.0) + $fp, $lr = frame-destroy LDPXi $sp, 12 :: (load (s64) from %stack.2), (load (s64) from %stack.1) + $x20, $x19 = frame-destroy LDPXi $sp, 10 :: (load (s64) from %stack.4), (load (s64) from %stack.3) + $xzr = SUBSXrs killed renamable $x26, killed renamable $x8, 0, implicit-def $nzcv, debug-location !20 + $x22, $x21 = frame-destroy LDPXi $sp, 8 :: (load (s64) from %stack.6), (load (s64) from %stack.5) + $x24, $x23 = frame-destroy LDPXi $sp, 6 :: (load (s64) from %stack.8), (load (s64) from %stack.7) + $x26, $x25 = frame-destroy LDPXi $sp, 4 :: (load (s64) from %stack.10), (load (s64) from %stack.9) + $x28, $x27 = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.12), (load (s64) from %stack.11) + $sp = frame-destroy ADDXri $sp, 112, 0 + RET undef $lr +... From b630721d543091821fec1c631285573763370e83 Mon Sep 17 00:00:00 2001 From: Pranav Kant Date: Tue, 18 Nov 2025 11:20:19 -0800 Subject: [PATCH 31/57] [bazel] Fix #164904 (#168593) --- utils/bazel/llvm-project-overlay/lldb/BUILD.bazel | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel index 8ebb6ab5daa3c..b69dddba261a9 100644 --- a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel @@ -988,6 +988,15 @@ gentbl_cc_library( deps = ["//llvm:OptParserTdFiles"], ) +gentbl_cc_library( + name = "lldb_platform_opts_gen", + strip_include_prefix = ".", + tbl_outs = {"PlatformOptions.inc": ["-gen-opt-parser-defs"]}, + tblgen = "//llvm:llvm-tblgen", + td_file = "tools/lldb-server/PlatformOptions.td", + deps = ["//llvm:OptParserTdFiles"], +) + cc_binary( name = "lldb-server", srcs = glob([ @@ -1006,6 +1015,7 @@ cc_binary( ":Interpreter", ":Utility", ":Version", + ":lldb_platform_opts_gen", ":lldb_server_opts_gen", "//lldb:Target", "//lldb:TargetHeaders", From e93763e7909b746136c88caf77572d937b8f2af8 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 18 Nov 2025 11:28:29 -0800 Subject: [PATCH 32/57] [dsymutil] Specify that -flat is for testing in the help output (#168590) Gently discourage users from relying on -flat by specifying in the help output that it's meant for testing. --- llvm/tools/dsymutil/Options.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/dsymutil/Options.td b/llvm/tools/dsymutil/Options.td index e99bc12fa7fd8..571f90c1e46f5 100644 --- a/llvm/tools/dsymutil/Options.td +++ b/llvm/tools/dsymutil/Options.td @@ -94,7 +94,7 @@ def: Flag<["-"], "s">, Group; def flat: F<"flat">, - HelpText<"Produce a flat dSYM file (not a bundle).">, + HelpText<"Produce a flat dSYM file (not a bundle). Intended for testing and generally unsupported by tools that consume dSYMs.">, Group; def: Flag<["-"], "f">, Alias, From 2ad93b4775cf8524bc775e871f2224f30ef92947 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 18 Nov 2025 19:30:00 +0000 Subject: [PATCH 33/57] [X86] getRoundingModeX86 - add missing "clang-format on" toggle comment (#168588) This was preventing later code to be formatted --- llvm/lib/Target/X86/X86ISelLowering.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 864e5dc67682c..1251a3ca8dbaa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5369,12 +5369,12 @@ bool isConstantSplat(SDValue Op, APInt &SplatVal, bool AllowPartialUndefs) { int getRoundingModeX86(unsigned RM) { switch (static_cast<::llvm::RoundingMode>(RM)) { // clang-format off - case ::llvm::RoundingMode::NearestTiesToEven: return X86::rmToNearest; break; - case ::llvm::RoundingMode::TowardNegative: return X86::rmDownward; break; - case ::llvm::RoundingMode::TowardPositive: return X86::rmUpward; break; - case ::llvm::RoundingMode::TowardZero: return X86::rmTowardZero; break; - default: - return X86::rmInvalid; // Invalid rounding mode + case ::llvm::RoundingMode::NearestTiesToEven: return X86::rmToNearest; + case ::llvm::RoundingMode::TowardNegative: return X86::rmDownward; + case ::llvm::RoundingMode::TowardPositive: return X86::rmUpward; + case ::llvm::RoundingMode::TowardZero: return X86::rmTowardZero; + default: return X86::rmInvalid; + // clang-format on } } From ac6e48de40ec8be78d407072479cdbf7aa35535d Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Tue, 18 Nov 2025 11:32:23 -0800 Subject: [PATCH 34/57] Modify llvm-dwp to be able to emit string tables over 4GB without losing data (#167457) We can change llvm-dwp to emit DWARF64 version of the .debug_str_offsets tables for .dwo files in a .dwp file. This allows the string table to exceed 4GB without truncating string offsets into the .debug_str section and losing data. llvm-dwp will append all strings to the .debug_str section for a .dwo file, and if any of the new string offsets exceed UINT32_MAX, it will upgrade the .debug_str_offsets table to a DWARF64 header and then each string offset in that table can now have a 64 bit offset. Fixed LLDB to be able to successfully load the 64 bit string tables in .dwp files. Fixed llvm-dwarfdump and LLVM DWARF parsing code to do the right thing with DWARF64 string table headers. --- llvm/include/llvm/DWP/DWP.h | 13 ++- llvm/include/llvm/DWP/DWPStringPool.h | 6 +- llvm/lib/DWP/DWP.cpp | 99 +++++++++++++++---- .../llvm-dwp/X86/dwarf64-str-offsets.test | 81 +++++++++++++++ llvm/tools/llvm-dwp/Opts.td | 15 +++ llvm/tools/llvm-dwp/llvm-dwp.cpp | 27 ++++- 6 files changed, 214 insertions(+), 27 deletions(-) create mode 100644 llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test diff --git a/llvm/include/llvm/DWP/DWP.h b/llvm/include/llvm/DWP/DWP.h index a759bae10d160..10fdae25d4eef 100644 --- a/llvm/include/llvm/DWP/DWP.h +++ b/llvm/include/llvm/DWP/DWP.h @@ -22,6 +22,12 @@ enum OnCuIndexOverflow { Continue, }; +enum Dwarf64StrOffsetsPromotion { + Disabled, ///< Don't do any conversion of .debug_str_offsets tables. + Enabled, ///< Convert any .debug_str_offsets tables to DWARF64 if needed. + Always, ///< Always emit .debug_str_offsets talbes as DWARF64 for testing. +}; + struct UnitIndexEntry { DWARFUnitIndex::Entry::SectionContribution Contributions[8]; std::string Name; @@ -68,7 +74,10 @@ struct CompileUnitIdentifiers { }; LLVM_ABI Error write(MCStreamer &Out, ArrayRef Inputs, - OnCuIndexOverflow OverflowOptValue); + OnCuIndexOverflow OverflowOptValue, + Dwarf64StrOffsetsPromotion StrOffsetsOptValue); + +typedef std::vector> SectionLengths; LLVM_ABI Error handleSection( const StringMap> &KnownSections, @@ -82,7 +91,7 @@ LLVM_ABI Error handleSection( std::vector &CurTypesSection, std::vector &CurInfoSection, StringRef &AbbrevSection, StringRef &CurCUIndexSection, StringRef &CurTUIndexSection, - std::vector> &SectionLength); + SectionLengths &SectionLength); LLVM_ABI Expected parseInfoSectionUnitHeader(StringRef Info); diff --git a/llvm/include/llvm/DWP/DWPStringPool.h b/llvm/include/llvm/DWP/DWPStringPool.h index 1354b46f156b6..d1486ff7872e1 100644 --- a/llvm/include/llvm/DWP/DWPStringPool.h +++ b/llvm/include/llvm/DWP/DWPStringPool.h @@ -32,13 +32,13 @@ class DWPStringPool { MCStreamer &Out; MCSection *Sec; - DenseMap Pool; - uint32_t Offset = 0; + DenseMap Pool; + uint64_t Offset = 0; public: DWPStringPool(MCStreamer &Out, MCSection *Sec) : Out(Out), Sec(Sec) {} - uint32_t getOffset(const char *Str, unsigned Length) { + uint64_t getOffset(const char *Str, unsigned Length) { assert(strlen(Str) + 1 == Length && "Ensure length hint is correct"); auto Pair = Pool.insert(std::make_pair(Str, Offset)); diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp index b565edbfe96db..a563a90a1fb4d 100644 --- a/llvm/lib/DWP/DWP.cpp +++ b/llvm/lib/DWP/DWP.cpp @@ -413,33 +413,52 @@ Expected parseInfoSectionUnitHeader(StringRef Info) { } static void writeNewOffsetsTo(MCStreamer &Out, DataExtractor &Data, - DenseMap &OffsetRemapping, - uint64_t &Offset, uint64_t &Size) { - + DenseMap &OffsetRemapping, + uint64_t &Offset, const uint64_t Size, + uint32_t OldOffsetSize, uint32_t NewOffsetSize) { + // Create a mask so we don't trigger a emitIntValue() assert below if the + // NewOffset is over 4GB. + const uint64_t NewOffsetMask = NewOffsetSize == 8 ? UINT64_MAX : UINT32_MAX; while (Offset < Size) { - auto OldOffset = Data.getU32(&Offset); - auto NewOffset = OffsetRemapping[OldOffset]; - Out.emitIntValue(NewOffset, 4); + const uint64_t OldOffset = Data.getUnsigned(&Offset, OldOffsetSize); + const uint64_t NewOffset = OffsetRemapping[OldOffset]; + // Truncate the string offset like the old llvm-dwp would have if we aren't + // promoting the .debug_str_offsets to DWARF64. + Out.emitIntValue(NewOffset & NewOffsetMask, NewOffsetSize); } } -void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, - MCSection *StrOffsetSection, - StringRef CurStrSection, - StringRef CurStrOffsetSection, uint16_t Version) { +void writeStringsAndOffsets( + MCStreamer &Out, DWPStringPool &Strings, MCSection *StrOffsetSection, + StringRef CurStrSection, StringRef CurStrOffsetSection, uint16_t Version, + SectionLengths &SectionLength, + const Dwarf64StrOffsetsPromotion StrOffsetsOptValue) { // Could possibly produce an error or warning if one of these was non-null but // the other was null. if (CurStrSection.empty() || CurStrOffsetSection.empty()) return; - DenseMap OffsetRemapping; + DenseMap OffsetRemapping; DataExtractor Data(CurStrSection, true, 0); uint64_t LocalOffset = 0; uint64_t PrevOffset = 0; + + // Keep track if any new string offsets exceed UINT32_MAX. If any do, we can + // emit a DWARF64 .debug_str_offsets table for this compile unit. If the + // \a StrOffsetsOptValue argument is Dwarf64StrOffsetsPromotion::Always, then + // force the emission of DWARF64 .debug_str_offsets for testing. + uint32_t OldOffsetSize = 4; + uint32_t NewOffsetSize = + StrOffsetsOptValue == Dwarf64StrOffsetsPromotion::Always ? 8 : 4; while (const char *S = Data.getCStr(&LocalOffset)) { - OffsetRemapping[PrevOffset] = - Strings.getOffset(S, LocalOffset - PrevOffset); + uint64_t NewOffset = Strings.getOffset(S, LocalOffset - PrevOffset); + OffsetRemapping[PrevOffset] = NewOffset; + // Only promote the .debug_str_offsets to DWARF64 if our setting allows it. + if (StrOffsetsOptValue != Dwarf64StrOffsetsPromotion::Disabled && + NewOffset > UINT32_MAX) { + NewOffsetSize = 8; + } PrevOffset = LocalOffset; } @@ -451,7 +470,7 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, uint64_t Size = CurStrOffsetSection.size(); if (Version > 4) { while (Offset < Size) { - uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version); + const uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version); assert(HeaderSize <= Size - Offset && "StrOffsetSection size is less than its header"); @@ -461,16 +480,52 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, if (HeaderSize == 8) { ContributionSize = Data.getU32(&HeaderLengthOffset); } else if (HeaderSize == 16) { + OldOffsetSize = 8; HeaderLengthOffset += 4; // skip the dwarf64 marker ContributionSize = Data.getU64(&HeaderLengthOffset); } ContributionEnd = ContributionSize + HeaderLengthOffset; - Out.emitBytes(Data.getBytes(&Offset, HeaderSize)); - writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd); + + StringRef HeaderBytes = Data.getBytes(&Offset, HeaderSize); + if (OldOffsetSize == 4 && NewOffsetSize == 8) { + // We had a DWARF32 .debug_str_offsets header, but we need to emit + // some string offsets that require 64 bit offsets on the .debug_str + // section. Emit the .debug_str_offsets header in DWARF64 format so we + // can emit string offsets that exceed UINT32_MAX without truncating + // the string offset. + + // 2 bytes for DWARF version, 2 bytes pad. + const uint64_t VersionPadSize = 4; + const uint64_t NewLength = + (ContributionSize - VersionPadSize) * 2 + VersionPadSize; + // Emit the DWARF64 length that starts with a 4 byte DW_LENGTH_DWARF64 + // value followed by the 8 byte updated length. + Out.emitIntValue(llvm::dwarf::DW_LENGTH_DWARF64, 4); + Out.emitIntValue(NewLength, 8); + // Emit DWARF version as a 2 byte integer. + Out.emitIntValue(Version, 2); + // Emit 2 bytes of padding. + Out.emitIntValue(0, 2); + // Update the .debug_str_offsets section length contribution for the + // this .dwo file. + for (auto &Pair : SectionLength) { + if (Pair.first == DW_SECT_STR_OFFSETS) { + Pair.second = NewLength + 12; + break; + } + } + } else { + // Just emit the same .debug_str_offsets header. + Out.emitBytes(HeaderBytes); + } + writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd, + OldOffsetSize, NewOffsetSize); } } else { - writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size); + assert(OldOffsetSize == NewOffsetSize); + writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size, OldOffsetSize, + NewOffsetSize); } } @@ -562,7 +617,7 @@ Error handleSection( std::vector &CurTypesSection, std::vector &CurInfoSection, StringRef &AbbrevSection, StringRef &CurCUIndexSection, StringRef &CurTUIndexSection, - std::vector> &SectionLength) { + SectionLengths &SectionLength) { if (Section.isBSS()) return Error::success(); @@ -620,7 +675,8 @@ Error handleSection( } Error write(MCStreamer &Out, ArrayRef Inputs, - OnCuIndexOverflow OverflowOptValue) { + OnCuIndexOverflow OverflowOptValue, + Dwarf64StrOffsetsPromotion StrOffsetsOptValue) { const auto &MCOFI = *Out.getContext().getObjectFileInfo(); MCSection *const StrSection = MCOFI.getDwarfStrDWOSection(); MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection(); @@ -684,7 +740,7 @@ Error write(MCStreamer &Out, ArrayRef Inputs, // This maps each section contained in this file to its length. // This information is later on used to calculate the contributions, // i.e. offset and length, of each compile/type unit to a section. - std::vector> SectionLength; + SectionLengths SectionLength; for (const auto &Section : Obj.sections()) if (auto Err = handleSection( @@ -713,7 +769,8 @@ Error write(MCStreamer &Out, ArrayRef Inputs, } writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection, - CurStrOffsetSection, Header.Version); + CurStrOffsetSection, Header.Version, SectionLength, + StrOffsetsOptValue); for (auto Pair : SectionLength) { auto Index = getContributionIndex(Pair.first, IndexVersion); diff --git a/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test b/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test new file mode 100644 index 0000000000000..26f7acae70aeb --- /dev/null +++ b/llvm/test/tools/llvm-dwp/X86/dwarf64-str-offsets.test @@ -0,0 +1,81 @@ +# This test tests that llvm-dwp can successfully promote .debug_str_offsets to +# DWARF64. We do this by using a hidden option to llvm-dwp which is +# "--force-dwarf64-str-offsets". This allows us to test if llvm-dwp can +# successfully promote a DWARF32 version of .debug_str_offsets to a DWARF64 +# version. This allows us to test the functionality without having to create a +# 4GB .dwo file. + +# RUN: yaml2obj %s -o %t.dwo +# RUN: llvm-dwp %t.dwo -o %t.dwp +# RUN: llvm-dwp %t.dwo -o %t.default.dwp --dwarf64-str-offsets-promotion +# RUN: llvm-dwp %t.dwo -o %t.disabled.dwp --dwarf64-str-offsets-promotion=disabled +# RUN: llvm-dwp %t.dwo -o %t.enabled.dwp --dwarf64-str-offsets-promotion=enabled +# RUN: llvm-dwp %t.dwo -o %t.always.dwp --dwarf64-str-offsets-promotion=always +# RUN: not llvm-dwp %t.dwo -o %t.invalid.dwp --dwarf64-str-offsets-promotion=invalid 2>&1 | FileCheck --check-prefixes=ERROR %s +# RUN: llvm-dwarfdump --debug-str-offsets %t.dwp | FileCheck --check-prefixes=DWARF32 %s +# RUN: llvm-dwarfdump --debug-str-offsets %t.default.dwp | FileCheck --check-prefixes=DWARF32 %s +# RUN: llvm-dwarfdump --debug-str-offsets %t.disabled.dwp | FileCheck --check-prefixes=DWARF32 %s +# RUN: llvm-dwarfdump --debug-str-offsets %t.enabled.dwp | FileCheck --check-prefixes=DWARF32 %s +# RUN: llvm-dwarfdump --debug-str-offsets %t.always.dwp | FileCheck --check-prefixes=DWARF64 %s + +# DWARF32: .debug_str_offsets.dwo contents: +# DWARF32-NEXT: 0x00000000: Contribution size = 36, Format = DWARF32, Version = 5 +# DWARF32-NEXT: 0x00000008: 00000000 "main" +# DWARF32-NEXT: 0x0000000c: 00000005 "int" +# DWARF32-NEXT: 0x00000010: 00000009 "argc" +# DWARF32-NEXT: 0x00000014: 0000000e "argv" +# DWARF32-NEXT: 0x00000018: 00000013 "char" +# DWARF32-NEXT: 0x0000001c: 00000018 "Apple clang version 17.0.0 (clang-1700.4.4.1)" +# DWARF32-NEXT: 0x00000020: 00000046 "simple.cpp" +# DWARF32-NEXT: 0x00000024: 00000051 "simple.dwo" + +# DWARF64: .debug_str_offsets.dwo contents: +# DWARF64-NEXT: 0x00000000: Contribution size = 68, Format = DWARF64, Version = 5 +# DWARF64-NEXT: 0x00000010: 0000000000000000 "main" +# DWARF64-NEXT: 0x00000018: 0000000000000005 "int" +# DWARF64-NEXT: 0x00000020: 0000000000000009 "argc" +# DWARF64-NEXT: 0x00000028: 000000000000000e "argv" +# DWARF64-NEXT: 0x00000030: 0000000000000013 "char" +# DWARF64-NEXT: 0x00000038: 0000000000000018 "Apple clang version 17.0.0 (clang-1700.4.4.1)" +# DWARF64-NEXT: 0x00000040: 0000000000000046 "simple.cpp" +# DWARF64-NEXT: 0x00000048: 0000000000000051 "simple.dwo" + +# ERROR: invalid value for --dwarf64-str-offsets-promotion. Valid values are one of: "enabled", "disabled" or "always". + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + SectionHeaderStringTable: .strtab +Sections: + - Name: .debug_str_offsets.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE ] + AddressAlign: 0x1 + Content: '24000000050000000000000005000000090000000E00000013000000180000004600000051000000' + - Name: .debug_str.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE, SHF_MERGE, SHF_STRINGS ] + AddressAlign: 0x1 + EntSize: 0x1 + Content: 6D61696E00696E74006172676300617267760063686172004170706C6520636C616E672076657273696F6E2031372E302E302028636C616E672D313730302E342E342E31290073696D706C652E6370700073696D706C652E64776F00 + - Name: .debug_info.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE ] + AddressAlign: 0x1 + Content: 540000000500050800000000031DD228762F8E1C0105210006070200190000000156000001400000000302917802000140000000030291700300014400000000040105040549000000054E00000006530000000404060100 + - Name: .debug_abbrev.dwo + Type: SHT_PROGBITS + Flags: [ SHF_EXCLUDE ] + AddressAlign: 0x1 + Content: 01110125251305032576250000022E01111B1206401803253A0B3B0B49133F190000030500021803253A0B3B0B4913000004240003253E0B0B0B0000050F00491300000626004913000000 + - Type: SectionHeaderTable + Sections: + - Name: .strtab + - Name: .debug_str_offsets.dwo + - Name: .debug_str.dwo + - Name: .debug_info.dwo + - Name: .debug_abbrev.dwo +... diff --git a/llvm/tools/llvm-dwp/Opts.td b/llvm/tools/llvm-dwp/Opts.td index 46593bc40ebae..d4474ac073fd7 100644 --- a/llvm/tools/llvm-dwp/Opts.td +++ b/llvm/tools/llvm-dwp/Opts.td @@ -16,3 +16,18 @@ def continueOnCuIndexOverflow_EQ : Joined<["-", "--"], "continue-on-cu-index-ove "\t\ttruncated but valid DWP file, discarding any DWO files that would not fit within \n" "\t\tthe 32 bit/4GB limits of the format.">, Values<"continue,soft-stop">; + +def dwarf64StringOffsets : Flag<["-", "--"], "dwarf64-str-offsets-promotion">; +def dwarf64StringOffsets_EQ + : Joined<["-", "--"], "dwarf64-str-offsets-promotion=">, + HelpText<"default = enabled, This allows .debug_str tables to exceed the " + "4GB limit\n" + "and have any DWARF32 .debug_str_offsets tables converted to " + "DWARF64 only for tables\n" + "that require 64 bit string offsets. = disabled, This setting " + "doesn't convert DWARF32\n" + ".debug_str_offsets tables in .dwo files to DWARF64 in the .dwp " + "file. = always, This\n" + "forces all .debug_str_offsets tables to be emitted as DWARF64. " + "This is used for testing.">, + Values<"disabled,enabled,always">; diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp index 31bad2d68982b..2892450398bb6 100644 --- a/llvm/tools/llvm-dwp/llvm-dwp.cpp +++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp @@ -125,6 +125,9 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) { llvm::BumpPtrAllocator A; llvm::StringSaver Saver{A}; OnCuIndexOverflow OverflowOptValue = OnCuIndexOverflow::HardStop; + Dwarf64StrOffsetsPromotion Dwarf64StrOffsetsValue = + Dwarf64StrOffsetsPromotion::Disabled; + opt::InputArgList Args = Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) { llvm::errs() << Msg << '\n'; @@ -161,6 +164,27 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) { } } + if (Arg *Arg = Args.getLastArg(OPT_dwarf64StringOffsets, + OPT_dwarf64StringOffsets_EQ)) { + if (Arg->getOption().matches(OPT_dwarf64StringOffsets)) { + Dwarf64StrOffsetsValue = Dwarf64StrOffsetsPromotion::Enabled; + } else { + std::string OptValue = Arg->getValue(); + if (OptValue == "disabled") { + Dwarf64StrOffsetsValue = Dwarf64StrOffsetsPromotion::Disabled; + } else if (OptValue == "enabled") { + Dwarf64StrOffsetsValue = Dwarf64StrOffsetsPromotion::Enabled; + } else if (OptValue == "always") { + Dwarf64StrOffsetsValue = Dwarf64StrOffsetsPromotion::Always; + } else { + llvm::errs() + << "invalid value for --dwarf64-str-offsets-promotion. Valid " + "values are one of: \"enabled\", \"disabled\" or \"always\".\n"; + exit(1); + } + } + } + for (const llvm::opt::Arg *A : Args.filtered(OPT_execFileNames)) ExecFilenames.emplace_back(A->getValue()); @@ -274,7 +298,8 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) { if (!MS) return error("no object streamer for target " + TripleName, Context); - if (auto Err = write(*MS, DWOFilenames, OverflowOptValue)) { + if (auto Err = + write(*MS, DWOFilenames, OverflowOptValue, Dwarf64StrOffsetsValue)) { logAllUnhandledErrors(std::move(Err), WithColor::error()); return 1; } From 58b8e6e4241ba71c8ffeef4578f1bebb9cec9db9 Mon Sep 17 00:00:00 2001 From: Laxman Sole Date: Tue, 18 Nov 2025 11:33:40 -0800 Subject: [PATCH 35/57] [DebugInfo][IR] Verifier checks for the extraData (#167971) LLVM IR verifier checks for `extraData` in debug info metadata. This is a follow-up PR based on discussions in #165023 --- llvm/lib/IR/Verifier.cpp | 25 +++++++++ .../Verifier/diderivedtype-extradata-tuple.ll | 55 +++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 llvm/test/Verifier/diderivedtype-extradata-tuple.ll diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index fa18c3cd0f404..92e7b7530b038 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1158,6 +1158,7 @@ void Verifier::visitMetadataAsValue(const MetadataAsValue &MDV, Function *F) { static bool isType(const Metadata *MD) { return !MD || isa(MD); } static bool isScope(const Metadata *MD) { return !MD || isa(MD); } static bool isDINode(const Metadata *MD) { return !MD || isa(MD); } +static bool isMDTuple(const Metadata *MD) { return !MD || isa(MD); } void Verifier::visitDILocation(const DILocation &N) { CheckDI(N.getRawScope() && isa(N.getRawScope()), @@ -1320,6 +1321,30 @@ void Verifier::visitDIDerivedType(const DIDerivedType &N) { if (N.getTag() == dwarf::DW_TAG_ptr_to_member_type) { CheckDI(isType(N.getRawExtraData()), "invalid pointer to member type", &N, N.getRawExtraData()); + } else if (N.getTag() == dwarf::DW_TAG_template_alias) { + CheckDI(isMDTuple(N.getRawExtraData()), "invalid template parameters", &N, + N.getRawExtraData()); + } else if (N.getTag() == dwarf::DW_TAG_inheritance || + N.getTag() == dwarf::DW_TAG_member || + N.getTag() == dwarf::DW_TAG_variable) { + auto *ExtraData = N.getRawExtraData(); + auto IsValidExtraData = [&]() { + if (ExtraData == nullptr) + return true; + if (isa(ExtraData) || isa(ExtraData) || + isa(ExtraData)) + return true; + if (auto *Tuple = dyn_cast(ExtraData)) { + if (Tuple->getNumOperands() != 1) + return false; + return isa_and_nonnull(Tuple->getOperand(0).get()); + } + return false; + }; + CheckDI(IsValidExtraData(), + "extraData must be ConstantAsMetadata, MDString, DIObjCProperty, " + "or MDTuple with single ConstantAsMetadata operand", + &N, ExtraData); } if (N.getTag() == dwarf::DW_TAG_set_type) { diff --git a/llvm/test/Verifier/diderivedtype-extradata-tuple.ll b/llvm/test/Verifier/diderivedtype-extradata-tuple.ll new file mode 100644 index 0000000000000..9258d1db76aff --- /dev/null +++ b/llvm/test/Verifier/diderivedtype-extradata-tuple.ll @@ -0,0 +1,55 @@ +; RUN: not opt -S < %s 2>&1 | FileCheck %s + +;; Test that extraData with MDTuple is only allowed for specific DWARF tags: +;; DW_TAG_inheritance, DW_TAG_member, and DW_TAG_variable + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} + +!1 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + +; Keep all metadata nodes alive so verifier can check them +!named = !{!1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16} +!2 = !{i32 0} + +; Valid: DW_TAG_inheritance with tuple extraData should be accepted +!3 = !DIDerivedType(tag: DW_TAG_inheritance, baseType: !1, size: 32, extraData: !2) + +; Valid: DW_TAG_member with tuple extraData should be accepted +!4 = !DIDerivedType(tag: DW_TAG_member, name: "field", baseType: !1, size: 32, extraData: !2) + +; Valid: DW_TAG_variable (static member) with tuple extraData should be accepted +!5 = !DIDerivedType(tag: DW_TAG_variable, name: "var", baseType: !1, extraData: !2, flags: DIFlagStaticMember) + +; Invalid: Empty tuple should be rejected +!6 = !{} +; CHECK: extraData must be ConstantAsMetadata, MDString, DIObjCProperty, or MDTuple with single ConstantAsMetadata operand +; CHECK-NEXT: !{{[0-9]+}} = !DIDerivedType(tag: DW_TAG_member +!7 = !DIDerivedType(tag: DW_TAG_member, name: "field2", baseType: !1, extraData: !6) + +; Invalid: Tuple with multiple operands should be rejected +!8 = !{i32 0, i32 1} +; CHECK: extraData must be ConstantAsMetadata, MDString, DIObjCProperty, or MDTuple with single ConstantAsMetadata operand +; CHECK-NEXT: !{{[0-9]+}} = !DIDerivedType(tag: DW_TAG_member +!9 = !DIDerivedType(tag: DW_TAG_member, name: "field3", baseType: !1, extraData: !8) + +; Invalid: Tuple with non-ConstantAsMetadata operand should be rejected +!10 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) +!11 = !{!10} +; CHECK: extraData must be ConstantAsMetadata, MDString, DIObjCProperty, or MDTuple with single ConstantAsMetadata operand +; CHECK-NEXT: !{{[0-9]+}} = !DIDerivedType(tag: DW_TAG_member +!12 = !DIDerivedType(tag: DW_TAG_member, name: "field4", baseType: !1, extraData: !11) + +; Valid: DW_TAG_template_alias with proper template parameters tuple +; Template aliases are handled specially and accept any MDTuple for template parameters +!13 = !DITemplateTypeParameter(name: "T", type: !1) +!14 = !{!13} +!15 = !DIDerivedType(tag: DW_TAG_template_alias, name: "MyAlias", baseType: !1, extraData: !14) + +; Invalid: DW_TAG_template_alias with non-tuple extraData should fail +; CHECK: invalid template parameters +; CHECK-NEXT: !{{[0-9]+}} = !DIDerivedType(tag: DW_TAG_template_alias +!16 = !DIDerivedType(tag: DW_TAG_template_alias, name: "FailingAlias", baseType: !1, extraData: i32 42) + +; CHECK: warning: ignoring invalid debug info + From 04a1fd5c5434d47cac7488d777d9a1b472cb71f8 Mon Sep 17 00:00:00 2001 From: Mikhail Gudim Date: Tue, 18 Nov 2025 14:36:43 -0500 Subject: [PATCH 36/57] [RISCV] Make XFAIL test UNSUPPORTED. (#168525) Currently the test cfi-multiple-location.mir is marked as XFAIL. This causes failures on some build bots because the test unexpectedly passes. Mark this test as UNSUPPORTED for now. Later I plan to merge an MR which fixes an issue in CFIInstrInserter and this test will be enabled. --- llvm/test/CodeGen/RISCV/cfi-multiple-locations.mir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/RISCV/cfi-multiple-locations.mir b/llvm/test/CodeGen/RISCV/cfi-multiple-locations.mir index 7844589e3f93c..08544a95dedb7 100644 --- a/llvm/test/CodeGen/RISCV/cfi-multiple-locations.mir +++ b/llvm/test/CodeGen/RISCV/cfi-multiple-locations.mir @@ -1,7 +1,7 @@ # RUN: llc %s -mtriple=riscv64 \ # RUN: -run-pass=cfi-instr-inserter \ # RUN: -riscv-enable-cfi-instr-inserter=true -# XFAIL: * +# UNSUPPORTED: target={{.*}} # Technically, it is possible that a callee-saved register is saved in multiple different locations. # CFIInstrInserter should handle this, but currently it does not. From 576e1affab35cff50a7b3beded51c752f1ea2940 Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Tue, 18 Nov 2025 20:47:41 +0100 Subject: [PATCH 37/57] [NFC][AMDGPU] IGLP: Fixes for unsigned int handling (#135090) Fixes unsigned int underflows in `MFMASmallGemmSingleWaveOpt::applyIGLPStrategy`. --- llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp index ddc675bbb8fb7..85addb13aef8d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp @@ -2183,7 +2183,7 @@ bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy( SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]); // Interleave MFMA with DS_READ prefetch - for (unsigned I = 0; I < DSRCount - 4; ++I) { + for (unsigned I = 4; I < DSRCount; ++I) { SG = &SyncedSchedGroups[PipelineSyncID].emplace_back( SchedGroupMask::DS_READ, 1, PipelineSyncID, DAG, TII); SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]); @@ -2196,7 +2196,7 @@ bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy( // Phase 2a: Loop carried dependency with V_PERM // Schedule VPerm & DS_WRITE as closely as possible to the VMEM_READ they // depend on. Interleave MFMA to keep XDL unit busy throughout. - for (unsigned I = 0; I < DSWWithPermCount - DSWWithSharedVMEMCount; ++I) { + for (unsigned I = DSWWithSharedVMEMCount; I < DSWWithPermCount; ++I) { SG = &SyncedSchedGroups[PipelineSyncID].emplace_back( SchedGroupMask::VALU, 4, PipelineSyncID, DAG, TII); SG->addRule(std::make_shared(TII, SG->getSGID(), true)); @@ -2233,7 +2233,7 @@ bool MFMASmallGemmSingleWaveOpt::applyIGLPStrategy( // Phase 2b: Loop carried dependency without V_PERM // Schedule DS_WRITE as closely as possible to the VMEM_READ they depend on. // Interleave MFMA to keep XDL unit busy throughout. - for (unsigned I = 0; I < DSWCount - DSWWithPermCount; I++) { + for (unsigned I = DSWWithPermCount; I < DSWCount; I++) { SG = &SyncedSchedGroups[PipelineSyncID].emplace_back( SchedGroupMask::DS_WRITE, 1, PipelineSyncID, DAG, TII); SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]); From 124fa5ce5f211dff6dbdc5f433a445386ac2c26b Mon Sep 17 00:00:00 2001 From: Pawan Nirpal Date: Wed, 19 Nov 2025 01:22:43 +0530 Subject: [PATCH 38/57] [AArch64] - Improve costing for Identity shuffles for SVE targets. (#165375) Identity masks can be treated as free when scalable vectorization is possible making the check agnostic of the vectorization policy fixed/scalable, This allows for aggressive vector combines for identity shuffle masks. --- .../AArch64/AArch64TargetTransformInfo.cpp | 17 +++--- .../CostModel/AArch64/shuffle-other.ll | 12 ++++ .../AArch64/identity-shuffle-sve.ll | 61 +++++++++++++++++++ 3 files changed, 82 insertions(+), 8 deletions(-) create mode 100644 llvm/test/Transforms/VectorCombine/AArch64/identity-shuffle-sve.ll diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 7dd571c7fe60d..bf195ca210e9b 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -6000,6 +6000,15 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, SrcTy = DstTy; } + // Check for identity masks, which we can treat as free for both fixed and + // scalable vector paths. + if (!Mask.empty() && LT.second.isFixedLengthVector() && + (Kind == TTI::SK_PermuteTwoSrc || Kind == TTI::SK_PermuteSingleSrc) && + all_of(enumerate(Mask), [](const auto &M) { + return M.value() < 0 || M.value() == (int)M.index(); + })) + return 0; + // Segmented shuffle matching. if (Kind == TTI::SK_PermuteSingleSrc && isa(SrcTy) && !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() && @@ -6047,14 +6056,6 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, all_of(Mask, [](int E) { return E < 8; })) return getPerfectShuffleCost(Mask); - // Check for identity masks, which we can treat as free. - if (!Mask.empty() && LT.second.isFixedLengthVector() && - (Kind == TTI::SK_PermuteTwoSrc || Kind == TTI::SK_PermuteSingleSrc) && - all_of(enumerate(Mask), [](const auto &M) { - return M.value() < 0 || M.value() == (int)M.index(); - })) - return 0; - // Check for other shuffles that are not SK_ kinds but we have native // instructions for, for example ZIP and UZP. unsigned Unused; diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll index 4579acb9b3555..255877fcdca5e 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll @@ -490,3 +490,15 @@ define void @vst4(ptr %p) { ret void } + +define void @identity_shuffle_costs() #0 { +bb: + ; CHECK-LABEL: 'identity_shuffle_costs' + ; CHECK: Cost Model: Found costs of 0 for: %shufflevector142 = shufflevector <16 x i8> zeroinitializer, <16 x i8> zeroinitializer, <16 x i32> + ; CHECK: Cost Model: Found costs of 0 for: %shufflevector84 = shufflevector <16 x i8> zeroinitializer, <16 x i8> poison, <32 x i32> + %shufflevector142 = shufflevector <16 x i8> zeroinitializer, <16 x i8> zeroinitializer, <16 x i32> + %shufflevector84 = shufflevector <16 x i8> zeroinitializer, <16 x i8> poison, <32 x i32> + ret void +} + +attributes #0 = { "target-features"="+sve,+neon" } diff --git a/llvm/test/Transforms/VectorCombine/AArch64/identity-shuffle-sve.ll b/llvm/test/Transforms/VectorCombine/AArch64/identity-shuffle-sve.ll new file mode 100644 index 0000000000000..f499ea9a20c6f --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AArch64/identity-shuffle-sve.ll @@ -0,0 +1,61 @@ +; NOTE: This test is expected to test the Identity shuffle costs as zero, regardless of scalable or fixed width shuffle vectors, As a result enabling aggressive vector-combine transforms. +; RUN: opt -passes=vector-combine -S %s | FileCheck %s +target triple = "aarch64-unknown-linux-gnu" + +define i32 @ham(ptr %call12) #0 { +; CHECK-LABEL: define i32 @ham( +; CHECK-SAME: ptr [[CALL12:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK: [[TMP2:%.*]] = mul nuw nsw <32 x i32> [[TMP1:%.*]], + +; CHECK: [[TMP5:%.*]] = mul nuw <32 x i32> [[TMP4:%.*]], + +; CHECK: [[TMP8:%.*]] = mul nuw <32 x i32> [[TMP7:%.*]], + +bb: + br label %bb48 + +bb48: ; preds = %bb48, %bb + %phi49 = phi i64 [ 0, %bb ], [ %add86, %bb48 ] + %mul50 = mul i64 %phi49, 3 + %getelementptr53 = getelementptr i8, ptr %call12, i64 %mul50 + %load54 = load <48 x i8>, ptr %getelementptr53, align 1 + %shufflevector = shufflevector <48 x i8> %load54, <48 x i8> poison, <16 x i32> + %zext57 = zext <16 x i8> %shufflevector to <16 x i32> + %mul58 = mul nuw nsw <16 x i32> %zext57, splat (i32 19595) + %zext59 = zext <16 x i8> %shufflevector to <16 x i32> + %mul60 = mul nuw nsw <16 x i32> %zext59, splat (i32 38470) + %zext61 = zext <16 x i8> %shufflevector to <16 x i32> + %mul62 = mul nuw nsw <16 x i32> %zext61, splat (i32 7471) + %add63 = add nuw nsw <16 x i32> %mul58, splat (i32 32768) + %add64 = add nuw nsw <16 x i32> %add63, %mul60 + %add65 = add nuw nsw <16 x i32> %add64, %mul62 + %lshr = lshr <16 x i32> %add65, splat (i32 16) + %trunc66 = trunc nuw <16 x i32> %lshr to <16 x i8> + %mul67 = mul nuw nsw <16 x i32> %zext57, splat (i32 32767) + %mul68 = mul nuw <16 x i32> %zext59, splat (i32 16762097) + %mul69 = mul nuw <16 x i32> %zext61, splat (i32 16759568) + %add70 = add nuw nsw <16 x i32> %mul67, splat (i32 32768) + %add71 = add nuw <16 x i32> %add70, %mul68 + %add72 = add <16 x i32> %add71, %mul69 + %lshr73 = lshr <16 x i32> %add72, splat (i32 16) + %trunc74 = trunc <16 x i32> %lshr73 to <16 x i8> + %mul75 = mul nuw nsw <16 x i32> %zext57, splat (i32 13282) + %mul76 = mul nuw <16 x i32> %zext59, splat (i32 16744449) + %mul77 = mul nuw nsw <16 x i32> %zext61, splat (i32 19485) + %add78 = add nuw nsw <16 x i32> %mul75, splat (i32 32768) + %add79 = add nuw <16 x i32> %add78, %mul76 + %add80 = add nuw <16 x i32> %add79, %mul77 + %lshr81 = lshr <16 x i32> %add80, splat (i32 16) + %trunc82 = trunc <16 x i32> %lshr81 to <16 x i8> + %shufflevector83 = shufflevector <16 x i8> %trunc66, <16 x i8> %trunc74, <32 x i32> + %shufflevector84 = shufflevector <16 x i8> %trunc82, <16 x i8> poison, <32 x i32> + store <32 x i8> %shufflevector83, ptr %getelementptr53, align 1 + %add86 = add nuw i64 %phi49, 16 + %icmp87 = icmp eq i64 %add86, %mul50 + br i1 %icmp87, label %bb205, label %bb48 + +bb205: ; preds = %bb48, %bb + ret i32 0 +} + +attributes #0 = { vscale_range(1,16) "target-features"="+sve,+neon"} From 4155cdc0f1bac39bad35ac390da4170c0482812f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 18 Nov 2025 15:01:30 -0500 Subject: [PATCH 39/57] Mips: Remove manual libcall name search and table (#168595) This should really check if the libcall is known supported. For now mips doesn't configure its RuntimeLibcallsInfo correctly, and does not have any of the mips16 calls in it. For now there isn't a way to add them without triggering conflicting cases in tablegen, so keep parsing the raw name as it was before. --- llvm/lib/Target/Mips/Mips16ISelLowering.cpp | 99 +++++++-------------- 1 file changed, 32 insertions(+), 67 deletions(-) diff --git a/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/llvm/lib/Target/Mips/Mips16ISelLowering.cpp index 7bd96b571bc68..51049c83dec52 100644 --- a/llvm/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/llvm/lib/Target/Mips/Mips16ISelLowering.cpp @@ -31,16 +31,6 @@ static cl::opt DontExpandCondPseudos16( cl::Hidden); namespace { -struct Mips16Libcall { - RTLIB::Libcall Libcall; - RTLIB::LibcallImpl Impl; - const char *Name; // FIXME: Remove this - - bool operator<(const Mips16Libcall &RHS) const { - return std::strcmp(Name, RHS.Name) < 0; - } -}; - struct Mips16IntrinsicHelperType{ const char* Name; const char* Helper; @@ -52,56 +42,27 @@ struct Mips16IntrinsicHelperType{ return std::strcmp(Name, RHS.Name) == 0; } }; -} +} // namespace // Libcalls for which no helper is generated. Sorted by name for binary search. -static const Mips16Libcall HardFloatLibCalls[] = { - {RTLIB::ADD_F64, RTLIB::impl___mips16_adddf3, "__mips16_adddf3"}, - {RTLIB::ADD_F32, RTLIB::impl___mips16_addsf3, "__mips16_addsf3"}, - {RTLIB::DIV_F64, RTLIB::impl___mips16_divdf3, "__mips16_divdf3"}, - {RTLIB::DIV_F32, RTLIB::impl___mips16_divsf3, "__mips16_divsf3"}, - {RTLIB::OEQ_F64, RTLIB::impl___mips16_eqdf2, "__mips16_eqdf2"}, - {RTLIB::OEQ_F32, RTLIB::impl___mips16_eqsf2, "__mips16_eqsf2"}, - {RTLIB::FPEXT_F32_F64, RTLIB::impl___mips16_extendsfdf2, - "__mips16_extendsfdf2"}, - {RTLIB::FPTOSINT_F64_I32, RTLIB::impl___mips16_fix_truncdfsi, - "__mips16_fix_truncdfsi"}, - {RTLIB::FPTOSINT_F32_I32, RTLIB::impl___mips16_fix_truncsfsi, - "__mips16_fix_truncsfsi"}, - {RTLIB::SINTTOFP_I32_F64, RTLIB::impl___mips16_floatsidf, - "__mips16_floatsidf"}, - {RTLIB::SINTTOFP_I32_F32, RTLIB::impl___mips16_floatsisf, - "__mips16_floatsisf"}, - {RTLIB::UINTTOFP_I32_F64, RTLIB::impl___mips16_floatunsidf, - "__mips16_floatunsidf"}, - {RTLIB::UINTTOFP_I32_F32, RTLIB::impl___mips16_floatunsisf, - "__mips16_floatunsisf"}, - {RTLIB::OGE_F64, RTLIB::impl___mips16_gedf2, "__mips16_gedf2"}, - {RTLIB::OGE_F32, RTLIB::impl___mips16_gesf2, "__mips16_gesf2"}, - {RTLIB::OGT_F64, RTLIB::impl___mips16_gtdf2, "__mips16_gtdf2"}, - {RTLIB::OGT_F32, RTLIB::impl___mips16_gtsf2, "__mips16_gtsf2"}, - {RTLIB::OLE_F64, RTLIB::impl___mips16_ledf2, "__mips16_ledf2"}, - {RTLIB::OLE_F32, RTLIB::impl___mips16_lesf2, "__mips16_lesf2"}, - {RTLIB::OLT_F64, RTLIB::impl___mips16_ltdf2, "__mips16_ltdf2"}, - {RTLIB::OLT_F32, RTLIB::impl___mips16_ltsf2, "__mips16_ltsf2"}, - {RTLIB::MUL_F64, RTLIB::impl___mips16_muldf3, "__mips16_muldf3"}, - {RTLIB::MUL_F32, RTLIB::impl___mips16_mulsf3, "__mips16_mulsf3"}, - {RTLIB::UNE_F64, RTLIB::impl___mips16_nedf2, "__mips16_nedf2"}, - {RTLIB::UNE_F32, RTLIB::impl___mips16_nesf2, "__mips16_nesf2"}, - {RTLIB::UNKNOWN_LIBCALL, RTLIB::impl___mips16_ret_dc, - "__mips16_ret_dc"}, // No associated libcall. - {RTLIB::UNKNOWN_LIBCALL, RTLIB::impl___mips16_ret_df, - "__mips16_ret_df"}, // No associated libcall. - {RTLIB::UNKNOWN_LIBCALL, RTLIB::impl___mips16_ret_sc, - "__mips16_ret_sc"}, // No associated libcall. - {RTLIB::UNKNOWN_LIBCALL, RTLIB::impl___mips16_ret_sf, - "__mips16_ret_sf"}, // No associated libcall. - {RTLIB::SUB_F64, RTLIB::impl___mips16_subdf3, "__mips16_subdf3"}, - {RTLIB::SUB_F32, RTLIB::impl___mips16_subsf3, "__mips16_subsf3"}, - {RTLIB::FPROUND_F64_F32, RTLIB::impl___mips16_truncdfsf2, - "__mips16_truncdfsf2"}, - {RTLIB::UO_F64, RTLIB::impl___mips16_unorddf2, "__mips16_unorddf2"}, - {RTLIB::UO_F32, RTLIB::impl___mips16_unordsf2, "__mips16_unordsf2"}}; +static const RTLIB::LibcallImpl HardFloatLibCalls[] = { + RTLIB::impl___mips16_adddf3, RTLIB::impl___mips16_addsf3, + RTLIB::impl___mips16_divdf3, RTLIB::impl___mips16_divsf3, + RTLIB::impl___mips16_eqdf2, RTLIB::impl___mips16_eqsf2, + RTLIB::impl___mips16_extendsfdf2, RTLIB::impl___mips16_fix_truncdfsi, + RTLIB::impl___mips16_fix_truncsfsi, RTLIB::impl___mips16_floatsidf, + RTLIB::impl___mips16_floatsisf, RTLIB::impl___mips16_floatunsidf, + RTLIB::impl___mips16_floatunsisf, RTLIB::impl___mips16_gedf2, + RTLIB::impl___mips16_gesf2, RTLIB::impl___mips16_gtdf2, + RTLIB::impl___mips16_gtsf2, RTLIB::impl___mips16_ledf2, + RTLIB::impl___mips16_lesf2, RTLIB::impl___mips16_ltdf2, + RTLIB::impl___mips16_ltsf2, RTLIB::impl___mips16_muldf3, + RTLIB::impl___mips16_mulsf3, RTLIB::impl___mips16_nedf2, + RTLIB::impl___mips16_nesf2, RTLIB::impl___mips16_ret_dc, + RTLIB::impl___mips16_ret_df, RTLIB::impl___mips16_ret_sc, + RTLIB::impl___mips16_ret_sf, RTLIB::impl___mips16_subdf3, + RTLIB::impl___mips16_subsf3, RTLIB::impl___mips16_truncdfsf2, + RTLIB::impl___mips16_unorddf2, RTLIB::impl___mips16_unordsf2}; static const Mips16IntrinsicHelperType Mips16IntrinsicHelper[] = { {"__fixunsdfsi", "__mips16_call_stub_2" }, @@ -261,8 +222,9 @@ void Mips16TargetLowering::setMips16HardFloatLibCalls() { for (unsigned I = 0; I != std::size(HardFloatLibCalls); ++I) { assert((I == 0 || HardFloatLibCalls[I - 1] < HardFloatLibCalls[I]) && "Array not sorted!"); - if (HardFloatLibCalls[I].Libcall != RTLIB::UNKNOWN_LIBCALL) - setLibcallImpl(HardFloatLibCalls[I].Libcall, HardFloatLibCalls[I].Impl); + RTLIB::Libcall LC = + RTLIB::RuntimeLibcallsInfo::getLibcallFromImpl(HardFloatLibCalls[I]); + setLibcallImpl(LC, HardFloatLibCalls[I]); } } @@ -417,6 +379,14 @@ const char* Mips16TargetLowering:: return result; } +static bool isMips16HardFloatLibcall(StringRef Name) { + // FIXME: Use getSupportedLibcallImpl instead of blindly parsing the name. + iota_range ParsedLibcalls = + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(Name); + return !ParsedLibcalls.empty() && + binary_search(HardFloatLibCalls, *ParsedLibcalls.begin()); +} + void Mips16TargetLowering:: getOpndList(SmallVectorImpl &Ops, std::deque< std::pair > &RegsToPass, @@ -437,10 +407,7 @@ getOpndList(SmallVectorImpl &Ops, // bool LookupHelper = true; if (ExternalSymbolSDNode *S = dyn_cast(CLI.Callee)) { - Mips16Libcall Find = {RTLIB::UNKNOWN_LIBCALL, RTLIB::Unsupported, - S->getSymbol()}; - - if (llvm::binary_search(HardFloatLibCalls, Find)) + if (isMips16HardFloatLibcall(S->getSymbol())) LookupHelper = false; else { const char *Symbol = S->getSymbol(); @@ -478,10 +445,8 @@ getOpndList(SmallVectorImpl &Ops, } } else if (GlobalAddressSDNode *G = dyn_cast(CLI.Callee)) { - Mips16Libcall Find = {RTLIB::UNKNOWN_LIBCALL, RTLIB::Unsupported, - G->getGlobal()->getName().data()}; - if (llvm::binary_search(HardFloatLibCalls, Find)) + if (isMips16HardFloatLibcall(G->getGlobal()->getName())) LookupHelper = false; } if (LookupHelper) From 8aca6c39e2b4ccf4d739c6450ca012d920de8e45 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 18 Nov 2025 21:05:43 +0100 Subject: [PATCH 40/57] [AllocToken] Test compatibility with -fsanitize=kcfi,memtag (#168600) Test that -fsanitize=alloc-token is compatible with kcfi and memtag, as these should also be possible to combine. NFC. --- clang/test/Driver/fsanitize-alloc-token.c | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/test/Driver/fsanitize-alloc-token.c b/clang/test/Driver/fsanitize-alloc-token.c index 6d8bda16dfb96..0ffe9abad8053 100644 --- a/clang/test/Driver/fsanitize-alloc-token.c +++ b/clang/test/Driver/fsanitize-alloc-token.c @@ -5,6 +5,7 @@ // CHECK-NO-TOKEN-ALLOC-NOT: "-fsanitize=alloc-token" // RUN: %clang --target=x86_64-linux-gnu -flto -fvisibility=hidden -fno-sanitize-ignorelist -fsanitize=alloc-token,undefined,cfi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-COMPATIBLE +// RUN: %clang --target=aarch64-linux-android -march=armv8-a+memtag -flto -fvisibility=hidden -fsanitize=alloc-token,kcfi,memtag %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-COMPATIBLE // CHECK-COMPATIBLE: "-fsanitize={{.*}}alloc-token" // RUN: %clang --target=x86_64-linux-gnu -fsanitize=alloc-token -fsanitize-minimal-runtime %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-MINIMAL From e1bb50b2845379ef696b26e78aba0f62a3e61fb1 Mon Sep 17 00:00:00 2001 From: Pranav Kant Date: Tue, 18 Nov 2025 12:08:03 -0800 Subject: [PATCH 41/57] [bazel] fix #168212 (#168598) --- utils/bazel/llvm-project-overlay/llvm/BUILD.bazel | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index ddad2f4f7611d..b027d82d98177 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -2359,6 +2359,10 @@ llvm_target_lib_list = [lib for lib in [ ["-gen-callingconv"], "lib/Target/ARM/ARMGenCallingConv.inc", ), + ( + ["-gen-sd-node-info"], + "lib/Target/ARM/ARMGenSDNodeInfo.inc", + ), ( ["-gen-subtarget"], "lib/Target/ARM/ARMGenSubtargetInfo.inc", From 56b1d42a65653b23ec9fb96d3cac13d54b4b32ba Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Tue, 18 Nov 2025 12:14:09 -0800 Subject: [PATCH 42/57] [CIR] Mark globals as constants (#168463) We previously added support for marking GlobalOp operations as constant, but the handling to actually do so was left mostly unimplemented. This fills in the missing pieces. --- clang/include/clang/CIR/MissingFeatures.h | 1 - clang/lib/CIR/CodeGen/CIRGenDecl.cpp | 3 +- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 11 +++- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 4 +- clang/test/CIR/CodeGen/constant-inits.cpp | 60 +++++++++---------- clang/test/CIR/CodeGen/global-constant.c | 20 +++++++ .../CIR/CodeGen/record-zero-init-padding.c | 16 ++--- clang/test/CIR/CodeGen/vtt.cpp | 14 ++--- 8 files changed, 77 insertions(+), 52 deletions(-) create mode 100644 clang/test/CIR/CodeGen/global-constant.c diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 34c2476ffccce..6b5c34d28ce2a 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -29,7 +29,6 @@ struct MissingFeatures { // Unhandled global/linkage information. static bool opGlobalThreadLocal() { return false; } - static bool opGlobalConstant() { return false; } static bool opGlobalWeakRef() { return false; } static bool opGlobalUnnamedAddr() { return false; } static bool opGlobalSection() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp index 325875d10d6ea..e0e4f67df87b2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp @@ -526,7 +526,8 @@ cir::GlobalOp CIRGenFunction::addInitializerToStaticVarDecl( bool needsDtor = d.needsDestruction(getContext()) == QualType::DK_cxx_destructor; - assert(!cir::MissingFeatures::opGlobalConstant()); + gv.setConstant(d.getType().isConstantStorage( + getContext(), /*ExcludeCtor=*/true, !needsDtor)); gv.setInitialValueAttr(init); emitter.finalize(gv); diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index c1f2581eb96e3..4a82ea3121b60 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -675,7 +675,10 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty, errorNYI(d->getSourceRange(), "OpenMP target global variable"); gv.setAlignmentAttr(getSize(astContext.getDeclAlign(d))); - assert(!cir::MissingFeatures::opGlobalConstant()); + // FIXME: This code is overly simple and should be merged with other global + // handling. + gv.setConstant(d->getType().isConstantStorage( + astContext, /*ExcludeCtor=*/false, /*ExcludeDtor=*/false)); setLinkageForGV(gv, d); @@ -864,7 +867,11 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd, if (emitter) emitter->finalize(gv); - assert(!cir::MissingFeatures::opGlobalConstant()); + // If it is safe to mark the global 'constant', do so now. + gv.setConstant((vd->hasAttr() && langOpts.CUDAIsDevice) || + (!needsGlobalCtor && !needsGlobalDtor && + vd->getType().isConstantStorage( + astContext, /*ExcludeCtor=*/true, /*ExcludeDtor=*/true))); assert(!cir::MissingFeatures::opGlobalSection()); // Set CIR's linkage type as appropriate. diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index d43a462a25092..4912bd197dba4 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1995,7 +1995,6 @@ void CIRToLLVMGlobalOpLowering::setupRegionInitializedLLVMGlobalOp( // attributes are available on cir.global ops. This duplicates code // in CIRToLLVMGlobalOpLowering::matchAndRewrite() but that will go // away when the placeholders are no longer needed. - assert(!cir::MissingFeatures::opGlobalConstant()); const bool isConst = op.getConstant(); assert(!cir::MissingFeatures::addressSpace()); const unsigned addrSpace = 0; @@ -2055,8 +2054,7 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite( convertTypeForMemory(*getTypeConverter(), dataLayout, cirSymType); // FIXME: These default values are placeholders until the the equivalent // attributes are available on cir.global ops. - assert(!cir::MissingFeatures::opGlobalConstant()); - const bool isConst = false; + const bool isConst = op.getConstant(); assert(!cir::MissingFeatures::addressSpace()); const unsigned addrSpace = 0; const bool isDsoLocal = op.getDsoLocal(); diff --git a/clang/test/CIR/CodeGen/constant-inits.cpp b/clang/test/CIR/CodeGen/constant-inits.cpp index d5a7bb9d57251..ef9802de405c1 100644 --- a/clang/test/CIR/CodeGen/constant-inits.cpp +++ b/clang/test/CIR/CodeGen/constant-inits.cpp @@ -105,57 +105,57 @@ void function() { // CIR-DAG: !rec_anon_struct = !cir.record // CIR-DAG: !rec_anon_struct1 = !cir.record}> -// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE1e = #cir.zero : !rec_empty -// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE1s = #cir.const_record<{#cir.int<0> : !s32i, #cir.int<-1> : !s32i}> : !rec_simple -// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE2p1 = #cir.const_record<{#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.const_array<[#cir.int<99> : !s8i, #cir.int<88> : !s8i, #cir.int<77> : !s8i]> : !cir.array, #cir.int<40> : !s32i}> : !rec_Point -// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE2p2 = #cir.const_record<{#cir.int<123> : !s8i, #cir.int<456> : !s32i}> : !rec_packed -// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE3paa = #cir.const_record<{#cir.int<1> : !s16i, #cir.int<2> : !s8i, #cir.fp<3.000000e+00> : !cir.float, #cir.zero : !u8i}> : !rec_packed_and_aligned +// CIR-DAG: cir.global "private" constant internal dso_local @_ZZ8functionvE1e = #cir.zero : !rec_empty +// CIR-DAG: cir.global "private" constant internal dso_local @_ZZ8functionvE1s = #cir.const_record<{#cir.int<0> : !s32i, #cir.int<-1> : !s32i}> : !rec_simple +// CIR-DAG: cir.global "private" constant internal dso_local @_ZZ8functionvE2p1 = #cir.const_record<{#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.const_array<[#cir.int<99> : !s8i, #cir.int<88> : !s8i, #cir.int<77> : !s8i]> : !cir.array, #cir.int<40> : !s32i}> : !rec_Point +// CIR-DAG: cir.global "private" constant internal dso_local @_ZZ8functionvE2p2 = #cir.const_record<{#cir.int<123> : !s8i, #cir.int<456> : !s32i}> : !rec_packed +// CIR-DAG: cir.global "private" constant internal dso_local @_ZZ8functionvE3paa = #cir.const_record<{#cir.int<1> : !s16i, #cir.int<2> : !s8i, #cir.fp<3.000000e+00> : !cir.float, #cir.zero : !u8i}> : !rec_packed_and_aligned -// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE5array = #cir.const_array<[ +// CIR-DAG: cir.global "private" constant internal dso_local @_ZZ8functionvE5array = #cir.const_array<[ // CIR-DAG-SAME: #cir.const_record<{#cir.int<123> : !s32i, #cir.int<456> : !s32i, #cir.const_array<[#cir.int<11> : !s8i, #cir.int<22> : !s8i, #cir.int<33> : !s8i]> : !cir.array, #cir.int<789> : !s32i}> : !rec_Point // CIR-DAG-SAME: #cir.const_record<{#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.zero : !cir.array, #cir.int<40> : !s32i}> : !rec_Point // CIR-DAG-SAME: ]> : !cir.array -// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE12simple_array = #cir.const_array<[ +// CIR-DAG: cir.global "private" constant internal dso_local @_ZZ8functionvE12simple_array = #cir.const_array<[ // CIR-DAG-SAME: #cir.const_record<{#cir.int<0> : !s32i, #cir.int<-1> : !s32i}> : !rec_simple, // CIR-DAG-SAME: #cir.const_record<{#cir.int<1111> : !s32i, #cir.int<2222> : !s32i}> : !rec_simple, // CIR-DAG-SAME: #cir.const_record<{#cir.int<0> : !s32i, #cir.int<-1> : !s32i}> : !rec_simple // CIR-DAG-SAME: ]> : !cir.array -// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE12packed_array = #cir.const_array<[ +// CIR-DAG: cir.global "private" constant internal dso_local @_ZZ8functionvE12packed_array = #cir.const_array<[ // CIR-DAG-SAME: #cir.const_record<{#cir.int<123> : !s8i, #cir.int<456> : !s32i}> : !rec_packed, // CIR-DAG-SAME: #cir.const_record<{#cir.int<123> : !s8i, #cir.int<456> : !s32i}> : !rec_packed // CIR-DAG-SAME: ]> : !cir.array -// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE9paa_array = #cir.const_array<[ +// CIR-DAG: cir.global "private" constant internal dso_local @_ZZ8functionvE9paa_array = #cir.const_array<[ // CIR-DAG-SAME: #cir.const_record<{#cir.int<1> : !s16i, #cir.int<2> : !s8i, #cir.fp<3.000000e+00> : !cir.float, #cir.zero : !u8i}> : !rec_packed_and_aligned, // CIR-DAG-SAME: #cir.zero : !rec_packed_and_aligned // CIR-DAG-SAME: ]> : !cir.array -// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE6ba_bf1 = #cir.const_record<{ +// CIR-DAG: cir.global "private" constant internal dso_local @_ZZ8functionvE6ba_bf1 = #cir.const_record<{ // CIR-DAG-SAME: #cir.int<255> : !u8i, // CIR-DAG-SAME: #cir.int<170> : !u8i, // CIR-DAG-SAME: #cir.int<52> : !u8i, // CIR-DAG-SAME: #cir.int<18> : !u8i // CIR-DAG-SAME: }> : !rec_anon_struct -// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE6ba_bf2 = #cir.const_record<{ +// CIR-DAG: cir.global "private" constant internal dso_local @_ZZ8functionvE6ba_bf2 = #cir.const_record<{ // CIR-DAG-SAME: #cir.int<255> : !u8i, // CIR-DAG-SAME: #cir.int<127> : !u8i, // CIR-DAG-SAME: #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array // CIR-DAG-SAME: }> : !rec_anon_struct1 -// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE6ba_bf3 = #cir.const_record<{ +// CIR-DAG: cir.global "private" constant internal dso_local @_ZZ8functionvE6ba_bf3 = #cir.const_record<{ // CIR-DAG-SAME: #cir.int<42> : !u8i // CIR-DAG-SAME: }> : !rec_single_byte_bitfield -// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE5p_bf1 = #cir.const_record<{ +// CIR-DAG: cir.global "private" constant internal dso_local @_ZZ8functionvE5p_bf1 = #cir.const_record<{ // CIR-DAG-SAME: #cir.int<17> : !u8i, // CIR-DAG-SAME: #cir.int<3> : !u8i, // CIR-DAG-SAME: #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array // CIR-DAG-SAME: }> : !rec_anon_struct1 -// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE5p_bf2 = #cir.const_record<{ +// CIR-DAG: cir.global "private" constant internal dso_local @_ZZ8functionvE5p_bf2 = #cir.const_record<{ // CIR-DAG-SAME: #cir.int<127> : !u8i, // CIR-DAG-SAME: #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array // CIR-DAG-SAME: }> : !rec_signed_partial_bitfields -// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE5p_bf3 = #cir.const_record<{ +// CIR-DAG: cir.global "private" constant internal dso_local @_ZZ8functionvE5p_bf3 = #cir.const_record<{ // CIR-DAG-SAME: #cir.int<125> : !u8i // CIR-DAG-SAME: }> : !rec_mixed_partial_bitfields @@ -163,21 +163,21 @@ void function() { // CIR: cir.return -// LLVM-DAG: @_ZZ8functionvE12packed_array = internal global [2 x %struct.packed] [%struct.packed <{ i8 123, i32 456 }>, %struct.packed <{ i8 123, i32 456 }>] -// LLVM-DAG: @_ZZ8functionvE12simple_array = internal global [3 x %struct.simple] [%struct.simple { i32 0, i32 -1 }, %struct.simple { i32 1111, i32 2222 }, %struct.simple { i32 0, i32 -1 }] -// LLVM-DAG: @_ZZ8functionvE1e = internal global %struct.empty zeroinitializer -// LLVM-DAG: @_ZZ8functionvE1s = internal global %struct.simple { i32 0, i32 -1 } -// LLVM-DAG: @_ZZ8functionvE2p1 = internal global %struct.Point { i32 10, i32 20, [3 x i8] c"cXM", i32 40 } -// LLVM-DAG: @_ZZ8functionvE2p2 = internal global %struct.packed <{ i8 123, i32 456 }> -// LLVM-DAG: @_ZZ8functionvE3paa = internal global %struct.packed_and_aligned <{ i16 1, i8 2, float 3.000000e+00, i8 0 }> -// LLVM-DAG: @_ZZ8functionvE5array = internal global [2 x %struct.Point] [%struct.Point { i32 123, i32 456, [3 x i8] c"\0B\16!", i32 789 }, %struct.Point { i32 10, i32 20, [3 x i8] zeroinitializer, i32 40 }] -// LLVM-DAG: @_ZZ8functionvE9paa_array = internal global [2 x %struct.packed_and_aligned] [%struct.packed_and_aligned <{ i16 1, i8 2, float 3.000000e+00, i8 0 }>, %struct.packed_and_aligned zeroinitializer] -// LLVM-DAG: @_ZZ8functionvE6ba_bf1 = internal global { i8, i8, i8, i8 } { i8 -1, i8 -86, i8 52, i8 18 } -// LLVM-DAG: @_ZZ8functionvE6ba_bf2 = internal global { i8, i8, [2 x i8] } { i8 -1, i8 127, [2 x i8] zeroinitializer } -// LLVM-DAG: @_ZZ8functionvE6ba_bf3 = internal global %struct.single_byte_bitfield { i8 42 } -// LLVM-DAG: @_ZZ8functionvE5p_bf1 = internal global { i8, i8, [2 x i8] } { i8 17, i8 3, [2 x i8] zeroinitializer } -// LLVM-DAG: @_ZZ8functionvE5p_bf2 = internal global %struct.signed_partial_bitfields { i8 127, [3 x i8] zeroinitializer } -// LLVM-DAG: @_ZZ8functionvE5p_bf3 = internal global %struct.mixed_partial_bitfields { i8 125 } +// LLVM-DAG: @_ZZ8functionvE12packed_array = internal constant [2 x %struct.packed] [%struct.packed <{ i8 123, i32 456 }>, %struct.packed <{ i8 123, i32 456 }>] +// LLVM-DAG: @_ZZ8functionvE12simple_array = internal constant [3 x %struct.simple] [%struct.simple { i32 0, i32 -1 }, %struct.simple { i32 1111, i32 2222 }, %struct.simple { i32 0, i32 -1 }] +// LLVM-DAG: @_ZZ8functionvE1e = internal constant %struct.empty zeroinitializer +// LLVM-DAG: @_ZZ8functionvE1s = internal constant %struct.simple { i32 0, i32 -1 } +// LLVM-DAG: @_ZZ8functionvE2p1 = internal constant %struct.Point { i32 10, i32 20, [3 x i8] c"cXM", i32 40 } +// LLVM-DAG: @_ZZ8functionvE2p2 = internal constant %struct.packed <{ i8 123, i32 456 }> +// LLVM-DAG: @_ZZ8functionvE3paa = internal constant %struct.packed_and_aligned <{ i16 1, i8 2, float 3.000000e+00, i8 0 }> +// LLVM-DAG: @_ZZ8functionvE5array = internal constant [2 x %struct.Point] [%struct.Point { i32 123, i32 456, [3 x i8] c"\0B\16!", i32 789 }, %struct.Point { i32 10, i32 20, [3 x i8] zeroinitializer, i32 40 }] +// LLVM-DAG: @_ZZ8functionvE9paa_array = internal constant [2 x %struct.packed_and_aligned] [%struct.packed_and_aligned <{ i16 1, i8 2, float 3.000000e+00, i8 0 }>, %struct.packed_and_aligned zeroinitializer] +// LLVM-DAG: @_ZZ8functionvE6ba_bf1 = internal constant { i8, i8, i8, i8 } { i8 -1, i8 -86, i8 52, i8 18 } +// LLVM-DAG: @_ZZ8functionvE6ba_bf2 = internal constant { i8, i8, [2 x i8] } { i8 -1, i8 127, [2 x i8] zeroinitializer } +// LLVM-DAG: @_ZZ8functionvE6ba_bf3 = internal constant %struct.single_byte_bitfield { i8 42 } +// LLVM-DAG: @_ZZ8functionvE5p_bf1 = internal constant { i8, i8, [2 x i8] } { i8 17, i8 3, [2 x i8] zeroinitializer } +// LLVM-DAG: @_ZZ8functionvE5p_bf2 = internal constant %struct.signed_partial_bitfields { i8 127, [3 x i8] zeroinitializer } +// LLVM-DAG: @_ZZ8functionvE5p_bf3 = internal constant %struct.mixed_partial_bitfields { i8 125 } // LLVM-LABEL: define{{.*}} void @_Z8functionv // LLVM: ret void diff --git a/clang/test/CIR/CodeGen/global-constant.c b/clang/test/CIR/CodeGen/global-constant.c new file mode 100644 index 0000000000000..588642c0c3faa --- /dev/null +++ b/clang/test/CIR/CodeGen/global-constant.c @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +const int global_no_use = 12; +// CIR: cir.global constant {{.*}}@global_no_use +// LLVM: @global_no_use = constant +// OGCG: @global_no_use = constant + +const float global_used = 1.2f; +// CIR: cir.global constant {{.*}}@global_used +// LLVM: @global_used = constant +// OGCG: @global_used = constant + +float const * get_float_ptr() { + return &global_used; +} diff --git a/clang/test/CIR/CodeGen/record-zero-init-padding.c b/clang/test/CIR/CodeGen/record-zero-init-padding.c index f131c9bbd069f..9c8daccb21a53 100644 --- a/clang/test/CIR/CodeGen/record-zero-init-padding.c +++ b/clang/test/CIR/CodeGen/record-zero-init-padding.c @@ -41,28 +41,28 @@ void test_zero_init_padding(void) { // CIR-DAG: !rec_anon_struct3 = !cir.record, !s32i}> // paf: char + 3 bytes padding + int -> uses !rec_anon_struct3 -// CIR-DAG: cir.global "private" internal dso_local @test_zero_init_padding.paf = #cir.const_record<{ +// CIR-DAG: cir.global "private" constant internal dso_local @test_zero_init_padding.paf = #cir.const_record<{ // CIR-DAG-SAME: #cir.int<1> : !s8i, // CIR-DAG-SAME: #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array, // CIR-DAG-SAME: #cir.int<42> : !s32i // CIR-DAG-SAME: }> : !rec_anon_struct3 // bfp: unsigned bitfield byte + 3 bytes padding + int -> uses !rec_anon_struct2 -// CIR-DAG: cir.global "private" internal dso_local @test_zero_init_padding.bfp = #cir.const_record<{ +// CIR-DAG: cir.global "private" constant internal dso_local @test_zero_init_padding.bfp = #cir.const_record<{ // CIR-DAG-SAME: #cir.int<17> : !u8i, // CIR-DAG-SAME: #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array, // CIR-DAG-SAME: #cir.int<99> : !s32i // CIR-DAG-SAME: }> : !rec_anon_struct2 // tp: int + char + 3 bytes tail padding -> uses !rec_anon_struct1 -// CIR-DAG: cir.global "private" internal dso_local @test_zero_init_padding.tp = #cir.const_record<{ +// CIR-DAG: cir.global "private" constant internal dso_local @test_zero_init_padding.tp = #cir.const_record<{ // CIR-DAG-SAME: #cir.int<10> : !s32i, // CIR-DAG-SAME: #cir.int<20> : !s8i, // CIR-DAG-SAME: #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array // CIR-DAG-SAME: }> : !rec_anon_struct1 // mp: char + 1 byte padding + short + 4 bytes padding + long long -> uses !rec_anon_struct -// CIR-DAG: cir.global "private" internal dso_local @test_zero_init_padding.mp = #cir.const_record<{ +// CIR-DAG: cir.global "private" constant internal dso_local @test_zero_init_padding.mp = #cir.const_record<{ // CIR-DAG-SAME: #cir.int<5> : !s8i, // CIR-DAG-SAME: #cir.zero : !u8i, // CIR-DAG-SAME: #cir.int<10> : !s16i, @@ -73,10 +73,10 @@ void test_zero_init_padding(void) { // CIR-LABEL: cir.func {{.*}}@test_zero_init_padding // CIR: cir.return -// LLVM-DAG: @test_zero_init_padding.paf = internal global { i8, [3 x i8], i32 } { i8 1, [3 x i8] zeroinitializer, i32 42 } -// LLVM-DAG: @test_zero_init_padding.bfp = internal global { i8, [3 x i8], i32 } { i8 17, [3 x i8] zeroinitializer, i32 99 } -// LLVM-DAG: @test_zero_init_padding.tp = internal global { i32, i8, [3 x i8] } { i32 10, i8 20, [3 x i8] zeroinitializer } -// LLVM-DAG: @test_zero_init_padding.mp = internal global { i8, i8, i16, [4 x i8], i64 } { i8 5, i8 0, i16 10, [4 x i8] zeroinitializer, i64 100 } +// LLVM-DAG: @test_zero_init_padding.paf = internal constant { i8, [3 x i8], i32 } { i8 1, [3 x i8] zeroinitializer, i32 42 } +// LLVM-DAG: @test_zero_init_padding.bfp = internal constant { i8, [3 x i8], i32 } { i8 17, [3 x i8] zeroinitializer, i32 99 } +// LLVM-DAG: @test_zero_init_padding.tp = internal constant { i32, i8, [3 x i8] } { i32 10, i8 20, [3 x i8] zeroinitializer } +// LLVM-DAG: @test_zero_init_padding.mp = internal constant { i8, i8, i16, [4 x i8], i64 } { i8 5, i8 0, i16 10, [4 x i8] zeroinitializer, i64 100 } // LLVM-LABEL: define{{.*}} void @test_zero_init_padding // LLVM: ret void diff --git a/clang/test/CIR/CodeGen/vtt.cpp b/clang/test/CIR/CodeGen/vtt.cpp index f9a62e37450cf..d0319b7adc126 100644 --- a/clang/test/CIR/CodeGen/vtt.cpp +++ b/clang/test/CIR/CodeGen/vtt.cpp @@ -5,12 +5,12 @@ // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fno-rtti -emit-llvm %s -o %t.ll // RUN: FileCheck --check-prefixes=OGCG-NO-RTTI,OGCG-COMMON --input-file=%t.ll %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -// RUN: FileCheck --check-prefixes=CIR-RTTI,CIR-COMMON --input-file=%t.cir %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll -// RUN: FileCheck --check-prefixes=LLVM-RTTI,LLVM-COMMON --input-file=%t-cir.ll %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll -// RUN: FileCheck --check-prefixes=OGCG-RTTI,OGCG-COMMON --input-file=%t.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t-rtti.cir +// RUN: FileCheck --check-prefixes=CIR-RTTI,CIR-COMMON --input-file=%t-rtti.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir-rtti.ll +// RUN: FileCheck --check-prefixes=LLVM-RTTI,LLVM-COMMON --input-file=%t-cir-rtti.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t-rtti.ll +// RUN: FileCheck --check-prefixes=OGCG-RTTI,OGCG-COMMON --input-file=%t-rtti.ll %s // Note: This test will be expanded to verify VTT emission and VTT implicit // argument handling. For now, it's just test the record layout. @@ -170,7 +170,7 @@ void D::y() {} // CIR-RTTI: cir.global{{.*}} @_ZTI1B : !cir.ptr -// LLVM-RTTI: @_ZTI1B = external global ptr +// LLVM-RTTI: @_ZTI1B = external constant ptr // OGCG-RTTI: @_ZTI1B = external constant ptr From 1157a2213445199169f1f5bbe6edf8839f440498 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 18 Nov 2025 12:26:47 -0800 Subject: [PATCH 43/57] [GISel] Use getScalarSizeInBits in LegalizerHelper::lowerBitCount (#168584) For vectors, CTLZ, CTTZ, CTPOP all operate on individual elements. The lowering should be based on the element width. I noticed this by inspection. No tests in tree are currently affected, but I thought it would be good to fix so someone doesn't have to debug it in the future. --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e9e05be2fcbd4..120c38ab8404c 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7609,7 +7609,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { } case TargetOpcode::G_CTLZ: { auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); - unsigned Len = SrcTy.getSizeInBits(); + unsigned Len = SrcTy.getScalarSizeInBits(); if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) { // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. @@ -7657,7 +7657,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { case TargetOpcode::G_CTTZ: { auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); - unsigned Len = SrcTy.getSizeInBits(); + unsigned Len = SrcTy.getScalarSizeInBits(); if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) { // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with // zero. @@ -7695,7 +7695,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { case TargetOpcode::G_CTPOP: { Register SrcReg = MI.getOperand(1).getReg(); LLT Ty = MRI.getType(SrcReg); - unsigned Size = Ty.getSizeInBits(); + unsigned Size = Ty.getScalarSizeInBits(); MachineIRBuilder &B = MIRBuilder; // Bail out on irregular type lengths. From 3e8dc4dc4d04fe4c42f139423a61802b1ba719fc Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Tue, 18 Nov 2025 12:31:31 -0800 Subject: [PATCH 44/57] [clang][deps] NFC: Use qualified names for function definitions (#168586) The compiler doesn't emit a diagnostics when the signature of a function defined in a namespace gets out-of-sync with its declaration. Let's use qualified names for function definitions instead of nesting them in a namespace so that mismatches are diagnosed by the compiler rather than by the (less understandable) linker. --- .../DependencyScannerImpl.cpp | 44 ++++++++++--------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp index 4178d1fd352c3..a3deb907c23ed 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp @@ -358,9 +358,8 @@ void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) { } } // namespace -namespace clang::tooling::dependencies { std::unique_ptr -createDiagOptions(ArrayRef CommandLine) { +dependencies::createDiagOptions(ArrayRef CommandLine) { std::vector CLI; for (const std::string &Arg : CommandLine) CLI.push_back(Arg.c_str()); @@ -382,9 +381,10 @@ DignosticsEngineWithDiagOpts::DignosticsEngineWithDiagOpts( } std::pair, std::unique_ptr> -buildCompilation(ArrayRef ArgStrs, DiagnosticsEngine &Diags, - IntrusiveRefCntPtr FS, - llvm::BumpPtrAllocator &Alloc) { +dependencies::buildCompilation(ArrayRef ArgStrs, + DiagnosticsEngine &Diags, + IntrusiveRefCntPtr FS, + llvm::BumpPtrAllocator &Alloc) { SmallVector Argv; Argv.reserve(ArgStrs.size()); for (const std::string &Arg : ArgStrs) @@ -417,8 +417,8 @@ buildCompilation(ArrayRef ArgStrs, DiagnosticsEngine &Diags, } std::unique_ptr -createCompilerInvocation(ArrayRef CommandLine, - DiagnosticsEngine &Diags) { +dependencies::createCompilerInvocation(ArrayRef CommandLine, + DiagnosticsEngine &Diags) { llvm::opt::ArgStringList Argv; for (const std::string &Str : ArrayRef(CommandLine).drop_front()) Argv.push_back(Str.c_str()); @@ -432,10 +432,10 @@ createCompilerInvocation(ArrayRef CommandLine, } std::pair, std::vector> -initVFSForTUBuferScanning(IntrusiveRefCntPtr BaseFS, - ArrayRef CommandLine, - StringRef WorkingDirectory, - llvm::MemoryBufferRef TUBuffer) { +dependencies::initVFSForTUBuferScanning( + IntrusiveRefCntPtr BaseFS, + ArrayRef CommandLine, StringRef WorkingDirectory, + llvm::MemoryBufferRef TUBuffer) { // Reset what might have been modified in the previous worker invocation. BaseFS->setCurrentWorkingDirectory(WorkingDirectory); @@ -459,9 +459,10 @@ initVFSForTUBuferScanning(IntrusiveRefCntPtr BaseFS, std::pair, std::vector> -initVFSForByNameScanning(IntrusiveRefCntPtr BaseFS, - ArrayRef CommandLine, - StringRef WorkingDirectory, StringRef ModuleName) { +dependencies::initVFSForByNameScanning( + IntrusiveRefCntPtr BaseFS, + ArrayRef CommandLine, StringRef WorkingDirectory, + StringRef ModuleName) { // Reset what might have been modified in the previous worker invocation. BaseFS->setCurrentWorkingDirectory(WorkingDirectory); @@ -486,7 +487,7 @@ initVFSForByNameScanning(IntrusiveRefCntPtr BaseFS, return std::make_pair(OverlayFS, ModifiedCommandLine); } -bool initializeScanCompilerInstance( +bool dependencies::initializeScanCompilerInstance( CompilerInstance &ScanInstance, IntrusiveRefCntPtr FS, DiagnosticConsumer *DiagConsumer, DependencyScanningService &Service, @@ -559,7 +560,7 @@ bool initializeScanCompilerInstance( } llvm::SmallVector -getInitialStableDirs(const CompilerInstance &ScanInstance) { +dependencies::getInitialStableDirs(const CompilerInstance &ScanInstance) { // Create a collection of stable directories derived from the ScanInstance // for determining whether module dependencies would fully resolve from // those directories. @@ -571,8 +572,8 @@ getInitialStableDirs(const CompilerInstance &ScanInstance) { } std::optional -computePrebuiltModulesASTMap(CompilerInstance &ScanInstance, - llvm::SmallVector &StableDirs) { +dependencies::computePrebuiltModulesASTMap( + CompilerInstance &ScanInstance, llvm::SmallVector &StableDirs) { // Store a mapping of prebuilt module files and their properties like header // search options. This will prevent the implicit build to create duplicate // modules and will force reuse of the existing prebuilt module files @@ -590,7 +591,8 @@ computePrebuiltModulesASTMap(CompilerInstance &ScanInstance, } std::unique_ptr -takeAndUpdateDependencyOutputOptionsFrom(CompilerInstance &ScanInstance) { +dependencies::takeAndUpdateDependencyOutputOptionsFrom( + CompilerInstance &ScanInstance) { // This function moves the existing dependency output options from the // invocation to the collector. The options in the invocation are reset, // which ensures that the compiler won't create new dependency collectors, @@ -607,7 +609,8 @@ takeAndUpdateDependencyOutputOptionsFrom(CompilerInstance &ScanInstance) { return Opts; } -std::shared_ptr initializeScanInstanceDependencyCollector( +std::shared_ptr +dependencies::initializeScanInstanceDependencyCollector( CompilerInstance &ScanInstance, std::unique_ptr DepOutputOpts, StringRef WorkingDirectory, DependencyConsumer &Consumer, @@ -633,7 +636,6 @@ std::shared_ptr initializeScanInstanceDependencyCollector( return MDC; } -} // namespace clang::tooling::dependencies bool DependencyScanningAction::runInvocation( std::unique_ptr Invocation, From d3c2973da0466408aa9cfe1081cd08125a3491a1 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 18 Nov 2025 12:39:48 -0800 Subject: [PATCH 45/57] [lldb/aarch64] Add STR/LDR instructions for FP registers to Emulator (#168187) A function prologue can begin with a pre-index STR instruction for a floating-point register. To construct an unwind plan from assembly correctly, the instruction emulator must support such instructions. --- .../ARM64/EmulateInstructionARM64.cpp | 43 +++++-- .../ARM64/TestArm64InstEmulation.cpp | 108 ++++++++++++++++++ 2 files changed, 140 insertions(+), 11 deletions(-) diff --git a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp index a8901beda3970..f124424a37f58 100644 --- a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp +++ b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp @@ -346,6 +346,16 @@ EmulateInstructionARM64::GetOpcodeForInstruction(const uint32_t opcode) { &EmulateInstructionARM64::EmulateLDRSTRImm, "LDR , [{, #}]"}, + {0x3f200c00, 0x3c000400, No_VFP, + &EmulateInstructionARM64::EmulateLDRSTRImm, + "LDR|STR , [], #"}, + {0x3f200c00, 0x3c000c00, No_VFP, + &EmulateInstructionARM64::EmulateLDRSTRImm, + "LDR|STR , [, #]!"}, + {0x3f000000, 0x3d000000, No_VFP, + &EmulateInstructionARM64::EmulateLDRSTRImm, + "LDR|STR , [{, #}]"}, + {0xfc000000, 0x14000000, No_VFP, &EmulateInstructionARM64::EmulateB, "B