From 004b4ca5ff50a4db9d653f3983f8d4c4abb638cb Mon Sep 17 00:00:00 2001 From: aarongreig Date: Thu, 31 Jul 2025 16:33:44 +0100 Subject: [PATCH] =?UTF-8?q?Revert=20"[UR][SYCL]=20Introduce=20UR=20api=20t?= =?UTF-8?q?o=20set=20kernel=20args=20+=20launch=20in=20one=20call.=20?= =?UTF-8?q?=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 24f54eab96020184afcaf88f6205646b2143a58c. --- sycl/source/detail/scheduler/commands.cpp | 207 +++--------- .../Adapters/level_zero/batch_barrier.cpp | 4 +- .../Adapters/level_zero/batch_test.cpp | 65 ++-- .../batch_test_copy_with_compute.cpp | 66 ++-- sycl/test-e2e/Basic/fill_accessor_ur.cpp | 8 +- sycl/test-e2e/Basic/host-task-dependency.cpp | 4 +- .../Basic/kernel_bundle/kernel_bundle_api.cpp | 2 +- sycl/test-e2e/Basic/queue/release.cpp | 2 +- sycl/test-e2e/Basic/subdevice_pi.cpp | 14 +- .../enqueueLaunchCustom_check_event_deps.cpp | 2 +- .../discard_events_accessors.cpp | 8 +- .../discard_events_using_assert.cpp | 6 +- .../discard_events_usm_ooo_queue.cpp | 24 +- .../KernelAndProgram/disable-caching.cpp | 12 +- .../test-e2e/Scheduler/HostAccDestruction.cpp | 2 +- sycl/test-e2e/Scheduler/InOrderQueueDeps.cpp | 6 +- .../SpecConstants/2020/image_selection.cpp | 30 +- .../XPTI/basic_event_collection_linux.cpp | 10 +- sycl/tools/xpti_helpers/usm_analyzer.hpp | 19 -- sycl/unittests/Extensions/DeviceGlobal.cpp | 7 +- .../Extensions/EnqueueFunctionsEvents.cpp | 94 +++--- .../FreeFunctionCommandsEvents.cpp | 95 +++--- .../FreeFunctionEventsHelpers.hpp | 13 +- sycl/unittests/Extensions/USMMemcpy2D.cpp | 25 +- .../WorkGroupMemoryBackendArgument.cpp | 20 +- sycl/unittests/assert/assert.cpp | 16 +- sycl/unittests/buffer/KernelArgMemObj.cpp | 31 +- .../handler/SetArgForLocalAccessor.cpp | 17 +- sycl/unittests/scheduler/FailedCommands.cpp | 7 +- sycl/unittests/scheduler/InOrderQueueDeps.cpp | 15 +- .../scheduler/InOrderQueueHostTaskDeps.cpp | 15 +- sycl/unittests/scheduler/RequiredWGSize.cpp | 10 +- .../thread_safety/InteropKernelEnqueue.cpp | 32 +- .../unittests/xpti_trace/QueueApiFailures.cpp | 20 +- unified-runtime/include/ur_api.h | 184 ----------- unified-runtime/include/ur_api_funcs.def | 1 - unified-runtime/include/ur_ddi.h | 10 - unified-runtime/include/ur_print.h | 40 --- unified-runtime/include/ur_print.hpp | 308 +----------------- .../EXP-ENQUEUE-KERNEL-LAUNCH-WITH-ARGS.rst | 77 ----- .../exp-enqueue-kernel-launch-with-args.yml | 170 ---------- unified-runtime/scripts/core/registry.yml | 3 - unified-runtime/scripts/parse_specs.py | 4 +- unified-runtime/scripts/templates/helper.py | 7 +- .../scripts/templates/print.hpp.mako | 29 +- .../source/adapters/cuda/enqueue.cpp | 55 ---- .../adapters/cuda/ur_interface_loader.cpp | 1 - .../source/adapters/hip/enqueue.cpp | 55 ---- .../adapters/hip/ur_interface_loader.cpp | 1 - .../source/adapters/level_zero/kernel.cpp | 167 ---------- .../level_zero/ur_interface_loader.cpp | 2 - .../level_zero/ur_interface_loader.hpp | 9 - .../level_zero/v2/command_list_manager.cpp | 58 ---- .../level_zero/v2/command_list_manager.hpp | 10 - .../source/adapters/level_zero/v2/kernel.cpp | 29 +- .../adapters/level_zero/v2/queue_api.cpp | 16 - .../adapters/level_zero/v2/queue_api.hpp | 5 - .../v2/queue_immediate_in_order.hpp | 16 - .../v2/queue_immediate_out_of_order.hpp | 18 - .../source/adapters/mock/ur_mockddi.cpp | 104 ------ .../source/adapters/native_cpu/enqueue.cpp | 42 --- .../source/adapters/native_cpu/kernel.cpp | 27 +- .../source/adapters/native_cpu/kernel.hpp | 31 +- .../native_cpu/ur_interface_loader.cpp | 1 - .../source/adapters/offload/enqueue.cpp | 37 --- .../source/adapters/offload/kernel.cpp | 9 + .../source/adapters/offload/kernel.hpp | 11 +- .../adapters/offload/ur_interface_loader.cpp | 1 - .../source/adapters/opencl/enqueue.cpp | 103 ------ .../adapters/opencl/ur_interface_loader.cpp | 1 - .../source/common/stype_map_helpers.def | 3 - .../loader/layers/sanitizer/asan/asan_ddi.cpp | 134 -------- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 137 -------- .../loader/layers/sanitizer/tsan/tsan_ddi.cpp | 135 -------- .../loader/layers/tracing/ur_trcddi.cpp | 96 ------ .../loader/layers/validation/ur_valddi.cpp | 120 ------- unified-runtime/source/loader/loader.def.in | 5 - unified-runtime/source/loader/loader.map.in | 5 - unified-runtime/source/loader/ur_ldrddi.cpp | 61 ---- unified-runtime/source/loader/ur_libapi.cpp | 98 ------ unified-runtime/source/loader/ur_print.cpp | 32 -- unified-runtime/source/ur_api.cpp | 89 ----- .../test/conformance/CMakeLists.txt | 1 - .../CMakeLists.txt | 9 - .../urEnqueueKernelLaunchWithArgsExp.cpp | 303 ----------------- 85 files changed, 422 insertions(+), 3356 deletions(-) delete mode 100644 unified-runtime/scripts/core/EXP-ENQUEUE-KERNEL-LAUNCH-WITH-ARGS.rst delete mode 100644 unified-runtime/scripts/core/exp-enqueue-kernel-launch-with-args.yml delete mode 100644 unified-runtime/test/conformance/exp_enqueue_kernel_launch_with_args/CMakeLists.txt delete mode 100644 unified-runtime/test/conformance/exp_enqueue_kernel_launch_with_args/urEnqueueKernelLaunchWithArgsExp.cpp diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 70f12d0a59ef7..ea5dfbacf4553 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2310,14 +2310,14 @@ ur_mem_flags_t AccessModeToUr(access::mode AccessorMode) { } } -// Gets UR argument struct for a given kernel and device based on the argument -// type. Refactored from SetKernelParamsAndLaunch to allow it to be used in -// the graphs extension (LaunchWithArgs for graphs is planned future work). -static void GetUrArgsBasedOnType( +// Sets arguments for a given kernel and device based on the argument type. +// Refactored from SetKernelParamsAndLaunch to allow it to be used in the graphs +// extension. +static void SetArgBasedOnType( + adapter_impl &Adapter, ur_kernel_handle_t Kernel, device_image_impl *DeviceImageImpl, const std::function &getMemAllocationFunc, - context_impl &ContextImpl, detail::ArgDesc &Arg, size_t NextTrueIndex, - std::vector &UrArgs) { + context_impl &ContextImpl, detail::ArgDesc &Arg, size_t NextTrueIndex) { switch (Arg.MType) { case kernel_param_kind_t::kind_dynamic_work_group_memory: break; @@ -2337,61 +2337,52 @@ static void GetUrArgsBasedOnType( getMemAllocationFunc ? reinterpret_cast(getMemAllocationFunc(Req)) : nullptr; - ur_exp_kernel_arg_value_t Value = {}; - Value.memObjTuple = {MemArg, AccessModeToUr(Req->MAccessMode)}; - UrArgs.push_back({UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, nullptr, - UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ, - static_cast(NextTrueIndex), sizeof(MemArg), - Value}); + ur_kernel_arg_mem_obj_properties_t MemObjData{}; + MemObjData.stype = UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES; + MemObjData.memoryAccess = AccessModeToUr(Req->MAccessMode); + Adapter.call(Kernel, NextTrueIndex, + &MemObjData, MemArg); break; } case kernel_param_kind_t::kind_std_layout: { - ur_exp_kernel_arg_type_t Type; if (Arg.MPtr) { - Type = UR_EXP_KERNEL_ARG_TYPE_VALUE; + Adapter.call( + Kernel, NextTrueIndex, Arg.MSize, nullptr, Arg.MPtr); } else { - Type = UR_EXP_KERNEL_ARG_TYPE_LOCAL; + Adapter.call(Kernel, NextTrueIndex, + Arg.MSize, nullptr); } - ur_exp_kernel_arg_value_t Value = {}; - Value.value = {Arg.MPtr}; - UrArgs.push_back({UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, nullptr, - Type, static_cast(NextTrueIndex), - static_cast(Arg.MSize), Value}); break; } case kernel_param_kind_t::kind_sampler: { sampler *SamplerPtr = (sampler *)Arg.MPtr; - ur_exp_kernel_arg_value_t Value = {}; - Value.sampler = (ur_sampler_handle_t)detail::getSyclObjImpl(*SamplerPtr) - ->getOrCreateSampler(ContextImpl); - UrArgs.push_back({UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, nullptr, - UR_EXP_KERNEL_ARG_TYPE_SAMPLER, - static_cast(NextTrueIndex), - sizeof(ur_sampler_handle_t), Value}); + ur_sampler_handle_t Sampler = + (ur_sampler_handle_t)detail::getSyclObjImpl(*SamplerPtr) + ->getOrCreateSampler(ContextImpl); + Adapter.call(Kernel, NextTrueIndex, + nullptr, Sampler); break; } case kernel_param_kind_t::kind_pointer: { - ur_exp_kernel_arg_value_t Value = {}; - // We need to de-rerence to get the actual USM allocation - that's the + // We need to de-rerence this to get the actual USM allocation - that's the // pointer UR is expecting. - Value.pointer = *static_cast(Arg.MPtr); - UrArgs.push_back({UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, nullptr, - UR_EXP_KERNEL_ARG_TYPE_POINTER, - static_cast(NextTrueIndex), sizeof(Arg.MPtr), - Value}); + const void *Ptr = *static_cast(Arg.MPtr); + Adapter.call(Kernel, NextTrueIndex, + nullptr, Ptr); break; } case kernel_param_kind_t::kind_specialization_constants_buffer: { assert(DeviceImageImpl != nullptr); ur_mem_handle_t SpecConstsBuffer = DeviceImageImpl->get_spec_const_buffer_ref(); - ur_exp_kernel_arg_value_t Value = {}; - Value.memObjTuple = {SpecConstsBuffer, UR_MEM_FLAG_READ_ONLY}; - UrArgs.push_back({UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, nullptr, - UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ, - static_cast(NextTrueIndex), - sizeof(SpecConstsBuffer), Value}); + + ur_kernel_arg_mem_obj_properties_t MemObjProps{}; + MemObjProps.pNext = nullptr; + MemObjProps.stype = UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES; + MemObjProps.memoryAccess = UR_MEM_FLAG_READ_ONLY; + Adapter.call( + Kernel, NextTrueIndex, &MemObjProps, SpecConstsBuffer); break; } case kernel_param_kind_t::kind_invalid: @@ -2424,32 +2415,22 @@ static ur_result_t SetKernelParamsAndLaunch( DeviceImageImpl ? DeviceImageImpl->get_spec_const_blob_ref() : Empty); } - std::vector UrArgs; - UrArgs.reserve(Args.size()); - if (KernelFuncPtr && !KernelHasSpecialCaptures) { - auto setFunc = [&UrArgs, + auto setFunc = [&Adapter, Kernel, KernelFuncPtr](const detail::kernel_param_desc_t &ParamDesc, size_t NextTrueIndex) { const void *ArgPtr = (const char *)KernelFuncPtr + ParamDesc.offset; switch (ParamDesc.kind) { case kernel_param_kind_t::kind_std_layout: { int Size = ParamDesc.info; - ur_exp_kernel_arg_value_t Value = {}; - Value.value = ArgPtr; - UrArgs.push_back({UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, nullptr, - UR_EXP_KERNEL_ARG_TYPE_VALUE, - static_cast(NextTrueIndex), - static_cast(Size), Value}); + Adapter.call(Kernel, NextTrueIndex, + Size, nullptr, ArgPtr); break; } case kernel_param_kind_t::kind_pointer: { - ur_exp_kernel_arg_value_t Value = {}; - Value.pointer = *static_cast(ArgPtr); - UrArgs.push_back({UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, nullptr, - UR_EXP_KERNEL_ARG_TYPE_POINTER, - static_cast(NextTrueIndex), - sizeof(Value.pointer), Value}); + const void *Ptr = *static_cast(ArgPtr); + Adapter.call(Kernel, NextTrueIndex, + nullptr, Ptr); break; } default: @@ -2459,10 +2440,10 @@ static ur_result_t SetKernelParamsAndLaunch( applyFuncOnFilteredArgs(EliminatedArgMask, KernelNumArgs, KernelParamDescGetter, setFunc); } else { - auto setFunc = [&DeviceImageImpl, &getMemAllocationFunc, &Queue, - &UrArgs](detail::ArgDesc &Arg, size_t NextTrueIndex) { - GetUrArgsBasedOnType(DeviceImageImpl, getMemAllocationFunc, - Queue.getContextImpl(), Arg, NextTrueIndex, UrArgs); + auto setFunc = [&Adapter, Kernel, &DeviceImageImpl, &getMemAllocationFunc, + &Queue](detail::ArgDesc &Arg, size_t NextTrueIndex) { + SetArgBasedOnType(Adapter, Kernel, DeviceImageImpl, getMemAllocationFunc, + Queue.getContextImpl(), Arg, NextTrueIndex); }; applyFuncOnFilteredArgs(EliminatedArgMask, Args, setFunc); } @@ -2475,12 +2456,8 @@ static ur_result_t SetKernelParamsAndLaunch( // CUDA-style local memory setting. Note that we may have -1 as a position, // this indicates the buffer is actually unused and was elided. if (ImplicitLocalArg.has_value() && ImplicitLocalArg.value() != -1) { - UrArgs.push_back({UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, - nullptr, - UR_EXP_KERNEL_ARG_TYPE_LOCAL, - static_cast(ImplicitLocalArg.value()), - WorkGroupMemorySize, - {nullptr}}); + Adapter.call( + Kernel, ImplicitLocalArg.value(), WorkGroupMemorySize, nullptr); } adjustNDRangePerKernel(NDRDesc, Kernel, Queue.getDeviceImpl()); @@ -2538,15 +2515,13 @@ static ur_result_t SetKernelParamsAndLaunch( {{WorkGroupMemorySize}}}); } ur_event_handle_t UREvent = nullptr; - ur_result_t Error = - Adapter.call_nocheck( - Queue.getHandleRef(), Kernel, NDRDesc.Dims, - HasOffset ? &NDRDesc.GlobalOffset[0] : nullptr, - &NDRDesc.GlobalSize[0], LocalSize, UrArgs.size(), UrArgs.data(), - property_list.size(), - property_list.empty() ? nullptr : property_list.data(), - RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], - OutEventImpl ? &UREvent : nullptr); + ur_result_t Error = Adapter.call_nocheck( + Queue.getHandleRef(), Kernel, NDRDesc.Dims, + HasOffset ? &NDRDesc.GlobalOffset[0] : nullptr, &NDRDesc.GlobalSize[0], + LocalSize, property_list.size(), + property_list.empty() ? nullptr : property_list.data(), RawEvents.size(), + RawEvents.empty() ? nullptr : &RawEvents[0], + OutEventImpl ? &UREvent : nullptr); if (Error == UR_RESULT_SUCCESS && OutEventImpl) { OutEventImpl->setHandle(UREvent); } @@ -2554,88 +2529,6 @@ static ur_result_t SetKernelParamsAndLaunch( return Error; } -// Sets arguments for a given kernel and device based on the argument type. -// This is a legacy path which the graphs extension still uses. -static void SetArgBasedOnType( - adapter_impl &Adapter, ur_kernel_handle_t Kernel, - device_image_impl *DeviceImageImpl, - const std::function &getMemAllocationFunc, - context_impl &ContextImpl, detail::ArgDesc &Arg, size_t NextTrueIndex) { - switch (Arg.MType) { - case kernel_param_kind_t::kind_dynamic_work_group_memory: - break; - case kernel_param_kind_t::kind_work_group_memory: - break; - case kernel_param_kind_t::kind_stream: - break; - case kernel_param_kind_t::kind_dynamic_accessor: - case kernel_param_kind_t::kind_accessor: { - Requirement *Req = (Requirement *)(Arg.MPtr); - - // getMemAllocationFunc is nullptr when there are no requirements. However, - // we may pass default constructed accessors to a command, which don't add - // requirements. In such case, getMemAllocationFunc is nullptr, but it's a - // valid case, so we need to properly handle it. - ur_mem_handle_t MemArg = - getMemAllocationFunc - ? reinterpret_cast(getMemAllocationFunc(Req)) - : nullptr; - ur_kernel_arg_mem_obj_properties_t MemObjData{}; - MemObjData.stype = UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES; - MemObjData.memoryAccess = AccessModeToUr(Req->MAccessMode); - Adapter.call(Kernel, NextTrueIndex, - &MemObjData, MemArg); - break; - } - case kernel_param_kind_t::kind_std_layout: { - if (Arg.MPtr) { - Adapter.call( - Kernel, NextTrueIndex, Arg.MSize, nullptr, Arg.MPtr); - } else { - Adapter.call(Kernel, NextTrueIndex, - Arg.MSize, nullptr); - } - - break; - } - case kernel_param_kind_t::kind_sampler: { - sampler *SamplerPtr = (sampler *)Arg.MPtr; - ur_sampler_handle_t Sampler = - (ur_sampler_handle_t)detail::getSyclObjImpl(*SamplerPtr) - ->getOrCreateSampler(ContextImpl); - Adapter.call(Kernel, NextTrueIndex, - nullptr, Sampler); - break; - } - case kernel_param_kind_t::kind_pointer: { - // We need to de-rerence this to get the actual USM allocation - that's the - // pointer UR is expecting. - const void *Ptr = *static_cast(Arg.MPtr); - Adapter.call(Kernel, NextTrueIndex, - nullptr, Ptr); - break; - } - case kernel_param_kind_t::kind_specialization_constants_buffer: { - assert(DeviceImageImpl != nullptr); - ur_mem_handle_t SpecConstsBuffer = - DeviceImageImpl->get_spec_const_buffer_ref(); - - ur_kernel_arg_mem_obj_properties_t MemObjProps{}; - MemObjProps.pNext = nullptr; - MemObjProps.stype = UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES; - MemObjProps.memoryAccess = UR_MEM_FLAG_READ_ONLY; - Adapter.call( - Kernel, NextTrueIndex, &MemObjProps, SpecConstsBuffer); - break; - } - case kernel_param_kind_t::kind_invalid: - throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), - "Invalid kernel param kind " + - codeToString(UR_RESULT_ERROR_INVALID_VALUE)); - break; - } -} - static std::tuple getCGKernelInfo(const CGExecKernel &CommandGroup, context_impl &ContextImpl, diff --git a/sycl/test-e2e/Adapters/level_zero/batch_barrier.cpp b/sycl/test-e2e/Adapters/level_zero/batch_barrier.cpp index d62437d7b9068..88a51f579dc0b 100644 --- a/sycl/test-e2e/Adapters/level_zero/batch_barrier.cpp +++ b/sycl/test-e2e/Adapters/level_zero/batch_barrier.cpp @@ -24,7 +24,7 @@ int main(int argc, char *argv[]) { queue q; submit_kernel(q); // starts a batch - // CHECK: ---> urEnqueueKernelLaunchWithArgsExp + // CHECK: ---> urEnqueueKernelLaunch // CHECK-NOT: zeCommandQueueExecuteCommandLists // Initialize Level Zero driver is required if this test is linked @@ -41,7 +41,7 @@ int main(int argc, char *argv[]) { // CHECK-NOT: zeCommandQueueExecuteCommandLists submit_kernel(q); - // CHECK: ---> urEnqueueKernelLaunchWithArgsExp + // CHECK: ---> urEnqueueKernelLaunch // CHECK-NOT: zeCommandQueueExecuteCommandLists // interop should close the batch diff --git a/sycl/test-e2e/Adapters/level_zero/batch_test.cpp b/sycl/test-e2e/Adapters/level_zero/batch_test.cpp index 1db60a36e1265..8f6e4e0f6a563 100644 --- a/sycl/test-e2e/Adapters/level_zero/batch_test.cpp +++ b/sycl/test-e2e/Adapters/level_zero/batch_test.cpp @@ -55,54 +55,55 @@ // variable SYCL_PI_LEVEL_ZEOR+BATCH_SIZE=N. // This test enqueues 8 kernels and then does a wait. And it does this 3 times. // Expected output is that for batching =1 you will see zeCommandListClose, -// and zeCommandQueueExecuteCommandLists after every -// urEnqueueKernelLaunchWithArgsExp. For batching=3 you will see that after 3rd -// and 6th enqueues, and then after urQueueFinish. For 5, after 5th urEnqueue, -// and then after urQueueFinish. For 4 you will see these after 4th and 8th -// Enqueue, and for 8, only after the 8th enqueue. And lastly for 9, you will -// see the Close and Execute calls only after the urQueueFinish. Since the test -// does this 3 times, this pattern will repeat 2 more times, and then the test -// will print Test Passed 8 times, once for each kernel validation check. +// and zeCommandQueueExecuteCommandLists after every urEnqueueKernelLaunch. +// For batching=3 you will see that after 3rd and 6th enqueues, and then after +// urQueueFinish. For 5, after 5th urEnqueue, and then after urQueueFinish. For +// 4 you will see these after 4th and 8th Enqueue, and for 8, only after the +// 8th enqueue. And lastly for 9, you will see the Close and Execute calls +// only after the urQueueFinish. +// Since the test does this 3 times, this pattern will repeat 2 more times, +// and then the test will print Test Passed 8 times, once for each kernel +// validation check. // Pattern starts first set of kernel executions. -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB3: zeCommandListClose( // CKB3: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB4: zeCommandListClose( // CKB4: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB5: zeCommandListClose( // CKB5: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB3: zeCommandListClose( // CKB3: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB7: zeCommandListClose( // CKB7: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( @@ -120,45 +121,45 @@ // CKB9: zeCommandListClose( // CKB9: zeCommandQueueExecuteCommandLists( // Pattern starts 2nd set of kernel executions -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB3: zeCommandListClose( // CKB3: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB4: zeCommandListClose( // CKB4: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB5: zeCommandListClose( // CKB5: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB3: zeCommandListClose( // CKB3: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB7: zeCommandListClose( // CKB7: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( @@ -176,45 +177,45 @@ // CKB9: zeCommandListClose( // CKB9: zeCommandQueueExecuteCommandLists( // Pattern starts 3rd set of kernel executions -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB3: zeCommandListClose( // CKB3: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB4: zeCommandListClose( // CKB4: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB5: zeCommandListClose( // CKB5: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB3: zeCommandListClose( // CKB3: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB7: zeCommandListClose( // CKB7: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( diff --git a/sycl/test-e2e/Adapters/level_zero/batch_test_copy_with_compute.cpp b/sycl/test-e2e/Adapters/level_zero/batch_test_copy_with_compute.cpp index 85634616c67a9..8470c7abd2bfa 100644 --- a/sycl/test-e2e/Adapters/level_zero/batch_test_copy_with_compute.cpp +++ b/sycl/test-e2e/Adapters/level_zero/batch_test_copy_with_compute.cpp @@ -31,53 +31,55 @@ // variable SYCL_PI_LEVEL_ZERO_{COPY_}BATCH_SIZE=N. // This test enqueues 8 kernels and then does a wait. And it does this 3 times. // Expected output is that for batching =1 you will see zeCommandListClose, -// and zeCommandQueueExecuteCommandLists after every -// urEnqueueKernelLaunchWithArgsExp. For batching=3 you will see that after 3rd -// and 6th enqueues, and then after urEventWait. For 5, after 5th urEnqueue, and -// then after urEventWait. For 4 you will see these after 4th and 8th Enqueue, -// and for 8, only after the 8th enqueue. And lastly for 9, you will see the -// Close and Execute calls only after the urEventWait. Since the test does this -// 3 times, this pattern will repeat 2 more times, and then the test will print -// Test Passed 8 times, once for each kernel validation check. Pattern starts -// first set of kernel executions. CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// and zeCommandQueueExecuteCommandLists after every urEnqueueKernelLaunch. +// For batching=3 you will see that after 3rd and 6th enqueues, and then after +// urEventWait. For 5, after 5th urEnqueue, and then after urEventWait. For +// 4 you will see these after 4th and 8th Enqueue, and for 8, only after the +// 8th enqueue. And lastly for 9, you will see the Close and Execute calls +// only after the urEventWait. +// Since the test does this 3 times, this pattern will repeat 2 more times, +// and then the test will print Test Passed 8 times, once for each kernel +// validation check. +// Pattern starts first set of kernel executions. +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB3: zeCommandListClose( // CKB3: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB4: zeCommandListClose( // CKB4: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB5: zeCommandListClose( // CKB5: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB3: zeCommandListClose( // CKB3: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB7: zeCommandListClose( // CKB7: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( @@ -95,45 +97,45 @@ // CKB9: zeCommandListClose( // CKB9: zeCommandQueueExecuteCommandLists( // Pattern starts 2nd set of kernel executions -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB3: zeCommandListClose( // CKB3: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB4: zeCommandListClose( // CKB4: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB5: zeCommandListClose( // CKB5: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB3: zeCommandListClose( // CKB3: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB7: zeCommandListClose( // CKB7: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( @@ -151,45 +153,45 @@ // CKB9: zeCommandListClose( // CKB9: zeCommandQueueExecuteCommandLists( // Pattern starts 3rd set of kernel executions -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB3: zeCommandListClose( // CKB3: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB4: zeCommandListClose( // CKB4: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB5: zeCommandListClose( // CKB5: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB3: zeCommandListClose( // CKB3: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( // CKB7: zeCommandListClose( // CKB7: zeCommandQueueExecuteCommandLists( -// CKALL: ---> urEnqueueKernelLaunchWithArgsExp +// CKALL: ---> urEnqueueKernelLaunch // CKALL: zeCommandListAppendLaunchKernel( // CKB1: zeCommandListClose( // CKB1: zeCommandQueueExecuteCommandLists( diff --git a/sycl/test-e2e/Basic/fill_accessor_ur.cpp b/sycl/test-e2e/Basic/fill_accessor_ur.cpp index e632948eee65c..ce8ae917eaab6 100644 --- a/sycl/test-e2e/Basic/fill_accessor_ur.cpp +++ b/sycl/test-e2e/Basic/fill_accessor_ur.cpp @@ -63,7 +63,7 @@ void testFill_Buffer2D() { auto acc2D = buffer_2D.get_access(cgh, {8, 12}, {2, 2}); // "ranged accessor" will have to be handled by custom kernel: - // urEnqueueKernelLaunchWithArgsExp + // urEnqueueKernelLaunch cgh.fill(acc2D, float{4}); }); q.wait(); @@ -94,7 +94,7 @@ void testFill_Buffer3D() { auto acc3D = buffer_3D.get_access( cgh, {4, 8, 12}, {3, 3, 3}); // "ranged accessor" will have to be handled by custom kernel: - // urEnqueueKernelLaunchWithArgsExp + // urEnqueueKernelLaunch cgh.fill(acc3D, float{6}); }); q.wait(); @@ -139,12 +139,12 @@ int main() { // CHECK: start testFill_Buffer2D // CHECK: <--- urEnqueueMemBufferFill // CHECK: start testFill_Buffer2D -- OFFSET -// CHECK: <--- urEnqueueKernelLaunchWithArgsExp +// CHECK: <--- urEnqueueKernelLaunch // CHECK: start testFill_Buffer3D // CHECK: <--- urEnqueueMemBufferFill // CHECK: start testFill_Buffer3D -- OFFSET -// CHECK: <--- urEnqueueKernelLaunchWithArgsExp +// CHECK: <--- urEnqueueKernelLaunch // CHECK: start testFill_ZeroDim // CHECK: <--- urEnqueueMemBufferFill diff --git a/sycl/test-e2e/Basic/host-task-dependency.cpp b/sycl/test-e2e/Basic/host-task-dependency.cpp index 2c29034de4af8..3b015051377c9 100644 --- a/sycl/test-e2e/Basic/host-task-dependency.cpp +++ b/sycl/test-e2e/Basic/host-task-dependency.cpp @@ -179,13 +179,13 @@ int main() { // launch of Gen kernel // CHECK: <--- urKernelCreate // CHECK: NameGen -// CHECK: <--- urEnqueueKernelLaunchWithArgsExp +// CHECK: <--- urEnqueueKernelLaunch // prepare for host task // CHECK: <--- urEnqueueMemBuffer{{Map|Read}} // launch of Copier kernel // CHECK: <--- urKernelCreate // CHECK: Copier -// CHECK: <--- urEnqueueKernelLaunchWithArgsExp +// CHECK: <--- urEnqueueKernelLaunch // CHECK:Third buffer [ 0] = 0 // CHECK:Third buffer [ 1] = 1 diff --git a/sycl/test-e2e/Basic/kernel_bundle/kernel_bundle_api.cpp b/sycl/test-e2e/Basic/kernel_bundle/kernel_bundle_api.cpp index 91a5bd1363268..8eb13cf226c2c 100644 --- a/sycl/test-e2e/Basic/kernel_bundle/kernel_bundle_api.cpp +++ b/sycl/test-e2e/Basic/kernel_bundle/kernel_bundle_api.cpp @@ -227,7 +227,7 @@ int main() { // CHECK-SAME: .hKernel = [[KERNEL_HANDLE]] // CHECK-SAME:-> UR_RESULT_SUCCESS; // - // CHECK:<--- urEnqueueKernelLaunchWithArgsExp( + // CHECK:<--- urEnqueueKernelLaunch( // CHECK-SAME: .hKernel = [[KERNEL_HANDLE]] // // CHECK:<--- urKernelRelease( diff --git a/sycl/test-e2e/Basic/queue/release.cpp b/sycl/test-e2e/Basic/queue/release.cpp index 5f4dc58c92575..13ee5d6ee22bf 100644 --- a/sycl/test-e2e/Basic/queue/release.cpp +++ b/sycl/test-e2e/Basic/queue/release.cpp @@ -11,7 +11,7 @@ int main() { return 0; } -// CHECK: <--- urEnqueueKernelLaunchWithArgsExp( +// CHECK: <--- urEnqueueKernelLaunch( // FIXME the order of these 2 varies between adapters due to a Level Zero // specific queue workaround. // CHECK-DAG: <--- urEventRelease( diff --git a/sycl/test-e2e/Basic/subdevice_pi.cpp b/sycl/test-e2e/Basic/subdevice_pi.cpp index fc9fdbffabec3..4a0e4a06e016d 100644 --- a/sycl/test-e2e/Basic/subdevice_pi.cpp +++ b/sycl/test-e2e/Basic/subdevice_pi.cpp @@ -64,7 +64,7 @@ static bool check_separate(device dev, buffer buf, // CHECK-SEPARATE: <--- urContextCreate // CHECK-SEPARATE: <--- urQueueCreate // CHECK-SEPARATE: <--- urMemBufferCreate - // CHECK-SEPARATE: <--- urEnqueueKernelLaunchWithArgsExp + // CHECK-SEPARATE: <--- urEnqueueKernelLaunch // CHECK-SEPARATE: <--- urQueueFinish log_pi("Test sub device 1"); @@ -81,7 +81,7 @@ static bool check_separate(device dev, buffer buf, // CHECK-SEPARATE: <--- urEnqueueMemBuffer{{Map|Read}} // CHECK-SEPARATE: <--- urEnqueueMemBufferWrite // - // CHECK-SEPARATE: <--- urEnqueueKernelLaunchWithArgsExp + // CHECK-SEPARATE: <--- urEnqueueKernelLaunch // CHECK-SEPARATE: <--- urQueueFinish return true; @@ -116,7 +116,7 @@ static bool check_shared_context(device dev, buffer buf, // Make sure that a single buffer is created (and shared between subdevices): // see --implicit-check-not above. // - // CHECK-SHARED: <--- urEnqueueKernelLaunchWithArgsExp + // CHECK-SHARED: <--- urEnqueueKernelLaunch // CHECK-SHARED: <--- urQueueFinish log_pi("Test sub device 1"); @@ -126,7 +126,7 @@ static bool check_shared_context(device dev, buffer buf, } // CHECK-SHARED: Test sub device 1 // CHECK-SHARED: <--- urQueueCreate - // CHECK-SHARED: <--- urEnqueueKernelLaunchWithArgsExp + // CHECK-SHARED: <--- urEnqueueKernelLaunch // CHECK-SHARED: <--- urQueueFinish // CHECK-SHARED: <--- urEnqueueMemBufferRead @@ -165,7 +165,7 @@ static bool check_fused_context(device dev, buffer buf, // Make sure that a single buffer is created (and shared between subdevices // *and* the root device): see --implicit-check-not above. // - // CHECK-FUSED: <--- urEnqueueKernelLaunchWithArgsExp + // CHECK-FUSED: <--- urEnqueueKernelLaunch // CHECK-FUSED: <--- urQueueFinish log_pi("Test sub device 0"); @@ -175,7 +175,7 @@ static bool check_fused_context(device dev, buffer buf, } // CHECK-FUSED: Test sub device 0 // CHECK-FUSED: <--- urQueueCreate - // CHECK-FUSED: <--- urEnqueueKernelLaunchWithArgsExp + // CHECK-FUSED: <--- urEnqueueKernelLaunch // CHECK-FUSED: <--- urQueueFinish log_pi("Test sub device 1"); @@ -185,7 +185,7 @@ static bool check_fused_context(device dev, buffer buf, } // CHECK-FUSED: Test sub device 1 // CHECK-FUSED: <--- urQueueCreate - // CHECK-FUSED: <--- urEnqueueKernelLaunchWithArgsExp + // CHECK-FUSED: <--- urEnqueueKernelLaunch // CHECK-FUSED: <--- urQueueFinish // CHECK-FUSED: <--- urEnqueueMemBufferRead diff --git a/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp b/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp index 50a488d861874..7928e5da66bac 100644 --- a/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp +++ b/sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp @@ -1,5 +1,5 @@ // Checks whether or not event Dependencies are honored by -// urEnqueueKernelLaunchWithArgsExp with cluster dimensions +// urEnqueueKernelLaunch with cluster dimensions // REQUIRES: target-nvidia, aspect-ext_oneapi_cuda_cluster_group // RUN: %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_90 -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/DeprecatedFeatures/DiscardEvents/discard_events_accessors.cpp b/sycl/test-e2e/DeprecatedFeatures/DiscardEvents/discard_events_accessors.cpp index fffd5d22625f7..5cdbd0f73e683 100644 --- a/sycl/test-e2e/DeprecatedFeatures/DiscardEvents/discard_events_accessors.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/DiscardEvents/discard_events_accessors.cpp @@ -3,12 +3,12 @@ // RUN: env SYCL_UR_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt // // The test checks that the last parameter is not `nullptr` for -// urEnqueueKernelLaunchWithArgsExp for a kernel using buffer accessor. +// urEnqueueKernelLaunch for a kernel using buffer accessor. // -// CHECK: <--- urEnqueueKernelLaunchWithArgsExp +// CHECK: <--- urEnqueueKernelLaunch // -// CHECK-NOT: <--- urEnqueueKernelLaunchWithArgsExp({{.*}}.phEvent = nullptr -// CHECK: <--- urEnqueueKernelLaunchWithArgsExp +// CHECK-NOT: <--- urEnqueueKernelLaunch({{.*}}.phEvent = nullptr +// CHECK: <--- urEnqueueKernelLaunch // CHECK: -> UR_RESULT_SUCCESS // // CHECK: The test passed. diff --git a/sycl/test-e2e/DeprecatedFeatures/DiscardEvents/discard_events_using_assert.cpp b/sycl/test-e2e/DeprecatedFeatures/DiscardEvents/discard_events_using_assert.cpp index df9bb56b6d280..5d965b1c27ff1 100644 --- a/sycl/test-e2e/DeprecatedFeatures/DiscardEvents/discard_events_using_assert.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/DiscardEvents/discard_events_using_assert.cpp @@ -6,10 +6,10 @@ // RUN: env SYCL_UR_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt // // The test checks that the last parameter is not `nullptr` for -// urEnqueueKernelLaunchWithArgsExp. +// urEnqueueKernelLaunch. // -// CHECK-NOT: <--- urEnqueueKernelLaunchWithArgsExp({{.*}}.phEvent = nullptr -// CHECK: <--- urEnqueueKernelLaunchWithArgsExp +// CHECK-NOT: <--- urEnqueueKernelLaunch({{.*}}.phEvent = nullptr +// CHECK: <--- urEnqueueKernelLaunch // CHECK: -> UR_RESULT_SUCCESS // // CHECK: The test passed. diff --git a/sycl/test-e2e/DeprecatedFeatures/DiscardEvents/discard_events_usm_ooo_queue.cpp b/sycl/test-e2e/DeprecatedFeatures/DiscardEvents/discard_events_usm_ooo_queue.cpp index 79ba3d79f5800..3aa91ed17cd32 100644 --- a/sycl/test-e2e/DeprecatedFeatures/DiscardEvents/discard_events_usm_ooo_queue.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/DiscardEvents/discard_events_usm_ooo_queue.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // -// On level_zero Q.fill uses urEnqueueKernelLaunchWithArgsExp and not -// urEnqueueUSMFill due to https://github.com/intel/llvm/issues/13787 +// On level_zero Q.fill uses urEnqueueKernelLaunch and not urEnqueueUSMFill +// due to https://github.com/intel/llvm/issues/13787 // // RUN: env SYCL_UR_TRACE=2 %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt --check-prefixes=CHECK%if level_zero %{,CHECK-L0%} %else %{,CHECK-OTHER%} // @@ -25,7 +25,7 @@ // CHECK: -> UR_RESULT_SUCCESS // // Level-zero backend doesn't use urEnqueueUSMFill -// CHECK-L0: <--- urEnqueueKernelLaunchWithArgsExp +// CHECK-L0: <--- urEnqueueKernelLaunch // CHECK-L0: .phEvent = {{[0-9a-f]+}} // CHECK-OTHER: <--- urEnqueueUSMFill({{.*}} .phEvent = {{[0-9a-f]+}} // CHECK: -> UR_RESULT_SUCCESS @@ -47,12 +47,12 @@ // CHECK: <--- urEnqueueEventsWaitWithBarrier // CHECK: -> UR_RESULT_SUCCESS // -// CHECK-NOT: <--- urEnqueueKernelLaunchWithArgsExp({{.*}} .phEvent = nullptr -// CHECK: <--- urEnqueueKernelLaunchWithArgsExp +// CHECK-NOT: <--- urEnqueueKernelLaunch({{.*}} .phEvent = nullptr +// CHECK: <--- urEnqueueKernelLaunch // CHECK: -> UR_RESULT_SUCCESS // -// CHECK-NOT: <--- urEnqueueKernelLaunchWithArgsExp({{.*}} .phEvent = nullptr -// CHECK: <--- urEnqueueKernelLaunchWithArgsExp +// CHECK-NOT: <--- urEnqueueKernelLaunch({{.*}} .phEvent = nullptr +// CHECK: <--- urEnqueueKernelLaunch // CHECK: -> UR_RESULT_SUCCESS // // RegularQueue @@ -74,7 +74,7 @@ // CHECK: -> UR_RESULT_SUCCESS // // Level-zero backend doesn't use urEnqueueUSMFill -// CHECK-L0: <--- urEnqueueKernelLaunchWithArgsExp +// CHECK-L0: <--- urEnqueueKernelLaunch // CHECK-L0: .phEvent = {{[0-9a-f]+}} // CHECK-OTHER: <--- urEnqueueUSMFill({{.*}} .phEvent = {{[0-9a-f]+}} // CHECK: -> UR_RESULT_SUCCESS @@ -96,12 +96,12 @@ // CHECK: <--- urEnqueueEventsWaitWithBarrier // CHECK: -> UR_RESULT_SUCCESS // -// CHECK-NOT: <--- urEnqueueKernelLaunchWithArgsExp({{.*}} .phEvent = nullptr -// CHECK: <--- urEnqueueKernelLaunchWithArgsExp +// CHECK-NOT: <--- urEnqueueKernelLaunch({{.*}} .phEvent = nullptr +// CHECK: <--- urEnqueueKernelLaunch // CHECK: -> UR_RESULT_SUCCESS // -// CHECK-NOT: <--- urEnqueueKernelLaunchWithArgsExp({{.*}} .phEvent = nullptr -// CHECK: <--- urEnqueueKernelLaunchWithArgsExp +// CHECK-NOT: <--- urEnqueueKernelLaunch({{.*}} .phEvent = nullptr +// CHECK: <--- urEnqueueKernelLaunch // CHECK: -> UR_RESULT_SUCCESS // // RegularQueue diff --git a/sycl/test-e2e/KernelAndProgram/disable-caching.cpp b/sycl/test-e2e/KernelAndProgram/disable-caching.cpp index 09a73c3036831..814a87cd5cac8 100644 --- a/sycl/test-e2e/KernelAndProgram/disable-caching.cpp +++ b/sycl/test-e2e/KernelAndProgram/disable-caching.cpp @@ -23,7 +23,7 @@ int main() { // CHECK-NOT: <--- urProgramRetain // CHECK: <--- urKernelCreate // CHECK-NOT: <--- urKernelRetain - // CHECK: <--- urEnqueueKernelLaunchWithArgsExp + // CHECK: <--- urEnqueueKernelLaunch // CHECK: <--- urProgramRelease // CHECK: <--- urKernelRelease // CHECK: <--- urEventWait @@ -34,7 +34,7 @@ int main() { // CHECK-CACHE: <--- urKernelCreate // CHECK-CACHE: <--- urKernelRetain // CHECK-CACHE-NOT: <--- urKernelCreate - // CHECK-CACHE: <--- urEnqueueKernelLaunchWithArgsExp + // CHECK-CACHE: <--- urEnqueueKernelLaunch // CHECK-CACHE: <--- urEventWait q.single_task([] {}).wait(); @@ -42,7 +42,7 @@ int main() { // CHECK-NOT: <--- urProgramRetain // CHECK: <--- urKernelCreate // CHECK-NOT: <--- urKernelRetain - // CHECK: <--- urEnqueueKernelLaunchWithArgsExp + // CHECK: <--- urEnqueueKernelLaunch // CHECK: <--- urKernelRelease // CHECK: <--- urProgramRelease // CHECK: <--- urEventWait @@ -53,7 +53,7 @@ int main() { // CHECK-CACHE: <--- urKernelCreate // CHECK-CACHE: <--- urKernelRetain // CHECK-CACHE-NOT: <--- urKernelCreate - // CHECK-CACHE: <--- urEnqueueKernelLaunchWithArgsExp + // CHECK-CACHE: <--- urEnqueueKernelLaunch // CHECK-CACHE: <--- urKernelRelease // CHECK-CACHE: <--- urProgramRelease // CHECK-CACHE: <--- urEventWait @@ -62,7 +62,7 @@ int main() { // CHECK-NOT: <--- urProgramRetain // CHECK: <--- urKernelCreate // CHECK-NOT: <--- urKernelRetain - // CHECK: <--- urEnqueueKernelLaunchWithArgsExp + // CHECK: <--- urEnqueueKernelLaunch // CHECK: <--- urKernelRelease // CHECK: <--- urProgramRelease // CHECK: <--- urEventWait @@ -73,7 +73,7 @@ int main() { // CHECK-CACHE: <--- urKernelCreate // CHECK-CACHE: <--- urKernelRetain // CHECK-CACHE-NOT: <--- urKernelCreate - // CHECK-CACHE: <--- urEnqueueKernelLaunchWithArgsExp + // CHECK-CACHE: <--- urEnqueueKernelLaunch // CHECK-CACHE: <--- urKernelRelease // CHECK-CACHE: <--- urProgramRelease // CHECK-CACHE: <--- urEventWait diff --git a/sycl/test-e2e/Scheduler/HostAccDestruction.cpp b/sycl/test-e2e/Scheduler/HostAccDestruction.cpp index 30e98974b5f1d..fd9465935dfe1 100644 --- a/sycl/test-e2e/Scheduler/HostAccDestruction.cpp +++ b/sycl/test-e2e/Scheduler/HostAccDestruction.cpp @@ -32,5 +32,5 @@ int main() { } // CHECK:host acc destructor call -// CHECK: <--- urEnqueueKernelLaunchWithArgsExp +// CHECK: <--- urEnqueueKernelLaunch // CHECK:end of scope diff --git a/sycl/test-e2e/Scheduler/InOrderQueueDeps.cpp b/sycl/test-e2e/Scheduler/InOrderQueueDeps.cpp index df8062d165ccb..0c0e1750805f2 100644 --- a/sycl/test-e2e/Scheduler/InOrderQueueDeps.cpp +++ b/sycl/test-e2e/Scheduler/InOrderQueueDeps.cpp @@ -36,15 +36,15 @@ int main() { // Sequential submissions to the same in-order queue should not result in any // event dependencies. - // CHECK: <--- urEnqueueKernelLaunchWithArgsExp + // CHECK: <--- urEnqueueKernelLaunch // CHECK-SAME: .numEventsInWaitList = 0 submitKernel(InOrderQueueA, Buf); - // CHECK: <--- urEnqueueKernelLaunchWithArgsExp + // CHECK: <--- urEnqueueKernelLaunch // CHECK-SAME: .numEventsInWaitList = 0 submitKernel(InOrderQueueA, Buf); // Submisssion to a different in-order queue should explicitly depend on the // previous command group. - // CHECK: <--- urEnqueueKernelLaunchWithArgsExp + // CHECK: <--- urEnqueueKernelLaunch // CHECK-SAME: .numEventsInWaitList = 1 submitKernel(InOrderQueueB, Buf); diff --git a/sycl/test-e2e/SpecConstants/2020/image_selection.cpp b/sycl/test-e2e/SpecConstants/2020/image_selection.cpp index 99e170d91cac6..7cb68b3d63fb7 100644 --- a/sycl/test-e2e/SpecConstants/2020/image_selection.cpp +++ b/sycl/test-e2e/SpecConstants/2020/image_selection.cpp @@ -68,40 +68,40 @@ int main() { // submission depending on whether spec const value was set or not. a. In the // case when we select image where specialization constants are replaced with // default value - specialization constant buffer is not created and we set - // nullptr in urEnqueueKernelLaunchWithArgsExp. In the case when we select - // regular image - specialization constant buffer is created and we set a - // real pointer in urEnqueueKernelLaunchWithArgsExp. + // nullptr in urKernelSetArgMemObj (4th parameter) b. In the case when we + // select regular image - specialization constant buffer is created and we set + // a real pointer in urKernelSetArgMemObj. // CHECK-DEFAULT: Submission 0 - // CHECK-DEFAULT: <--- urEnqueueKernelLaunchWithArgsExp{{.*}}KERNEL_ARG_TYPE_MEM_OBJ,{{.*}}.hMem = {{(0x)?[0-9,a-f,A-F]+}}{{.*}} -> UR_RESULT_SUCCESS; + // CHECK-DEFAULT: <--- urKernelSetArgMemObj({{.*}}, .hArgValue = {{(0x)?[0-9,a-f,A-F]+}}) -> UR_RESULT_SUCCESS; // CHECK-DEFAULT: Default value of specialization constant was used. // CHECK-DEFAULT: Submission 1 - // CHECK-DEFAULT: <--- urEnqueueKernelLaunchWithArgsExp{{.*}}KERNEL_ARG_TYPE_MEM_OBJ,{{.*}}.hMem = {{(0x)?[0-9,a-f,A-F]+}}{{.*}} -> UR_RESULT_SUCCESS; + // CHECK-DEFAULT: <--- urKernelSetArgMemObj({{.*}}, .hArgValue = {{(0x)?[0-9,a-f,A-F]+}}) -> UR_RESULT_SUCCESS; // CHECK-DEFAULT: New specialization constant value was set. // CHECK-DEFAULT: Submission 2 - // CHECK-DEFAULT: <--- urEnqueueKernelLaunchWithArgsExp{{.*}}KERNEL_ARG_TYPE_MEM_OBJ,{{.*}}.hMem = {{(0x)?[0-9,a-f,A-F]+}}{{.*}} -> UR_RESULT_SUCCESS; + // CHECK-DEFAULT: <--- urKernelSetArgMemObj({{.*}}, .hArgValue = {{(0x)?[0-9,a-f,A-F]+}}) -> UR_RESULT_SUCCESS; // CHECK-DEFAULT: Default value of specialization constant was used. // CHECK-DEFAULT: Submission 3 - // CHECK-DEFAULT: <--- urEnqueueKernelLaunchWithArgsExp{{.*}}KERNEL_ARG_TYPE_MEM_OBJ,{{.*}}.hMem = {{(0x)?[0-9,a-f,A-F]+}}{{.*}} -> UR_RESULT_SUCCESS; + // CHECK-DEFAULT: <--- urKernelSetArgMemObj({{.*}}, .hArgValue = {{(0x)?[0-9,a-f,A-F]+}}) -> UR_RESULT_SUCCESS; // CHECK-DEFAULT: New specialization constant value was set. // CHECK-ENABLED: Submission 0 - // CHECK-ENABLED: <--- urEnqueueKernelLaunchWithArgsExp{{.*}}KERNEL_ARG_TYPE_MEM_OBJ,{{.*}}.hMem = nullptr{{.*}} -> UR_RESULT_SUCCESS; + // CHECK-ENABLED: <--- urKernelSetArgMemObj({{.*}}, .hArgValue = nullptr) -> UR_RESULT_SUCCESS; // CHECK-ENABLED: Default value of specialization constant was used. // CHECK-ENABLED: Submission 1 - // CHECK-ENABLED: <--- urEnqueueKernelLaunchWithArgsExp{{.*}}KERNEL_ARG_TYPE_MEM_OBJ,{{.*}}.hMem = {{(0x)?[0-9,a-f,A-F]+}}{{.*}}-> UR_RESULT_SUCCESS; + // CHECK-ENABLED: <--- urKernelSetArgMemObj({{.*}}, .hArgValue = {{(0x)?[0-9,a-f,A-F]+}}) -> UR_RESULT_SUCCESS; // CHECK-ENABLED: New specialization constant value was set. // CHECK-ENABLED: Submission 2 - // CHECK-ENABLED: <--- urEnqueueKernelLaunchWithArgsExp{{.*}}KERNEL_ARG_TYPE_MEM_OBJ,{{.*}}.hMem = nullptr{{.*}} -> UR_RESULT_SUCCESS; + // CHECK-ENABLED: <--- urKernelSetArgMemObj({{.*}}, .hArgValue = nullptr) -> UR_RESULT_SUCCESS; // CHECK-ENABLED: Default value of specialization constant was used. // CHECK-ENABLED: Submission 3 - // CHECK-ENABLED: <--- urEnqueueKernelLaunchWithArgsExp{{.*}}KERNEL_ARG_TYPE_MEM_OBJ,{{.*}}.hMem = {{(0x)?[0-9,a-f,A-F]+}}{{.*}} -> UR_RESULT_SUCCESS; + // CHECK-ENABLED: <--- urKernelSetArgMemObj({{.*}}, .hArgValue = {{(0x)?[0-9,a-f,A-F]+}}) -> UR_RESULT_SUCCESS; // CHECK-ENABLED: New specialization constant value was set. // CHECK-MIX: Submission 0 @@ -141,11 +141,11 @@ int main() { // default, that's why nullptr is set as 4th parameter of // urKernelSetArgMemObj. // CHECK-DEFAULT: Kernel bundle - // CHECK-DEFAULT: <--- urEnqueueKernelLaunchWithArgsExp{{.*}}KERNEL_ARG_TYPE_MEM_OBJ,{{.*}}.hMem = {{(0x)?[0-9,a-f,A-F]+}}{{.*}} -> UR_RESULT_SUCCESS; + // CHECK-DEFAULT: <--- urKernelSetArgMemObj({{.*}}, .hArgValue = {{(0x)?[0-9,a-f,A-F]+}}) -> UR_RESULT_SUCCESS; // CHECK-DEFAULT: Default value of specialization constant was used. // CHECK-ENABLED: Kernel bundle - // CHECK-ENABLED: <--- urEnqueueKernelLaunchWithArgsExp{{.*}}KERNEL_ARG_TYPE_MEM_OBJ,{{.*}}.hMem = nullptr{{.*}} -> UR_RESULT_SUCCESS; + // CHECK-ENABLED: <--- urKernelSetArgMemObj({{.*}}, .hArgValue = nullptr) -> UR_RESULT_SUCCESS; // CHECK-ENABLED: Default value of specialization constant was used. // CHECK-MIX: Kernel bundle @@ -173,7 +173,7 @@ int main() { // constants. We are verifying that by checking the 4th parameter is set to // zero. // CHECK-DEFAULT-EXPLICIT-SET: Default value was explicitly set - // CHECK-DEFAULT-EXPLICIT-SET: <--- urEnqueueKernelLaunchWithArgsExp{{.*}}KERNEL_ARG_TYPE_MEM_OBJ,{{.*}}.hMem = nullptr{{.*}} -> UR_RESULT_SUCCESS; + // CHECK-DEFAULT-EXPLICIT-SET: <--- urKernelSetArgMemObj({{.*}}, .hArgValue = nullptr) -> UR_RESULT_SUCCESS; // CHECK-DEFAULT-EXPLICIT-SET: Default value of specialization constant was used. std::cout << "Default value was explicitly set" << std::endl; Q.submit([&](sycl::handler &cgh) { @@ -196,7 +196,7 @@ int main() { // values of specialization constants. We are verifying that by checking the // 4th parameter is set to zero. // CHECK-DEFAULT-BACK-TO-DEFAULT: Changed to new value and then default value was explicitly set - // CHECK-DEFAULT-BACK-TO-DEFAULT: <--- urEnqueueKernelLaunchWithArgsExp{{.*}}KERNEL_ARG_TYPE_MEM_OBJ,{{.*}}.hMem = nullptr{{.*}} -> UR_RESULT_SUCCESS; + // CHECK-DEFAULT-BACK-TO-DEFAULT: <--- urKernelSetArgMemObj({{.*}}, .hArgValue = nullptr) -> UR_RESULT_SUCCESS; // CHECK-DEFAULT-BACK-TO-DEFAULT: Default value of specialization constant was used. std::cout << "Changed to new value and then default value was explicitly set" << std::endl; diff --git a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp index 24edf795dfc77..b8dceca5367a9 100644 --- a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp +++ b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp @@ -18,8 +18,9 @@ // CHECK-NEXT: UR Call Begin : urPlatformGetInfo // CHECK-NEXT: UR Call Begin : urKernelSetExecInfo // CHECK-NEXT: UR Call Begin : urKernelRetain -// CHECK: UR Call Begin : urKernelGetGroupInfo -// CHECK-NEXT: UR Call Begin : urEnqueueKernelLaunchWithArgsExp +// CHECK: UR Call Begin : urKernelSetArgPointer +// CHECK-NEXT: UR Call Begin : urKernelGetGroupInfo +// CHECK-NEXT: UR Call Begin : urEnqueueKernelLaunch // CHECK: UR Call Begin : urKernelCreate // CHECK-NEXT: UR Call Begin : urPlatformGetInfo // CHECK-NEXT: UR Call Begin : urPlatformGetInfo @@ -41,8 +42,9 @@ // CHECK-DAG: from_source : false // CHECK-DAG: kernel_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} // CHECK-DAG: sycl_device : {{.*}} -// CHECK: UR Call Begin : urKernelGetGroupInfo -// CHECK-NEXT: UR Call Begin : urEnqueueKernelLaunchWithArgsExp +// CHECK: UR Call Begin : urKernelSetArgPointer +// CHECK-NEXT: UR Call Begin : urKernelGetGroupInfo +// CHECK-NEXT: UR Call Begin : urEnqueueKernelLaunch // CHECK-NEXT: Signal // CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} diff --git a/sycl/tools/xpti_helpers/usm_analyzer.hpp b/sycl/tools/xpti_helpers/usm_analyzer.hpp index 0dc46427557bb..6df1c522899d9 100644 --- a/sycl/tools/xpti_helpers/usm_analyzer.hpp +++ b/sycl/tools/xpti_helpers/usm_analyzer.hpp @@ -254,11 +254,6 @@ class USMAnalyzer { handleKernelSetArgPointer( static_cast(Data->args_data)); return; - case UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_WITH_ARGS_EXP: - handleEnqueueKernelLaunchWithArgsExp( - static_cast( - Data->args_data)); - return; default: return; } @@ -426,18 +421,4 @@ class USMAnalyzer { "kernel parameter with index = " + std::to_string(*Params->pargIndex), Ptr, 0 /*no data how it will be used in kernel*/, "kernel"); } - - static void handleEnqueueKernelLaunchWithArgsExp( - const ur_enqueue_kernel_launch_with_args_exp_params_t *Params) { - // Search for pointer args and validate the pointers - for (uint32_t i = 0; i < *Params->pnumArgs; i++) { - if ((*Params->ppArgs)[i].type == UR_EXP_KERNEL_ARG_TYPE_POINTER) { - void *Ptr = (const_cast((*Params->ppArgs)[i].value.pointer)); - CheckPointerValidness("kernel parameter with index = " + - std::to_string((*Params->ppArgs)[i].index), - Ptr, 0 /*no data how it will be used in kernel*/, - "kernel"); - } - } - } }; diff --git a/sycl/unittests/Extensions/DeviceGlobal.cpp b/sycl/unittests/Extensions/DeviceGlobal.cpp index 6aa8c32405830..b6fbc9bd8ab3e 100644 --- a/sycl/unittests/Extensions/DeviceGlobal.cpp +++ b/sycl/unittests/Extensions/DeviceGlobal.cpp @@ -191,9 +191,8 @@ ur_result_t after_urEventGetInfo(void *pParams) { return UR_RESULT_SUCCESS; } -ur_result_t after_urEnqueueKernelLaunchWithArgsExp(void *pParams) { - auto params = - *static_cast(pParams); +ur_result_t after_urEnqueueKernelLaunch(void *pParams) { + auto params = *static_cast(pParams); ++KernelCallCounter; EXPECT_TRUE(DeviceGlobalInitEvent.has_value()) << "DeviceGlobalInitEvent has not been set. Kernel call " @@ -276,7 +275,7 @@ TEST_F(DeviceGlobalTest, DeviceGlobalInitBeforeUse) { REDEFINE_AFTER(urEnqueueUSMMemcpy); REDEFINE_AFTER_TEMPLATED(urEnqueueDeviceGlobalVariableWrite, true); REDEFINE_AFTER(urEventGetInfo); - REDEFINE_AFTER(urEnqueueKernelLaunchWithArgsExp); + REDEFINE_AFTER(urEnqueueKernelLaunch); // Kernel call 1. // First launch should create both init events. diff --git a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp index 9e6366ce16abf..a1014adbff686 100644 --- a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp +++ b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp @@ -13,7 +13,6 @@ #include using namespace sycl; -using namespace FreeFunctionEventsHelpers; namespace oneapiext = ext::oneapi::experimental; @@ -27,7 +26,7 @@ class EnqueueFunctionsEventsTests : public ::testing::Test { protected: void SetUp() override { - counter_urEnqueueKernelLaunchWithArgsExp = 0; + counter_urEnqueueKernelLaunch = 0; counter_urUSMEnqueueMemcpy = 0; counter_urUSMEnqueueFill = 0; counter_urUSMEnqueuePrefetch = 0; @@ -40,31 +39,28 @@ class EnqueueFunctionsEventsTests : public ::testing::Test { }; TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); oneapiext::submit(Q, [&](handler &CGH) { oneapiext::single_task(CGH, []() {}); }); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); oneapiext::single_task(Q, []() {}); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskKernelNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); @@ -78,13 +74,12 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskKernelNoEvent) { oneapiext::submit(Q, [&](handler &CGH) { oneapiext::single_task(CGH, Kernel); }); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutKernelNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); @@ -98,35 +93,32 @@ TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutKernelNoEvent) { oneapiext::single_task(Q, Kernel); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); oneapiext::submit(Q, [&](handler &CGH) { oneapiext::parallel_for(CGH, range<1>{32}, [](item<1>) {}); }); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); oneapiext::parallel_for(Q, range<1>{32}, [](item<1>) {}); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForKernelNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); @@ -141,13 +133,12 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForKernelNoEvent) { oneapiext::parallel_for(CGH, range<1>{32}, Kernel); }); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutKernelNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); @@ -161,37 +152,34 @@ TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutKernelNoEvent) { oneapiext::parallel_for(Q, range<1>{32}, Kernel); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); oneapiext::submit(Q, [&](handler &CGH) { oneapiext::nd_launch( CGH, nd_range<1>{range<1>{32}, range<1>{32}}, [](nd_item<1>) {}); }); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); oneapiext::nd_launch(Q, nd_range<1>{range<1>{32}, range<1>{32}}, [](nd_item<1>) {}); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchKernelNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); @@ -206,13 +194,12 @@ TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchKernelNoEvent) { oneapiext::nd_launch(CGH, nd_range<1>{range<1>{32}, range<1>{32}}, Kernel); }); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutKernelNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); @@ -226,7 +213,7 @@ TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutKernelNoEvent) { oneapiext::nd_launch(Q, nd_range<1>{range<1>{32}, range<1>{32}}, Kernel); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(EnqueueFunctionsEventsTests, SubmitMemcpyNoEvent) { @@ -388,9 +375,8 @@ TEST_F(EnqueueFunctionsEventsTests, MemAdviseShortcutNoEvent) { TEST_F(EnqueueFunctionsEventsTests, BarrierBeforeHostTask) { // Special test for case where host_task need an event after, so a barrier is // enqueued to create a usable event. - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); mock::getCallbacks().set_after_callback( "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); @@ -402,7 +388,7 @@ TEST_F(EnqueueFunctionsEventsTests, BarrierBeforeHostTask) { [&]() { HostTaskTimestamp = std::chrono::steady_clock::now(); }); }).wait(); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); ASSERT_EQ(counter_urEnqueueEventsWaitWithBarrier, size_t{1}); ASSERT_TRUE(HostTaskTimestamp > timestamp_urEnqueueEventsWaitWithBarrier); } diff --git a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp b/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp index aaa84dd4429ff..1f6a73df055c2 100644 --- a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp +++ b/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp @@ -15,8 +15,6 @@ #define __DPCPP_ENABLE_UNFINISHED_KHR_EXTENSIONS #include -using namespace FreeFunctionEventsHelpers; - class TestFunctor { public: void operator()() const {} @@ -55,7 +53,7 @@ class FreeFunctionCommandsEventsTests : public ::testing::Test { protected: void SetUp() override { - counter_urEnqueueKernelLaunchWithArgsExp = 0; + counter_urEnqueueKernelLaunch = 0; counter_urUSMEnqueueMemcpy = 0; counter_urUSMEnqueueFill = 0; counter_urUSMEnqueuePrefetch = 0; @@ -68,29 +66,26 @@ class FreeFunctionCommandsEventsTests : public ::testing::Test { }; TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchTaskNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); sycl::khr::submit(Queue, [&](sycl::handler &Handler) { sycl::khr::launch_task(Handler, TestFunctor()); }); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(FreeFunctionCommandsEventsTests, LaunchTaskShortcutNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); sycl::khr::launch_task(Queue, TestFunctor()); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchTaskKernelNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); auto KID = sycl::get_kernel_id(); @@ -104,13 +99,12 @@ TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchTaskKernelNoEvent) { sycl::khr::launch_task(Handler, Kernel); }); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(FreeFunctionCommandsEventsTests, LaunchTaskShortcutKernelNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); @@ -124,36 +118,33 @@ TEST_F(FreeFunctionCommandsEventsTests, LaunchTaskShortcutKernelNoEvent) { sycl::khr::launch_task(Queue, Kernel); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchForNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); sycl::khr::submit(Queue, [&](sycl::handler &Handler) { sycl::khr::launch(Handler, sycl::range<1>{32}, TestFunctor()); }); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(FreeFunctionCommandsEventsTests, LaunchForShortcutNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); sycl::khr::launch(Queue, sycl::range<1>{32}, TestFunctor()); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchForKernelNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); @@ -168,13 +159,12 @@ TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchForKernelNoEvent) { sycl::khr::launch(Handler, sycl::range<1>{32}, Kernel); }); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(FreeFunctionCommandsEventsTests, LaunchForShortcutKernelNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); @@ -188,37 +178,34 @@ TEST_F(FreeFunctionCommandsEventsTests, LaunchForShortcutKernelNoEvent) { sycl::khr::launch(Queue, sycl::range<1>{32}, Kernel); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchGroupedNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); sycl::khr::submit(Queue, [&](sycl::handler &Handler) { sycl::khr::launch_grouped(Handler, sycl::range<1>{32}, sycl::range<1>{32}, TestFunctor()); }); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(FreeFunctionCommandsEventsTests, LaunchGroupedShortcutNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); sycl::khr::launch_grouped(Queue, sycl::range<1>{32}, sycl::range<1>{32}, TestFunctor()); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchGroupedKernelNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); @@ -234,13 +221,12 @@ TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchGroupedKernelNoEvent) { Kernel); }); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(FreeFunctionCommandsEventsTests, LaunchGroupedShortcutKernelNoEvent) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); mock::getCallbacks().set_after_callback("urKernelGetInfo", &after_urKernelGetInfo); @@ -255,7 +241,7 @@ TEST_F(FreeFunctionCommandsEventsTests, LaunchGroupedShortcutKernelNoEvent) { sycl::khr::launch_grouped(Queue, sycl::range<1>{32}, sycl::range<1>{32}, Kernel); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); } TEST_F(FreeFunctionCommandsEventsTests, SubmitMemcpyNoEvent) { @@ -418,9 +404,8 @@ TEST_F(FreeFunctionCommandsEventsTests, MemAdviseShortcutNoEvent) { TEST_F(FreeFunctionCommandsEventsTests, BarrierBeforeHostTask) { // Special test for case where host_task need an event after, so a barrier is // enqueued to create a usable event. - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefined_urEnqueueKernelLaunch); mock::getCallbacks().set_after_callback( "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); @@ -434,7 +419,7 @@ TEST_F(FreeFunctionCommandsEventsTests, BarrierBeforeHostTask) { }) .wait(); - ASSERT_EQ(counter_urEnqueueKernelLaunchWithArgsExp, size_t{1}); + ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1}); ASSERT_EQ(counter_urEnqueueEventsWaitWithBarrier, size_t{1}); ASSERT_TRUE(HostTaskTimestamp > timestamp_urEnqueueEventsWaitWithBarrier); } diff --git a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp b/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp index 3da107a8c2ce2..c45d72ea4c343 100644 --- a/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp +++ b/sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp @@ -9,8 +9,6 @@ #include #include -namespace FreeFunctionEventsHelpers { - inline ur_result_t after_urKernelGetInfo(void *pParams) { auto params = *static_cast(pParams); constexpr char MockKernel[] = "TestKernel"; @@ -25,11 +23,10 @@ inline ur_result_t after_urKernelGetInfo(void *pParams) { return UR_RESULT_SUCCESS; } -static thread_local size_t counter_urEnqueueKernelLaunchWithArgsExp = 0; -inline ur_result_t redefined_urEnqueueKernelLaunchWithArgsExp(void *pParams) { - ++counter_urEnqueueKernelLaunchWithArgsExp; - auto params = - *static_cast(pParams); +static thread_local size_t counter_urEnqueueKernelLaunch = 0; +inline ur_result_t redefined_urEnqueueKernelLaunch(void *pParams) { + ++counter_urEnqueueKernelLaunch; + auto params = *static_cast(pParams); EXPECT_EQ(*params.pphEvent, nullptr); return UR_RESULT_SUCCESS; } @@ -74,5 +71,3 @@ inline ur_result_t after_urEnqueueEventsWaitWithBarrier(void *pParams) { timestamp_urEnqueueEventsWaitWithBarrier = std::chrono::steady_clock::now(); return UR_RESULT_SUCCESS; } - -} // namespace FreeFunctionEventsHelpers diff --git a/sycl/unittests/Extensions/USMMemcpy2D.cpp b/sycl/unittests/Extensions/USMMemcpy2D.cpp index bf8b5df0c4793..e05164d2ac66d 100644 --- a/sycl/unittests/Extensions/USMMemcpy2D.cpp +++ b/sycl/unittests/Extensions/USMMemcpy2D.cpp @@ -264,9 +264,8 @@ ur_result_t after_urKernelCreate(void *pParams) { std::string LastEnqueuedKernel; -ur_result_t after_urEnqueueKernelLaunchWithArgsExp(void *pParams) { - auto params = - *static_cast(pParams); +ur_result_t after_urEnqueueKernelLaunch(void *pParams) { + auto params = *static_cast(pParams); auto KernelIt = KernelToNameMap.find(*params.phKernel); EXPECT_TRUE(KernelIt != KernelToNameMap.end()); LastEnqueuedKernel = KernelIt->second; @@ -348,9 +347,8 @@ TEST(USMMemcpy2DTest, USMMemops2DUnsupported) { &after_urDeviceGetInfo); mock::getCallbacks().set_after_callback("urKernelCreate", &after_urKernelCreate); - mock::getCallbacks().set_after_callback( - "urEnqueueKernelLaunchWithArgsExp", - &after_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_after_callback("urEnqueueKernelLaunch", + &after_urEnqueueKernelLaunch); mock::getCallbacks().set_after_callback( "urUSMGetMemAllocInfo", &after_urUSMGetMemAllocInfo); @@ -390,9 +388,8 @@ TEST(USMMemcpy2DTest, USMFillSupportedOnly) { &after_urDeviceGetInfo); mock::getCallbacks().set_after_callback("urKernelCreate", &after_urKernelCreate); - mock::getCallbacks().set_after_callback( - "urEnqueueKernelLaunchWithArgsExp", - &after_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_after_callback("urEnqueueKernelLaunch", + &after_urEnqueueKernelLaunch); mock::getCallbacks().set_replace_callback("urEnqueueUSMFill2D", &redefine_urEnqueueUSMFill2D); mock::getCallbacks().set_after_callback( @@ -438,9 +435,8 @@ TEST(USMMemcpy2DTest, USMMemsetSupportedOnly) { &after_urDeviceGetInfo); mock::getCallbacks().set_after_callback("urKernelCreate", &after_urKernelCreate); - mock::getCallbacks().set_after_callback( - "urEnqueueKernelLaunchWithArgsExp", - &after_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_after_callback("urEnqueueKernelLaunch", + &after_urEnqueueKernelLaunch); mock::getCallbacks().set_after_callback( "urUSMGetMemAllocInfo", &after_urUSMGetMemAllocInfo); mock::getCallbacks().set_replace_callback("urEnqueueUSMFill2D", @@ -484,9 +480,8 @@ TEST(USMMemcpy2DTest, USMMemcpySupportedOnly) { &after_urDeviceGetInfo); mock::getCallbacks().set_after_callback("urKernelCreate", &after_urKernelCreate); - mock::getCallbacks().set_after_callback( - "urEnqueueKernelLaunchWithArgsExp", - &after_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_after_callback("urEnqueueKernelLaunch", + &after_urEnqueueKernelLaunch); mock::getCallbacks().set_replace_callback("urEnqueueUSMMemcpy2D", &redefine_urEnqueueUSMMemcpy2D); mock::getCallbacks().set_after_callback( diff --git a/sycl/unittests/Extensions/WorkGroupMemoryBackendArgument.cpp b/sycl/unittests/Extensions/WorkGroupMemoryBackendArgument.cpp index 9af42d8e1a9ef..8febd9676fb9f 100644 --- a/sycl/unittests/Extensions/WorkGroupMemoryBackendArgument.cpp +++ b/sycl/unittests/Extensions/WorkGroupMemoryBackendArgument.cpp @@ -44,24 +44,16 @@ static sycl::unittest::MockDeviceImage Img = sycl::unittest::generateDefaultImage({"WorkGroupMemoryKernel"}); static sycl::unittest::MockDeviceImageArray<1> ImgArray{&Img}; -static int LocalMemArgs = 0; -inline ur_result_t redefined_urEnqueueKernelLaunchWithArgsExp(void *pParams) { - auto params = - *static_cast(pParams); - auto Args = *params.ppArgs; - for (uint32_t i = 0; i < *params.pnumArgs; i++) { - if (Args[i].type == UR_EXP_KERNEL_ARG_TYPE_LOCAL) { - ++LocalMemArgs; - } - } +static int urKernelSetArgLocalCalls = 0; +inline ur_result_t redefined_urKernelSetArgLocal(void *) { + ++urKernelSetArgLocalCalls; return UR_RESULT_SUCCESS; } TEST(URArgumentTest, URArgumentTest) { sycl::unittest::UrMock<> Mock; - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urKernelSetArgLocal", + &redefined_urKernelSetArgLocal); sycl::platform Platform = sycl::platform(); const sycl::device dev = Platform.get_devices()[0]; sycl::queue q{dev}; @@ -72,5 +64,5 @@ TEST(URArgumentTest, URArgumentTest) { kernel); }); q.wait(); - ASSERT_EQ(LocalMemArgs, 1); + ASSERT_EQ(urKernelSetArgLocalCalls, 1); } diff --git a/sycl/unittests/assert/assert.cpp b/sycl/unittests/assert/assert.cpp index c81f71ddd27b9..59b0764d60619 100644 --- a/sycl/unittests/assert/assert.cpp +++ b/sycl/unittests/assert/assert.cpp @@ -170,9 +170,8 @@ static ur_result_t redefinedKernelGetGroupInfoAfter(void *pParams) { } #ifndef _WIN32 -static ur_result_t redefinedEnqueueKernelLaunchWithArgsExpAfter(void *pParams) { - auto params = - *static_cast(pParams); +static ur_result_t redefinedEnqueueKernelLaunchAfter(void *pParams) { + auto params = *static_cast(pParams); static ur_event_handle_t UserKernelEvent = **params.pphEvent; int Val = KernelLaunchCounter++; // This output here is to reduce amount of time requried to debug/reproduce a @@ -233,9 +232,8 @@ static void setupMock(sycl::unittest::UrMock<> &Mock) { using namespace sycl::detail; mock::getCallbacks().set_after_callback("urKernelGetGroupInfo", &redefinedKernelGetGroupInfoAfter); - mock::getCallbacks().set_after_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefinedEnqueueKernelLaunchWithArgsExpAfter); + mock::getCallbacks().set_after_callback("urEnqueueKernelLaunch", + &redefinedEnqueueKernelLaunchAfter); mock::getCallbacks().set_after_callback("urEnqueueMemBufferMap", &redefinedEnqueueMemBufferMapAfter); mock::getCallbacks().set_before_callback("urEventWait", @@ -292,7 +290,7 @@ static ur_result_t redefinedKernelGetInfo(void *pParams) { return UR_RESULT_ERROR_UNKNOWN; } -static ur_result_t redefinedEnqueueKernelLaunchWithArgsExp(void *pParms) { +static ur_result_t redefinedEnqueueKernelLaunch(void *pParms) { int Val = KernelLaunchCounter++; // This output here is to reduce amount of time requried to debug/reproduce a // failing test upon feature break @@ -376,8 +374,8 @@ static void setupMockForInterop(sycl::unittest::UrMock<> &Mock, mock::getCallbacks().set_after_callback("urKernelGetGroupInfo", &redefinedKernelGetGroupInfoAfter); mock::getCallbacks().set_before_callback( - "urEnqueueKernelLaunchWithArgsExp", - &TestInteropKernel::redefinedEnqueueKernelLaunchWithArgsExp); + "urEnqueueKernelLaunch", + &TestInteropKernel::redefinedEnqueueKernelLaunch); mock::getCallbacks().set_after_callback("urEnqueueMemBufferMap", &redefinedEnqueueMemBufferMapAfter); mock::getCallbacks().set_before_callback("urEventWait", diff --git a/sycl/unittests/buffer/KernelArgMemObj.cpp b/sycl/unittests/buffer/KernelArgMemObj.cpp index 136f5b73843c4..b826e89a128b8 100644 --- a/sycl/unittests/buffer/KernelArgMemObj.cpp +++ b/sycl/unittests/buffer/KernelArgMemObj.cpp @@ -42,21 +42,13 @@ static sycl::unittest::MockDeviceImageArray<1> ImgArray{&Img}; using namespace sycl; bool PropertyPresent = false; -ur_mem_flags_t MemFlags{}; - -ur_result_t redefinedEnqueueKernelLaunchWithArgsExp(void *pParams) { - auto params = - *static_cast(pParams); - auto Args = *params.ppArgs; - for (uint32_t i = 0; i < *params.pnumArgs; i++) { - if (Args[i].type != UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ) { - continue; - } - PropertyPresent = Args[i].value.memObjTuple.flags != 0; - if (PropertyPresent) { - MemFlags = Args[i].value.memObjTuple.flags; - } - } +ur_kernel_arg_mem_obj_properties_t PropsCopy{}; + +ur_result_t redefinedKernelSetArgMemObj(void *pParams) { + auto params = *static_cast(pParams); + PropertyPresent = *params.ppProperties != nullptr; + if (PropertyPresent) + PropsCopy = **params.ppProperties; return UR_RESULT_SUCCESS; } @@ -67,10 +59,9 @@ class BuferTestUrArgs : public ::testing::Test { protected: void SetUp() override { PropertyPresent = false; - MemFlags = 0; - mock::getCallbacks().set_before_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefinedEnqueueKernelLaunchWithArgsExp); + PropsCopy = {}; + mock::getCallbacks().set_before_callback("urKernelSetArgMemObj", + &redefinedKernelSetArgMemObj); } template @@ -89,7 +80,7 @@ class BuferTestUrArgs : public ::testing::Test { }) .wait(); ASSERT_TRUE(PropertyPresent); - EXPECT_EQ(MemFlags, ExpectedAccessMode); + EXPECT_EQ(PropsCopy.memoryAccess, ExpectedAccessMode); } protected: diff --git a/sycl/unittests/handler/SetArgForLocalAccessor.cpp b/sycl/unittests/handler/SetArgForLocalAccessor.cpp index aef26577bc50b..7a9079872ce36 100644 --- a/sycl/unittests/handler/SetArgForLocalAccessor.cpp +++ b/sycl/unittests/handler/SetArgForLocalAccessor.cpp @@ -21,15 +21,9 @@ namespace { size_t LocalBufferArgSize = 0; -ur_result_t redefined_urEnqueueKernelLaunchWithArgsExp(void *pParams) { - auto params = - *static_cast(pParams); - auto Args = *params.ppArgs; - for (uint32_t i = 0; i < *params.pnumArgs; i++) { - if (Args[i].type == UR_EXP_KERNEL_ARG_TYPE_LOCAL) { - LocalBufferArgSize = Args[i].size; - } - } +ur_result_t redefined_urKernelSetArgLocal(void *pParams) { + auto params = *static_cast(pParams); + LocalBufferArgSize = *params.pargSize; return UR_RESULT_SUCCESS; } @@ -37,9 +31,8 @@ ur_result_t redefined_urEnqueueKernelLaunchWithArgsExp(void *pParams) { TEST(HandlerSetArg, LocalAccessor) { sycl::unittest::UrMock<> Mock; redefineMockForKernelInterop(Mock); - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urKernelSetArgLocal", + &redefined_urKernelSetArgLocal); constexpr size_t Size = 128; sycl::queue Q; diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp index 9104c1d9e4a10..8207ade72f0d0 100644 --- a/sycl/unittests/scheduler/FailedCommands.cpp +++ b/sycl/unittests/scheduler/FailedCommands.cpp @@ -76,7 +76,7 @@ ur_result_t failingUrCall(void *) { return UR_RESULT_ERROR_UNKNOWN; } TEST_F(SchedulerTest, FailedKernelException) { unittest::UrMock<> Mock; - mock::getCallbacks().set_before_callback("urEnqueueKernelLaunchWithArgsExp", + mock::getCallbacks().set_before_callback("urEnqueueKernelLaunch", &failingUrCall); RunWithFailedCommandsAndCheck(true, 0); } @@ -94,8 +94,7 @@ ur_event_handle_t DummyEvent = mock::createDummyHandle(); inline ur_result_t failedEnqueueKernelLaunchWithDummy(void *pParams) { DummyEventReturned = true; - auto params = - *static_cast(pParams); + auto params = *static_cast(pParams); **params.pphEvent = DummyEvent; return UR_RESULT_ERROR_UNKNOWN; } @@ -121,7 +120,7 @@ TEST(FailedCommandsTest, CheckUREventReleaseWithKernel) { DummyEventReleaseAttempt = false; DummyEventReturned = false; sycl::unittest::UrMock<> Mock; - mock::getCallbacks().set_before_callback("urEnqueueKernelLaunchWithArgsExp", + mock::getCallbacks().set_before_callback("urEnqueueKernelLaunch", &failedEnqueueKernelLaunchWithDummy); mock::getCallbacks().set_before_callback("urEventRelease", &checkDummyInEventRelease); diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp index e6cde1b133a59..dc9bd42de84a7 100644 --- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp @@ -128,9 +128,8 @@ TEST_F(SchedulerTest, InOrderQueueIsolatedDeps) { std::vector KernelEventListSize; -inline ur_result_t customEnqueueKernelLaunchWithArgsExp(void *pParams) { - auto params = - *static_cast(pParams); +inline ur_result_t customEnqueueKernelLaunch(void *pParams) { + auto params = *static_cast(pParams); KernelEventListSize.push_back(*params.pnumEventsInWaitList); return UR_RESULT_SUCCESS; } @@ -138,9 +137,8 @@ inline ur_result_t customEnqueueKernelLaunchWithArgsExp(void *pParams) { TEST_F(SchedulerTest, TwoInOrderQueuesOnSameContext) { KernelEventListSize.clear(); sycl::unittest::UrMock<> Mock; - mock::getCallbacks().set_before_callback( - "urEnqueueKernelLaunchWithArgsExp", - &customEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_before_callback("urEnqueueKernelLaunch", + &customEnqueueKernelLaunch); sycl::platform Plt = sycl::platform(); @@ -167,9 +165,8 @@ TEST_F(SchedulerTest, TwoInOrderQueuesOnSameContext) { TEST_F(SchedulerTest, InOrderQueueNoSchedulerPath) { KernelEventListSize.clear(); sycl::unittest::UrMock<> Mock; - mock::getCallbacks().set_before_callback( - "urEnqueueKernelLaunchWithArgsExp", - &customEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_before_callback("urEnqueueKernelLaunch", + &customEnqueueKernelLaunch); sycl::platform Plt = sycl::platform(); diff --git a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp index 30308584dc107..31a2914e2c803 100644 --- a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp @@ -51,9 +51,8 @@ TEST_F(SchedulerTest, InOrderQueueHostTaskDeps) { enum class CommandType { KERNEL = 1, MEMSET = 2 }; std::vector> ExecutedCommands; -inline ur_result_t customEnqueueKernelLaunchWithArgsExp(void *pParams) { - auto params = - *static_cast(pParams); +inline ur_result_t customEnqueueKernelLaunch(void *pParams) { + auto params = *static_cast(pParams); ExecutedCommands.push_back( {CommandType::KERNEL, *params.pnumEventsInWaitList}); return UR_RESULT_SUCCESS; @@ -69,9 +68,8 @@ inline ur_result_t customEnqueueUSMFill(void *pParams) { TEST_F(SchedulerTest, InOrderQueueCrossDeps) { ExecutedCommands.clear(); sycl::unittest::UrMock<> Mock; - mock::getCallbacks().set_before_callback( - "urEnqueueKernelLaunchWithArgsExp", - &customEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_before_callback("urEnqueueKernelLaunch", + &customEnqueueKernelLaunch); mock::getCallbacks().set_before_callback("urEnqueueUSMFill", &customEnqueueUSMFill); @@ -123,9 +121,8 @@ TEST_F(SchedulerTest, InOrderQueueCrossDeps) { TEST_F(SchedulerTest, InOrderQueueCrossDepsShortcutFuncs) { ExecutedCommands.clear(); sycl::unittest::UrMock<> Mock; - mock::getCallbacks().set_before_callback( - "urEnqueueKernelLaunchWithArgsExp", - &customEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_before_callback("urEnqueueKernelLaunch", + &customEnqueueKernelLaunch); mock::getCallbacks().set_before_callback("urEnqueueUSMFill", &customEnqueueUSMFill); diff --git a/sycl/unittests/scheduler/RequiredWGSize.cpp b/sycl/unittests/scheduler/RequiredWGSize.cpp index 851ee3ee32d38..675036ec30081 100644 --- a/sycl/unittests/scheduler/RequiredWGSize.cpp +++ b/sycl/unittests/scheduler/RequiredWGSize.cpp @@ -37,9 +37,8 @@ static ur_result_t redefinedKernelGetGroupInfo(void *pParams) { return UR_RESULT_SUCCESS; } -static ur_result_t redefinedEnqueueKernelLaunchWithArgsExp(void *pParams) { - auto params = - *static_cast(pParams); +static ur_result_t redefinedEnqueueKernelLaunch(void *pParams) { + auto params = *static_cast(pParams); if (*params.ppLocalWorkSize) { IncomingLocalSize[0] = (*params.ppLocalWorkSize)[0]; IncomingLocalSize[1] = (*params.ppLocalWorkSize)[1]; @@ -57,9 +56,8 @@ static void reset() { static void performChecks() { sycl::unittest::UrMock<> Mock; sycl::platform Plt = sycl::platform(); - mock::getCallbacks().set_before_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefinedEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_before_callback("urEnqueueKernelLaunch", + &redefinedEnqueueKernelLaunch); mock::getCallbacks().set_before_callback("urKernelGetGroupInfo", &redefinedKernelGetGroupInfo); diff --git a/sycl/unittests/thread_safety/InteropKernelEnqueue.cpp b/sycl/unittests/thread_safety/InteropKernelEnqueue.cpp index 935ca1eba18ac..ca54cf0d908d6 100644 --- a/sycl/unittests/thread_safety/InteropKernelEnqueue.cpp +++ b/sycl/unittests/thread_safety/InteropKernelEnqueue.cpp @@ -24,33 +24,23 @@ constexpr std::size_t LaunchCount = 8; uint32_t LastArgSet = -1; std::size_t LastThread = -1; -ur_result_t redefined_urEnqueueKernelLaunchWithArgsExp(void *pParams) { - auto params = - *static_cast(pParams); - auto Args = *params.ppArgs; - for (uint32_t i = 0; i < *params.pnumArgs; i++) { - if (Args[i].type != UR_EXP_KERNEL_ARG_TYPE_VALUE) { - continue; - } - auto ArgIndex = Args[i].index; - EXPECT_EQ((LastArgSet + 1) % NArgs, ArgIndex); - LastArgSet = ArgIndex; - std::size_t ArgValue = - *static_cast(Args[i].value.pointer); - if (ArgIndex == 0) - LastThread = ArgValue; - else - EXPECT_EQ(LastThread, ArgValue); - } +ur_result_t redefined_urKernelSetArgValue(void *pParams) { + auto params = *static_cast(pParams); + EXPECT_EQ((LastArgSet + 1) % NArgs, *params.pargIndex); + LastArgSet = *params.pargIndex; + std::size_t ArgValue = *static_cast(*params.ppArgValue); + if (*params.pargIndex == 0) + LastThread = ArgValue; + else + EXPECT_EQ(LastThread, ArgValue); return UR_RESULT_SUCCESS; } TEST(KernelEnqueue, InteropKernel) { unittest::UrMock<> Mock; redefineMockForKernelInterop(Mock); - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefined_urEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urKernelSetArgValue", + &redefined_urKernelSetArgValue); platform Plt = sycl::platform(); queue Q; diff --git a/sycl/unittests/xpti_trace/QueueApiFailures.cpp b/sycl/unittests/xpti_trace/QueueApiFailures.cpp index 303567ddcf953..1e88143774e21 100644 --- a/sycl/unittests/xpti_trace/QueueApiFailures.cpp +++ b/sycl/unittests/xpti_trace/QueueApiFailures.cpp @@ -30,7 +30,7 @@ inline ur_result_t redefinedAdapterGetLastError(void *) { return UR_RESULT_ERROR_INVALID_VALUE; } -ur_result_t redefinedEnqueueKernelLaunchWithArgsExp(void *) { +ur_result_t redefinedEnqueueKernelLaunch(void *) { return UR_RESULT_ERROR_ADAPTER_SPECIFIC; } @@ -92,9 +92,8 @@ class QueueApiFailures : public ::testing::Test { }; TEST_F(QueueApiFailures, QueueSubmit) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefinedEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefinedEnqueueKernelLaunch); mock::getCallbacks().set_replace_callback("urAdapterGetLastError", &redefinedAdapterGetLastError); sycl::queue Q; @@ -117,9 +116,8 @@ TEST_F(QueueApiFailures, QueueSubmit) { } TEST_F(QueueApiFailures, QueueSingleTask) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefinedEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefinedEnqueueKernelLaunch); mock::getCallbacks().set_replace_callback("urAdapterGetLastError", &redefinedAdapterGetLastError); sycl::queue Q; @@ -321,9 +319,8 @@ TEST_F(QueueApiFailures, QueueMemAdvise) { } TEST_F(QueueApiFailures, QueueParallelFor) { - mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefinedEnqueueKernelLaunchWithArgsExp); + mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch", + &redefinedEnqueueKernelLaunch); mock::getCallbacks().set_replace_callback("urAdapterGetLastError", &redefinedAdapterGetLastError); sycl::queue Q; @@ -454,8 +451,7 @@ ur_result_t redefinedEnqueueKernelLaunchWithStatus(void *) { TEST_F(QueueApiFailures, QueueKernelAsync) { mock::getCallbacks().set_replace_callback( - "urEnqueueKernelLaunchWithArgsExp", - &redefinedEnqueueKernelLaunchWithStatus); + "urEnqueueKernelLaunch", &redefinedEnqueueKernelLaunchWithStatus); mock::getCallbacks().set_replace_callback("urAdapterGetLastError", &redefinedAdapterGetLastError); diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index 5882015c55972..1bba8a950e75f 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -475,8 +475,6 @@ typedef enum ur_function_t { UR_FUNCTION_MEMORY_EXPORT_EXPORT_MEMORY_HANDLE_EXP = 287, /// Enumerator for ::urBindlessImagesSupportsImportingHandleTypeExp UR_FUNCTION_BINDLESS_IMAGES_SUPPORTS_IMPORTING_HANDLE_TYPE_EXP = 288, - /// Enumerator for ::urEnqueueKernelLaunchWithArgsExp - UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_WITH_ARGS_EXP = 289, /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -590,8 +588,6 @@ typedef enum ur_structure_type_t { UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES = 0x3000, /// ::ur_exp_enqueue_ext_properties_t UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES = 0x4000, - /// ::ur_exp_kernel_arg_properties_t - UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES = 0x5000, /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -12847,166 +12843,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( /// propName. size_t *pPropSizeRet); -#if !defined(__GNUC__) -#pragma endregion -#endif -// Intel 'oneAPI' Unified Runtime Experimental API for setting args at kernel -// launch -#if !defined(__GNUC__) -#pragma region enqueue_kernel_launch_with_args_(experimental) -#endif -/////////////////////////////////////////////////////////////////////////////// -/// @brief What kind of kernel arg is this -typedef enum ur_exp_kernel_arg_type_t { - /// Kernel arg is a value. - UR_EXP_KERNEL_ARG_TYPE_VALUE = 0, - /// Kernel arg is a pointer. - UR_EXP_KERNEL_ARG_TYPE_POINTER = 1, - /// Kernel arg is a memory object. - UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ = 2, - /// Kernel arg is a local allocation. - UR_EXP_KERNEL_ARG_TYPE_LOCAL = 3, - /// Kernel arg is a sampler. - UR_EXP_KERNEL_ARG_TYPE_SAMPLER = 4, - /// @cond - UR_EXP_KERNEL_ARG_TYPE_FORCE_UINT32 = 0x7fffffff - /// @endcond - -} ur_exp_kernel_arg_type_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Mem obj/properties tuple -typedef struct ur_exp_kernel_arg_mem_obj_tuple_t { - /// [in] Handle of a memory object - ur_mem_handle_t hMem; - /// [in] Memory flags to associate with `hMem`. Allowed values are: - /// ::UR_MEM_FLAG_READ_WRITE, ::UR_MEM_FLAG_WRITE_ONLY, - /// ::UR_MEM_FLAG_READ_ONLY. - ur_mem_flags_t flags; - -} ur_exp_kernel_arg_mem_obj_tuple_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Typesafe container for a kernel argument value -typedef union ur_exp_kernel_arg_value_t { - /// [in] argument value represented as matching arg type. - /// The data pointed to will be copied and therefore can be reused on return. - const void *value; - /// [in] Allocation obtained by USM allocation or virtual memory mapping - /// operation, or pointer to a literal value. - const void *pointer; - /// [in] Struct containing a memory object and associated flags. - ur_exp_kernel_arg_mem_obj_tuple_t memObjTuple; - /// [in] Handle of a sampler object. - ur_sampler_handle_t sampler; - -} ur_exp_kernel_arg_value_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Kernel arg properties -typedef struct ur_exp_kernel_arg_properties_t { - /// [in] type of this structure, must be - /// ::UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES - ur_structure_type_t stype; - /// [in,out][optional] pointer to extension-specific structure - void *pNext; - /// [in] type of the kernel arg - ur_exp_kernel_arg_type_t type; - /// [in] index of the kernel arg - uint32_t index; - /// [in] size of the kernel arg - size_t size; - /// [in][tagged_by(type)] Union containing the argument value. - ur_exp_kernel_arg_value_t value; - -} ur_exp_kernel_arg_properties_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Enqueue a command to execute a kernel -/// -/// @remarks -/// _Analogues_ -/// - **clEnqueueNDRangeKernel** -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_DEVICE_LOST -/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE -/// + `NULL == hQueue` -/// + `NULL == hKernel` -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pGlobalWorkSize` -/// + `launchPropList == NULL && numPropsInLaunchPropList > 0` -/// + `pArgs == NULL && numArgs > 0` -/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `NULL != pArgs && ::UR_EXP_KERNEL_ARG_TYPE_SAMPLER < pArgs->type` -/// - ::UR_RESULT_ERROR_INVALID_QUEUE -/// - ::UR_RESULT_ERROR_INVALID_KERNEL -/// - ::UR_RESULT_ERROR_INVALID_EVENT -/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST -/// + `phEventWaitList == NULL && numEventsInWaitList > 0` -/// + `phEventWaitList != NULL && numEventsInWaitList == 0` -/// + If event objects in phEventWaitList are not valid events. -/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS -/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. -/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION -/// + `pGlobalWorkSize[0] == 0 || pGlobalWorkSize[1] == 0 || -/// pGlobalWorkSize[2] == 0` -/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE -/// + `pLocalWorkSize && (pLocalWorkSize[0] == 0 || pLocalWorkSize[1] == -/// 0 || pLocalWorkSize[2] == 0)` -/// - ::UR_RESULT_ERROR_INVALID_VALUE -/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGS - "The kernel argument values -/// have not been specified." -/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY -/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -/// - ::UR_RESULT_ERROR_INVALID_OPERATION -/// + If any property in `launchPropList` isn't supported by the device. -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp( - /// [in] handle of the queue object - ur_queue_handle_t hQueue, - /// [in] handle of the kernel object - ur_kernel_handle_t hKernel, - /// [in] number of dimensions, from 1 to 3, to specify the global and - /// work-group work-items - uint32_t workDim, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the offset used to calculate the global ID of a work-item - const size_t *pGlobalWorkOffset, - /// [in] pointer to an array of workDim unsigned values that specify the - /// number of global work-items in workDim that will execute the kernel - /// function - const size_t *pGlobalWorkSize, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the number of local work-items forming a work-group that will - /// execute the kernel function. - /// If nullptr, the runtime implementation will choose the work-group size. - const size_t *pLocalWorkSize, - /// [in] Number of entries in pArgs - uint32_t numArgs, - /// [in][optional][range(0, numArgs)] pointer to a list of kernel arg - /// properties. - const ur_exp_kernel_arg_properties_t *pArgs, - /// [in] size of the launch prop list - uint32_t numPropsInLaunchPropList, - /// [in][optional][range(0, numPropsInLaunchPropList)] pointer to a list - /// of launch properties - const ur_kernel_launch_property_t *launchPropList, - /// [in] size of the event wait list - uint32_t numEventsInWaitList, - /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of - /// events that must be complete before the kernel execution. - /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait - /// event. - const ur_event_handle_t *phEventWaitList, - /// [out][optional][alloc] return an event object that identifies this - /// particular kernel execution instance. If phEventWaitList and phEvent - /// are not NULL, phEvent must not refer to an element of the - /// phEventWaitList array. - ur_event_handle_t *phEvent); - #if !defined(__GNUC__) #pragma endregion #endif @@ -14609,26 +14445,6 @@ typedef struct ur_enqueue_write_host_pipe_params_t { ur_event_handle_t **pphEvent; } ur_enqueue_write_host_pipe_params_t; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urEnqueueKernelLaunchWithArgsExp -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_enqueue_kernel_launch_with_args_exp_params_t { - ur_queue_handle_t *phQueue; - ur_kernel_handle_t *phKernel; - uint32_t *pworkDim; - const size_t **ppGlobalWorkOffset; - const size_t **ppGlobalWorkSize; - const size_t **ppLocalWorkSize; - uint32_t *pnumArgs; - const ur_exp_kernel_arg_properties_t **ppArgs; - uint32_t *pnumPropsInLaunchPropList; - const ur_kernel_launch_property_t **plaunchPropList; - uint32_t *pnumEventsInWaitList; - const ur_event_handle_t **pphEventWaitList; - ur_event_handle_t **pphEvent; -} ur_enqueue_kernel_launch_with_args_exp_params_t; - /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urEnqueueEventsWaitWithBarrierExt /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/unified-runtime/include/ur_api_funcs.def b/unified-runtime/include/ur_api_funcs.def index 97092258a5a46..f0c92445b9238 100644 --- a/unified-runtime/include/ur_api_funcs.def +++ b/unified-runtime/include/ur_api_funcs.def @@ -133,7 +133,6 @@ _UR_API(urEnqueueDeviceGlobalVariableRead) _UR_API(urEnqueueReadHostPipe) _UR_API(urEnqueueWriteHostPipe) _UR_API(urEnqueueEventsWaitWithBarrierExt) -_UR_API(urEnqueueKernelLaunchWithArgsExp) _UR_API(urEnqueueUSMDeviceAllocExp) _UR_API(urEnqueueUSMSharedAllocExp) _UR_API(urEnqueueUSMHostAllocExp) diff --git a/unified-runtime/include/ur_ddi.h b/unified-runtime/include/ur_ddi.h index b1033a027a223..8ab686aa583cc 100644 --- a/unified-runtime/include/ur_ddi.h +++ b/unified-runtime/include/ur_ddi.h @@ -1097,15 +1097,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( typedef ur_result_t(UR_APICALL *ur_pfnGetEnqueueProcAddrTable_t)( ur_api_version_t, ur_enqueue_dditable_t *); -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urEnqueueKernelLaunchWithArgsExp -typedef ur_result_t(UR_APICALL *ur_pfnEnqueueKernelLaunchWithArgsExp_t)( - ur_queue_handle_t, ur_kernel_handle_t, uint32_t, const size_t *, - const size_t *, const size_t *, uint32_t, - const ur_exp_kernel_arg_properties_t *, uint32_t, - const ur_kernel_launch_property_t *, uint32_t, const ur_event_handle_t *, - ur_event_handle_t *); - /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urEnqueueUSMDeviceAllocExp typedef ur_result_t(UR_APICALL *ur_pfnEnqueueUSMDeviceAllocExp_t)( @@ -1156,7 +1147,6 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueNativeCommandExp_t)( /////////////////////////////////////////////////////////////////////////////// /// @brief Table of EnqueueExp functions pointers typedef struct ur_enqueue_exp_dditable_t { - ur_pfnEnqueueKernelLaunchWithArgsExp_t pfnKernelLaunchWithArgsExp; ur_pfnEnqueueUSMDeviceAllocExp_t pfnUSMDeviceAllocExp; ur_pfnEnqueueUSMSharedAllocExp_t pfnUSMSharedAllocExp; ur_pfnEnqueueUSMHostAllocExp_t pfnUSMHostAllocExp; diff --git a/unified-runtime/include/ur_print.h b/unified-runtime/include/ur_print.h index 3e1f03a3aafbc..8130df0c5bec4 100644 --- a/unified-runtime/include/ur_print.h +++ b/unified-runtime/include/ur_print.h @@ -1415,36 +1415,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpPeerInfo(enum ur_exp_peer_info_t value, char *buffer, const size_t buff_size, size_t *out_size); -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_exp_kernel_arg_type_t enum -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL -urPrintExpKernelArgType(enum ur_exp_kernel_arg_type_t value, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_exp_kernel_arg_mem_obj_tuple_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintExpKernelArgMemObjTuple( - const struct ur_exp_kernel_arg_mem_obj_tuple_t params, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_exp_kernel_arg_properties_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintExpKernelArgProperties( - const struct ur_exp_kernel_arg_properties_t params, char *buffer, - const size_t buff_size, size_t *out_size); - /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_enqueue_ext_flag_t enum /// @returns @@ -2714,16 +2684,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueWriteHostPipeParams( const struct ur_enqueue_write_host_pipe_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_enqueue_kernel_launch_with_args_exp_params_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueKernelLaunchWithArgsExpParams( - const struct ur_enqueue_kernel_launch_with_args_exp_params_t *params, - char *buffer, const size_t buff_size, size_t *out_size); - /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_enqueue_events_wait_with_barrier_ext_params_t struct /// @returns diff --git a/unified-runtime/include/ur_print.hpp b/unified-runtime/include/ur_print.hpp index 93cc0d5f2b6fb..91c9973a3a6e9 100644 --- a/unified-runtime/include/ur_print.hpp +++ b/unified-runtime/include/ur_print.hpp @@ -50,8 +50,6 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template inline ur_result_t printTagged(std::ostream &os, const void *ptr, T value, size_t size); -template -inline ur_result_t printArray(std::ostream &os, const T *ptr); inline ur_result_t printStruct(std::ostream &os, const void *ptr); @@ -265,10 +263,6 @@ template <> inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_peer_info_t value, size_t size); -inline ur_result_t printUnion(std::ostream &os, - const union ur_exp_kernel_arg_value_t params, - const enum ur_exp_kernel_arg_type_t tag); - template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); @@ -598,14 +592,6 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_kernel_launch_desc_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_peer_info_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_exp_kernel_arg_type_t value); -inline std::ostream &operator<<( - std::ostream &os, - [[maybe_unused]] const struct ur_exp_kernel_arg_mem_obj_tuple_t params); -inline std::ostream & -operator<<(std::ostream &os, - [[maybe_unused]] const struct ur_exp_kernel_arg_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_enqueue_ext_flag_t value); inline std::ostream &operator<<( @@ -1288,9 +1274,6 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_BINDLESS_IMAGES_SUPPORTS_IMPORTING_HANDLE_TYPE_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_SUPPORTS_IMPORTING_HANDLE_TYPE_EXP"; break; - case UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_WITH_ARGS_EXP: - os << "UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_WITH_ARGS_EXP"; - break; default: os << "unknown enumerator"; break; @@ -1460,9 +1443,6 @@ inline std::ostream &operator<<(std::ostream &os, case UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES: os << "UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES"; break; - case UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES: - os << "UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES"; - break; default: os << "unknown enumerator"; break; @@ -1779,12 +1759,6 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { (const ur_exp_enqueue_ext_properties_t *)ptr; printPtr(os, pstruct); } break; - - case UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES: { - const ur_exp_kernel_arg_properties_t *pstruct = - (const ur_exp_kernel_arg_properties_t *)ptr; - printPtr(os, pstruct); - } break; default: os << "unknown enumerator"; return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -10920,7 +10894,13 @@ printUnion(std::ostream &os, case UR_KERNEL_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION: os << ".clusterDim = {"; - ur::details::printArray<3>(os, params.clusterDim); + for (auto i = 0; i < 3; i++) { + if (i != 0) { + os << ", "; + } + + os << (params.clusterDim[i]); + } os << "}"; break; @@ -11565,7 +11545,13 @@ operator<<(std::ostream &os, const struct ur_exp_sampler_addr_modes_t params) { os << ", "; os << ".addrModes = {"; - ur::details::printArray<3>(os, params.addrModes); + for (auto i = 0; i < 3; i++) { + if (i != 0) { + os << ", "; + } + + os << (params.addrModes[i]); + } os << "}"; os << "}"; @@ -12284,141 +12270,6 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, } } // namespace ur::details -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_exp_kernel_arg_type_t type -/// @returns -/// std::ostream & -inline std::ostream &operator<<(std::ostream &os, - enum ur_exp_kernel_arg_type_t value) { - switch (value) { - case UR_EXP_KERNEL_ARG_TYPE_VALUE: - os << "UR_EXP_KERNEL_ARG_TYPE_VALUE"; - break; - case UR_EXP_KERNEL_ARG_TYPE_POINTER: - os << "UR_EXP_KERNEL_ARG_TYPE_POINTER"; - break; - case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ: - os << "UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ"; - break; - case UR_EXP_KERNEL_ARG_TYPE_LOCAL: - os << "UR_EXP_KERNEL_ARG_TYPE_LOCAL"; - break; - case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: - os << "UR_EXP_KERNEL_ARG_TYPE_SAMPLER"; - break; - default: - os << "unknown enumerator"; - break; - } - return os; -} -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_exp_kernel_arg_mem_obj_tuple_t type -/// @returns -/// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_exp_kernel_arg_mem_obj_tuple_t params) { - os << "(struct ur_exp_kernel_arg_mem_obj_tuple_t){"; - - os << ".hMem = "; - - ur::details::printPtr(os, (params.hMem)); - - os << ", "; - os << ".flags = "; - - ur::details::printFlag(os, (params.flags)); - - os << "}"; - return os; -} -namespace ur::details { - -/////////////////////////////////////////////////////////////////////////////// -// @brief Print ur_exp_kernel_arg_value_t union -inline ur_result_t printUnion(std::ostream &os, - const union ur_exp_kernel_arg_value_t params, - const enum ur_exp_kernel_arg_type_t tag) { - os << "(union ur_exp_kernel_arg_value_t){"; - - switch (tag) { - case UR_EXP_KERNEL_ARG_TYPE_VALUE: - - os << ".value = "; - - ur::details::printPtr(os, (params.value)); - - break; - case UR_EXP_KERNEL_ARG_TYPE_POINTER: - - os << ".pointer = "; - - ur::details::printPtr(os, (params.pointer)); - - break; - case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ: - - os << ".memObjTuple = "; - - os << (params.memObjTuple); - - break; - case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: - - os << ".sampler = "; - - ur::details::printPtr(os, (params.sampler)); - - break; - default: - os << ""; - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - os << "}"; - return UR_RESULT_SUCCESS; -} -} // namespace ur::details -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_exp_kernel_arg_properties_t type -/// @returns -/// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, - const struct ur_exp_kernel_arg_properties_t params) { - os << "(struct ur_exp_kernel_arg_properties_t){"; - - os << ".stype = "; - - os << (params.stype); - - os << ", "; - os << ".pNext = "; - - ur::details::printStruct(os, (params.pNext)); - - os << ", "; - os << ".type = "; - - os << (params.type); - - os << ", "; - os << ".index = "; - - os << (params.index); - - os << ", "; - os << ".size = "; - - os << (params.size); - - os << ", "; - os << ".value = "; - ur::details::printUnion(os, (params.value), params.type); - - os << "}"; - return os; -} /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_enqueue_ext_flag_t type /// @returns @@ -17068,114 +16919,6 @@ inline std::ostream &operator<<( return os; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the -/// ur_enqueue_kernel_launch_with_args_exp_params_t type -/// @returns -/// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, [[maybe_unused]] const struct - ur_enqueue_kernel_launch_with_args_exp_params_t *params) { - - os << ".hQueue = "; - - ur::details::printPtr(os, *(params->phQueue)); - - os << ", "; - os << ".hKernel = "; - - ur::details::printPtr(os, *(params->phKernel)); - - os << ", "; - os << ".workDim = "; - - os << *(params->pworkDim); - - os << ", "; - os << ".pGlobalWorkOffset = "; - - ur::details::printPtr(os, *(params->ppGlobalWorkOffset)); - - os << ", "; - os << ".pGlobalWorkSize = "; - - ur::details::printPtr(os, *(params->ppGlobalWorkSize)); - - os << ", "; - os << ".pLocalWorkSize = "; - - ur::details::printPtr(os, *(params->ppLocalWorkSize)); - - os << ", "; - os << ".numArgs = "; - - os << *(params->pnumArgs); - - os << ", "; - os << ".pArgs = "; - ur::details::printPtr(os, reinterpret_cast(*(params->ppArgs))); - if (*(params->ppArgs) != NULL) { - os << " {"; - for (size_t i = 0; i < *params->pnumArgs; ++i) { - if (i != 0) { - os << ", "; - } - - os << (*(params->ppArgs))[i]; - } - os << "}"; - } - - os << ", "; - os << ".numPropsInLaunchPropList = "; - - os << *(params->pnumPropsInLaunchPropList); - - os << ", "; - os << ".launchPropList = "; - ur::details::printPtr( - os, reinterpret_cast(*(params->plaunchPropList))); - if (*(params->plaunchPropList) != NULL) { - os << " {"; - for (size_t i = 0; i < *params->pnumPropsInLaunchPropList; ++i) { - if (i != 0) { - os << ", "; - } - - os << (*(params->plaunchPropList))[i]; - } - os << "}"; - } - - os << ", "; - os << ".numEventsInWaitList = "; - - os << *(params->pnumEventsInWaitList); - - os << ", "; - os << ".phEventWaitList = "; - ur::details::printPtr( - os, reinterpret_cast(*(params->pphEventWaitList))); - if (*(params->pphEventWaitList) != NULL) { - os << " {"; - for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } - - ur::details::printPtr(os, (*(params->pphEventWaitList))[i]); - } - os << "}"; - } - - os << ", "; - os << ".phEvent = "; - - ur::details::printPtr(os, *(params->pphEvent)); - - return os; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the /// ur_enqueue_events_wait_with_barrier_ext_params_t type @@ -21304,25 +21047,6 @@ inline ur_result_t printPtr(std::ostream &os, const T *ptr) { return UR_RESULT_SUCCESS; } - -/////////////////////////////////////////////////////////////////////////////// -// @brief Print array of literals -template -inline ur_result_t printArray(std::ostream &os, const T *ptr) { - if (ptr == NULL) { - return printPtr(os, ptr); - } - - for (size_t i = 0; i < size; i++) { - if (i != 0) { - os << ", "; - } - - os << ptr[i]; - } - - return UR_RESULT_SUCCESS; -} } // namespace ur::details namespace ur::extras { @@ -21717,10 +21441,6 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, case UR_FUNCTION_ENQUEUE_WRITE_HOST_PIPE: { os << (const struct ur_enqueue_write_host_pipe_params_t *)params; } break; - case UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_WITH_ARGS_EXP: { - os << (const struct ur_enqueue_kernel_launch_with_args_exp_params_t *) - params; - } break; case UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT: { os << (const struct ur_enqueue_events_wait_with_barrier_ext_params_t *) params; diff --git a/unified-runtime/scripts/core/EXP-ENQUEUE-KERNEL-LAUNCH-WITH-ARGS.rst b/unified-runtime/scripts/core/EXP-ENQUEUE-KERNEL-LAUNCH-WITH-ARGS.rst deleted file mode 100644 index 703cd1e935592..0000000000000 --- a/unified-runtime/scripts/core/EXP-ENQUEUE-KERNEL-LAUNCH-WITH-ARGS.rst +++ /dev/null @@ -1,77 +0,0 @@ -<% - OneApi=tags['$OneApi'] - x=tags['$x'] - X=x.upper() -%> - -.. _experimental-enqueue-kernel-launch-with-args: - -================================================================================ -Enqueue Kernel Launch With Args -================================================================================ - -.. warning:: - - Experimental features: - - * May be replaced, updated, or removed at any time. - * Do not require maintaining API/ABI stability of their own additions over - time. - * Do not require conformance testing of their own additions. - - - -Motivation --------------------------------------------------------------------------------- - -If an application is setting a kernel's args and launching that kernel in the -same place, we can eliminate some overhead by allowing this to be accomplished -with one API call, rather than requiring one call for each argument and one to -launch. This also aligns with developments in the Level Zero backend, as well -as how CUDA and HIP handle kernel args. - -API --------------------------------------------------------------------------------- - -Enums -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -* ${x}_structure_type_t - ${X}_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES - -* ${x}_exp_kernel_arg_type_t - ${X}_EXP_KERNEL_ARG_TYPE_VALUE - ${X}_EXP_KERNEL_ARG_TYPE_POINTER - ${X}_EXP_KERNEL_ARG_TYPE_MEM_OBJ - ${X}_EXP_KERNEL_ARG_TYPE_LOCAL - ${X}_EXP_KERNEL_ARG_TYPE_SAMPLER - -Types -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -* ${x}_exp_kernel_arg_mem_obj_tuple_t -* ${x}_exp_kernel_arg_value_t -* ${x}_exp_kernel_arg_properties_t - -Functions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -* ${x}EnqueueKernelLaunchWithArgsExp - -Changelog --------------------------------------------------------------------------------- - -+-----------+---------------------------------------------+ -| Revision | Changes | -+===========+=============================================+ -| 1.0 | Initial Draft | -+-----------+---------------------------------------------+ - -Support --------------------------------------------------------------------------------- - -Adapters must support this feature. A naive implementation can easily be -constructed as a wrapper around the existing APIs for setting kernel args and -launching. - -Contributors --------------------------------------------------------------------------------- - -* Aaron Greig `aaron.greig@codeplay.com `_ diff --git a/unified-runtime/scripts/core/exp-enqueue-kernel-launch-with-args.yml b/unified-runtime/scripts/core/exp-enqueue-kernel-launch-with-args.yml deleted file mode 100644 index 6656b6a6d0299..0000000000000 --- a/unified-runtime/scripts/core/exp-enqueue-kernel-launch-with-args.yml +++ /dev/null @@ -1,170 +0,0 @@ -# -# Copyright (C) 2025 Intel Corporation -# -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# See YaML.md for syntax definition -# ---- #-------------------------------------------------------------------------- -type: header -desc: "Intel $OneApi Unified Runtime Experimental API for setting args at kernel launch" -ordinal: "100" ---- #-------------------------------------------------------------------------- -type: enum -desc: "What kind of kernel arg is this" -class: $xKernel -name: $x_exp_kernel_arg_type_t -etors: - - name: VALUE - desc: "Kernel arg is a value." - - name: POINTER - desc: "Kernel arg is a pointer." - - name: MEM_OBJ - desc: "Kernel arg is a memory object." - - name: LOCAL - desc: "Kernel arg is a local allocation." - - name: SAMPLER - desc: "Kernel arg is a sampler." ---- #-------------------------------------------------------------------------- -type: struct -desc: "Mem obj/properties tuple" -name: $x_exp_kernel_arg_mem_obj_tuple_t -members: - - type: $x_mem_handle_t - name: hMem - desc: "[in] Handle of a memory object" - - type: $x_mem_flags_t - name: flags - desc: "[in] Memory flags to associate with `hMem`. Allowed values are: $X_MEM_FLAG_READ_WRITE, $X_MEM_FLAG_WRITE_ONLY, $X_MEM_FLAG_READ_ONLY." ---- #-------------------------------------------------------------------------- -# We have redundant entries in the union (value + pointer) to make printing work -# as it relies on the tags and we can currently only have one tag per member. -type: union -desc: "Typesafe container for a kernel argument value" -name: $x_exp_kernel_arg_value_t -tag: $x_exp_kernel_arg_type_t -members: - - type: "const void*" - name: value - desc: | - [in] argument value represented as matching arg type. - The data pointed to will be copied and therefore can be reused on return. - tag: $X_EXP_KERNEL_ARG_TYPE_VALUE - - type: "const void*" - name: pointer - desc: "[in] Allocation obtained by USM allocation or virtual memory mapping operation, or pointer to a literal value." - tag: $X_EXP_KERNEL_ARG_TYPE_POINTER - - type: $x_exp_kernel_arg_mem_obj_tuple_t - name: memObjTuple - desc: "[in] Struct containing a memory object and associated flags." - tag: $X_EXP_KERNEL_ARG_TYPE_MEM_OBJ - - type: $x_sampler_handle_t - name: sampler - desc: "[in] Handle of a sampler object." - tag: $X_EXP_KERNEL_ARG_TYPE_SAMPLER ---- #-------------------------------------------------------------------------- -type: struct -desc: "Kernel arg properties" -name: $x_exp_kernel_arg_properties_t -base: $x_base_properties_t -members: - - type: $x_exp_kernel_arg_type_t - name: type - desc: "[in] type of the kernel arg" - - type: uint32_t - name: index - desc: "[in] index of the kernel arg" - - type: size_t - name: size - desc: "[in] size of the kernel arg" - - type: $x_exp_kernel_arg_value_t - name: value - desc: "[in][tagged_by(type)] Union containing the argument value." ---- #-------------------------------------------------------------------------- -type: enum -extend: true -desc: "Structure Type experimental enumerations." -name: $x_structure_type_t -etors: - - name: EXP_KERNEL_ARG_PROPERTIES - desc: $x_exp_kernel_arg_properties_t - value: "0x5000" ---- #-------------------------------------------------------------------------- -type: function -desc: "Enqueue a command to execute a kernel" -class: $xEnqueue -name: KernelLaunchWithArgsExp -ordinal: "0" -analogue: - - "**clEnqueueNDRangeKernel**" -params: - - type: $x_queue_handle_t - name: hQueue - desc: "[in] handle of the queue object" - - type: $x_kernel_handle_t - name: hKernel - desc: "[in] handle of the kernel object" - - type: uint32_t - name: workDim - desc: "[in] number of dimensions, from 1 to 3, to specify the global and work-group work-items" - - type: "const size_t*" - name: pGlobalWorkOffset - desc: "[in][optional] pointer to an array of workDim unsigned values that specify the offset used to calculate the global ID of a work-item" - - type: "const size_t*" - name: pGlobalWorkSize - desc: "[in] pointer to an array of workDim unsigned values that specify the number of global work-items in workDim that will execute the kernel function" - - type: "const size_t*" - name: pLocalWorkSize - desc: | - [in][optional] pointer to an array of workDim unsigned values that specify the number of local work-items forming a work-group that will execute the kernel function. - If nullptr, the runtime implementation will choose the work-group size. - - type: uint32_t - name: numArgs - desc: "[in] Number of entries in pArgs" - - type: "const $x_exp_kernel_arg_properties_t*" - name: pArgs - desc: "[in][optional][range(0, numArgs)] pointer to a list of kernel arg properties." - - type: uint32_t - name: numPropsInLaunchPropList - desc: "[in] size of the launch prop list" - - type: const $x_kernel_launch_property_t* - name: launchPropList - desc: "[in][optional][range(0, numPropsInLaunchPropList)] pointer to a list of launch properties" - - type: uint32_t - name: numEventsInWaitList - desc: "[in] size of the event wait list" - - type: "const $x_event_handle_t*" - name: phEventWaitList - desc: | - [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution. - If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. - - type: $x_event_handle_t* - name: phEvent - desc: | - [out][optional][alloc] return an event object that identifies this particular kernel execution instance. If phEventWaitList and phEvent are not NULL, phEvent must not refer to an element of the phEventWaitList array. -returns: - - $X_RESULT_ERROR_INVALID_QUEUE - - $X_RESULT_ERROR_INVALID_KERNEL - - $X_RESULT_ERROR_INVALID_EVENT - - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: - - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - - "If event objects in phEventWaitList are not valid events." - - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: - - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - - $X_RESULT_ERROR_INVALID_WORK_DIMENSION: - - "`pGlobalWorkSize[0] == 0 || pGlobalWorkSize[1] == 0 || pGlobalWorkSize[2] == 0`" - - $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE: - - "`pLocalWorkSize && (pLocalWorkSize[0] == 0 || pLocalWorkSize[1] == 0 || pLocalWorkSize[2] == 0)`" - - $X_RESULT_ERROR_INVALID_VALUE - - $X_RESULT_ERROR_INVALID_KERNEL_ARGS - - "The kernel argument values have not been specified." - - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - - $X_RESULT_ERROR_OUT_OF_RESOURCES - - $X_RESULT_ERROR_INVALID_OPERATION: - - "If any property in `launchPropList` isn't supported by the device." - - $X_RESULT_ERROR_INVALID_NULL_POINTER: - - "`launchPropList == NULL && numPropsInLaunchPropList > 0`" - - "`pArgs == NULL && numArgs > 0`" diff --git a/unified-runtime/scripts/core/registry.yml b/unified-runtime/scripts/core/registry.yml index 0646945b082f5..349ac97a27ae4 100644 --- a/unified-runtime/scripts/core/registry.yml +++ b/unified-runtime/scripts/core/registry.yml @@ -670,9 +670,6 @@ etors: - name: BINDLESS_IMAGES_SUPPORTS_IMPORTING_HANDLE_TYPE_EXP desc: Enumerator for $xBindlessImagesSupportsImportingHandleTypeExp value: '288' -- name: ENQUEUE_KERNEL_LAUNCH_WITH_ARGS_EXP - desc: Enumerator for $xEnqueueKernelLaunchWithArgsExp - value: '289' --- type: enum desc: Defines structure types diff --git a/unified-runtime/scripts/parse_specs.py b/unified-runtime/scripts/parse_specs.py index 8adbc6de19f4a..9fdb69eedc473 100644 --- a/unified-runtime/scripts/parse_specs.py +++ b/unified-runtime/scripts/parse_specs.py @@ -909,9 +909,7 @@ def _append(lst, key, val): rets[idx][key].append(val) def append_nullchecks(param, accessor: str): - if type_traits.is_pointer(param["type"]) or type_traits.is_array( - param["type"] - ): + if type_traits.is_pointer(param["type"]): _append( rets, "$X_RESULT_ERROR_INVALID_NULL_POINTER", diff --git a/unified-runtime/scripts/templates/helper.py b/unified-runtime/scripts/templates/helper.py index 5b4fc0c18c2c5..00de01e34753a 100644 --- a/unified-runtime/scripts/templates/helper.py +++ b/unified-runtime/scripts/templates/helper.py @@ -987,17 +987,12 @@ def make_param_lines( words = [] if "type*" in format: - ptname = tname + "*" - if type_traits.is_array(item["type"]): - ptname += "*" - words.append(ptname) + words.append(tname + "*") name = "p" + name elif "type" in format: words.append(tname) if "name" in format: words.append(name) - if type_traits.is_array(item["type"]) and "type" in format: - words.append(f"[{type_traits.get_array_length(item['type'])}]") prologue = " ".join(words) if "delim" in format: diff --git a/unified-runtime/scripts/templates/print.hpp.mako b/unified-runtime/scripts/templates/print.hpp.mako index ed94cd9227996..4481847130cbe 100644 --- a/unified-runtime/scripts/templates/print.hpp.mako +++ b/unified-runtime/scripts/templates/print.hpp.mako @@ -99,7 +99,14 @@ def findMemberType(_item): ${x}::details::printUnion(os, ${deref}(params${access}${item['name']}), params${access}${th.param_traits.tagged_member(item)}); %elif th.type_traits.is_array(item['type']): os << ".${iname} = {"; - ${x}::details::printArray<${th.type_traits.get_array_length(item['type'])}>(os, ${deref}params${access}${pname}); + for(auto i = 0; i < ${th.type_traits.get_array_length(item['type'])}; i++){ + if(i != 0){ + os << ", "; + } + <%call expr="member(iname, itype, True)"> + ${deref}(params${access}${item['name']}[i]) + + } os << "}"; %elif typename is not None: os << ".${iname} = "; @@ -133,7 +140,6 @@ inline constexpr bool is_handle_v = is_handle::value; template inline ${x}_result_t printPtr(std::ostream &os, const T *ptr); template inline ${x}_result_t printFlag(std::ostream &os, uint32_t flag); template inline ${x}_result_t printTagged(std::ostream &os, const void *ptr, T value, size_t size); -template inline ur_result_t printArray(std::ostream &os, const T *ptr); %for spec in specs: %for obj in spec['objects']: @@ -558,25 +564,6 @@ template inline ${x}_result_t printPtr(std::ostream &os, const T *p return ${X}_RESULT_SUCCESS; } - -/////////////////////////////////////////////////////////////////////////////// -// @brief Print array of literals -template -inline ur_result_t printArray(std::ostream &os, const T *ptr) { - if(ptr == NULL) { - return printPtr(os, ptr); - } - - for (size_t i = 0; i < size; i++) { - if (i != 0) { - os << ", "; - } - - os << ptr[i]; - } - - return ${X}_RESULT_SUCCESS; -} } // namespace ${x}::details namespace ${x}::extras { diff --git a/unified-runtime/source/adapters/cuda/enqueue.cpp b/unified-runtime/source/adapters/cuda/enqueue.cpp index 091e8e9d53d44..8eb00ccab2ca1 100644 --- a/unified-runtime/source/adapters/cuda/enqueue.cpp +++ b/unified-runtime/source/adapters/cuda/enqueue.cpp @@ -15,7 +15,6 @@ #include "kernel.hpp" #include "memory.hpp" #include "queue.hpp" -#include "sampler.hpp" #include #include @@ -620,60 +619,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( #endif // CUDA_VERSION >= 11080 } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp( - ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numArgs, - const ur_exp_kernel_arg_properties_t *pArgs, - uint32_t numPropsInLaunchPropList, - const ur_kernel_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - try { - for (uint32_t i = 0; i < numArgs; i++) { - switch (pArgs[i].type) { - case UR_EXP_KERNEL_ARG_TYPE_LOCAL: { - hKernel->setKernelLocalArg(pArgs[i].index, pArgs[i].size); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_VALUE: { - hKernel->setKernelArg(pArgs[i].index, pArgs[i].size, - pArgs[i].value.value); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_POINTER: { - // setKernelArg is expecting a pointer to our argument - hKernel->setKernelArg(pArgs[i].index, pArgs[i].size, - &pArgs[i].value.pointer); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ: { - ur_kernel_arg_mem_obj_properties_t Props = { - UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES, nullptr, - pArgs[i].value.memObjTuple.flags}; - UR_CALL(urKernelSetArgMemObj(hKernel, pArgs[i].index, &Props, - pArgs[i].value.memObjTuple.hMem)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: { - uint32_t SamplerProps = pArgs[i].value.sampler->Props; - hKernel->setKernelArg(pArgs[i].index, sizeof(uint32_t), - (void *)&SamplerProps); - break; - } - default: - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - } - } catch (ur_result_t Err) { - return Err; - } - return urEnqueueKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset, - pGlobalWorkSize, pLocalWorkSize, - numPropsInLaunchPropList, launchPropList, - numEventsInWaitList, phEventWaitList, phEvent); -} - /// Set parameters for general 3D memory copy. /// If the source and/or destination is on the device, SrcPtr and/or DstPtr /// must be a pointer to a CUdeviceptr diff --git a/unified-runtime/source/adapters/cuda/ur_interface_loader.cpp b/unified-runtime/source/adapters/cuda/ur_interface_loader.cpp index a9b072472b7c9..8430df0ab0678 100644 --- a/unified-runtime/source/adapters/cuda/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/cuda/ur_interface_loader.cpp @@ -454,7 +454,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnUSMHostAllocExp = urEnqueueUSMHostAllocExp; pDdiTable->pfnUSMFreeExp = urEnqueueUSMFreeExp; pDdiTable->pfnCommandBufferExp = urEnqueueCommandBufferExp; - pDdiTable->pfnKernelLaunchWithArgsExp = urEnqueueKernelLaunchWithArgsExp; return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/adapters/hip/enqueue.cpp b/unified-runtime/source/adapters/hip/enqueue.cpp index 54ea1ca91a71d..dc0fac8050eb9 100644 --- a/unified-runtime/source/adapters/hip/enqueue.cpp +++ b/unified-runtime/source/adapters/hip/enqueue.cpp @@ -16,7 +16,6 @@ #include "logger/ur_logger.hpp" #include "memory.hpp" #include "queue.hpp" -#include "sampler.hpp" #include "ur_api.h" #include @@ -341,60 +340,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp( - ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numArgs, - const ur_exp_kernel_arg_properties_t *pArgs, - uint32_t numPropsInLaunchPropList, - const ur_kernel_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - try { - for (uint32_t i = 0; i < numArgs; i++) { - switch (pArgs[i].type) { - case UR_EXP_KERNEL_ARG_TYPE_LOCAL: { - hKernel->setKernelLocalArg(pArgs[i].index, pArgs[i].size); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_VALUE: { - hKernel->setKernelArg(pArgs[i].index, pArgs[i].size, - pArgs[i].value.value); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_POINTER: { - // setKernelArg is expecting a pointer to our argument - hKernel->setKernelArg(pArgs[i].index, pArgs[i].size, - &pArgs[i].value.pointer); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ: { - ur_kernel_arg_mem_obj_properties_t Props = { - UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES, nullptr, - pArgs[i].value.memObjTuple.flags}; - UR_CALL(urKernelSetArgMemObj(hKernel, pArgs[i].index, &Props, - pArgs[i].value.memObjTuple.hMem)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: { - uint32_t SamplerProps = pArgs[i].value.sampler->Props; - hKernel->setKernelArg(pArgs[i].index, sizeof(uint32_t), - (void *)&SamplerProps); - break; - } - default: - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - } - } catch (ur_result_t Err) { - return Err; - } - return urEnqueueKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset, - pGlobalWorkSize, pLocalWorkSize, - numPropsInLaunchPropList, launchPropList, - numEventsInWaitList, phEventWaitList, phEvent); -} - /// Enqueues a wait on the given queue for all events. /// See \ref enqueueEventWait /// diff --git a/unified-runtime/source/adapters/hip/ur_interface_loader.cpp b/unified-runtime/source/adapters/hip/ur_interface_loader.cpp index d8ec6bb3b50c9..dfb4382cad828 100644 --- a/unified-runtime/source/adapters/hip/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/hip/ur_interface_loader.cpp @@ -447,7 +447,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnTimestampRecordingExp = urEnqueueTimestampRecordingExp; pDdiTable->pfnNativeCommandExp = urEnqueueNativeCommandExp; pDdiTable->pfnCommandBufferExp = urEnqueueCommandBufferExp; - pDdiTable->pfnKernelLaunchWithArgsExp = urEnqueueKernelLaunchWithArgsExp; return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/adapters/level_zero/kernel.cpp b/unified-runtime/source/adapters/level_zero/kernel.cpp index 06d1366a119b5..b6d3d2e64ce4e 100644 --- a/unified-runtime/source/adapters/level_zero/kernel.cpp +++ b/unified-runtime/source/adapters/level_zero/kernel.cpp @@ -56,173 +56,6 @@ ur_result_t urKernelGetSuggestedLocalWorkSize( return UR_RESULT_SUCCESS; } -inline ur_result_t KernelSetArgValueHelper( - ur_kernel_handle_t Kernel, - /// [in] argument index in range [0, num args - 1] - uint32_t ArgIndex, - /// [in] size of argument type - size_t ArgSize, - /// [in] argument value represented as matching arg type. - const void *PArgValue) { - // OpenCL: "the arg_value pointer can be NULL or point to a NULL value - // in which case a NULL value will be used as the value for the argument - // declared as a pointer to global or constant memory in the kernel" - // - // We don't know the type of the argument but it seems that the only time - // SYCL RT would send a pointer to NULL in 'arg_value' is when the argument - // is a NULL pointer. Treat a pointer to NULL in 'arg_value' as a NULL. - if (ArgSize == sizeof(void *) && PArgValue && - *(void **)(const_cast(PArgValue)) == nullptr) { - PArgValue = nullptr; - } - - if (ArgIndex > Kernel->ZeKernelProperties->numKernelArgs - 1) { - return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX; - } - - ze_result_t ZeResult = ZE_RESULT_SUCCESS; - if (Kernel->ZeKernelMap.empty()) { - auto ZeKernel = Kernel->ZeKernel; - ZeResult = ZE_CALL_NOCHECK(zeKernelSetArgumentValue, - (ZeKernel, ArgIndex, ArgSize, PArgValue)); - } else { - for (auto It : Kernel->ZeKernelMap) { - auto ZeKernel = It.second; - ZeResult = ZE_CALL_NOCHECK(zeKernelSetArgumentValue, - (ZeKernel, ArgIndex, ArgSize, PArgValue)); - } - } - - if (ZeResult == ZE_RESULT_ERROR_INVALID_ARGUMENT) { - return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE; - } - - return ze2urResult(ZeResult); -} - -inline ur_result_t KernelSetArgMemObjHelper( - /// [in] handle of the kernel object - ur_kernel_handle_t Kernel, - /// [in] argument index in range [0, num args - 1] - uint32_t ArgIndex, - /// [in][optional] pointer to Memory object properties. - const ur_kernel_arg_mem_obj_properties_t *Properties, - /// [in][optional] handle of Memory object. - ur_mem_handle_t ArgValue) { - // The ArgValue may be a NULL pointer in which case a NULL value is used for - // the kernel argument declared as a pointer to global or constant memory. - - if (ArgIndex > Kernel->ZeKernelProperties->numKernelArgs - 1) { - return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX; - } - - ur_mem_handle_t_ *UrMem = ur_cast(ArgValue); - - ur_mem_handle_t_::access_mode_t UrAccessMode = ur_mem_handle_t_::read_write; - if (Properties) { - switch (Properties->memoryAccess) { - case UR_MEM_FLAG_READ_WRITE: - UrAccessMode = ur_mem_handle_t_::read_write; - break; - case UR_MEM_FLAG_WRITE_ONLY: - UrAccessMode = ur_mem_handle_t_::write_only; - break; - case UR_MEM_FLAG_READ_ONLY: - UrAccessMode = ur_mem_handle_t_::read_only; - break; - case 0: - break; - default: - return UR_RESULT_ERROR_INVALID_ARGUMENT; - } - } - auto Arg = UrMem ? UrMem : nullptr; - Kernel->PendingArguments.push_back( - {ArgIndex, sizeof(void *), Arg, UrAccessMode}); - - return UR_RESULT_SUCCESS; -} - -ur_result_t urEnqueueKernelLaunchWithArgsExp( - /// [in] handle of the queue object - ur_queue_handle_t Queue, - /// [in] handle of the kernel object - ur_kernel_handle_t Kernel, - /// [in] number of dimensions, from 1 to 3, to specify the global and - /// work-group work-items - uint32_t workDim, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the offset used to calculate the global ID of a work-item - const size_t *GlobalWorkOffset, - /// [in] pointer to an array of workDim unsigned values that specify the - /// number of global work-items in workDim that will execute the kernel - /// function - const size_t *GlobalWorkSize, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the number of local work-items forming a work-group that will - /// execute the kernel function. - /// If nullptr, the runtime implementation will choose the work-group size. - const size_t *LocalWorkSize, - /// [in] size of the event wait list - uint32_t NumArgs, - /// [in][optional][range(0, numArgs)] pointer to a list of kernel arg - /// properties. - const ur_exp_kernel_arg_properties_t *Args, - /// [in] size of the launch prop list - uint32_t NumPropsInLaunchPropList, - /// [in][range(0, numPropsInLaunchPropList)] pointer to a list of launch - /// properties - const ur_kernel_launch_property_t *LaunchPropList, - uint32_t NumEventsInWaitList, - /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of - /// events that must be complete before the kernel execution. If - /// nullptr, the numEventsInWaitList must be 0, indicating that no wait - /// event. - const ur_event_handle_t *EventWaitList, - /// [in,out][optional] return an event object that identifies this - /// particular kernel execution instance. - ur_event_handle_t *OutEvent) { - { - std::scoped_lock Guard(Kernel->Mutex); - for (uint32_t i = 0; i < NumArgs; i++) { - switch (Args[i].type) { - case UR_EXP_KERNEL_ARG_TYPE_LOCAL: - UR_CALL(KernelSetArgValueHelper(Kernel, Args[i].index, Args[i].size, - nullptr)); - break; - case UR_EXP_KERNEL_ARG_TYPE_VALUE: - UR_CALL(KernelSetArgValueHelper(Kernel, Args[i].index, Args[i].size, - Args[i].value.value)); - break; - case UR_EXP_KERNEL_ARG_TYPE_POINTER: - UR_CALL(KernelSetArgValueHelper(Kernel, Args[i].index, Args[i].size, - &Args[i].value.pointer)); - break; - case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ: { - ur_kernel_arg_mem_obj_properties_t Properties = { - UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES, nullptr, - Args[i].value.memObjTuple.flags}; - UR_CALL(KernelSetArgMemObjHelper(Kernel, Args[i].index, &Properties, - Args[i].value.memObjTuple.hMem)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: { - UR_CALL(KernelSetArgValueHelper(Kernel, Args[i].index, Args[i].size, - &Args[i].value.sampler->ZeSampler)); - break; - } - default: - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - } - } - // Normalize so each dimension has at least one work item - return level_zero::urEnqueueKernelLaunch( - Queue, Kernel, workDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize, - NumPropsInLaunchPropList, LaunchPropList, NumEventsInWaitList, - EventWaitList, OutEvent); -} - ur_result_t urEnqueueKernelLaunch( /// [in] handle of the queue object ur_queue_handle_t Queue, diff --git a/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp b/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp index 4276f97f5dd29..13d7274e7aebf 100644 --- a/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp @@ -225,8 +225,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( return result; } - pDdiTable->pfnKernelLaunchWithArgsExp = - ur::level_zero::urEnqueueKernelLaunchWithArgsExp; pDdiTable->pfnUSMDeviceAllocExp = ur::level_zero::urEnqueueUSMDeviceAllocExp; pDdiTable->pfnUSMSharedAllocExp = ur::level_zero::urEnqueueUSMSharedAllocExp; pDdiTable->pfnUSMHostAllocExp = ur::level_zero::urEnqueueUSMHostAllocExp; diff --git a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp index b0d683e7a5667..df8e93c1f768a 100644 --- a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp +++ b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp @@ -804,15 +804,6 @@ ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice, ur_exp_peer_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet); -ur_result_t urEnqueueKernelLaunchWithArgsExp( - ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numArgs, - const ur_exp_kernel_arg_properties_t *pArgs, - uint32_t numPropsInLaunchPropList, - const ur_kernel_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent); ur_result_t urEnqueueEventsWaitWithBarrierExt( ur_queue_handle_t hQueue, const ur_exp_enqueue_ext_properties_t *pProperties, diff --git a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp index 04e202265d05c..728db1360b0bd 100644 --- a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp @@ -11,7 +11,6 @@ #include "command_list_manager.hpp" #include "../helpers/kernel_helpers.hpp" #include "../helpers/memory_helpers.hpp" -#include "../sampler.hpp" #include "../ur_interface_loader.hpp" #include "command_buffer.hpp" #include "context.hpp" @@ -976,60 +975,3 @@ ur_result_t ur_command_list_manager::releaseSubmittedKernels() { submittedKernels.clear(); return UR_RESULT_SUCCESS; } - -ur_result_t ur_command_list_manager::appendKernelLaunchWithArgsExp( - ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numArgs, - const ur_exp_kernel_arg_properties_t *pArgs, - uint32_t numPropsInLaunchPropList, - const ur_kernel_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t phEvent) { - TRACK_SCOPE_LATENCY( - "ur_queue_immediate_in_order_t::enqueueKernelLaunchWithArgsExp"); - { - std::scoped_lock guard(hKernel->Mutex); - for (uint32_t argIndex = 0; argIndex < numArgs; argIndex++) { - switch (pArgs[argIndex].type) { - case UR_EXP_KERNEL_ARG_TYPE_LOCAL: - UR_CALL(hKernel->setArgValue(pArgs[argIndex].index, - pArgs[argIndex].size, nullptr, nullptr)); - break; - case UR_EXP_KERNEL_ARG_TYPE_VALUE: - UR_CALL(hKernel->setArgValue(pArgs[argIndex].index, - pArgs[argIndex].size, nullptr, - pArgs[argIndex].value.value)); - break; - case UR_EXP_KERNEL_ARG_TYPE_POINTER: - UR_CALL(hKernel->setArgPointer(pArgs[argIndex].index, nullptr, - pArgs[argIndex].value.pointer)); - break; - case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ: - // TODO: import helper for converting ur flags to internal equivalent - UR_CALL(hKernel->addPendingMemoryAllocation( - {pArgs[argIndex].value.memObjTuple.hMem, - ur_mem_buffer_t::device_access_mode_t::read_write, - pArgs[argIndex].index})); - break; - case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: { - UR_CALL( - hKernel->setArgValue(argIndex, sizeof(void *), nullptr, - &pArgs[argIndex].value.sampler->ZeSampler)); - break; - } - default: - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - } - } - - UR_CALL(appendKernelLaunch(hKernel, workDim, pGlobalWorkOffset, - pGlobalWorkSize, pLocalWorkSize, - numPropsInLaunchPropList, launchPropList, - numEventsInWaitList, phEventWaitList, phEvent)); - - recordSubmittedKernel(hKernel); - - return UR_RESULT_SUCCESS; -} diff --git a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp index 2a18f9b919662..a7eafa8f9cecc 100644 --- a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp @@ -231,16 +231,6 @@ struct ur_command_list_manager { const ur_event_handle_t *phEventWaitList, ur_event_handle_t phEvent); - ur_result_t appendKernelLaunchWithArgsExp( - ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numArgs, - const ur_exp_kernel_arg_properties_t *pArgs, - uint32_t numPropsInLaunchPropList, - const ur_kernel_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t phEvent); - private: ur_result_t appendGenericCommandListsExp( uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, diff --git a/unified-runtime/source/adapters/level_zero/v2/kernel.cpp b/unified-runtime/source/adapters/level_zero/v2/kernel.cpp index f48a41154e0f7..173b51ffc42a5 100644 --- a/unified-runtime/source/adapters/level_zero/v2/kernel.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/kernel.cpp @@ -417,17 +417,19 @@ ur_result_t urKernelSetArgPointer( return exceptionToResult(std::current_exception()); } -static ur_mem_buffer_t::device_access_mode_t -memAccessFromKernelProperties(const ur_mem_flags_t &Flags) { - switch (Flags) { - case UR_MEM_FLAG_READ_WRITE: - return ur_mem_buffer_t::device_access_mode_t::read_write; - case UR_MEM_FLAG_WRITE_ONLY: - return ur_mem_buffer_t::device_access_mode_t::write_only; - case UR_MEM_FLAG_READ_ONLY: - return ur_mem_buffer_t::device_access_mode_t::read_only; - default: - return ur_mem_buffer_t::device_access_mode_t::read_write; +static ur_mem_buffer_t::device_access_mode_t memAccessFromKernelProperties( + const ur_kernel_arg_mem_obj_properties_t *pProperties) { + if (pProperties) { + switch (pProperties->memoryAccess) { + case UR_MEM_FLAG_READ_WRITE: + return ur_mem_buffer_t::device_access_mode_t::read_write; + case UR_MEM_FLAG_WRITE_ONLY: + return ur_mem_buffer_t::device_access_mode_t::write_only; + case UR_MEM_FLAG_READ_ONLY: + return ur_mem_buffer_t::device_access_mode_t::read_only; + default: + return ur_mem_buffer_t::device_access_mode_t::read_write; + } } return ur_mem_buffer_t::device_access_mode_t::read_write; } @@ -441,10 +443,7 @@ urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, std::scoped_lock guard(hKernel->Mutex); UR_CALL(hKernel->addPendingMemoryAllocation( - {hArgValue, - memAccessFromKernelProperties(pProperties ? pProperties->memoryAccess - : 0), - argIndex})); + {hArgValue, memAccessFromKernelProperties(pProperties), argIndex})); return UR_RESULT_SUCCESS; } catch (...) { diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_api.cpp b/unified-runtime/source/adapters/level_zero/v2/queue_api.cpp index 660ed54406b72..d043a68dcaec7 100644 --- a/unified-runtime/source/adapters/level_zero/v2/queue_api.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/queue_api.cpp @@ -440,22 +440,6 @@ ur_result_t urEnqueueTimestampRecordingExp( } catch (...) { return exceptionToResult(std::current_exception()); } -ur_result_t urEnqueueKernelLaunchWithArgsExp( - ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numArgs, - const ur_exp_kernel_arg_properties_t *pArgs, - uint32_t numPropsInLaunchPropList, - const ur_kernel_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) try { - return hQueue->get().enqueueKernelLaunchWithArgsExp( - hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, - numArgs, pArgs, numPropsInLaunchPropList, launchPropList, - numEventsInWaitList, phEventWaitList, phEvent); -} catch (...) { - return exceptionToResult(std::current_exception()); -} ur_result_t urEnqueueEventsWaitWithBarrierExt( ur_queue_handle_t hQueue, const ur_exp_enqueue_ext_properties_t *pProperties, diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_api.hpp b/unified-runtime/source/adapters/level_zero/v2/queue_api.hpp index 47425c5772d3d..b710f9d56b50d 100644 --- a/unified-runtime/source/adapters/level_zero/v2/queue_api.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/queue_api.hpp @@ -162,11 +162,6 @@ struct ur_queue_t_ { virtual ur_result_t enqueueTimestampRecordingExp(bool, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) = 0; - virtual ur_result_t enqueueKernelLaunchWithArgsExp( - ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, - const size_t *, uint32_t, const ur_exp_kernel_arg_properties_t *, - uint32_t, const ur_kernel_launch_property_t *, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) = 0; virtual ur_result_t enqueueEventsWaitWithBarrierExt(const ur_exp_enqueue_ext_properties_t *, uint32_t, const ur_event_handle_t *, diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp index 7b5f36da47f4c..74b37d1b40eb3 100644 --- a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp @@ -453,22 +453,6 @@ struct ur_queue_immediate_in_order_t : ur_object, ur_queue_t_ { createEventIfRequested(eventPool.get(), phEvent, this)); } - ur_result_t enqueueKernelLaunchWithArgsExp( - ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numArgs, - const ur_exp_kernel_arg_properties_t *pArgs, - uint32_t numPropsInLaunchPropList, - const ur_kernel_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override { - return commandListManager.lock()->appendKernelLaunchWithArgsExp( - hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, - numArgs, pArgs, numPropsInLaunchPropList, launchPropList, - numEventsInWaitList, phEventWaitList, - createEventIfRequested(eventPool.get(), phEvent, this)); - } - ur::RefCount RefCount; }; diff --git a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_out_of_order.hpp b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_out_of_order.hpp index 5712375a84dc6..07e8743154ded 100644 --- a/unified-runtime/source/adapters/level_zero/v2/queue_immediate_out_of_order.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/queue_immediate_out_of_order.hpp @@ -505,24 +505,6 @@ struct ur_queue_immediate_out_of_order_t : ur_object, ur_queue_t_ { createEventIfRequested(eventPool.get(), phEvent, this)); } - ur_result_t enqueueKernelLaunchWithArgsExp( - ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numArgs, - const ur_exp_kernel_arg_properties_t *pArgs, - uint32_t numPropsInLaunchPropList, - const ur_kernel_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override { - auto commandListId = getNextCommandListId(); - return commandListManagers.lock()[commandListId] - .appendKernelLaunchWithArgsExp( - hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numArgs, pArgs, numPropsInLaunchPropList, - launchPropList, numEventsInWaitList, phEventWaitList, - createEventIfRequested(eventPool.get(), phEvent, this)); - } - ur::RefCount RefCount; }; diff --git a/unified-runtime/source/adapters/mock/ur_mockddi.cpp b/unified-runtime/source/adapters/mock/ur_mockddi.cpp index 74cb1accfa448..39d67fff43aa6 100644 --- a/unified-runtime/source/adapters/mock/ur_mockddi.cpp +++ b/unified-runtime/source/adapters/mock/ur_mockddi.cpp @@ -11915,107 +11915,6 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return exceptionToResult(std::current_exception()); } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urEnqueueKernelLaunchWithArgsExp -__urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp( - /// [in] handle of the queue object - ur_queue_handle_t hQueue, - /// [in] handle of the kernel object - ur_kernel_handle_t hKernel, - /// [in] number of dimensions, from 1 to 3, to specify the global and - /// work-group work-items - uint32_t workDim, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the offset used to calculate the global ID of a work-item - const size_t *pGlobalWorkOffset, - /// [in] pointer to an array of workDim unsigned values that specify the - /// number of global work-items in workDim that will execute the kernel - /// function - const size_t *pGlobalWorkSize, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the number of local work-items forming a work-group that will - /// execute the kernel function. - /// If nullptr, the runtime implementation will choose the work-group size. - const size_t *pLocalWorkSize, - /// [in] Number of entries in pArgs - uint32_t numArgs, - /// [in][optional][range(0, numArgs)] pointer to a list of kernel arg - /// properties. - const ur_exp_kernel_arg_properties_t *pArgs, - /// [in] size of the launch prop list - uint32_t numPropsInLaunchPropList, - /// [in][optional][range(0, numPropsInLaunchPropList)] pointer to a list - /// of launch properties - const ur_kernel_launch_property_t *launchPropList, - /// [in] size of the event wait list - uint32_t numEventsInWaitList, - /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of - /// events that must be complete before the kernel execution. - /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait - /// event. - const ur_event_handle_t *phEventWaitList, - /// [out][optional][alloc] return an event object that identifies this - /// particular kernel execution instance. If phEventWaitList and phEvent - /// are not NULL, phEvent must not refer to an element of the - /// phEventWaitList array. - ur_event_handle_t *phEvent) try { - ur_result_t result = UR_RESULT_SUCCESS; - - ur_enqueue_kernel_launch_with_args_exp_params_t params = { - &hQueue, - &hKernel, - &workDim, - &pGlobalWorkOffset, - &pGlobalWorkSize, - &pLocalWorkSize, - &numArgs, - &pArgs, - &numPropsInLaunchPropList, - &launchPropList, - &numEventsInWaitList, - &phEventWaitList, - &phEvent}; - - auto beforeCallback = reinterpret_cast( - mock::getCallbacks().get_before_callback( - "urEnqueueKernelLaunchWithArgsExp")); - if (beforeCallback) { - result = beforeCallback(¶ms); - if (result != UR_RESULT_SUCCESS) { - return result; - } - } - - auto replaceCallback = reinterpret_cast( - mock::getCallbacks().get_replace_callback( - "urEnqueueKernelLaunchWithArgsExp")); - if (replaceCallback) { - result = replaceCallback(¶ms); - } else { - - // optional output handle - if (phEvent) { - *phEvent = mock::createDummyHandle(); - } - result = UR_RESULT_SUCCESS; - } - - if (result != UR_RESULT_SUCCESS) { - return result; - } - - auto afterCallback = reinterpret_cast( - mock::getCallbacks().get_after_callback( - "urEnqueueKernelLaunchWithArgsExp")); - if (afterCallback) { - return afterCallback(¶ms); - } - - return result; -} catch (...) { - return exceptionToResult(std::current_exception()); -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueEventsWaitWithBarrierExt __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( @@ -12521,9 +12420,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; - pDdiTable->pfnKernelLaunchWithArgsExp = - driver::urEnqueueKernelLaunchWithArgsExp; - pDdiTable->pfnUSMDeviceAllocExp = driver::urEnqueueUSMDeviceAllocExp; pDdiTable->pfnUSMSharedAllocExp = driver::urEnqueueUSMSharedAllocExp; diff --git a/unified-runtime/source/adapters/native_cpu/enqueue.cpp b/unified-runtime/source/adapters/native_cpu/enqueue.cpp index 4c780031f8cf7..5fecdc5b8f67d 100644 --- a/unified-runtime/source/adapters/native_cpu/enqueue.cpp +++ b/unified-runtime/source/adapters/native_cpu/enqueue.cpp @@ -621,45 +621,3 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( const ur_event_handle_t *, ur_event_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp( - ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numArgs, - const ur_exp_kernel_arg_properties_t *pArgs, - uint32_t numPropsInLaunchPropList, - const ur_kernel_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - for (uint32_t argIndex = 0; argIndex < numArgs; argIndex++) { - switch (pArgs[argIndex].type) { - case UR_EXP_KERNEL_ARG_TYPE_VALUE: - UR_CALL(hKernel->addArg(pArgs[argIndex].value.value, - pArgs[argIndex].index, pArgs[argIndex].size)); - break; - case UR_EXP_KERNEL_ARG_TYPE_POINTER: - UR_CALL( - hKernel->addPtrArg(const_cast(pArgs[argIndex].value.pointer), - pArgs[argIndex].index)); - break; - case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ: { - auto MemObj = pArgs[argIndex].value.memObjTuple.hMem; - UR_CALL(hKernel->addMemObjArg(MemObj, pArgs[argIndex].index)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_LOCAL: - UR_CALL( - hKernel->addLocalArg(pArgs[argIndex].index, pArgs[argIndex].size)); - break; - case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - break; - default: - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - } - return urEnqueueKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset, - pGlobalWorkSize, pLocalWorkSize, - numPropsInLaunchPropList, launchPropList, - numEventsInWaitList, phEventWaitList, phEvent); -} diff --git a/unified-runtime/source/adapters/native_cpu/kernel.cpp b/unified-runtime/source/adapters/native_cpu/kernel.cpp index f67c7653d0981..ac11331357f39 100644 --- a/unified-runtime/source/adapters/native_cpu/kernel.cpp +++ b/unified-runtime/source/adapters/native_cpu/kernel.cpp @@ -61,14 +61,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( // TODO: error checking UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(argSize, UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE); - return hKernel->addArg(pArgValue, argIndex, argSize); + hKernel->addArg(pArgValue, argIndex, argSize); + + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal( ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const ur_kernel_arg_local_properties_t * /*pProperties*/) { - return hKernel->addLocalArg(argIndex, argSize); + // emplace a placeholder kernel arg, gets replaced with a pointer to the + // memory pool before enqueueing the kernel. + hKernel->addPtrArg(nullptr, argIndex); + hKernel->_localArgInfo.emplace_back(argIndex, argSize); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, @@ -204,8 +211,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( const void *pArgValue) { UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pArgValue, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + hKernel->addPtrArg(const_cast(pArgValue), argIndex); - return hKernel->addPtrArg(const_cast(pArgValue), argIndex); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL @@ -232,7 +242,16 @@ urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - return hKernel->addMemObjArg(hArgValue, argIndex); + // Taken from ur/adapters/cuda/kernel.cpp + // zero-sized buffers are expected to be null. + if (hArgValue == nullptr) { + hKernel->addPtrArg(nullptr, argIndex); + return UR_RESULT_SUCCESS; + } + + hKernel->addArgReference(hArgValue); + hKernel->addPtrArg(hArgValue->_mem, argIndex); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urKernelSetSpecializationConstants( diff --git a/unified-runtime/source/adapters/native_cpu/kernel.hpp b/unified-runtime/source/adapters/native_cpu/kernel.hpp index 285b1c00d0e63..8daf23feb65f5 100644 --- a/unified-runtime/source/adapters/native_cpu/kernel.hpp +++ b/unified-runtime/source/adapters/native_cpu/kernel.hpp @@ -181,44 +181,17 @@ struct ur_kernel_handle_t_ : RefCounted { return Result; } - inline ur_result_t addArg(const void *Ptr, size_t Index, size_t Size) { - UR_ASSERT(Size, UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE); + void addArg(const void *Ptr, size_t Index, size_t Size) { Args.addArg(Index, Size, Ptr); - return UR_RESULT_SUCCESS; } - inline ur_result_t addPtrArg(void *Ptr, size_t Index) { - UR_ASSERT(Ptr, UR_RESULT_ERROR_INVALID_NULL_POINTER); - Args.addPtrArg(Index, Ptr); - return UR_RESULT_SUCCESS; - } + void addPtrArg(void *Ptr, size_t Index) { Args.addPtrArg(Index, Ptr); } void addArgReference(ur_mem_handle_t Arg) { Arg->incrementReferenceCount(); ReferencedArgs.push_back(Arg); } - inline ur_result_t addMemObjArg(ur_mem_handle_t ArgValue, size_t Index) { - // Taken from ur/adapters/cuda/kernel.cpp - // zero-sized buffers are expected to be null. - if (ArgValue == nullptr) { - addPtrArg(nullptr, Index); - return UR_RESULT_SUCCESS; - } - - addArgReference(ArgValue); - addPtrArg(ArgValue->_mem, Index); - return UR_RESULT_SUCCESS; - } - - inline ur_result_t addLocalArg(size_t Index, size_t Size) { - // emplace a placeholder kernel arg, gets replaced with a pointer to the - // memory pool before enqueueing the kernel. - Args.addPtrArg(Index, nullptr); - _localArgInfo.emplace_back(Index, Size); - return UR_RESULT_SUCCESS; - } - private: void removeArgReferences() { for (auto arg : ReferencedArgs) diff --git a/unified-runtime/source/adapters/native_cpu/ur_interface_loader.cpp b/unified-runtime/source/adapters/native_cpu/ur_interface_loader.cpp index 7bc2f999a00cd..3f6fe061b4917 100644 --- a/unified-runtime/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/native_cpu/ur_interface_loader.cpp @@ -431,7 +431,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnTimestampRecordingExp = urEnqueueTimestampRecordingExp; pDdiTable->pfnNativeCommandExp = urEnqueueNativeCommandExp; pDdiTable->pfnCommandBufferExp = urEnqueueCommandBufferExp; - pDdiTable->pfnKernelLaunchWithArgsExp = urEnqueueKernelLaunchWithArgsExp; return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/adapters/offload/enqueue.cpp b/unified-runtime/source/adapters/offload/enqueue.cpp index cd89280c5a5e1..b1a1edac522b2 100644 --- a/unified-runtime/source/adapters/offload/enqueue.cpp +++ b/unified-runtime/source/adapters/offload/enqueue.cpp @@ -274,40 +274,3 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( return Result; } - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp( - ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numArgs, - const ur_exp_kernel_arg_properties_t *pArgs, - uint32_t numPropsInLaunchPropList, - const ur_kernel_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - for (uint32_t i = 0; i < numArgs; i++) { - switch (pArgs[i].type) { - case UR_EXP_KERNEL_ARG_TYPE_POINTER: - hKernel->Args.addArg(pArgs[i].index, sizeof(pArgs[i].value.pointer), - &pArgs[i].value.pointer); - break; - case UR_EXP_KERNEL_ARG_TYPE_VALUE: - hKernel->Args.addArg(pArgs[i].index, pArgs[i].size, pArgs[i].value.value); - break; - case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ: - hKernel->Args.addMemObjArg(pArgs[i].index, - pArgs[i].value.memObjTuple.hMem, - pArgs[i].value.memObjTuple.flags); - break; - case UR_EXP_KERNEL_ARG_TYPE_LOCAL: - case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - default: - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - } - - return urEnqueueKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset, - pGlobalWorkSize, pLocalWorkSize, - numPropsInLaunchPropList, launchPropList, - numEventsInWaitList, phEventWaitList, phEvent); -} diff --git a/unified-runtime/source/adapters/offload/kernel.cpp b/unified-runtime/source/adapters/offload/kernel.cpp index 02a7ee3a3f7b2..58c4f6cf7ffc7 100644 --- a/unified-runtime/source/adapters/offload/kernel.cpp +++ b/unified-runtime/source/adapters/offload/kernel.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "kernel.hpp" +#include "memory.hpp" #include "program.hpp" #include "ur2offload.hpp" #include @@ -87,11 +88,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_mem_obj_properties_t *Properties, ur_mem_handle_t hArgValue) { + // Handle zero-sized buffers + if (hArgValue == nullptr) { + hKernel->Args.addArg(argIndex, 0, nullptr); + return UR_RESULT_SUCCESS; + } + ur_mem_flags_t MemAccess = Properties ? Properties->memoryAccess : static_cast(UR_MEM_FLAG_READ_WRITE); hKernel->Args.addMemObjArg(argIndex, hArgValue, MemAccess); + auto Ptr = std::get(hArgValue->Mem).Ptr; + hKernel->Args.addArg(argIndex, sizeof(void *), &Ptr); return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/adapters/offload/kernel.hpp b/unified-runtime/source/adapters/offload/kernel.hpp index a5e7f16f2bfe8..83866b5974b94 100644 --- a/unified-runtime/source/adapters/offload/kernel.hpp +++ b/unified-runtime/source/adapters/offload/kernel.hpp @@ -18,7 +18,6 @@ #include #include "common.hpp" -#include "memory.hpp" struct ur_kernel_handle_t_ : RefCounted { @@ -57,12 +56,7 @@ struct ur_kernel_handle_t_ : RefCounted { } void addMemObjArg(int Index, ur_mem_handle_t hMem, ur_mem_flags_t Flags) { - // Handle zero-sized buffers - if (hMem == nullptr) { - addArg(Index, 0, nullptr); - return; - } - + assert(hMem && "Invalid mem handle"); // If a memobj is already set at this index, update the entry rather // than adding a duplicate one for (auto &Arg : MemObjArgs) { @@ -72,9 +66,6 @@ struct ur_kernel_handle_t_ : RefCounted { } } MemObjArgs.push_back(MemObjArg{hMem, Index, Flags}); - - auto Ptr = std::get(hMem->Mem).Ptr; - addArg(Index, sizeof(void *), &Ptr); } const args_ptr_t &getPointers() const noexcept { return Pointers; } diff --git a/unified-runtime/source/adapters/offload/ur_interface_loader.cpp b/unified-runtime/source/adapters/offload/ur_interface_loader.cpp index e8a4fa60f7a2a..02de9df99fddc 100644 --- a/unified-runtime/source/adapters/offload/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/offload/ur_interface_loader.cpp @@ -383,7 +383,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnTimestampRecordingExp = nullptr; pDdiTable->pfnNativeCommandExp = nullptr; - pDdiTable->pfnKernelLaunchWithArgsExp = urEnqueueKernelLaunchWithArgsExp; return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/adapters/opencl/enqueue.cpp b/unified-runtime/source/adapters/opencl/enqueue.cpp index 1bd75b6b56aaf..63b7b45426632 100644 --- a/unified-runtime/source/adapters/opencl/enqueue.cpp +++ b/unified-runtime/source/adapters/opencl/enqueue.cpp @@ -16,10 +16,6 @@ #include "memory.hpp" #include "program.hpp" #include "queue.hpp" -#include "sampler.hpp" - -#include -#include cl_map_flags convertURMapFlagsToCL(ur_map_flags_t URFlags) { cl_map_flags CLFlags = 0; @@ -505,102 +501,3 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( return UR_RESULT_SUCCESS; } - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp( - ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numArgs, - const ur_exp_kernel_arg_properties_t *pArgs, - uint32_t numPropsInLaunchPropList, - const ur_kernel_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - for (uint32_t propIndex = 0; propIndex < numPropsInLaunchPropList; - propIndex++) { - // Adapters that don't support cooperative kernels are currently expected - // to ignore COOPERATIVE launch properties. Ideally we should avoid passing - // these at the SYCL RT level instead, see - // https://github.com/intel/llvm/issues/18421 - if (launchPropList[propIndex].id == UR_KERNEL_LAUNCH_PROPERTY_ID_IGNORE || - launchPropList[propIndex].id == - UR_KERNEL_LAUNCH_PROPERTY_ID_COOPERATIVE) { - continue; - } - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - clSetKernelArgMemPointerINTEL_fn SetKernelArgMemPointerPtr = nullptr; - UR_RETURN_ON_FAILURE( - cl_ext::getExtFuncFromContext( - hQueue->Context->CLContext, - ur::cl::getAdapter()->fnCache.clSetKernelArgMemPointerINTELCache, - cl_ext::SetKernelArgMemPointerName, &SetKernelArgMemPointerPtr)); - - for (uint32_t i = 0; i < numArgs; i++) { - switch (pArgs[i].type) { - case UR_EXP_KERNEL_ARG_TYPE_LOCAL: - CL_RETURN_ON_FAILURE(clSetKernelArg(hKernel->CLKernel, - static_cast(pArgs[i].index), - pArgs[i].size, nullptr)); - break; - case UR_EXP_KERNEL_ARG_TYPE_VALUE: - CL_RETURN_ON_FAILURE(clSetKernelArg(hKernel->CLKernel, - static_cast(pArgs[i].index), - pArgs[i].size, pArgs[i].value.value)); - break; - case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ: { - cl_mem mem = pArgs[i].value.memObjTuple.hMem - ? pArgs[i].value.memObjTuple.hMem->CLMemory - : nullptr; - CL_RETURN_ON_FAILURE(clSetKernelArg(hKernel->CLKernel, - static_cast(pArgs[i].index), - pArgs[i].size, &mem)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_POINTER: - CL_RETURN_ON_FAILURE(SetKernelArgMemPointerPtr( - hKernel->CLKernel, static_cast(pArgs[i].index), - pArgs[i].value.pointer)); - break; - case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: { - CL_RETURN_ON_FAILURE(clSetKernelArg( - hKernel->CLKernel, static_cast(pArgs[i].index), - pArgs[i].size, &pArgs[i].value.sampler->CLSampler)); - break; - } - default: - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - } - - std::vector compiledLocalWorksize; - if (!pLocalWorkSize) { - cl_device_id device = nullptr; - CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - hQueue->CLQueue, CL_QUEUE_DEVICE, sizeof(device), &device, nullptr)); - // This query always returns size_t[3], if nothing was specified it - // returns all zeroes. - size_t queriedLocalWorkSize[3] = {0, 0, 0}; - CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo( - hKernel->CLKernel, device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, - sizeof(size_t[3]), queriedLocalWorkSize, nullptr)); - if (queriedLocalWorkSize[0] != 0) { - for (uint32_t i = 0; i < 3; i++) { - compiledLocalWorksize.push_back(queriedLocalWorkSize[i]); - } - } - } - - cl_event Event; - std::vector CLWaitEvents(numEventsInWaitList); - MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); - CL_RETURN_ON_FAILURE(clEnqueueNDRangeKernel( - hQueue->CLQueue, hKernel->CLKernel, workDim, pGlobalWorkOffset, - pGlobalWorkSize, - compiledLocalWorksize.empty() ? pLocalWorkSize - : compiledLocalWorksize.data(), - numEventsInWaitList, CLWaitEvents.data(), ifUrEvent(phEvent, Event))); - - UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); - return UR_RESULT_SUCCESS; -} diff --git a/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp b/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp index 18cc6a79651be..c619fa36b1ab0 100644 --- a/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp @@ -434,7 +434,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnTimestampRecordingExp = urEnqueueTimestampRecordingExp; pDdiTable->pfnNativeCommandExp = urEnqueueNativeCommandExp; pDdiTable->pfnCommandBufferExp = urEnqueueCommandBufferExp; - pDdiTable->pfnKernelLaunchWithArgsExp = urEnqueueKernelLaunchWithArgsExp; return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/common/stype_map_helpers.def b/unified-runtime/source/common/stype_map_helpers.def index efd69e6ae4cb3..79705826395b9 100644 --- a/unified-runtime/source/common/stype_map_helpers.def +++ b/unified-runtime/source/common/stype_map_helpers.def @@ -168,6 +168,3 @@ struct stype_map template <> struct stype_map : stype_map_impl {}; -template <> -struct stype_map - : stype_map_impl {}; diff --git a/unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp index a8d26f5498530..899ff6a850dbe 100644 --- a/unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp @@ -1637,119 +1637,6 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetInfo( return pfnGetInfo(hDevice, propName, propSize, pPropValue, pPropSizeRet); } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urEnqueueKernelLaunch -ur_result_t urEnqueueKernelLaunchWithArgsExp( - /// [in] handle of the queue object - ur_queue_handle_t hQueue, - /// [in] handle of the kernel object - ur_kernel_handle_t hKernel, - /// [in] number of dimensions, from 1 to 3, to specify the global and - /// work-group work-items - uint32_t workDim, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the offset used to calculate the global ID of a work-item - const size_t *pGlobalWorkOffset, - /// [in] pointer to an array of workDim unsigned values that specify the - /// number of global work-items in workDim that will execute the kernel - /// function - const size_t *pGlobalWorkSize, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the number of local work-items forming a work-group that will - /// execute the kernel function. - /// If nullptr, the runtime implementation will choose the work-group size. - const size_t *pLocalWorkSize, - /// [in] Number of entries in pArgs - uint32_t numArgs, - /// [in][optional][range(0, numArgs)] pointer to a list of kernel arg - /// properties. - const ur_exp_kernel_arg_properties_t *pArgs, - /// [in] size of the launch prop list - uint32_t numPropsInLaunchPropList, - /// [in][range(0, numPropsInLaunchPropList)] pointer to a list of launch - /// properties - const ur_kernel_launch_property_t *launchPropList, - /// [in] size of the event wait list - uint32_t numEventsInWaitList, - /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of - /// events that must be complete before the kernel execution. If - /// nullptr, the numEventsInWaitList must be 0, indicating that no wait - /// event. - const ur_event_handle_t *phEventWaitList, - /// [out][optional] return an event object that identifies this - /// particular kernel execution instance. - ur_event_handle_t *phEvent) { - // This mutex is to prevent concurrent kernel launches across different queues - // as the DeviceASAN local/private shadow memory does not support concurrent - // kernel launches now. - std::scoped_lock Guard( - getAsanInterceptor()->KernelLaunchMutex); - - UR_LOG_L(getContext()->logger, DEBUG, - "==== urEnqueueKernelLaunchWithArgsExp"); - - // We need to set all the args now rather than letting LaunchWithArgs handle - // them. This is because some implementations of - // urKernelGetSuggestedLocalWorkSize, which is used in preLaunchKernel, rely - // on all the args being set. - for (uint32_t ArgPropIndex = 0; ArgPropIndex < numArgs; ArgPropIndex++) { - switch (pArgs[ArgPropIndex].type) { - case UR_EXP_KERNEL_ARG_TYPE_LOCAL: { - UR_CALL(ur_sanitizer_layer::asan::urKernelSetArgLocal( - hKernel, pArgs[ArgPropIndex].index, pArgs[ArgPropIndex].size, - nullptr)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_POINTER: { - UR_CALL(ur_sanitizer_layer::asan::urKernelSetArgPointer( - hKernel, pArgs[ArgPropIndex].index, nullptr, - pArgs[ArgPropIndex].value.pointer)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_VALUE: { - UR_CALL(ur_sanitizer_layer::asan::urKernelSetArgValue( - hKernel, pArgs[ArgPropIndex].index, pArgs[ArgPropIndex].size, nullptr, - pArgs[ArgPropIndex].value.value)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ: { - ur_kernel_arg_mem_obj_properties_t Properties = { - UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES, nullptr, - pArgs[ArgPropIndex].value.memObjTuple.flags}; - UR_CALL(ur_sanitizer_layer::asan::urKernelSetArgMemObj( - hKernel, pArgs[ArgPropIndex].index, &Properties, - pArgs[ArgPropIndex].value.memObjTuple.hMem)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: { - auto pfnKernelSetArgSampler = - getContext()->urDdiTable.Kernel.pfnSetArgSampler; - UR_CALL(pfnKernelSetArgSampler(hKernel, pArgs[ArgPropIndex].index, - nullptr, - pArgs[ArgPropIndex].value.sampler)); - break; - } - default: - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - } - - LaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue), pGlobalWorkSize, - pLocalWorkSize, pGlobalWorkOffset, 3); - UR_CALL(LaunchInfo.Data.syncToDevice(hQueue)); - - UR_CALL(getAsanInterceptor()->preLaunchKernel(hKernel, hQueue, LaunchInfo)); - - UR_CALL(getContext()->urDdiTable.EnqueueExp.pfnKernelLaunchWithArgsExp( - hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - LaunchInfo.LocalWorkSize.data(), 0, nullptr, numPropsInLaunchPropList, - launchPropList, numEventsInWaitList, phEventWaitList, phEvent)); - - UR_CALL(getAsanInterceptor()->postLaunchKernel(hKernel, hQueue, LaunchInfo)); - - return UR_RESULT_SUCCESS; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Adapter table /// with current process' addresses @@ -2065,22 +1952,6 @@ __urdlllocal ur_result_t UR_APICALL urGetDeviceProcAddrTable( return result; } -/// @brief Exported function for filling application's ProgramExp table -/// with current process' addresses -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -ur_result_t urGetEnqueueExpProcAddrTable( - /// [in,out] pointer to table of DDI function pointers - ur_enqueue_exp_dditable_t *pDdiTable) { - ur_result_t result = UR_RESULT_SUCCESS; - - pDdiTable->pfnKernelLaunchWithArgsExp = - ur_sanitizer_layer::asan::urEnqueueKernelLaunchWithArgsExp; - - return result; -} template struct NotSupportedApi; @@ -2277,11 +2148,6 @@ ur_result_t initAsanDDITable(ur_dditable_t *dditable) { UR_API_VERSION_CURRENT, &dditable->VirtualMem); } - if (UR_RESULT_SUCCESS == result) { - result = ur_sanitizer_layer::asan::urGetEnqueueExpProcAddrTable( - &dditable->EnqueueExp); - } - if (result != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "Initialize ASAN DDI table failed: {}", result); diff --git a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 13868606dc9fa..e2bbb166a5feb 100644 --- a/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -1809,122 +1809,6 @@ ur_result_t urEnqueueUSMMemcpy2D( return UR_RESULT_SUCCESS; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urEnqueueKernelLaunch -ur_result_t urEnqueueKernelLaunchWithArgsExp( - /// [in] handle of the queue object - ur_queue_handle_t hQueue, - /// [in] handle of the kernel object - ur_kernel_handle_t hKernel, - /// [in] number of dimensions, from 1 to 3, to specify the global and - /// work-group work-items - uint32_t workDim, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the offset used to calculate the global ID of a work-item - const size_t *pGlobalWorkOffset, - /// [in] pointer to an array of workDim unsigned values that specify the - /// number of global work-items in workDim that will execute the kernel - /// function - const size_t *pGlobalWorkSize, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the number of local work-items forming a work-group that will - /// execute the kernel function. - /// If nullptr, the runtime implementation will choose the work-group size. - const size_t *pLocalWorkSize, - /// [in] Number of entries in pArgs - uint32_t numArgs, - /// [in][optional][range(0, numArgs)] pointer to a list of kernel arg - /// properties. - const ur_exp_kernel_arg_properties_t *pArgs, - /// [in] size of the launch prop list - uint32_t numPropsInLaunchPropList, - /// [in][range(0, numPropsInLaunchPropList)] pointer to a list of launch - /// properties - const ur_kernel_launch_property_t *launchPropList, - /// [in] size of the event wait list - uint32_t numEventsInWaitList, - /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of - /// events that must be complete before the kernel execution. If - /// nullptr, the numEventsInWaitList must be 0, indicating that no wait - /// event. - const ur_event_handle_t *phEventWaitList, - /// [out][optional] return an event object that identifies this - /// particular kernel execution instance. - ur_event_handle_t *phEvent) { - // This mutex is to prevent concurrent kernel launches across different queues - // as the DeviceMSAN local/private shadow memory does not support concurrent - // kernel launches now. - std::scoped_lock Guard( - getMsanInterceptor()->KernelLaunchMutex); - - UR_LOG_L(getContext()->logger, DEBUG, - "==== urEnqueueKernelLaunchWithArgsExp"); - - // We need to set all the args now rather than letting LaunchWithArgs handle - // them. This is because some implementations of - // urKernelGetSuggestedLocalWorkSize, which is used in preLaunchKernel, rely - // on all the args being set. - for (uint32_t ArgPropIndex = 0; ArgPropIndex < numArgs; ArgPropIndex++) { - switch (pArgs[ArgPropIndex].type) { - case UR_EXP_KERNEL_ARG_TYPE_LOCAL: { - UR_CALL(ur_sanitizer_layer::msan::urKernelSetArgLocal( - hKernel, pArgs[ArgPropIndex].index, pArgs[ArgPropIndex].size, - nullptr)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_POINTER: { - auto pfnKernelSetArgPointer = - getContext()->urDdiTable.Kernel.pfnSetArgPointer; - UR_CALL(pfnKernelSetArgPointer(hKernel, pArgs[ArgPropIndex].index, - nullptr, - pArgs[ArgPropIndex].value.pointer)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_VALUE: { - UR_CALL(ur_sanitizer_layer::msan::urKernelSetArgValue( - hKernel, pArgs[ArgPropIndex].index, pArgs[ArgPropIndex].size, nullptr, - pArgs[ArgPropIndex].value.value)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ: { - ur_kernel_arg_mem_obj_properties_t Properties = { - UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES, nullptr, - pArgs[ArgPropIndex].value.memObjTuple.flags}; - UR_CALL(ur_sanitizer_layer::msan::urKernelSetArgMemObj( - hKernel, pArgs[ArgPropIndex].index, &Properties, - pArgs[ArgPropIndex].value.memObjTuple.hMem)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: { - auto pfnKernelSetArgSampler = - getContext()->urDdiTable.Kernel.pfnSetArgSampler; - UR_CALL(pfnKernelSetArgSampler(hKernel, pArgs[ArgPropIndex].index, - nullptr, - pArgs[ArgPropIndex].value.sampler)); - break; - } - default: - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - } - - USMLaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue), - pGlobalWorkSize, pLocalWorkSize, pGlobalWorkOffset, - 3); - UR_CALL(LaunchInfo.initialize()); - - UR_CALL(getMsanInterceptor()->preLaunchKernel(hKernel, hQueue, LaunchInfo)); - - UR_CALL(getContext()->urDdiTable.EnqueueExp.pfnKernelLaunchWithArgsExp( - hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - LaunchInfo.LocalWorkSize.data(), 0, nullptr, numPropsInLaunchPropList, - launchPropList, numEventsInWaitList, phEventWaitList, phEvent)); - - UR_CALL(getMsanInterceptor()->postLaunchKernel(hKernel, hQueue, LaunchInfo)); - - return UR_RESULT_SUCCESS; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Adapter table /// with current process' addresses @@ -2103,22 +1987,6 @@ ur_result_t urGetUSMProcAddrTable( return result; } -/// @brief Exported function for filling application's ProgramExp table -/// with current process' addresses -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -ur_result_t urGetEnqueueExpProcAddrTable( - /// [in,out] pointer to table of DDI function pointers - ur_enqueue_exp_dditable_t *pDdiTable) { - ur_result_t result = UR_RESULT_SUCCESS; - - pDdiTable->pfnKernelLaunchWithArgsExp = - ur_sanitizer_layer::msan::urEnqueueKernelLaunchWithArgsExp; - - return result; -} ur_result_t urCheckVersion(ur_api_version_t version) { if (UR_MAJOR_VERSION(ur_sanitizer_layer::getContext()->version) != @@ -2184,11 +2052,6 @@ ur_result_t initMsanDDITable(ur_dditable_t *dditable) { result = ur_sanitizer_layer::msan::urGetUSMProcAddrTable(&dditable->USM); } - if (UR_RESULT_SUCCESS == result) { - result = ur_sanitizer_layer::msan::urGetEnqueueExpProcAddrTable( - &dditable->EnqueueExp); - } - if (result != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "Initialize MSAN DDI table failed: {}", result); diff --git a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp index 7934dbf596e40..61849ac0b363a 100644 --- a/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/tsan/tsan_ddi.cpp @@ -1337,120 +1337,6 @@ ur_result_t urEnqueueKernelLaunch( return UR_RESULT_SUCCESS; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urEnqueueKernelLaunch -ur_result_t urEnqueueKernelLaunchWithArgsExp( - /// [in] handle of the queue object - ur_queue_handle_t hQueue, - /// [in] handle of the kernel object - ur_kernel_handle_t hKernel, - /// [in] number of dimensions, from 1 to 3, to specify the global and - /// work-group work-items - uint32_t workDim, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the offset used to calculate the global ID of a work-item - const size_t *pGlobalWorkOffset, - /// [in] pointer to an array of workDim unsigned values that specify the - /// number of global work-items in workDim that will execute the kernel - /// function - const size_t *pGlobalWorkSize, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the number of local work-items forming a work-group that will - /// execute the kernel function. - /// If nullptr, the runtime implementation will choose the work-group size. - const size_t *pLocalWorkSize, - /// [in] Number of entries in pArgs - uint32_t numArgs, - /// [in][optional][range(0, numArgs)] pointer to a list of kernel arg - /// properties. - const ur_exp_kernel_arg_properties_t *pArgs, - /// [in] size of the launch prop list - uint32_t numPropsInLaunchPropList, - /// [in][range(0, numPropsInLaunchPropList)] pointer to a list of launch - /// properties - const ur_kernel_launch_property_t *launchPropList, - /// [in] size of the event wait list - uint32_t numEventsInWaitList, - /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of - /// events that must be complete before the kernel execution. If - /// nullptr, the numEventsInWaitList must be 0, indicating that no wait - /// event. - const ur_event_handle_t *phEventWaitList, - /// [out][optional] return an event object that identifies this - /// particular kernel execution instance. - ur_event_handle_t *phEvent) { - // This mutex is to prevent concurrent kernel launches across different queues - // as the DeviceTSAN local shadow memory does not support concurrent - // kernel launches now. - std::scoped_lock Guard( - getTsanInterceptor()->KernelLaunchMutex); - - UR_LOG_L(getContext()->logger, DEBUG, - "==== urEnqueueKernelLaunchWithArgsExp"); - - // We need to set all the args now rather than letting LaunchWithArgs handle - // them. This is because some implementations of - // urKernelGetSuggestedLocalWorkSize, which is used in preLaunchKernel, rely - // on all the args being set. - for (uint32_t ArgPropIndex = 0; ArgPropIndex < numArgs; ArgPropIndex++) { - switch (pArgs[ArgPropIndex].type) { - case UR_EXP_KERNEL_ARG_TYPE_LOCAL: { - UR_CALL(ur_sanitizer_layer::tsan::urKernelSetArgLocal( - hKernel, pArgs[ArgPropIndex].index, pArgs[ArgPropIndex].size, - nullptr)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_POINTER: { - auto pfnKernelSetArgPointer = - getContext()->urDdiTable.Kernel.pfnSetArgPointer; - UR_CALL(pfnKernelSetArgPointer(hKernel, pArgs[ArgPropIndex].index, - nullptr, - pArgs[ArgPropIndex].value.pointer)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_VALUE: { - UR_CALL(ur_sanitizer_layer::tsan::urKernelSetArgValue( - hKernel, pArgs[ArgPropIndex].index, pArgs[ArgPropIndex].size, nullptr, - pArgs[ArgPropIndex].value.value)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ: { - ur_kernel_arg_mem_obj_properties_t Properties = { - UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES, nullptr, - pArgs[ArgPropIndex].value.memObjTuple.flags}; - UR_CALL(ur_sanitizer_layer::tsan::urKernelSetArgMemObj( - hKernel, pArgs[ArgPropIndex].index, &Properties, - pArgs[ArgPropIndex].value.memObjTuple.hMem)); - break; - } - case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: { - auto pfnKernelSetArgSampler = - getContext()->urDdiTable.Kernel.pfnSetArgSampler; - UR_CALL(pfnKernelSetArgSampler(hKernel, pArgs[ArgPropIndex].index, - nullptr, - pArgs[ArgPropIndex].value.sampler)); - break; - } - default: - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - } - - LaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue), pGlobalWorkSize, - pLocalWorkSize, pGlobalWorkOffset, 3); - - UR_CALL(getTsanInterceptor()->preLaunchKernel(hKernel, hQueue, LaunchInfo)); - - UR_CALL(getContext()->urDdiTable.EnqueueExp.pfnKernelLaunchWithArgsExp( - hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, 0, nullptr, numPropsInLaunchPropList, launchPropList, - numEventsInWaitList, phEventWaitList, phEvent)); - - UR_CALL(getTsanInterceptor()->postLaunchKernel(hKernel, hQueue, LaunchInfo)); - - return UR_RESULT_SUCCESS; -} - ur_result_t urCheckVersion(ur_api_version_t version) { if (UR_MAJOR_VERSION(ur_sanitizer_layer::getContext()->version) != UR_MAJOR_VERSION(version) || @@ -1661,22 +1547,6 @@ __urdlllocal ur_result_t UR_APICALL urGetEnqueueProcAddrTable( return UR_RESULT_SUCCESS; } -/// @brief Exported function for filling application's ProgramExp table -/// with current process' addresses -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -ur_result_t urGetEnqueueExpProcAddrTable( - /// [in,out] pointer to table of DDI function pointers - ur_enqueue_exp_dditable_t *pDdiTable) { - ur_result_t result = UR_RESULT_SUCCESS; - - pDdiTable->pfnKernelLaunchWithArgsExp = - ur_sanitizer_layer::tsan::urEnqueueKernelLaunchWithArgsExp; - - return result; -} } // namespace tsan ur_result_t initTsanDDITable(ur_dditable_t *dditable) { @@ -1726,11 +1596,6 @@ ur_result_t initTsanDDITable(ur_dditable_t *dditable) { ur_sanitizer_layer::tsan::urGetEnqueueProcAddrTable(&dditable->Enqueue); } - if (UR_RESULT_SUCCESS == result) { - result = ur_sanitizer_layer::tsan::urGetEnqueueExpProcAddrTable( - &dditable->EnqueueExp); - } - if (result != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "Initialize TSAN DDI table failed: {}", result); diff --git a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp index 1cac607be8559..e96e1cbffda8d 100644 --- a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp +++ b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp @@ -10093,98 +10093,6 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return result; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urEnqueueKernelLaunchWithArgsExp -__urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp( - /// [in] handle of the queue object - ur_queue_handle_t hQueue, - /// [in] handle of the kernel object - ur_kernel_handle_t hKernel, - /// [in] number of dimensions, from 1 to 3, to specify the global and - /// work-group work-items - uint32_t workDim, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the offset used to calculate the global ID of a work-item - const size_t *pGlobalWorkOffset, - /// [in] pointer to an array of workDim unsigned values that specify the - /// number of global work-items in workDim that will execute the kernel - /// function - const size_t *pGlobalWorkSize, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the number of local work-items forming a work-group that will - /// execute the kernel function. - /// If nullptr, the runtime implementation will choose the work-group size. - const size_t *pLocalWorkSize, - /// [in] Number of entries in pArgs - uint32_t numArgs, - /// [in][optional][range(0, numArgs)] pointer to a list of kernel arg - /// properties. - const ur_exp_kernel_arg_properties_t *pArgs, - /// [in] size of the launch prop list - uint32_t numPropsInLaunchPropList, - /// [in][optional][range(0, numPropsInLaunchPropList)] pointer to a list - /// of launch properties - const ur_kernel_launch_property_t *launchPropList, - /// [in] size of the event wait list - uint32_t numEventsInWaitList, - /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of - /// events that must be complete before the kernel execution. - /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait - /// event. - const ur_event_handle_t *phEventWaitList, - /// [out][optional][alloc] return an event object that identifies this - /// particular kernel execution instance. If phEventWaitList and phEvent - /// are not NULL, phEvent must not refer to an element of the - /// phEventWaitList array. - ur_event_handle_t *phEvent) { - auto pfnKernelLaunchWithArgsExp = - getContext()->urDdiTable.EnqueueExp.pfnKernelLaunchWithArgsExp; - - if (nullptr == pfnKernelLaunchWithArgsExp) - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - - ur_enqueue_kernel_launch_with_args_exp_params_t params = { - &hQueue, - &hKernel, - &workDim, - &pGlobalWorkOffset, - &pGlobalWorkSize, - &pLocalWorkSize, - &numArgs, - &pArgs, - &numPropsInLaunchPropList, - &launchPropList, - &numEventsInWaitList, - &phEventWaitList, - &phEvent}; - uint64_t instance = getContext()->notify_begin( - UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_WITH_ARGS_EXP, - "urEnqueueKernelLaunchWithArgsExp", ¶ms); - - auto &logger = getContext()->logger; - UR_LOG_L(logger, INFO, " ---> urEnqueueKernelLaunchWithArgsExp\n"); - - ur_result_t result = pfnKernelLaunchWithArgsExp( - hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numArgs, pArgs, numPropsInLaunchPropList, launchPropList, - numEventsInWaitList, phEventWaitList, phEvent); - - getContext()->notify_end(UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_WITH_ARGS_EXP, - "urEnqueueKernelLaunchWithArgsExp", ¶ms, &result, - instance); - - if (logger.getLevel() <= UR_LOGGER_LEVEL_INFO) { - std::ostringstream args_str; - ur::extras::printFunctionParams( - args_str, UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_WITH_ARGS_EXP, ¶ms); - UR_LOG_L(logger, INFO, - " <--- urEnqueueKernelLaunchWithArgsExp({}) -> {};\n", - args_str.str(), result); - } - - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueEventsWaitWithBarrierExt __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( @@ -10795,10 +10703,6 @@ __urdlllocal ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; - dditable.pfnKernelLaunchWithArgsExp = pDdiTable->pfnKernelLaunchWithArgsExp; - pDdiTable->pfnKernelLaunchWithArgsExp = - ur_tracing_layer::urEnqueueKernelLaunchWithArgsExp; - dditable.pfnUSMDeviceAllocExp = pDdiTable->pfnUSMDeviceAllocExp; pDdiTable->pfnUSMDeviceAllocExp = ur_tracing_layer::urEnqueueUSMDeviceAllocExp; diff --git a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp index 9dd572ecd315a..6f33aaa856e41 100644 --- a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp +++ b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp @@ -10867,122 +10867,6 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return result; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urEnqueueKernelLaunchWithArgsExp -__urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp( - /// [in] handle of the queue object - ur_queue_handle_t hQueue, - /// [in] handle of the kernel object - ur_kernel_handle_t hKernel, - /// [in] number of dimensions, from 1 to 3, to specify the global and - /// work-group work-items - uint32_t workDim, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the offset used to calculate the global ID of a work-item - const size_t *pGlobalWorkOffset, - /// [in] pointer to an array of workDim unsigned values that specify the - /// number of global work-items in workDim that will execute the kernel - /// function - const size_t *pGlobalWorkSize, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the number of local work-items forming a work-group that will - /// execute the kernel function. - /// If nullptr, the runtime implementation will choose the work-group size. - const size_t *pLocalWorkSize, - /// [in] Number of entries in pArgs - uint32_t numArgs, - /// [in][optional][range(0, numArgs)] pointer to a list of kernel arg - /// properties. - const ur_exp_kernel_arg_properties_t *pArgs, - /// [in] size of the launch prop list - uint32_t numPropsInLaunchPropList, - /// [in][optional][range(0, numPropsInLaunchPropList)] pointer to a list - /// of launch properties - const ur_kernel_launch_property_t *launchPropList, - /// [in] size of the event wait list - uint32_t numEventsInWaitList, - /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of - /// events that must be complete before the kernel execution. - /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait - /// event. - const ur_event_handle_t *phEventWaitList, - /// [out][optional][alloc] return an event object that identifies this - /// particular kernel execution instance. If phEventWaitList and phEvent - /// are not NULL, phEvent must not refer to an element of the - /// phEventWaitList array. - ur_event_handle_t *phEvent) { - auto pfnKernelLaunchWithArgsExp = - getContext()->urDdiTable.EnqueueExp.pfnKernelLaunchWithArgsExp; - - if (nullptr == pfnKernelLaunchWithArgsExp) { - return UR_RESULT_ERROR_UNINITIALIZED; - } - - if (getContext()->enableParameterValidation) { - if (NULL == pGlobalWorkSize) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (launchPropList == NULL && numPropsInLaunchPropList > 0) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (pArgs == NULL && numArgs > 0) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (NULL == hQueue) - return UR_RESULT_ERROR_INVALID_NULL_HANDLE; - - if (NULL == hKernel) - return UR_RESULT_ERROR_INVALID_NULL_HANDLE; - - if (NULL != pArgs && UR_EXP_KERNEL_ARG_TYPE_SAMPLER < pArgs->type) - return UR_RESULT_ERROR_INVALID_ENUMERATION; - - if (phEventWaitList == NULL && numEventsInWaitList > 0) - return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; - - if (phEventWaitList != NULL && numEventsInWaitList == 0) - return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; - - if (pGlobalWorkSize[0] == 0 || pGlobalWorkSize[1] == 0 || - pGlobalWorkSize[2] == 0) - return UR_RESULT_ERROR_INVALID_WORK_DIMENSION; - - if (pLocalWorkSize && (pLocalWorkSize[0] == 0 || pLocalWorkSize[1] == 0 || - pLocalWorkSize[2] == 0)) - return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE; - - if (phEventWaitList != NULL && numEventsInWaitList > 0) { - for (uint32_t i = 0; i < numEventsInWaitList; ++i) { - if (phEventWaitList[i] == NULL) { - return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; - } - } - } - } - - if (getContext()->enableLifetimeValidation && - !getContext()->refCountContext->isReferenceValid(hQueue)) { - URLOG_CTX_INVALID_REFERENCE(hQueue); - } - - if (getContext()->enableLifetimeValidation && - !getContext()->refCountContext->isReferenceValid(hKernel)) { - URLOG_CTX_INVALID_REFERENCE(hKernel); - } - - ur_result_t result = pfnKernelLaunchWithArgsExp( - hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numArgs, pArgs, numPropsInLaunchPropList, launchPropList, - numEventsInWaitList, phEventWaitList, phEvent); - - if (getContext()->enableLeakChecking && result == UR_RESULT_SUCCESS && - phEvent) { - getContext()->refCountContext->createRefCount(*phEvent); - } - - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueEventsWaitWithBarrierExt __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( @@ -11618,10 +11502,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; - dditable.pfnKernelLaunchWithArgsExp = pDdiTable->pfnKernelLaunchWithArgsExp; - pDdiTable->pfnKernelLaunchWithArgsExp = - ur_validation_layer::urEnqueueKernelLaunchWithArgsExp; - dditable.pfnUSMDeviceAllocExp = pDdiTable->pfnUSMDeviceAllocExp; pDdiTable->pfnUSMDeviceAllocExp = ur_validation_layer::urEnqueueUSMDeviceAllocExp; diff --git a/unified-runtime/source/loader/loader.def.in b/unified-runtime/source/loader/loader.def.in index 516f46584012c..3ad47149315ee 100644 --- a/unified-runtime/source/loader/loader.def.in +++ b/unified-runtime/source/loader/loader.def.in @@ -76,7 +76,6 @@ EXPORTS urEnqueueEventsWaitWithBarrier urEnqueueEventsWaitWithBarrierExt urEnqueueKernelLaunch - urEnqueueKernelLaunchWithArgsExp urEnqueueMemBufferCopy urEnqueueMemBufferCopyRect urEnqueueMemBufferFill @@ -289,7 +288,6 @@ EXPORTS urPrintEnqueueEventsWaitWithBarrierExtParams urPrintEnqueueEventsWaitWithBarrierParams urPrintEnqueueKernelLaunchParams - urPrintEnqueueKernelLaunchWithArgsExpParams urPrintEnqueueMemBufferCopyParams urPrintEnqueueMemBufferCopyRectParams urPrintEnqueueMemBufferFillParams @@ -350,9 +348,6 @@ EXPORTS urPrintExpImageCopyFlags urPrintExpImageCopyRegion urPrintExpImageMemType - urPrintExpKernelArgMemObjTuple - urPrintExpKernelArgProperties - urPrintExpKernelArgType urPrintExpPeerInfo urPrintExpSamplerAddrModes urPrintExpSamplerCubemapFilterMode diff --git a/unified-runtime/source/loader/loader.map.in b/unified-runtime/source/loader/loader.map.in index a0e5b81244026..fde803f9aa45a 100644 --- a/unified-runtime/source/loader/loader.map.in +++ b/unified-runtime/source/loader/loader.map.in @@ -76,7 +76,6 @@ urEnqueueEventsWaitWithBarrier; urEnqueueEventsWaitWithBarrierExt; urEnqueueKernelLaunch; - urEnqueueKernelLaunchWithArgsExp; urEnqueueMemBufferCopy; urEnqueueMemBufferCopyRect; urEnqueueMemBufferFill; @@ -289,7 +288,6 @@ urPrintEnqueueEventsWaitWithBarrierExtParams; urPrintEnqueueEventsWaitWithBarrierParams; urPrintEnqueueKernelLaunchParams; - urPrintEnqueueKernelLaunchWithArgsExpParams; urPrintEnqueueMemBufferCopyParams; urPrintEnqueueMemBufferCopyRectParams; urPrintEnqueueMemBufferFillParams; @@ -350,9 +348,6 @@ urPrintExpImageCopyFlags; urPrintExpImageCopyRegion; urPrintExpImageMemType; - urPrintExpKernelArgMemObjTuple; - urPrintExpKernelArgProperties; - urPrintExpKernelArgType; urPrintExpPeerInfo; urPrintExpSamplerAddrModes; urPrintExpSamplerCubemapFilterMode; diff --git a/unified-runtime/source/loader/ur_ldrddi.cpp b/unified-runtime/source/loader/ur_ldrddi.cpp index 0a09a3072cd48..5c2c3a41af738 100644 --- a/unified-runtime/source/loader/ur_ldrddi.cpp +++ b/unified-runtime/source/loader/ur_ldrddi.cpp @@ -5734,65 +5734,6 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( pPropValue, pPropSizeRet); } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urEnqueueKernelLaunchWithArgsExp -__urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp( - /// [in] handle of the queue object - ur_queue_handle_t hQueue, - /// [in] handle of the kernel object - ur_kernel_handle_t hKernel, - /// [in] number of dimensions, from 1 to 3, to specify the global and - /// work-group work-items - uint32_t workDim, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the offset used to calculate the global ID of a work-item - const size_t *pGlobalWorkOffset, - /// [in] pointer to an array of workDim unsigned values that specify the - /// number of global work-items in workDim that will execute the kernel - /// function - const size_t *pGlobalWorkSize, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the number of local work-items forming a work-group that will - /// execute the kernel function. - /// If nullptr, the runtime implementation will choose the work-group size. - const size_t *pLocalWorkSize, - /// [in] Number of entries in pArgs - uint32_t numArgs, - /// [in][optional][range(0, numArgs)] pointer to a list of kernel arg - /// properties. - const ur_exp_kernel_arg_properties_t *pArgs, - /// [in] size of the launch prop list - uint32_t numPropsInLaunchPropList, - /// [in][optional][range(0, numPropsInLaunchPropList)] pointer to a list - /// of launch properties - const ur_kernel_launch_property_t *launchPropList, - /// [in] size of the event wait list - uint32_t numEventsInWaitList, - /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of - /// events that must be complete before the kernel execution. - /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait - /// event. - const ur_event_handle_t *phEventWaitList, - /// [out][optional][alloc] return an event object that identifies this - /// particular kernel execution instance. If phEventWaitList and phEvent - /// are not NULL, phEvent must not refer to an element of the - /// phEventWaitList array. - ur_event_handle_t *phEvent) { - - auto *dditable = *reinterpret_cast(hQueue); - - auto *pfnKernelLaunchWithArgsExp = - dditable->EnqueueExp.pfnKernelLaunchWithArgsExp; - if (nullptr == pfnKernelLaunchWithArgsExp) - return UR_RESULT_ERROR_UNINITIALIZED; - - // forward to device-platform - return pfnKernelLaunchWithArgsExp( - hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numArgs, pArgs, numPropsInLaunchPropList, launchPropList, - numEventsInWaitList, phEventWaitList, phEvent); -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueEventsWaitWithBarrierExt __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( @@ -6303,8 +6244,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( if (ur_loader::getContext()->platforms.size() != 1 || ur_loader::getContext()->forceIntercept) { // return pointers to loader's DDIs - pDdiTable->pfnKernelLaunchWithArgsExp = - ur_loader::urEnqueueKernelLaunchWithArgsExp; pDdiTable->pfnUSMDeviceAllocExp = ur_loader::urEnqueueUSMDeviceAllocExp; pDdiTable->pfnUSMSharedAllocExp = ur_loader::urEnqueueUSMSharedAllocExp; pDdiTable->pfnUSMHostAllocExp = ur_loader::urEnqueueUSMHostAllocExp; diff --git a/unified-runtime/source/loader/ur_libapi.cpp b/unified-runtime/source/loader/ur_libapi.cpp index 59edc89920e92..a31b639ae5948 100644 --- a/unified-runtime/source/loader/ur_libapi.cpp +++ b/unified-runtime/source/loader/ur_libapi.cpp @@ -10560,104 +10560,6 @@ ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return exceptionToResult(std::current_exception()); } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Enqueue a command to execute a kernel -/// -/// @remarks -/// _Analogues_ -/// - **clEnqueueNDRangeKernel** -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_DEVICE_LOST -/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE -/// + `NULL == hQueue` -/// + `NULL == hKernel` -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pGlobalWorkSize` -/// + `launchPropList == NULL && numPropsInLaunchPropList > 0` -/// + `pArgs == NULL && numArgs > 0` -/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `NULL != pArgs && ::UR_EXP_KERNEL_ARG_TYPE_SAMPLER < pArgs->type` -/// - ::UR_RESULT_ERROR_INVALID_QUEUE -/// - ::UR_RESULT_ERROR_INVALID_KERNEL -/// - ::UR_RESULT_ERROR_INVALID_EVENT -/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST -/// + `phEventWaitList == NULL && numEventsInWaitList > 0` -/// + `phEventWaitList != NULL && numEventsInWaitList == 0` -/// + If event objects in phEventWaitList are not valid events. -/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS -/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. -/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION -/// + `pGlobalWorkSize[0] == 0 || pGlobalWorkSize[1] == 0 || -/// pGlobalWorkSize[2] == 0` -/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE -/// + `pLocalWorkSize && (pLocalWorkSize[0] == 0 || pLocalWorkSize[1] == -/// 0 || pLocalWorkSize[2] == 0)` -/// - ::UR_RESULT_ERROR_INVALID_VALUE -/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGS - "The kernel argument values -/// have not been specified." -/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY -/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -/// - ::UR_RESULT_ERROR_INVALID_OPERATION -/// + If any property in `launchPropList` isn't supported by the device. -ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp( - /// [in] handle of the queue object - ur_queue_handle_t hQueue, - /// [in] handle of the kernel object - ur_kernel_handle_t hKernel, - /// [in] number of dimensions, from 1 to 3, to specify the global and - /// work-group work-items - uint32_t workDim, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the offset used to calculate the global ID of a work-item - const size_t *pGlobalWorkOffset, - /// [in] pointer to an array of workDim unsigned values that specify the - /// number of global work-items in workDim that will execute the kernel - /// function - const size_t *pGlobalWorkSize, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the number of local work-items forming a work-group that will - /// execute the kernel function. - /// If nullptr, the runtime implementation will choose the work-group size. - const size_t *pLocalWorkSize, - /// [in] Number of entries in pArgs - uint32_t numArgs, - /// [in][optional][range(0, numArgs)] pointer to a list of kernel arg - /// properties. - const ur_exp_kernel_arg_properties_t *pArgs, - /// [in] size of the launch prop list - uint32_t numPropsInLaunchPropList, - /// [in][optional][range(0, numPropsInLaunchPropList)] pointer to a list - /// of launch properties - const ur_kernel_launch_property_t *launchPropList, - /// [in] size of the event wait list - uint32_t numEventsInWaitList, - /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of - /// events that must be complete before the kernel execution. - /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait - /// event. - const ur_event_handle_t *phEventWaitList, - /// [out][optional][alloc] return an event object that identifies this - /// particular kernel execution instance. If phEventWaitList and phEvent - /// are not NULL, phEvent must not refer to an element of the - /// phEventWaitList array. - ur_event_handle_t *phEvent) try { - auto pfnKernelLaunchWithArgsExp = - ur_lib::getContext()->urDdiTable.EnqueueExp.pfnKernelLaunchWithArgsExp; - if (nullptr == pfnKernelLaunchWithArgsExp) - return UR_RESULT_ERROR_UNINITIALIZED; - - return pfnKernelLaunchWithArgsExp( - hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numArgs, pArgs, numPropsInLaunchPropList, launchPropList, - numEventsInWaitList, phEventWaitList, phEvent); -} catch (...) { - return exceptionToResult(std::current_exception()); -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a barrier command which waits a list of events to complete /// before it completes, with optional extended properties diff --git a/unified-runtime/source/loader/ur_print.cpp b/unified-runtime/source/loader/ur_print.cpp index 0fee8b9ee2ee9..f3d5c96e376ca 100644 --- a/unified-runtime/source/loader/ur_print.cpp +++ b/unified-runtime/source/loader/ur_print.cpp @@ -1138,30 +1138,6 @@ ur_result_t urPrintExpPeerInfo(enum ur_exp_peer_info_t value, char *buffer, return str_copy(&ss, buffer, buff_size, out_size); } -ur_result_t urPrintExpKernelArgType(enum ur_exp_kernel_arg_type_t value, - char *buffer, const size_t buff_size, - size_t *out_size) { - std::stringstream ss; - ss << value; - return str_copy(&ss, buffer, buff_size, out_size); -} - -ur_result_t urPrintExpKernelArgMemObjTuple( - const struct ur_exp_kernel_arg_mem_obj_tuple_t params, char *buffer, - const size_t buff_size, size_t *out_size) { - std::stringstream ss; - ss << params; - return str_copy(&ss, buffer, buff_size, out_size); -} - -ur_result_t urPrintExpKernelArgProperties( - const struct ur_exp_kernel_arg_properties_t params, char *buffer, - const size_t buff_size, size_t *out_size) { - std::stringstream ss; - ss << params; - return str_copy(&ss, buffer, buff_size, out_size); -} - ur_result_t urPrintExpEnqueueExtFlags(enum ur_exp_enqueue_ext_flag_t value, char *buffer, const size_t buff_size, size_t *out_size) { @@ -1893,14 +1869,6 @@ ur_result_t urPrintEnqueueWriteHostPipeParams( return str_copy(&ss, buffer, buff_size, out_size); } -ur_result_t urPrintEnqueueKernelLaunchWithArgsExpParams( - const struct ur_enqueue_kernel_launch_with_args_exp_params_t *params, - char *buffer, const size_t buff_size, size_t *out_size) { - std::stringstream ss; - ss << params; - return str_copy(&ss, buffer, buff_size, out_size); -} - ur_result_t urPrintEnqueueEventsWaitWithBarrierExtParams( const struct ur_enqueue_events_wait_with_barrier_ext_params_t *params, char *buffer, const size_t buff_size, size_t *out_size) { diff --git a/unified-runtime/source/ur_api.cpp b/unified-runtime/source/ur_api.cpp index 771e27c3b8d6f..da84b7f50f71b 100644 --- a/unified-runtime/source/ur_api.cpp +++ b/unified-runtime/source/ur_api.cpp @@ -9190,95 +9190,6 @@ ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return result; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Enqueue a command to execute a kernel -/// -/// @remarks -/// _Analogues_ -/// - **clEnqueueNDRangeKernel** -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_DEVICE_LOST -/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE -/// + `NULL == hQueue` -/// + `NULL == hKernel` -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pGlobalWorkSize` -/// + `launchPropList == NULL && numPropsInLaunchPropList > 0` -/// + `pArgs == NULL && numArgs > 0` -/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `NULL != pArgs && ::UR_EXP_KERNEL_ARG_TYPE_SAMPLER < pArgs->type` -/// - ::UR_RESULT_ERROR_INVALID_QUEUE -/// - ::UR_RESULT_ERROR_INVALID_KERNEL -/// - ::UR_RESULT_ERROR_INVALID_EVENT -/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST -/// + `phEventWaitList == NULL && numEventsInWaitList > 0` -/// + `phEventWaitList != NULL && numEventsInWaitList == 0` -/// + If event objects in phEventWaitList are not valid events. -/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS -/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. -/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION -/// + `pGlobalWorkSize[0] == 0 || pGlobalWorkSize[1] == 0 || -/// pGlobalWorkSize[2] == 0` -/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE -/// + `pLocalWorkSize && (pLocalWorkSize[0] == 0 || pLocalWorkSize[1] == -/// 0 || pLocalWorkSize[2] == 0)` -/// - ::UR_RESULT_ERROR_INVALID_VALUE -/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGS - "The kernel argument values -/// have not been specified." -/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY -/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES -/// - ::UR_RESULT_ERROR_INVALID_OPERATION -/// + If any property in `launchPropList` isn't supported by the device. -ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp( - /// [in] handle of the queue object - ur_queue_handle_t hQueue, - /// [in] handle of the kernel object - ur_kernel_handle_t hKernel, - /// [in] number of dimensions, from 1 to 3, to specify the global and - /// work-group work-items - uint32_t workDim, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the offset used to calculate the global ID of a work-item - const size_t *pGlobalWorkOffset, - /// [in] pointer to an array of workDim unsigned values that specify the - /// number of global work-items in workDim that will execute the kernel - /// function - const size_t *pGlobalWorkSize, - /// [in][optional] pointer to an array of workDim unsigned values that - /// specify the number of local work-items forming a work-group that will - /// execute the kernel function. - /// If nullptr, the runtime implementation will choose the work-group size. - const size_t *pLocalWorkSize, - /// [in] Number of entries in pArgs - uint32_t numArgs, - /// [in][optional][range(0, numArgs)] pointer to a list of kernel arg - /// properties. - const ur_exp_kernel_arg_properties_t *pArgs, - /// [in] size of the launch prop list - uint32_t numPropsInLaunchPropList, - /// [in][optional][range(0, numPropsInLaunchPropList)] pointer to a list - /// of launch properties - const ur_kernel_launch_property_t *launchPropList, - /// [in] size of the event wait list - uint32_t numEventsInWaitList, - /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of - /// events that must be complete before the kernel execution. - /// If nullptr, the numEventsInWaitList must be 0, indicating that no wait - /// event. - const ur_event_handle_t *phEventWaitList, - /// [out][optional][alloc] return an event object that identifies this - /// particular kernel execution instance. If phEventWaitList and phEvent - /// are not NULL, phEvent must not refer to an element of the - /// phEventWaitList array. - ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a barrier command which waits a list of events to complete /// before it completes, with optional extended properties diff --git a/unified-runtime/test/conformance/CMakeLists.txt b/unified-runtime/test/conformance/CMakeLists.txt index c1ca49f8e992a..5d579dbbf506b 100644 --- a/unified-runtime/test/conformance/CMakeLists.txt +++ b/unified-runtime/test/conformance/CMakeLists.txt @@ -79,7 +79,6 @@ set(TEST_SUBDIRECTORIES_DPCXX "integration" "exp_command_buffer" "exp_enqueue_native" - "exp_enqueue_kernel_launch_with_args" "exp_usm_p2p" "memory-migrate" "usm" diff --git a/unified-runtime/test/conformance/exp_enqueue_kernel_launch_with_args/CMakeLists.txt b/unified-runtime/test/conformance/exp_enqueue_kernel_launch_with_args/CMakeLists.txt deleted file mode 100644 index d03e5ef7c072f..0000000000000 --- a/unified-runtime/test/conformance/exp_enqueue_kernel_launch_with_args/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (C) 2025 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -add_conformance_kernels_test( - exp_kernel_launch_with_args - urEnqueueKernelLaunchWithArgsExp.cpp -) diff --git a/unified-runtime/test/conformance/exp_enqueue_kernel_launch_with_args/urEnqueueKernelLaunchWithArgsExp.cpp b/unified-runtime/test/conformance/exp_enqueue_kernel_launch_with_args/urEnqueueKernelLaunchWithArgsExp.cpp deleted file mode 100644 index 093bc56004e9a..0000000000000 --- a/unified-runtime/test/conformance/exp_enqueue_kernel_launch_with_args/urEnqueueKernelLaunchWithArgsExp.cpp +++ /dev/null @@ -1,303 +0,0 @@ -// Copyright (C) 2025 Intel Corporation -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM -// Exceptions. See LICENSE.TXT -// -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include - -#include - -// This test runs a kernel with a mix of local memory, pointer and value args. -struct urEnqueueKernelLaunchWithArgsTest : uur::urKernelExecutionTest { - void SetUp() override { - program_name = "saxpy_usm_local_mem"; - UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp()); - - ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, - sizeof(backend), &backend, nullptr)); - - // HIP has extra args for local memory so we define an offset for arg - // indices here for updating - hip_arg_offset = backend == UR_BACKEND_HIP ? 3 : 0; - ur_device_usm_access_capability_flags_t shared_usm_flags; - ASSERT_SUCCESS( - uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); - if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { - GTEST_SKIP() << "Shared USM is not supported."; - } - - const size_t allocation_size = - sizeof(uint32_t) * global_size[0] * local_size[0]; - for (auto &shared_ptr : shared_ptrs) { - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - allocation_size, &shared_ptr)); - ASSERT_NE(shared_ptr, nullptr); - - std::vector pattern(allocation_size); - uur::generateMemFillPattern(pattern); - std::memcpy(shared_ptr, pattern.data(), allocation_size); - } - uint32_t current_index = 0; - // Index 0 is local_mem_a arg - args.push_back({UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, - nullptr, - UR_EXP_KERNEL_ARG_TYPE_LOCAL, - current_index++, - local_mem_a_size, - {nullptr}}); - - // Hip has extra args for local mem at index 1-3 - ur_exp_kernel_arg_value_t argValue = {}; - if (backend == UR_BACKEND_HIP) { - argValue.value = &hip_local_offset; - ur_exp_kernel_arg_properties_t local_offset = { - UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, - nullptr, - UR_EXP_KERNEL_ARG_TYPE_VALUE, - current_index++, - sizeof(hip_local_offset), - argValue}; - args.push_back(local_offset); - local_offset.index = current_index++; - args.push_back(local_offset); - local_offset.index = current_index++; - args.push_back(local_offset); - } - - // Index 1 is local_mem_b arg - args.push_back({UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, - nullptr, - UR_EXP_KERNEL_ARG_TYPE_LOCAL, - current_index++, - local_mem_b_size, - {nullptr}}); - - if (backend == UR_BACKEND_HIP) { - argValue.value = &hip_local_offset; - ur_exp_kernel_arg_properties_t local_offset = { - UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, - nullptr, - UR_EXP_KERNEL_ARG_TYPE_VALUE, - current_index++, - sizeof(hip_local_offset), - argValue}; - args.push_back(local_offset); - local_offset.index = current_index++; - args.push_back(local_offset); - local_offset.index = current_index++; - args.push_back(local_offset); - } - - // Index 2 is output - argValue.pointer = shared_ptrs[0]; - args.push_back({UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, nullptr, - UR_EXP_KERNEL_ARG_TYPE_POINTER, current_index++, - sizeof(shared_ptrs[0]), argValue}); - // Index 3 is A - argValue.value = &A; - args.push_back({UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, nullptr, - UR_EXP_KERNEL_ARG_TYPE_VALUE, current_index++, sizeof(A), - argValue}); - // Index 4 is X - argValue.pointer = shared_ptrs[1]; - args.push_back({UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, nullptr, - UR_EXP_KERNEL_ARG_TYPE_POINTER, current_index++, - sizeof(shared_ptrs[1]), argValue}); - // Index 5 is Y - argValue.pointer = shared_ptrs[2]; - args.push_back({UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, nullptr, - UR_EXP_KERNEL_ARG_TYPE_POINTER, current_index++, - sizeof(shared_ptrs[2]), argValue}); - } - - void Validate(uint32_t *output, uint32_t *X, uint32_t *Y, uint32_t A, - size_t length, size_t local_size) { - for (size_t i = 0; i < length; i++) { - uint32_t result = A * X[i] + Y[i] + local_size; - ASSERT_EQ(result, output[i]); - } - } - - virtual void TearDown() override { - for (auto &shared_ptr : shared_ptrs) { - if (shared_ptr) { - EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); - } - } - - UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::TearDown()); - } - - static constexpr size_t local_size[3] = {4, 1, 1}; - static constexpr size_t local_mem_a_size = local_size[0] * sizeof(uint32_t); - static constexpr size_t local_mem_b_size = local_mem_a_size * 2; - static constexpr size_t global_size[3] = {16, 1, 1}; - static constexpr size_t global_offset[3] = {0, 0, 0}; - static constexpr uint32_t workDim = 3; - static constexpr uint32_t A = 42; - std::array shared_ptrs = {nullptr, nullptr, nullptr, nullptr, - nullptr}; - - uint32_t hip_arg_offset = 0; - static constexpr uint64_t hip_local_offset = 0; - ur_backend_t backend{}; - std::vector args; -}; -UUR_INSTANTIATE_DEVICE_TEST_SUITE(urEnqueueKernelLaunchWithArgsTest); - -TEST_P(urEnqueueKernelLaunchWithArgsTest, Success) { - ASSERT_SUCCESS(urEnqueueKernelLaunchWithArgsExp( - queue, kernel, workDim, global_offset, global_size, local_size, - args.size(), args.data(), 0, nullptr, 0, nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - uint32_t *output = (uint32_t *)shared_ptrs[0]; - uint32_t *X = (uint32_t *)shared_ptrs[1]; - uint32_t *Y = (uint32_t *)shared_ptrs[2]; - Validate(output, X, Y, A, global_size[0], local_size[0]); -} - -TEST_P(urEnqueueKernelLaunchWithArgsTest, InvalidNullHandleQueue) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, - urEnqueueKernelLaunchWithArgsExp( - nullptr, kernel, workDim, global_offset, global_size, - local_size, args.size(), args.data(), 0, nullptr, 0, - nullptr, nullptr)); -} - -TEST_P(urEnqueueKernelLaunchWithArgsTest, InvalidNullHandleKernel) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, - urEnqueueKernelLaunchWithArgsExp( - queue, nullptr, workDim, global_offset, global_size, - local_size, args.size(), args.data(), 0, nullptr, 0, - nullptr, nullptr)); -} - -TEST_P(urEnqueueKernelLaunchWithArgsTest, InvalidNullPointerGlobalSize) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, - urEnqueueKernelLaunchWithArgsExp( - queue, kernel, workDim, global_offset, nullptr, - local_size, args.size(), args.data(), 0, nullptr, 0, - nullptr, nullptr)); -} - -TEST_P(urEnqueueKernelLaunchWithArgsTest, InvalidNullPointerProperties) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, - urEnqueueKernelLaunchWithArgsExp( - queue, kernel, workDim, global_offset, global_size, - local_size, args.size(), args.data(), 1, nullptr, 0, - nullptr, nullptr)); -} - -TEST_P(urEnqueueKernelLaunchWithArgsTest, InvalidNullPointerArgs) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, - urEnqueueKernelLaunchWithArgsExp( - queue, kernel, workDim, global_offset, global_size, - local_size, args.size(), nullptr, 0, nullptr, 0, nullptr, - nullptr)); -} - -TEST_P(urEnqueueKernelLaunchWithArgsTest, InvalidEventWaitList) { - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST, - urEnqueueKernelLaunchWithArgsExp( - queue, kernel, workDim, global_offset, global_size, - local_size, args.size(), args.data(), 0, nullptr, 1, - nullptr, nullptr)); - ur_event_handle_t event = nullptr; - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST, - urEnqueueKernelLaunchWithArgsExp( - queue, kernel, workDim, global_offset, global_size, - local_size, args.size(), args.data(), 0, nullptr, 0, - &event, nullptr)); -} - -// This test runs a kernel with a buffer (MEM_OBJ) arg. -struct urEnqueueKernelLaunchWithArgsMemObjTest : uur::urKernelExecutionTest { - void SetUp() override { - program_name = "fill"; - UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp()); - - ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, - sizeof(backend), &backend, nullptr)); - - ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, - sizeof(val) * global_size[0], nullptr, - &buffer)); - - char zero = 0; - ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffer, &zero, sizeof(zero), 0, - buffer_size, 0, nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - // First argument is buffer to fill - unsigned current_arg_index = 0; - ur_exp_kernel_arg_mem_obj_tuple_t buffer_and_properties = {buffer, 0}; - ur_exp_kernel_arg_properties_t arg = { - UR_STRUCTURE_TYPE_EXP_KERNEL_ARG_PROPERTIES, - nullptr, - UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ, - current_arg_index++, - sizeof(buffer), - {nullptr}}; - arg.value.memObjTuple = buffer_and_properties; - args.push_back(arg); - - // Add accessor arguments depending on backend. - // HIP has 3 offset parameters and other backends only have 1. - if (backend == UR_BACKEND_HIP) { - arg.type = UR_EXP_KERNEL_ARG_TYPE_VALUE; - arg.size = sizeof(hip_local_offset); - arg.value.value = &hip_local_offset; - arg.index = current_arg_index++; - args.push_back(arg); - arg.index = current_arg_index++; - args.push_back(arg); - arg.index = current_arg_index++; - args.push_back(arg); - } else { - arg.type = UR_EXP_KERNEL_ARG_TYPE_VALUE; - arg.index = current_arg_index++; - arg.size = sizeof(accessor); - arg.value.value = &accessor; - args.push_back(arg); - } - - // Second user defined argument is scalar to fill with. - arg.type = UR_EXP_KERNEL_ARG_TYPE_VALUE; - arg.index = current_arg_index++; - arg.size = sizeof(val); - arg.value.value = &val; - args.push_back(arg); - } - - void TearDown() override { - if (buffer) { - EXPECT_SUCCESS(urMemRelease(buffer)); - } - - UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::TearDown()); - } - - static constexpr uint32_t val = 42; - static constexpr size_t global_size[3] = {32, 1, 1}; - static constexpr uint32_t workDim = 3; - static constexpr size_t buffer_size = sizeof(val) * global_size[0]; - static constexpr uint64_t hip_local_offset = 0; - ur_backend_t backend{}; - ur_mem_handle_t buffer = nullptr; - // This is the accessor offset struct sycl kernels expect to accompany buffer args. - struct { - size_t offsets[1] = {0}; - } accessor; - std::vector args; -}; -UUR_INSTANTIATE_DEVICE_TEST_SUITE(urEnqueueKernelLaunchWithArgsMemObjTest); - -TEST_P(urEnqueueKernelLaunchWithArgsMemObjTest, Success) { - ASSERT_SUCCESS(urEnqueueKernelLaunchWithArgsExp( - queue, kernel, workDim, nullptr, global_size, nullptr, args.size(), - args.data(), 0, nullptr, 0, nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - ValidateBuffer(buffer, buffer_size, val); -}