//===------- Offload API tests - olLaunchKernel --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "../common/Fixtures.hpp" #include #include struct LaunchKernelTestBase : OffloadQueueTest { void SetUpProgram(const char *program) { RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp()); ASSERT_TRUE(TestEnvironment::loadDeviceBinary(program, Device, DeviceBin)); ASSERT_GE(DeviceBin->getBufferSize(), 0lu); ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(), DeviceBin->getBufferSize(), &Program)); LaunchArgs.Dimensions = 1; LaunchArgs.GroupSize = {64, 1, 1}; LaunchArgs.NumGroups = {1, 1, 1}; LaunchArgs.DynSharedMemory = 0; } void TearDown() override { if (Program) { olDestroyProgram(Program); } RETURN_ON_FATAL_FAILURE(OffloadQueueTest::TearDown()); } std::unique_ptr DeviceBin; ol_program_handle_t Program = nullptr; ol_kernel_launch_size_args_t LaunchArgs{}; }; struct LaunchSingleKernelTestBase : LaunchKernelTestBase { void SetUpKernel(const char *kernel) { RETURN_ON_FATAL_FAILURE(SetUpProgram(kernel)); ASSERT_SUCCESS( olGetSymbol(Program, kernel, OL_SYMBOL_KIND_KERNEL, &Kernel)); } ol_symbol_handle_t Kernel = nullptr; }; #define KERNEL_TEST(NAME, KERNEL) \ struct olLaunchKernel##NAME##Test : LaunchSingleKernelTestBase { \ void SetUp() override { SetUpKernel(#KERNEL); } \ }; \ OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernel##NAME##Test); KERNEL_TEST(Foo, foo) KERNEL_TEST(NoArgs, noargs) KERNEL_TEST(LocalMem, localmem) KERNEL_TEST(LocalMemReduction, localmem_reduction) KERNEL_TEST(LocalMemStatic, localmem_static) KERNEL_TEST(GlobalCtor, global_ctor) KERNEL_TEST(GlobalDtor, global_dtor) struct LaunchMultipleKernelTestBase : LaunchKernelTestBase { void SetUpKernels(const char *program, std::vector kernels) { RETURN_ON_FATAL_FAILURE(SetUpProgram(program)); Kernels.resize(kernels.size()); size_t I = 0; for (auto K : kernels) ASSERT_SUCCESS( olGetSymbol(Program, K, OL_SYMBOL_KIND_KERNEL, &Kernels[I++])); } std::vector Kernels; }; #define KERNEL_MULTI_TEST(NAME, PROGRAM, ...) \ struct olLaunchKernel##NAME##Test : LaunchMultipleKernelTestBase { \ void SetUp() override { SetUpKernels(#PROGRAM, {__VA_ARGS__}); } \ }; \ OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernel##NAME##Test); KERNEL_MULTI_TEST(Global, global, "write", "read") TEST_P(olLaunchKernelFooTest, Success) { void *Mem; ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem)); struct { void *Mem; } Args{Mem}; ASSERT_SUCCESS( olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs)); ASSERT_SUCCESS(olSyncQueue(Queue)); uint32_t *Data = (uint32_t *)Mem; for (uint32_t i = 0; i < 64; i++) { ASSERT_EQ(Data[i], i); } ASSERT_SUCCESS(olMemFree(Mem)); } TEST_P(olLaunchKernelNoArgsTest, Success) { ASSERT_SUCCESS( olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs)); ASSERT_SUCCESS(olSyncQueue(Queue)); } TEST_P(olLaunchKernelFooTest, SuccessSynchronous) { void *Mem; ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem)); struct { void *Mem; } Args{Mem}; ASSERT_SUCCESS(olLaunchKernel(nullptr, Device, Kernel, &Args, sizeof(Args), &LaunchArgs)); uint32_t *Data = (uint32_t *)Mem; for (uint32_t i = 0; i < 64; i++) { ASSERT_EQ(Data[i], i); } ASSERT_SUCCESS(olMemFree(Mem)); } TEST_P(olLaunchKernelLocalMemTest, Success) { LaunchArgs.NumGroups.x = 4; LaunchArgs.DynSharedMemory = 64 * sizeof(uint32_t); void *Mem; ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, LaunchArgs.GroupSize.x * LaunchArgs.NumGroups.x * sizeof(uint32_t), &Mem)); struct { void *Mem; } Args{Mem}; ASSERT_SUCCESS( olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs)); ASSERT_SUCCESS(olSyncQueue(Queue)); uint32_t *Data = (uint32_t *)Mem; for (uint32_t i = 0; i < LaunchArgs.GroupSize.x * LaunchArgs.NumGroups.x; i++) ASSERT_EQ(Data[i], (i % 64) * 2); ASSERT_SUCCESS(olMemFree(Mem)); } TEST_P(olLaunchKernelLocalMemReductionTest, Success) { LaunchArgs.NumGroups.x = 4; LaunchArgs.DynSharedMemory = 64 * sizeof(uint32_t); void *Mem; ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, LaunchArgs.NumGroups.x * sizeof(uint32_t), &Mem)); struct { void *Mem; } Args{Mem}; ASSERT_SUCCESS( olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs)); ASSERT_SUCCESS(olSyncQueue(Queue)); uint32_t *Data = (uint32_t *)Mem; for (uint32_t i = 0; i < LaunchArgs.NumGroups.x; i++) ASSERT_EQ(Data[i], 2 * LaunchArgs.GroupSize.x); ASSERT_SUCCESS(olMemFree(Mem)); } TEST_P(olLaunchKernelLocalMemStaticTest, Success) { LaunchArgs.NumGroups.x = 4; LaunchArgs.DynSharedMemory = 0; void *Mem; ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, LaunchArgs.NumGroups.x * sizeof(uint32_t), &Mem)); struct { void *Mem; } Args{Mem}; ASSERT_SUCCESS( olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs)); ASSERT_SUCCESS(olSyncQueue(Queue)); uint32_t *Data = (uint32_t *)Mem; for (uint32_t i = 0; i < LaunchArgs.NumGroups.x; i++) ASSERT_EQ(Data[i], 2 * LaunchArgs.GroupSize.x); ASSERT_SUCCESS(olMemFree(Mem)); } TEST_P(olLaunchKernelGlobalTest, Success) { void *Mem; ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem)); struct { void *Mem; } Args{Mem}; ASSERT_SUCCESS( olLaunchKernel(Queue, Device, Kernels[0], nullptr, 0, &LaunchArgs)); ASSERT_SUCCESS(olSyncQueue(Queue)); ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernels[1], &Args, sizeof(Args), &LaunchArgs)); ASSERT_SUCCESS(olSyncQueue(Queue)); uint32_t *Data = (uint32_t *)Mem; for (uint32_t i = 0; i < 64; i++) { ASSERT_EQ(Data[i], i * 2); } ASSERT_SUCCESS(olMemFree(Mem)); } TEST_P(olLaunchKernelGlobalTest, InvalidNotAKernel) { ol_symbol_handle_t Global = nullptr; ASSERT_SUCCESS( olGetSymbol(Program, "global", OL_SYMBOL_KIND_GLOBAL_VARIABLE, &Global)); ASSERT_ERROR(OL_ERRC_SYMBOL_KIND, olLaunchKernel(Queue, Device, Global, nullptr, 0, &LaunchArgs)); } TEST_P(olLaunchKernelGlobalCtorTest, Success) { void *Mem; ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem)); struct { void *Mem; } Args{Mem}; ASSERT_SUCCESS( olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs)); ASSERT_SUCCESS(olSyncQueue(Queue)); uint32_t *Data = (uint32_t *)Mem; for (uint32_t i = 0; i < 64; i++) { ASSERT_EQ(Data[i], i + 100); } ASSERT_SUCCESS(olMemFree(Mem)); } TEST_P(olLaunchKernelGlobalDtorTest, Success) { // TODO: We can't inspect the result of a destructor yet, once we // find/implement a way, update this test. For now we just check that nothing // crashes ASSERT_SUCCESS( olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs)); ASSERT_SUCCESS(olSyncQueue(Queue)); }