diff options
-rw-r--r-- | llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 12 | ||||
-rw-r--r-- | llvm/test/Transforms/OpenMP/remove_noinline_attributes.ll | 99 | ||||
-rw-r--r-- | openmp/libomptarget/DeviceRTL/include/Synchronization.h | 6 | ||||
-rw-r--r-- | openmp/libomptarget/DeviceRTL/src/Mapping.cpp | 6 | ||||
-rw-r--r-- | openmp/libomptarget/DeviceRTL/src/Parallelism.cpp | 5 | ||||
-rw-r--r-- | openmp/libomptarget/DeviceRTL/src/State.cpp | 4 | ||||
-rw-r--r-- | openmp/libomptarget/DeviceRTL/src/Synchronization.cpp | 6 |
7 files changed, 127 insertions, 11 deletions
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index ef2384f..0b42fc1 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -499,6 +499,18 @@ struct OMPInformationCache : public InformationCache { } #include "llvm/Frontend/OpenMP/OMPKinds.def" + // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_` + // functions, except if `optnone` is present. + if (isOpenMPDevice(M)) { + for (Function &F : M) { + for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"}) + if (F.hasFnAttribute(Attribute::NoInline) && + F.getName().startswith(Prefix) && + !F.hasFnAttribute(Attribute::OptimizeNone)) + F.removeFnAttr(Attribute::NoInline); + } + } + // TODO: We should attach the attributes defined in OMPKinds.def. } diff --git a/llvm/test/Transforms/OpenMP/remove_noinline_attributes.ll b/llvm/test/Transforms/OpenMP/remove_noinline_attributes.ll new file mode 100644 index 0000000..349e279 --- /dev/null +++ b/llvm/test/Transforms/OpenMP/remove_noinline_attributes.ll @@ -0,0 +1,99 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes +; RUN: opt < %s -S -openmp-opt-cgscc | FileCheck %s +; RUN: opt < %s -S -passes=openmp-opt-cgscc | FileCheck %s + +declare void @unknown() + +; __kmpc functions +define void @__kmpc_noinline() noinline nounwind { +; CHECK: Function Attrs: nounwind +; CHECK-LABEL: @__kmpc_noinline( +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: ret void +; + call void @unknown() + ret void +} +; omp_X functions +define void @omp_noinline() noinline nounwind { +; CHECK: Function Attrs: nounwind +; CHECK-LABEL: @omp_noinline( +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: ret void +; + call void @unknown() + ret void +} +; _OMP namespace +define void @_ZN4_OMP_noinline() noinline nounwind { +; CHECK: Function Attrs: nounwind +; CHECK-LABEL: @_ZN4_OMP_noinline( +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: ret void +; + call void @unknown() + ret void +} + +; Negative tests: + +define void @__kmpc_noinline_optnone() noinline optnone nounwind { +; CHECK: Function Attrs: noinline nounwind optnone +; CHECK-LABEL: @__kmpc_noinline_optnone( +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: ret void +; + call void @unknown() + ret void +} +define void @omp_noinline_optnone() noinline optnone nounwind { +; CHECK: Function Attrs: noinline nounwind optnone +; CHECK-LABEL: @omp_noinline_optnone( +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: ret void +; + call void @unknown() + ret void +} +; _OMP namespace +define void @_ZN4_OMP_noinline_optnone() noinline optnone nounwind { +; CHECK: Function Attrs: noinline nounwind optnone +; CHECK-LABEL: @_ZN4_OMP_noinline_optnone( +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: ret void +; + call void @unknown() + ret void +} +define void @a___kmpc_noinline() noinline nounwind { +; CHECK: Function Attrs: noinline nounwind +; CHECK-LABEL: @a___kmpc_noinline( +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: ret void +; + call void @unknown() + ret void +} +define void @a_omp_noinline() noinline nounwind { +; CHECK: Function Attrs: noinline nounwind +; CHECK-LABEL: @a_omp_noinline( +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: ret void +; + call void @unknown() + ret void +} +define void @a__ZN4_OMP_noinline() noinline nounwind { +; CHECK: Function Attrs: noinline nounwind +; CHECK-LABEL: @a__ZN4_OMP_noinline( +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: ret void +; + call void @unknown() + ret void +} + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 7, !"openmp", i32 50} +!1 = !{i32 7, !"openmp-device", i32 50} diff --git a/openmp/libomptarget/DeviceRTL/include/Synchronization.h b/openmp/libomptarget/DeviceRTL/include/Synchronization.h index 4b8898f..e33f37a 100644 --- a/openmp/libomptarget/DeviceRTL/include/Synchronization.h +++ b/openmp/libomptarget/DeviceRTL/include/Synchronization.h @@ -29,13 +29,15 @@ void threads(); /// Synchronizing threads is allowed even if they all hit different instances of /// `synchronize::threads()`. However, `synchronize::threadsAligned()` is more -/// restrictive in that it requires all threads to hit the same instance. +/// restrictive in that it requires all threads to hit the same instance. The +/// noinline is removed by the openmp-opt pass and helps to preserve the +/// information till then. ///{ #pragma omp begin assumes ext_aligned_barrier /// Synchronize all threads in a block, they are are reaching the same /// instruction (hence all threads in the block are "aligned"). -void threadsAligned(); +__attribute__((noinline)) void threadsAligned(); #pragma omp end assumes ///} diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp index b161c55..172bbbf 100644 --- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp @@ -289,17 +289,17 @@ bool mapping::isGenericMode() { return !isSPMDMode(); } ///} extern "C" { -uint32_t __kmpc_get_hardware_thread_id_in_block() { +__attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() { FunctionTracingRAII(); return mapping::getThreadIdInBlock(); } -uint32_t __kmpc_get_hardware_num_threads_in_block() { +__attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() { FunctionTracingRAII(); return impl::getNumHardwareThreadsInBlock(); } -uint32_t __kmpc_get_warp_size() { +__attribute__((noinline)) uint32_t __kmpc_get_warp_size() { FunctionTracingRAII(); return impl::getWarpSize(); } diff --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp index 5b133b0..27d1ff2 100644 --- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp @@ -243,7 +243,8 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr, __kmpc_end_sharing_variables(); } -bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) { +__attribute__((noinline)) bool +__kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) { FunctionTracingRAII(); // Work function and arguments for L1 parallel region. *WorkFn = state::ParallelRegionFn; @@ -258,7 +259,7 @@ bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) { return ThreadIsActive; } -void __kmpc_kernel_end_parallel() { +__attribute__((noinline)) void __kmpc_kernel_end_parallel() { FunctionTracingRAII(); // In case we have modified an ICV for this thread before a ThreadState was // created. We drop it now to not contaminate the next parallel region. diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp index 92847f7..7a73330 100644 --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -393,12 +393,12 @@ int omp_get_initial_device(void) { return -1; } } extern "C" { -void *__kmpc_alloc_shared(uint64_t Bytes) { +__attribute__((noinline)) void *__kmpc_alloc_shared(uint64_t Bytes) { FunctionTracingRAII(); return memory::allocShared(Bytes, "Frontend alloc shared"); } -void __kmpc_free_shared(void *Ptr, uint64_t Bytes) { +__attribute__((noinline)) void __kmpc_free_shared(void *Ptr, uint64_t Bytes) { FunctionTracingRAII(); memory::freeShared(Ptr, Bytes, "Frontend free shared"); } diff --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp index 350da0b..4327871 100644 --- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp @@ -358,12 +358,14 @@ void __kmpc_barrier(IdentTy *Loc, int32_t TId) { impl::namedBarrier(); } -void __kmpc_barrier_simple_spmd(IdentTy *Loc, int32_t TId) { +__attribute__((noinline)) void __kmpc_barrier_simple_spmd(IdentTy *Loc, + int32_t TId) { FunctionTracingRAII(); synchronize::threadsAligned(); } -void __kmpc_barrier_simple_generic(IdentTy *Loc, int32_t TId) { +__attribute__((noinline)) void __kmpc_barrier_simple_generic(IdentTy *Loc, + int32_t TId) { FunctionTracingRAII(); synchronize::threads(); } |