diff options
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 590 |
1 files changed, 100 insertions, 490 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index d9ef6c2..dbd24d33c 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -28,96 +28,6 @@ using namespace CodeGen; using namespace llvm::omp; namespace { -enum OpenMPRTLFunctionNVPTX { - /// Call to void __kmpc_kernel_init(kmp_int32 thread_limit, - /// int16_t RequiresOMPRuntime); - OMPRTL_NVPTX__kmpc_kernel_init, - /// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); - OMPRTL_NVPTX__kmpc_kernel_deinit, - /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, - /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); - OMPRTL_NVPTX__kmpc_spmd_kernel_init, - /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); - OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2, - /// Call to void __kmpc_kernel_prepare_parallel(void - /// *outlined_function); - OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, - /// Call to bool __kmpc_kernel_parallel(void **outlined_function); - OMPRTL_NVPTX__kmpc_kernel_parallel, - /// Call to void __kmpc_kernel_end_parallel(); - OMPRTL_NVPTX__kmpc_kernel_end_parallel, - /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 - /// global_tid); - OMPRTL_NVPTX__kmpc_serialized_parallel, - /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 - /// global_tid); - OMPRTL_NVPTX__kmpc_end_serialized_parallel, - /// Call to int32_t __kmpc_shuffle_int32(int32_t element, - /// int16_t lane_offset, int16_t warp_size); - OMPRTL_NVPTX__kmpc_shuffle_int32, - /// Call to int64_t __kmpc_shuffle_int64(int64_t element, - /// int16_t lane_offset, int16_t warp_size); - OMPRTL_NVPTX__kmpc_shuffle_int64, - /// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32 - /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data, - /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - /// lane_offset, int16_t shortCircuit), - /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); - OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2, - /// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32 - /// global_tid, void *global_buffer, int32_t num_of_records, void* - /// reduce_data, - /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - /// lane_offset, int16_t shortCircuit), - /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void - /// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data), - /// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx, - /// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer, - /// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void - /// *buffer, int idx, void *reduce_data)); - OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2, - /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid); - OMPRTL_NVPTX__kmpc_end_reduce_nowait, - /// Call to void __kmpc_data_sharing_init_stack(); - OMPRTL_NVPTX__kmpc_data_sharing_init_stack, - /// Call to void __kmpc_data_sharing_init_stack_spmd(); - OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd, - /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size, - /// int16_t UseSharedMemory); - OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack, - /// Call to void* __kmpc_data_sharing_push_stack(size_t size, int16_t - /// UseSharedMemory); - OMPRTL_NVPTX__kmpc_data_sharing_push_stack, - /// Call to void __kmpc_data_sharing_pop_stack(void *a); - OMPRTL_NVPTX__kmpc_data_sharing_pop_stack, - /// Call to void __kmpc_begin_sharing_variables(void ***args, - /// size_t n_args); - OMPRTL_NVPTX__kmpc_begin_sharing_variables, - /// Call to void __kmpc_end_sharing_variables(); - OMPRTL_NVPTX__kmpc_end_sharing_variables, - /// Call to void __kmpc_get_shared_variables(void ***GlobalArgs) - OMPRTL_NVPTX__kmpc_get_shared_variables, - /// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 - /// global_tid); - OMPRTL_NVPTX__kmpc_parallel_level, - /// Call to int8_t __kmpc_is_spmd_exec_mode(); - OMPRTL_NVPTX__kmpc_is_spmd_exec_mode, - /// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode, - /// const void *buf, size_t size, int16_t is_shared, const void **res); - OMPRTL_NVPTX__kmpc_get_team_static_memory, - /// Call to void __kmpc_restore_team_static_memory(int16_t - /// isSPMDExecutionMode, int16_t is_shared); - OMPRTL_NVPTX__kmpc_restore_team_static_memory, - /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); - OMPRTL__kmpc_barrier, - /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 - /// global_tid); - OMPRTL__kmpc_barrier_simple_spmd, - /// Call to int32_t __kmpc_warp_active_thread_mask(void); - OMPRTL_NVPTX__kmpc_warp_active_thread_mask, - /// Call to void __kmpc_syncwarp(int32_t Mask); - OMPRTL_NVPTX__kmpc_syncwarp, -}; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. class NVPTXActionTy final : public PrePostActionTy { @@ -1243,13 +1153,13 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryHeader(CodeGenFunction &CGF, // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {getThreadLimit(CGF), Bld.getInt16(/*RequiresOMPRuntime=*/1)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_init), + Args); // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_init_stack)); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack)); emitGenericVarsProlog(CGF, WST.Loc); } @@ -1272,8 +1182,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryFooter(CodeGenFunction &CGF, // Signal termination condition. // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_deinit), + Args); // Barrier to terminate worker threads. syncCTAThreads(CGF); // Master thread jumps to exit point. @@ -1347,13 +1258,14 @@ void CGOpenMPRuntimeGPU::emitSPMDEntryHeader( /*RequiresOMPRuntime=*/ Bld.getInt16(RequiresFullRuntime ? 1 : 0), /*RequiresDataSharing=*/Bld.getInt16(0)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_spmd_kernel_init), + Args); if (RequiresFullRuntime) { // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd)); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack_spmd)); } CGF.EmitBranch(ExecuteBB); @@ -1379,9 +1291,9 @@ void CGOpenMPRuntimeGPU::emitSPMDEntryFooter(CodeGenFunction &CGF, // DeInitialize the OMP state in the runtime; called by all active threads. llvm::Value *Args[] = {/*RequiresOMPRuntime=*/ CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_spmd_kernel_deinit_v2), + Args); CGF.EmitBranch(EST.ExitBB); CGF.EmitBlock(EST.ExitBB); @@ -1415,7 +1327,7 @@ void CGOpenMPRuntimeGPU::emitWorkerFunction(WorkerFunctionState &WST) { } void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, - WorkerFunctionState &WST) { + WorkerFunctionState &WST) { // // The workers enter this loop and wait for parallel work from the master. // When the master encounters a parallel region it sets up the work + variable @@ -1450,8 +1362,10 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {WorkFn.getPointer()}; - llvm::Value *Ret = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); + llvm::Value *Ret = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_parallel), + Args); Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); // On termination condition (workid == 0), exit loop. @@ -1516,9 +1430,9 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, // Signal end of parallel region. CGF.EmitBlock(TerminateBB); - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel), - llvm::None); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_end_parallel), + llvm::None); CGF.EmitBranch(BarrierBB); // All active and inactive workers wait at a barrier after parallel region. @@ -1533,328 +1447,6 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF, clearLocThreadIdInsertPt(CGF); } -/// Returns specified OpenMP runtime function for the current OpenMP -/// implementation. Specialized for the NVPTX device. -/// \param Function OpenMP runtime function. -/// \return Specified function. -llvm::FunctionCallee -CGOpenMPRuntimeGPU::createNVPTXRuntimeFunction(unsigned Function) { - llvm::FunctionCallee RTLFn = nullptr; - switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) { - case OMPRTL_NVPTX__kmpc_kernel_init: { - // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t - // RequiresOMPRuntime); - llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init"); - break; - } - case OMPRTL_NVPTX__kmpc_kernel_deinit: { - // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); - llvm::Type *TypeParams[] = {CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit"); - break; - } - case OMPRTL_NVPTX__kmpc_spmd_kernel_init: { - // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, - // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); - llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init"); - break; - } - case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: { - // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); - llvm::Type *TypeParams[] = {CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2"); - break; - } - case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { - /// Build void __kmpc_kernel_prepare_parallel( - /// void *outlined_function); - llvm::Type *TypeParams[] = {CGM.Int8PtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_kernel_parallel: { - /// Build bool __kmpc_kernel_parallel(void **outlined_function); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy}; - llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); - auto *FnTy = - llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_kernel_end_parallel: { - /// Build void __kmpc_kernel_end_parallel(); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_serialized_parallel: { - // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 - // global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_end_serialized_parallel: { - // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 - // global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); - break; - } - case OMPRTL_NVPTX__kmpc_shuffle_int32: { - // Build int32_t __kmpc_shuffle_int32(int32_t element, - // int16_t lane_offset, int16_t warp_size); - llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32"); - break; - } - case OMPRTL_NVPTX__kmpc_shuffle_int64: { - // Build int64_t __kmpc_shuffle_int64(int64_t element, - // int16_t lane_offset, int16_t warp_size); - llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64"); - break; - } - case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: { - // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, - // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void* - // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t - // lane_id, int16_t lane_offset, int16_t Algorithm Version), void - // (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num)); - llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, - CGM.Int16Ty, CGM.Int16Ty}; - auto *ShuffleReduceFnTy = - llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, - /*isVarArg=*/false); - llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; - auto *InterWarpCopyFnTy = - llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, - /*isVarArg=*/false); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), - CGM.Int32Ty, - CGM.Int32Ty, - CGM.SizeTy, - CGM.VoidPtrTy, - ShuffleReduceFnTy->getPointerTo(), - InterWarpCopyFnTy->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2"); - break; - } - case OMPRTL_NVPTX__kmpc_end_reduce_nowait: { - // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid); - llvm::Type *TypeParams[] = {CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait"); - break; - } - case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: { - // Build int32_t __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32 - // global_tid, void *global_buffer, int32_t num_of_records, void* - // reduce_data, - // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - // lane_offset, int16_t shortCircuit), - // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void - // (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data), - // void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx, - // void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer, - // int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void - // *buffer, int idx, void *reduce_data)); - llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, - CGM.Int16Ty, CGM.Int16Ty}; - auto *ShuffleReduceFnTy = - llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, - /*isVarArg=*/false); - llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; - auto *InterWarpCopyFnTy = - llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, - /*isVarArg=*/false); - llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy, - CGM.VoidPtrTy}; - auto *GlobalListFnTy = - llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams, - /*isVarArg=*/false); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), - CGM.Int32Ty, - CGM.VoidPtrTy, - CGM.Int32Ty, - CGM.VoidPtrTy, - ShuffleReduceFnTy->getPointerTo(), - InterWarpCopyFnTy->getPointerTo(), - GlobalListFnTy->getPointerTo(), - GlobalListFnTy->getPointerTo(), - GlobalListFnTy->getPointerTo(), - GlobalListFnTy->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_v2"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: { - /// Build void __kmpc_data_sharing_init_stack(); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: { - /// Build void __kmpc_data_sharing_init_stack_spmd(); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = - CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: { - // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size, - // int16_t UseSharedMemory); - llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: { - // Build void *__kmpc_data_sharing_push_stack(size_t size, int16_t - // UseSharedMemory); - llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_data_sharing_push_stack"); - break; - } - case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: { - // Build void __kmpc_data_sharing_pop_stack(void *a); - llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, - /*Name=*/"__kmpc_data_sharing_pop_stack"); - break; - } - case OMPRTL_NVPTX__kmpc_begin_sharing_variables: { - /// Build void __kmpc_begin_sharing_variables(void ***args, - /// size_t n_args); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables"); - break; - } - case OMPRTL_NVPTX__kmpc_end_sharing_variables: { - /// Build void __kmpc_end_sharing_variables(); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables"); - break; - } - case OMPRTL_NVPTX__kmpc_get_shared_variables: { - /// Build void __kmpc_get_shared_variables(void ***GlobalArgs); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables"); - break; - } - case OMPRTL_NVPTX__kmpc_parallel_level: { - // Build uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int16Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level"); - break; - } - case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: { - // Build int8_t __kmpc_is_spmd_exec_mode(); - auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode"); - break; - } - case OMPRTL_NVPTX__kmpc_get_team_static_memory: { - // Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode, - // const void *buf, size_t size, int16_t is_shared, const void **res); - llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy, - CGM.Int16Ty, CGM.VoidPtrPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory"); - break; - } - case OMPRTL_NVPTX__kmpc_restore_team_static_memory: { - // Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode, - // int16_t is_shared); - llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = - CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory"); - break; - } - case OMPRTL__kmpc_barrier: { - // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = - CGM.CreateConvergentRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); - break; - } - case OMPRTL__kmpc_barrier_simple_spmd: { - // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 - // global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateConvergentRuntimeFunction( - FnTy, /*Name*/ "__kmpc_barrier_simple_spmd"); - break; - } - case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: { - // Build int32_t __kmpc_warp_active_thread_mask(void); - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false); - RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask"); - break; - } - case OMPRTL_NVPTX__kmpc_syncwarp: { - // Build void __kmpc_syncwarp(kmp_int32 Mask); - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false); - RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_syncwarp"); - break; - } - } - return RTLFn; -} - void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t, @@ -2157,12 +1749,14 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *PL = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_parallel_level), {RTLoc, ThreadID}); IsTTD = Bld.CreateIsNull(PL); } - llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); + llvm::Value *IsSPMD = Bld.CreateIsNotNull( + CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode))); Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); @@ -2196,8 +1790,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *GlobalRecordSizeArg[] = { Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, GlobalRecPtrTy); @@ -2259,9 +1853,10 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, CGM.Int16Ty, getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0), StaticGlobalized, Ld, IsInSharedMemory, ResAddr}; - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_get_team_static_memory), - GlobalRecordSizeArg); + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_team_static_memory), + GlobalRecordSizeArg); GlobalizedRecords.back().Buffer = StaticGlobalized; GlobalizedRecords.back().RecSize = RecSize; GlobalizedRecords.back().UseSharedMemory = UseSharedMemory; @@ -2288,10 +1883,10 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), CGF.Builder.getInt16(UseSharedMemory ? 1 : 0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - IsInTTDRegion - ? OMPRTL_NVPTX__kmpc_data_sharing_push_stack - : OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), + IsInTTDRegion ? OMPRTL___kmpc_data_sharing_push_stack + : OMPRTL___kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, GlobalRecPtrTy); @@ -2390,8 +1985,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *GlobalRecordSizeArg[] = { Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo()); @@ -2419,7 +2014,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, for (llvm::Value *Addr : llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) { CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), Addr); } if (I->getSecond().GlobalRecordAddr) { @@ -2434,8 +2030,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, (void)ApplyDebugLocation::CreateEmpty(CGF); CGF.EmitBlock(NonSPMDBB); CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr)); CGF.EmitBlock(ExitBB); } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) { @@ -2456,14 +2052,15 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0), IsInSharedMemory}; CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_restore_team_static_memory), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_restore_team_static_memory), Args); } } else { - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), - I->getSecond().GlobalRecordAddr); + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), + I->getSecond().GlobalRecordAddr); } } } @@ -2535,9 +2132,11 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::Value *Args[] = {RTLoc, ThreadID}; NVPTXActionTy Action( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_serialized_parallel), Args, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_serialized_parallel), Args); RCG.setAction(Action); RCG(CGF); @@ -2553,7 +2152,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( // Prepare for parallel region. Indicate the outlined function. llvm::Value *Args[] = {ID}; CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_kernel_prepare_parallel), Args); // Create a private scope that will globalize the arguments @@ -2570,9 +2170,10 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::Value *DataSharingArgs[] = { SharedArgsPtr, llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())}; - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_begin_sharing_variables), - DataSharingArgs); + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_begin_sharing_variables), + DataSharingArgs); // Store variable address in a list of references to pass to workers. unsigned Idx = 0; @@ -2606,8 +2207,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( syncCTAThreads(CGF); if (!CapturedVars.empty()) - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables)); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_sharing_variables)); // Remember for post-processing in worker loop. Work.emplace_back(WFn); @@ -2631,8 +2232,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential"); llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck"); llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); - llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); + llvm::Value *IsSPMD = Bld.CreateIsNotNull( + CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode))); Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); @@ -2640,7 +2242,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall( llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *PL = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_parallel_level), {RTLoc, ThreadID}); llvm::Value *Res = Bld.CreateIsNotNull(PL); Bld.CreateCondBr(Res, SeqBB, MasterBB); @@ -2704,9 +2307,11 @@ void CGOpenMPRuntimeGPU::emitSPMDParallelCall( llvm::Value *Args[] = {RTLoc, ThreadID}; NVPTXActionTy Action( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_serialized_parallel), Args, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_serialized_parallel), Args); RCG.setAction(Action); RCG(CGF); @@ -2736,9 +2341,9 @@ void CGOpenMPRuntimeGPU::syncCTAThreads(CodeGenFunction &CGF) { llvm::ConstantPointerNull::get( cast<llvm::PointerType>(getIdentTyPointerTy())), llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)}; - llvm::CallInst *Call = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args); - Call->setConvergent(); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_barrier_simple_spmd), + Args); } void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF, @@ -2752,9 +2357,10 @@ void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF, unsigned Flags = getDefaultFlagsForBarriers(Kind); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), getThreadID(CGF, Loc)}; - llvm::CallInst *Call = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args); - Call->setConvergent(); + + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_barrier), + Args); } void CGOpenMPRuntimeGPU::emitCriticalRegion( @@ -2770,8 +2376,8 @@ void CGOpenMPRuntimeGPU::emitCriticalRegion( auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime()); // Get the mask of active threads in the warp. - llvm::Value *Mask = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask)); + llvm::Value *Mask = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_warp_active_thread_mask)); // Fetch team-local id of the thread. llvm::Value *ThreadID = RT.getGPUThreadID(CGF); @@ -2813,8 +2419,9 @@ void CGOpenMPRuntimeGPU::emitCriticalRegion( // counter variable and returns to the loop. CGF.EmitBlock(SyncBB); // Reconverge active threads in the warp. - (void)CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask); + (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_syncwarp), + Mask); llvm::Value *IncCounterVal = CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1)); @@ -2864,14 +2471,15 @@ static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, CGBuilderTy &Bld = CGF.Builder; CGOpenMPRuntimeGPU &RT = *(static_cast<CGOpenMPRuntimeGPU *>(&CGM.getOpenMPRuntime())); + llvm::OpenMPIRBuilder &OMPBuilder = RT.getOMPBuilder(); CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType); assert(Size.getQuantity() <= 8 && "Unsupported bitwidth in shuffle instruction."); - OpenMPRTLFunctionNVPTX ShuffleFn = Size.getQuantity() <= 4 - ? OMPRTL_NVPTX__kmpc_shuffle_int32 - : OMPRTL_NVPTX__kmpc_shuffle_int64; + RuntimeFunction ShuffleFn = Size.getQuantity() <= 4 + ? OMPRTL___kmpc_shuffle_int32 + : OMPRTL___kmpc_shuffle_int64; // Cast all types to 32- or 64-bit values before calling shuffle routines. QualType CastTy = CGF.getContext().getIntTypeForBitwidth( @@ -2881,7 +2489,8 @@ static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, Bld.CreateIntCast(RT.getGPUWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true); llvm::Value *ShuffledVal = CGF.EmitRuntimeCall( - RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize}); + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), ShuffleFn), + {ElemCast, Offset, WarpSize}); return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc); } @@ -4391,8 +4000,8 @@ void CGOpenMPRuntimeGPU::emitReduction( InterWarpCopyFn}; Res = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2), Args); } else { assert(TeamsReduction && "expected teams reduction."); @@ -4441,8 +4050,8 @@ void CGOpenMPRuntimeGPU::emitReduction( BufferToGlobalRedFn}; Res = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2), Args); } @@ -4477,7 +4086,8 @@ void CGOpenMPRuntimeGPU::emitReduction( RegionCodeGenTy RCG(CodeGen); NVPTXActionTy Action( nullptr, llvm::None, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait), EndArgs); RCG.setAction(Action); RCG(CGF); @@ -4488,7 +4098,7 @@ void CGOpenMPRuntimeGPU::emitReduction( const VarDecl * CGOpenMPRuntimeGPU::translateParameter(const FieldDecl *FD, - const VarDecl *NativeParam) const { + const VarDecl *NativeParam) const { if (!NativeParam->getType()->isReferenceType()) return NativeParam; QualType ArgType = NativeParam->getType(); @@ -4638,9 +4248,9 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper( CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args"); llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer(); llvm::Value *DataSharingArgs[] = {GlobalArgsPtr}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables), - DataSharingArgs); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_shared_variables), + DataSharingArgs); // Retrieve the shared variables from the list of references returned // by the runtime. Pass the variables to the outlined function. |