; See ./README.md for how to maintain the LLVM IR in this test. ; REQUIRES: nvptx-registered-target ; RUN: opt -pass-remarks=kernel-info -passes=kernel-info \ ; RUN: -disable-output %s 2>&1 | \ ; RUN: FileCheck -match-full-lines %s ; CHECK-NOT: remark: ; CHECK: remark: test.c:0:0: in artificial function '[[OFF_FUNC:__omp_offloading_[a-f0-9_]*_h_l12]]_debug__', artificial alloca ('%[[#]]') for 'dyn_ptr' with static size of 8 bytes ; CHECK-NEXT: remark: test.c:14:9: in artificial function '[[OFF_FUNC]]_debug__', alloca ('%[[#]]') for 'i' with static size of 4 bytes ; CHECK-NEXT: remark: test.c:15:9: in artificial function '[[OFF_FUNC]]_debug__', alloca ('%[[#]]') for 'a' with static size of 8 bytes ; CHECK-NEXT: remark: :0:0: in artificial function '[[OFF_FUNC]]_debug__', 'store' instruction accesses memory in flat address space ; CHECK-NEXT: remark: test.c:13:3: in artificial function '[[OFF_FUNC]]_debug__', direct call to defined function, callee is '@__kmpc_target_init' ; CHECK-NEXT: remark: test.c:16:5: in artificial function '[[OFF_FUNC]]_debug__', direct call, callee is '@f' ; CHECK-NEXT: remark: test.c:17:5: in artificial function '[[OFF_FUNC]]_debug__', direct call to defined function, callee is 'g' ; CHECK-NEXT: remark: test.c:18:3: in artificial function '[[OFF_FUNC]]_debug__', direct call to defined function, callee is '@__kmpc_target_deinit' ; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', ExternalNotKernel = 0 ; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', Allocas = 3 ; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', AllocasStaticSizeSum = 20 ; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', AllocasDyn = 0 ; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', DirectCalls = 4 ; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', IndirectCalls = 0 ; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', DirectCallsToDefinedFunctions = 3 ; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', InlineAssemblyCalls = 0 ; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', Invokes = 0 ; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', FlatAddrspaceAccesses = 1 ; CHECK-NEXT: remark: test.c:0:0: in artificial function '[[OFF_FUNC]]', artificial alloca ('%[[#]]') for 'dyn_ptr' with static size of 8 bytes ; CHECK-NEXT: remark: :0:0: in artificial function '[[OFF_FUNC]]', 'store' instruction accesses memory in flat address space ; CHECK-NEXT: remark: test.c:12:1: in artificial function '[[OFF_FUNC]]', 'load' instruction ('%[[#]]') accesses memory in flat address space ; CHECK-NEXT: remark: test.c:12:1: in artificial function '[[OFF_FUNC]]', direct call to defined function, callee is artificial '[[OFF_FUNC]]_debug__' ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', ExternalNotKernel = 0 ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', omp_target_thread_limit = 128 ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', maxntidx = 128 ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', Allocas = 1 ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', AllocasStaticSizeSum = 8 ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', AllocasDyn = 0 ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', DirectCalls = 1 ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', IndirectCalls = 0 ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', DirectCallsToDefinedFunctions = 1 ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', InlineAssemblyCalls = 0 ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', Invokes = 0 ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', FlatAddrspaceAccesses = 2 ; CHECK-NEXT: remark: test.c:4:7: in function 'g', alloca ('%[[#]]') for 'i' with static size of 4 bytes ; CHECK-NEXT: remark: test.c:5:7: in function 'g', alloca ('%[[#]]') for 'a' with static size of 8 bytes ; CHECK-NEXT: remark: test.c:6:3: in function 'g', direct call, callee is '@f' ; CHECK-NEXT: remark: test.c:7:3: in function 'g', direct call to defined function, callee is 'g' ; CHECK-NEXT: remark: test.c:3:0: in function 'g', ExternalNotKernel = 1 ; CHECK-NEXT: remark: test.c:3:0: in function 'g', Allocas = 2 ; CHECK-NEXT: remark: test.c:3:0: in function 'g', AllocasStaticSizeSum = 12 ; CHECK-NEXT: remark: test.c:3:0: in function 'g', AllocasDyn = 0 ; CHECK-NEXT: remark: test.c:3:0: in function 'g', DirectCalls = 2 ; CHECK-NEXT: remark: test.c:3:0: in function 'g', IndirectCalls = 0 ; CHECK-NEXT: remark: test.c:3:0: in function 'g', DirectCallsToDefinedFunctions = 1 ; CHECK-NEXT: remark: test.c:3:0: in function 'g', InlineAssemblyCalls = 0 ; CHECK-NEXT: remark: test.c:3:0: in function 'g', Invokes = 0 ; CHECK-NEXT: remark: test.c:3:0: in function 'g', FlatAddrspaceAccesses = 0 ; CHECK-NOT: remark: {{.*: in function 'g',.*}} ; A lot of internal functions (e.g., __kmpc_target_init) come next, but we don't ; want to maintain a list of their allocas, calls, etc. in this test. ; ModuleID = 'test-openmp-nvptx64-nvidia-cuda-sm_70.bc' source_filename = "test.c" target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" %struct.ident_t = type { i32, i32, i32, i32, ptr } %struct.DynamicEnvironmentTy = type { i16 } %struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } %struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 } %struct.DeviceMemoryPoolTy = type { ptr, i64 } %struct.DeviceMemoryPoolTrackingTy = type { i64, i64, i64, i64 } %struct.DeviceEnvironmentTy = type { i32, i32, i32, i32, i64, i64, i64, i64 } %"struct.rpc::Client" = type { %"struct.rpc::Process" } %"struct.rpc::Process" = type { i32, ptr, ptr, ptr, ptr, [128 x i32] } %"struct.(anonymous namespace)::SharedMemorySmartStackTy" = type { [512 x i8], [1024 x i8] } %"struct.ompx::state::TeamStateTy" = type { %"struct.ompx::state::ICVStateTy", i32, i32, ptr } %"struct.ompx::state::ICVStateTy" = type { i32, i32, i32, i32, i32, i32, i32 } @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 @0 = private unnamed_addr constant [58 x i8] c";test.c;__omp_offloading_fd02_1116d6_h_l12_debug__;13;3;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 57, ptr @0 }, align 8 @__omp_offloading_fd02_1116d6_h_l12_dynamic_environment = weak_odr protected global %struct.DynamicEnvironmentTy zeroinitializer @__omp_offloading_fd02_1116d6_h_l12_kernel_environment = weak_odr protected constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 1, i32 128, i32 -1, i32 -1, i32 0, i32 0 }, ptr @1, ptr @__omp_offloading_fd02_1116d6_h_l12_dynamic_environment } @llvm.used = appending global [4 x ptr] [ptr @__llvm_rpc_client, ptr addrspacecast (ptr addrspace(4) @__omp_rtl_device_environment to ptr), ptr @__omp_rtl_device_memory_pool, ptr @__omp_rtl_device_memory_pool_tracker], section "llvm.metadata" @__omp_rtl_device_memory_pool = weak protected global %struct.DeviceMemoryPoolTy zeroinitializer, align 8 @__omp_rtl_device_memory_pool_tracker = weak protected global %struct.DeviceMemoryPoolTrackingTy zeroinitializer, align 8 @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 @__omp_rtl_device_environment = weak protected addrspace(4) global %struct.DeviceEnvironmentTy undef, align 8 @.str = private unnamed_addr constant [40 x i8] c"%s:%u: %s: Assertion %s (`%s`) failed.\0A\00", align 1 @.str1 = private unnamed_addr constant [35 x i8] c"%s:%u: %s: Assertion `%s` failed.\0A\00", align 1 @.str15 = private unnamed_addr constant [43 x i8] c"/tmp/llvm/offload/DeviceRTL/src/Kernel.cpp\00", align 1 @__PRETTY_FUNCTION__._ZL19genericStateMachineP7IdentTy = private unnamed_addr constant [36 x i8] c"void genericStateMachine(IdentTy *)\00", align 1 @.str2 = private unnamed_addr constant [18 x i8] c"WorkFn == nullptr\00", align 1 @__PRETTY_FUNCTION__.__kmpc_target_deinit = private unnamed_addr constant [28 x i8] c"void __kmpc_target_deinit()\00", align 1 @IsSPMDMode = internal local_unnamed_addr addrspace(3) global i32 undef, align 4 @__llvm_rpc_client = weak protected global %"struct.rpc::Client" zeroinitializer, align 8 @.str1027 = private unnamed_addr constant [48 x i8] c"/tmp/llvm/offload/DeviceRTL/src/Parallelism.cpp\00", align 1 @.str12 = private unnamed_addr constant [23 x i8] c"!mapping::isSPMDMode()\00", align 1 @__PRETTY_FUNCTION__.__kmpc_kernel_end_parallel = private unnamed_addr constant [34 x i8] c"void __kmpc_kernel_end_parallel()\00", align 1 @_ZL20KernelEnvironmentPtr = internal unnamed_addr addrspace(3) global ptr undef, align 8 @_ZL26KernelLaunchEnvironmentPtr = internal unnamed_addr addrspace(3) global ptr undef, align 8 @_ZN12_GLOBAL__N_122SharedMemorySmartStackE = internal addrspace(3) global %"struct.(anonymous namespace)::SharedMemorySmartStackTy" undef, align 16 @.str444 = private unnamed_addr constant [42 x i8] c"/tmp/llvm/offload/DeviceRTL/src/State.cpp\00", align 1 @.str747 = private unnamed_addr constant [33 x i8] c"NThreadsVar == Other.NThreadsVar\00", align 1 @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_ = private unnamed_addr constant [68 x i8] c"void ompx::state::ICVStateTy::assertEqual(const ICVStateTy &) const\00", align 1 @.str848 = private unnamed_addr constant [27 x i8] c"LevelVar == Other.LevelVar\00", align 1 @.str949 = private unnamed_addr constant [39 x i8] c"ActiveLevelVar == Other.ActiveLevelVar\00", align 1 @.str1050 = private unnamed_addr constant [47 x i8] c"MaxActiveLevelsVar == Other.MaxActiveLevelsVar\00", align 1 @.str1151 = private unnamed_addr constant [33 x i8] c"RunSchedVar == Other.RunSchedVar\00", align 1 @.str1252 = private unnamed_addr constant [43 x i8] c"RunSchedChunkVar == Other.RunSchedChunkVar\00", align 1 @.str13 = private unnamed_addr constant [43 x i8] c"ParallelTeamSize == Other.ParallelTeamSize\00", align 1 @__PRETTY_FUNCTION__._ZNK4ompx5state11TeamStateTy11assertEqualERS1_ = private unnamed_addr constant [64 x i8] c"void ompx::state::TeamStateTy::assertEqual(TeamStateTy &) const\00", align 1 @.str14 = private unnamed_addr constant [39 x i8] c"HasThreadState == Other.HasThreadState\00", align 1 @.str23 = private unnamed_addr constant [32 x i8] c"mapping::isSPMDMode() == IsSPMD\00", align 1 @__PRETTY_FUNCTION__._ZN4ompx5state18assumeInitialStateEb = private unnamed_addr constant [43 x i8] c"void ompx::state::assumeInitialState(bool)\00", align 1 @_ZL9ThreadDST = internal unnamed_addr addrspace(3) global ptr undef, align 8 @_ZN4ompx5state9TeamStateE = internal local_unnamed_addr addrspace(3) global %"struct.ompx::state::TeamStateTy" undef, align 8 @_ZN4ompx5state12ThreadStatesE = internal addrspace(3) global ptr undef, align 8 ; Function Attrs: convergent noinline norecurse nounwind optnone define internal void @__omp_offloading_fd02_1116d6_h_l12_debug__(ptr noalias noundef %0) #0 !dbg !18 { %2 = alloca ptr, align 8 %3 = alloca i32, align 4 %4 = alloca [2 x i32], align 4 store ptr %0, ptr %2, align 8 #dbg_declare(ptr %2, !25, !DIExpression(), !26) %5 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_1116d6_h_l12_kernel_environment, ptr %0), !dbg !27 %6 = icmp eq i32 %5, -1, !dbg !27 br i1 %6, label %7, label %8, !dbg !27 7: ; preds = %1 #dbg_declare(ptr %3, !28, !DIExpression(), !31) #dbg_declare(ptr %4, !32, !DIExpression(), !36) call void @f() #19, !dbg !37 call void @g() #19, !dbg !38 call void @__kmpc_target_deinit(), !dbg !39 ret void, !dbg !40 8: ; preds = %1 ret void, !dbg !27 } ; Function Attrs: convergent mustprogress noinline norecurse nounwind optnone define weak_odr protected ptx_kernel void @__omp_offloading_fd02_1116d6_h_l12(ptr noalias noundef %0) #1 !dbg !41 { %2 = alloca ptr, align 8 store ptr %0, ptr %2, align 8 #dbg_declare(ptr %2, !42, !DIExpression(), !43) %3 = load ptr, ptr %2, align 8, !dbg !44 call void @__omp_offloading_fd02_1116d6_h_l12_debug__(ptr %3) #20, !dbg !44 ret void, !dbg !44 } ; Function Attrs: convergent declare void @f(...) #2 ; Function Attrs: convergent noinline nounwind optnone define hidden void @g() #3 !dbg !45 { %1 = alloca i32, align 4 %2 = alloca [2 x i32], align 4 #dbg_declare(ptr %1, !48, !DIExpression(), !49) #dbg_declare(ptr %2, !50, !DIExpression(), !51) call void @f() #19, !dbg !52 call void @g() #19, !dbg !53 ret void, !dbg !54 } ; Function Attrs: convergent mustprogress nounwind define internal noundef range(i32 -1, 1024) i32 @__kmpc_target_init(ptr nofree noundef nonnull align 8 dereferenceable(48) %0, ptr nofree noundef nonnull align 8 dereferenceable(16) %1) #4 { %3 = alloca ptr, align 8 %4 = getelementptr inbounds nuw i8, ptr %0, i64 2 %5 = load i8, ptr %4, align 2, !tbaa !55 %6 = and i8 %5, 2 %7 = icmp eq i8 %6, 0 %8 = load i8, ptr %0, align 8, !tbaa !61 %9 = icmp ne i8 %8, 0 br i1 %7, label %21, label %10 10: ; preds = %2 %11 = tail call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x() %12 = icmp eq i32 %11, 0 br i1 %12, label %13, label %14 13: ; preds = %10 store i32 1, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !62 store i8 0, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE to ptr), i64 512) to ptr addrspace(3)), align 1, !tbaa !63 tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(48) addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i8 noundef 0, i64 noundef 16, i1 noundef false) store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 16) to ptr addrspace(3)), align 8, !tbaa !64 store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 20) to ptr addrspace(3)), align 4, !tbaa !69 store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 24) to ptr addrspace(3)), align 8, !tbaa !70 store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 28) to ptr addrspace(3)), align 4, !tbaa !71 store i32 0, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 32) to ptr addrspace(3)), align 8, !tbaa !72 store ptr null, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 40) to ptr addrspace(3)), align 8, !tbaa !73 store ptr null, ptr addrspace(3) @_ZN4ompx5state12ThreadStatesE, align 8, !tbaa !74 store ptr %0, ptr addrspace(3) @_ZL20KernelEnvironmentPtr, align 8, !tbaa !76 store ptr %1, ptr addrspace(3) @_ZL26KernelLaunchEnvironmentPtr, align 8, !tbaa !78 br label %18 14: ; preds = %10 %15 = zext nneg i32 %11 to i64 %16 = getelementptr inbounds nuw [1024 x i8], ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE to ptr), i64 512), i64 0, i64 %15 %17 = addrspacecast ptr %16 to ptr addrspace(3) store i8 0, ptr addrspace(3) %17, align 1, !tbaa !63 br label %18 18: ; preds = %14, %13 br i1 %12, label %19, label %20 19: ; preds = %18 store ptr null, ptr addrspace(3) @_ZL9ThreadDST, align 8, !tbaa !80 br label %20 20: ; preds = %18, %19 tail call void @_ZN4ompx11synchronize14threadsAlignedENS_6atomic10OrderingTyE(i32 poison) #21 br label %37 21: ; preds = %2 %22 = tail call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range !82 %23 = add nsw i32 %22, -1 %24 = and i32 %23, -32 %25 = tail call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x() %26 = icmp eq i32 %25, %24 br i1 %26, label %27, label %31 27: ; preds = %21 store i32 0, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !62 %28 = zext nneg i32 %25 to i64 %29 = getelementptr inbounds nuw [1024 x i8], ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE to ptr), i64 512), i64 0, i64 %28 %30 = addrspacecast ptr %29 to ptr addrspace(3) store i8 0, ptr addrspace(3) %30, align 1, !tbaa !63 tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(48) addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i8 noundef 0, i64 noundef 16, i1 noundef false) store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 16) to ptr addrspace(3)), align 8, !tbaa !64 store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 20) to ptr addrspace(3)), align 4, !tbaa !69 store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 24) to ptr addrspace(3)), align 8, !tbaa !70 store i32 1, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 28) to ptr addrspace(3)), align 4, !tbaa !71 store i32 0, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 32) to ptr addrspace(3)), align 8, !tbaa !72 store ptr null, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 40) to ptr addrspace(3)), align 8, !tbaa !73 store ptr null, ptr addrspace(3) @_ZN4ompx5state12ThreadStatesE, align 8, !tbaa !74 store ptr %0, ptr addrspace(3) @_ZL20KernelEnvironmentPtr, align 8, !tbaa !76 store ptr %1, ptr addrspace(3) @_ZL26KernelLaunchEnvironmentPtr, align 8, !tbaa !78 br label %35 31: ; preds = %21 %32 = zext nneg i32 %25 to i64 %33 = getelementptr inbounds nuw [1024 x i8], ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE to ptr), i64 512), i64 0, i64 %32 %34 = addrspacecast ptr %33 to ptr addrspace(3) store i8 0, ptr addrspace(3) %34, align 1, !tbaa !63 br label %35 35: ; preds = %31, %27 br i1 %26, label %36, label %37 36: ; preds = %35 store ptr null, ptr addrspace(3) @_ZL9ThreadDST, align 8, !tbaa !80 br label %37 37: ; preds = %36, %35, %20 br i1 %7, label %100, label %38 38: ; preds = %37 %39 = load i32, ptr @__omp_rtl_debug_kind, align 4, !tbaa !62 %40 = load i32, ptr addrspace(4) @__omp_rtl_device_environment, align 8, !tbaa !83 %41 = and i32 %39, 1 %42 = and i32 %41, %40 %43 = icmp ne i32 %42, 0 %44 = load i32, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 8, !tbaa !86 %45 = icmp ne i32 %44, 0 %46 = select i1 %43, i1 %45, i1 false br i1 %46, label %47, label %48 47: ; preds = %38 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(33) @.str747, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 193, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22 unreachable 48: ; preds = %38 %49 = icmp eq i32 %44, 0 tail call void @llvm.assume(i1 noundef %49) #23 %50 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 4) to ptr addrspace(3)), align 4, !tbaa !87 br i1 %43, label %51, label %54 51: ; preds = %48 %52 = icmp eq i32 %50, 0 br i1 %52, label %54, label %53 53: ; preds = %51 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(27) @.str848, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 194, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22 unreachable 54: ; preds = %51, %48 %55 = phi i32 [ 0, %51 ], [ %50, %48 ] %56 = icmp eq i32 %55, 0 tail call void @llvm.assume(i1 noundef %56) #23 %57 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 8) to ptr addrspace(3)), align 8, !tbaa !88 br i1 %43, label %58, label %61 58: ; preds = %54 %59 = icmp eq i32 %57, 0 br i1 %59, label %61, label %60 60: ; preds = %58 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(39) @.str949, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 195, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22 unreachable 61: ; preds = %58, %54 %62 = phi i32 [ 0, %58 ], [ %57, %54 ] %63 = icmp eq i32 %62, 0 tail call void @llvm.assume(i1 noundef %63) #23 %64 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 16) to ptr addrspace(3)), align 8, !tbaa !89 br i1 %43, label %65, label %68 65: ; preds = %61 %66 = icmp eq i32 %64, 1 br i1 %66, label %68, label %67 67: ; preds = %65 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(47) @.str1050, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 196, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22 unreachable 68: ; preds = %65, %61 %69 = phi i32 [ 1, %65 ], [ %64, %61 ] %70 = icmp eq i32 %69, 1 tail call void @llvm.assume(i1 noundef %70) #23 %71 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 20) to ptr addrspace(3)), align 4, !tbaa !90 br i1 %43, label %72, label %93 72: ; preds = %68 %73 = icmp eq i32 %71, 1 br i1 %73, label %75, label %74 74: ; preds = %72 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(33) @.str1151, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 197, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22 unreachable 75: ; preds = %72 %76 = icmp eq i32 1, 1 tail call void @llvm.assume(i1 noundef %76) #23 br i1 %43, label %77, label %95 77: ; preds = %75 %78 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 24) to ptr addrspace(3)), align 8, !tbaa !91 %79 = icmp eq i32 %78, 1 br i1 %79, label %81, label %80 80: ; preds = %77 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(43) @.str1252, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 198, ptr noundef nonnull dereferenceable(68) @__PRETTY_FUNCTION__._ZNK4ompx5state10ICVStateTy11assertEqualERKS1_) #22 unreachable 81: ; preds = %77 %82 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 28) to ptr addrspace(3)), align 4, !tbaa !71 %83 = icmp eq i32 %82, 1 br i1 %83, label %85, label %84 84: ; preds = %81 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(43) @.str13, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 222, ptr noundef nonnull dereferenceable(64) @__PRETTY_FUNCTION__._ZNK4ompx5state11TeamStateTy11assertEqualERS1_) #22 unreachable 85: ; preds = %81 %86 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 32) to ptr addrspace(3)), align 8, !tbaa !72 %87 = icmp eq i32 %86, 0 br i1 %87, label %89, label %88 88: ; preds = %85 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(39) @.str14, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 223, ptr noundef nonnull dereferenceable(64) @__PRETTY_FUNCTION__._ZNK4ompx5state11TeamStateTy11assertEqualERS1_) #22 unreachable 89: ; preds = %85 %90 = load i32, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !62 %91 = icmp eq i32 %90, 0 br i1 %91, label %92, label %98 92: ; preds = %89 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(32) @.str23, ptr noundef null, ptr noundef nonnull dereferenceable(66) @.str444, i32 noundef 326, ptr noundef nonnull dereferenceable(43) @__PRETTY_FUNCTION__._ZN4ompx5state18assumeInitialStateEb) #22 unreachable 93: ; preds = %68 %94 = icmp eq i32 %71, 1 tail call void @llvm.assume(i1 noundef %94) #23 br label %95 95: ; preds = %75, %93 %96 = load i32, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !62 %97 = icmp ne i32 %96, 0 br label %98 98: ; preds = %89, %95 %99 = phi i1 [ %97, %95 ], [ true, %89 ] tail call void @llvm.assume(i1 noundef %99) #23 tail call void @_ZN4ompx11synchronize14threadsAlignedENS_6atomic10OrderingTyE(i32 poison) #21 br label %130 100: ; preds = %37 %101 = tail call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range !82 %102 = add nsw i32 %101, -1 %103 = and i32 %102, -32 %104 = tail call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !92 %105 = icmp eq i32 %104, %103 br i1 %105, label %130, label %106 106: ; preds = %100 %107 = add nsw i32 %101, -32 %108 = icmp ult i32 %104, %107 %109 = select i1 %9, i1 %108, i1 false br i1 %109, label %110, label %130 110: ; preds = %106 %111 = load i32, ptr @__omp_rtl_debug_kind, align 4 %112 = load i32, ptr addrspace(4) @__omp_rtl_device_environment, align 8 %113 = and i32 %111, 1 %114 = and i32 %113, %112 %115 = icmp ne i32 %114, 0 br label %116 116: ; preds = %110, %128 call void @llvm.lifetime.start.p0(i64 noundef 8, ptr noundef nonnull align 8 dereferenceable(8) %3) #20 tail call void @llvm.nvvm.barrier.sync(i32 noundef 8) %117 = call zeroext i1 @__kmpc_kernel_parallel(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) %3) #20 %118 = load ptr, ptr %3, align 8, !tbaa !93 %119 = icmp eq ptr %118, null br i1 %119, label %129, label %120 120: ; preds = %116 br i1 %117, label %121, label %128 121: ; preds = %120 %122 = load i32, ptr addrspace(3) @IsSPMDMode, align 4 %123 = icmp ne i32 %122, 0 %124 = select i1 %115, i1 %123, i1 false br i1 %124, label %125, label %126 125: ; preds = %121 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(23) @.str12, ptr noundef null, ptr noundef nonnull dereferenceable(67) @.str15, i32 noundef 60, ptr noundef nonnull dereferenceable(36) @__PRETTY_FUNCTION__._ZL19genericStateMachineP7IdentTy) #22 unreachable 126: ; preds = %121 %127 = icmp eq i32 %122, 0 tail call void @llvm.assume(i1 noundef %127) #23 tail call void %118(i32 noundef 0, i32 noundef %104) #24 tail call void @__kmpc_kernel_end_parallel() #24 br label %128 128: ; preds = %126, %120 tail call void @llvm.nvvm.barrier.sync(i32 noundef 8) call void @llvm.lifetime.end.p0(i64 noundef 8, ptr noundef nonnull %3) #20 br label %116, !llvm.loop !94 129: ; preds = %116 call void @llvm.lifetime.end.p0(i64 noundef 8, ptr noundef nonnull %3) #20 br label %130 130: ; preds = %106, %129, %100, %98 %131 = phi i32 [ -1, %98 ], [ -1, %100 ], [ %104, %129 ], [ %104, %106 ] ret i32 %131 } ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #5 ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #6 ; Function Attrs: convergent mustprogress noinline norecurse nounwind define internal void @_ZN4ompx11synchronize14threadsAlignedENS_6atomic10OrderingTyE(i32 %0) local_unnamed_addr #7 { tail call void @llvm.nvvm.barrier0() #25 ret void } ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare noundef i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #5 ; Function Attrs: cold convergent mustprogress noreturn nounwind define internal fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(8) %0, ptr noundef %1, ptr noundef nonnull dereferenceable(66) %2, i32 noundef range(i32 60, 905) %3, ptr noundef nonnull dereferenceable(20) %4) unnamed_addr #8 { %6 = icmp eq ptr %1, null br i1 %6, label %9, label %7 7: ; preds = %5 %8 = tail call noundef i32 (ptr, ...) @_ZN4ompx6printfEPKcz(ptr noundef nonnull dereferenceable(40) @.str, ptr noundef nonnull dereferenceable(66) %2, i32 noundef %3, ptr noundef nonnull dereferenceable(20) %4, ptr noundef nonnull %1, ptr noundef nonnull dereferenceable(8) %0) #24 br label %11 9: ; preds = %5 %10 = tail call noundef i32 (ptr, ...) @_ZN4ompx6printfEPKcz(ptr noundef nonnull dereferenceable(35) @.str1, ptr noundef nonnull dereferenceable(66) %2, i32 noundef %3, ptr noundef nonnull dereferenceable(20) %4, ptr noundef nonnull dereferenceable(8) %0) #24 br label %11 11: ; preds = %9, %7 tail call void @llvm.trap() #26 unreachable } ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) declare void @llvm.assume(i1 noundef) #9 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #10 ; Function Attrs: convergent nocallback nounwind declare void @llvm.nvvm.barrier.sync(i32) #11 ; Function Attrs: convergent mustprogress nofree noinline norecurse nosync nounwind willreturn memory(read, argmem: write, inaccessiblemem: none) define internal noundef zeroext i1 @__kmpc_kernel_parallel(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) initializes((0, 8)) %0) local_unnamed_addr #12 { %2 = load ptr, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 40) to ptr addrspace(3)), align 8, !tbaa !93 store ptr %2, ptr %0, align 8, !tbaa !93 %3 = icmp eq ptr %2, null br i1 %3, label %15, label %4 4: ; preds = %1 %5 = tail call noundef range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x() #27, !range !92 %6 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 28) to ptr addrspace(3)), align 4, !tbaa !62 %7 = icmp eq i32 %6, 0 %8 = tail call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range !82 %9 = load i32, ptr addrspace(3) @IsSPMDMode, align 4 %10 = icmp eq i32 %9, 0 %11 = select i1 %10, i32 -32, i32 0 %12 = add nsw i32 %11, %8 %13 = select i1 %7, i32 %12, i32 %6 %14 = icmp ult i32 %5, %13 br label %15 15: ; preds = %4, %1 %16 = phi i1 [ %14, %4 ], [ false, %1 ] ret i1 %16 } ; Function Attrs: convergent mustprogress noinline nounwind define internal void @__kmpc_kernel_end_parallel() local_unnamed_addr #13 { %1 = load i32, ptr @__omp_rtl_debug_kind, align 4, !tbaa !62 %2 = load i32, ptr addrspace(4) @__omp_rtl_device_environment, align 8, !tbaa !83 %3 = and i32 %1, 1 %4 = and i32 %3, %2 %5 = icmp ne i32 %4, 0 %6 = load i32, ptr addrspace(3) @IsSPMDMode, align 4 %7 = icmp ne i32 %6, 0 %8 = select i1 %5, i1 %7, i1 false br i1 %8, label %9, label %10 9: ; preds = %0 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(23) @.str12, ptr noundef null, ptr noundef nonnull dereferenceable(72) @.str1027, i32 noundef 299, ptr noundef nonnull dereferenceable(34) @__PRETTY_FUNCTION__.__kmpc_kernel_end_parallel) #22 unreachable 10: ; preds = %0 %11 = icmp eq i32 %6, 0 tail call void @llvm.assume(i1 noundef %11) #23 %12 = load i32, ptr @__omp_rtl_assume_no_thread_state, align 4, !tbaa !62 %13 = icmp eq i32 %12, 0 %14 = load i32, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 32) to ptr addrspace(3)), align 8 %15 = icmp ne i32 %14, 0 %16 = select i1 %13, i1 %15, i1 false br i1 %16, label %17, label %30 17: ; preds = %10 %18 = tail call noundef range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x() #27, !range !92 %19 = load ptr, ptr addrspace(3) @_ZN4ompx5state12ThreadStatesE, align 8, !tbaa !74 %20 = zext nneg i32 %18 to i64 %21 = getelementptr inbounds nuw ptr, ptr %19, i64 %20 %22 = load ptr, ptr %21, align 8, !tbaa !96 %23 = icmp eq ptr %22, null br i1 %23, label %30, label %24, !prof !98 24: ; preds = %17 %25 = getelementptr inbounds nuw i8, ptr %22, i64 32 %26 = load ptr, ptr %25, align 8, !tbaa !99 tail call void @free(ptr noundef nonnull dereferenceable(40) %22) #28 %27 = load ptr, ptr addrspace(3) @_ZN4ompx5state12ThreadStatesE, align 8, !tbaa !74 %28 = getelementptr inbounds nuw ptr, ptr %27, i64 %20 store ptr %26, ptr %28, align 8, !tbaa !96 %29 = load i32, ptr addrspace(3) @IsSPMDMode, align 4 br label %30 30: ; preds = %10, %17, %24 %31 = phi i32 [ 0, %10 ], [ 0, %17 ], [ %29, %24 ] %32 = icmp ne i32 %31, 0 %33 = select i1 %5, i1 %32, i1 false br i1 %33, label %34, label %35 34: ; preds = %30 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(23) @.str12, ptr noundef null, ptr noundef nonnull dereferenceable(72) @.str1027, i32 noundef 302, ptr noundef nonnull dereferenceable(34) @__PRETTY_FUNCTION__.__kmpc_kernel_end_parallel) #22 unreachable 35: ; preds = %30 %36 = icmp eq i32 %31, 0 tail call void @llvm.assume(i1 noundef %36) #23 ret void } ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #10 ; Function Attrs: convergent mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) declare extern_weak void @free(ptr allocptr nocapture noundef) local_unnamed_addr #14 ; Function Attrs: convergent mustprogress nounwind define internal noundef i32 @_ZN4ompx6printfEPKcz(ptr noundef %0, ...) local_unnamed_addr #15 { %2 = alloca ptr, align 8 call void @llvm.lifetime.start.p0(i64 noundef 8, ptr noundef nonnull align 8 %2) #29 call void @llvm.va_start.p0(ptr noundef nonnull align 8 %2) #27 %3 = load ptr, ptr %2, align 8, !tbaa !101 %4 = call i32 @vprintf(ptr noundef %0, ptr noundef %3) #24 call void @llvm.lifetime.end.p0(i64 noundef 8, ptr noundef nonnull %2) #20 ret i32 %4 } ; Function Attrs: cold noreturn nounwind memory(inaccessiblemem: write) declare void @llvm.trap() #16 ; Function Attrs: nocallback nofree nosync nounwind willreturn declare void @llvm.va_start.p0(ptr) #17 ; Function Attrs: convergent nounwind declare i32 @vprintf(ptr noundef, ptr noundef) local_unnamed_addr #18 ; Function Attrs: convergent nocallback nounwind declare void @llvm.nvvm.barrier0() #11 ; Function Attrs: convergent mustprogress nounwind define internal void @__kmpc_target_deinit() #4 { %1 = alloca ptr, align 8 %2 = load i32, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !62 %3 = icmp eq i32 %2, 0 br i1 %3, label %4, label %27 4: ; preds = %0 %5 = tail call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range !82 %6 = add nsw i32 %5, -1 %7 = and i32 %6, -32 %8 = tail call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !92 %9 = icmp eq i32 %8, %7 br i1 %9, label %10, label %11 10: ; preds = %4 store ptr null, ptr addrspace(3) addrspacecast (ptr getelementptr inbounds nuw (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 40) to ptr addrspace(3)), align 8, !tbaa !93 br label %27 11: ; preds = %4 %12 = load ptr, ptr addrspace(3) @_ZL20KernelEnvironmentPtr, align 8, !tbaa !76 %13 = load i8, ptr %12, align 8, !tbaa !103 %14 = icmp eq i8 %13, 0 br i1 %14, label %15, label %27 15: ; preds = %11 call void @llvm.lifetime.start.p0(i64 noundef 8, ptr noundef nonnull align 8 dereferenceable(8) %1) #29 %16 = call zeroext i1 @__kmpc_kernel_parallel(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) %1) #20 %17 = load i32, ptr @__omp_rtl_debug_kind, align 4, !tbaa !62 %18 = load i32, ptr addrspace(4) @__omp_rtl_device_environment, align 8, !tbaa !83 %19 = and i32 %17, 1 %20 = and i32 %19, %18 %21 = icmp eq i32 %20, 0 %22 = load ptr, ptr %1, align 8 %23 = icmp eq ptr %22, null %24 = select i1 %21, i1 true, i1 %23 br i1 %24, label %26, label %25 25: ; preds = %15 tail call fastcc void @__assert_fail_internal(ptr noundef nonnull dereferenceable(18) @.str2, ptr noundef null, ptr noundef nonnull dereferenceable(67) @.str15, i32 noundef 152, ptr noundef nonnull dereferenceable(28) @__PRETTY_FUNCTION__.__kmpc_target_deinit) #22 unreachable 26: ; preds = %15 tail call void @llvm.assume(i1 noundef %23) #23 call void @llvm.lifetime.end.p0(i64 noundef 8, ptr noundef nonnull %1) #20 br label %27 27: ; preds = %26, %11, %10, %0 ret void } attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx83,+sm_70" } attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "frame-pointer"="all" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="128" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx83,+sm_70" } attributes #2 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx83,+sm_70" } attributes #3 = { convergent noinline nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx83,+sm_70" } attributes #4 = { convergent mustprogress nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" } attributes #5 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #6 = { nocallback nofree nounwind willreturn memory(argmem: write) } attributes #7 = { convergent mustprogress noinline norecurse nounwind "frame-pointer"="all" "llvm.assume"="ompx_aligned_barrier" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" } attributes #8 = { cold convergent mustprogress noreturn nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" } attributes #9 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } attributes #10 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } attributes #11 = { convergent nocallback nounwind } attributes #12 = { convergent mustprogress nofree noinline norecurse nosync nounwind willreturn memory(read, argmem: write, inaccessiblemem: none) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" } attributes #13 = { convergent mustprogress noinline nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" } attributes #14 = { convergent mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" } attributes #15 = { convergent mustprogress nounwind "frame-pointer"="all" "no-builtin-printf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" } attributes #16 = { cold noreturn nounwind memory(inaccessiblemem: write) } attributes #17 = { nocallback nofree nosync nounwind willreturn } attributes #18 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx63,+ptx83,+sm_70" } attributes #19 = { convergent } attributes #20 = { nounwind } attributes #21 = { convergent nounwind "llvm.assume"="ompx_aligned_barrier" } attributes #22 = { convergent noreturn nounwind } attributes #23 = { memory(write) } attributes #24 = { convergent nounwind } attributes #25 = { "llvm.assume"="ompx_aligned_barrier" } attributes #26 = { noreturn } attributes #27 = { nofree willreturn } attributes #28 = { convergent nounwind willreturn } attributes #29 = { nofree nounwind willreturn } !llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10} !llvm.dbg.cu = !{!11} !nvvm.annotations = !{!13} !omp_offload.info = !{!14} !llvm.ident = !{!15, !16, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15, !15} !nvvmir.version = !{!17} !0 = !{i32 2, !"SDK Version", [2 x i32] [i32 12, i32 3]} !1 = !{i32 7, !"Dwarf Version", i32 2} !2 = !{i32 2, !"Debug Info Version", i32 3} !3 = !{i32 1, !"wchar_size", i32 4} !4 = !{i32 4, !"nvvm-reflect-ftz", i32 0} !5 = !{i32 7, !"openmp", i32 51} !6 = !{i32 7, !"openmp-device", i32 51} !7 = !{i32 8, !"PIC Level", i32 2} !8 = !{i32 7, !"frame-pointer", i32 2} !9 = !{i32 1, !"ThinLTO", i32 0} !10 = !{i32 1, !"EnableSplitLTOUnit", i32 1} !11 = distinct !DICompileUnit(language: DW_LANG_C11, file: !12, producer: "clang version 20.0.0git (/tmp/llvm/clang b9447c03a9ef2eed55b685a33511df86f7f94e89)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) !12 = !DIFile(filename: "test.c", directory: "/tmp") !13 = !{ptr @__omp_offloading_fd02_1116d6_h_l12, !"maxntidx", i32 128} !14 = !{i32 0, i32 64770, i32 1119958, !"h", i32 12, i32 0, i32 0} !15 = !{!"clang version 20.0.0git (/tmp/llvm/clang b9447c03a9ef2eed55b685a33511df86f7f94e89)"} !16 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"} !17 = !{i32 2, i32 0} !18 = distinct !DISubprogram(name: "__omp_offloading_fd02_1116d6_h_l12_debug__", scope: !12, file: !12, line: 13, type: !19, scopeLine: 13, flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !11, retainedNodes: !24) !19 = !DISubroutineType(types: !20) !20 = !{null, !21} !21 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !22) !22 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !23) !23 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64) !24 = !{} !25 = !DILocalVariable(name: "dyn_ptr", arg: 1, scope: !18, type: !21, flags: DIFlagArtificial) !26 = !DILocation(line: 0, scope: !18) !27 = !DILocation(line: 13, column: 3, scope: !18) !28 = !DILocalVariable(name: "i", scope: !29, file: !12, line: 14, type: !30) !29 = distinct !DILexicalBlock(scope: !18, file: !12, line: 13, column: 3) !30 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !31 = !DILocation(line: 14, column: 9, scope: !29) !32 = !DILocalVariable(name: "a", scope: !29, file: !12, line: 15, type: !33) !33 = !DICompositeType(tag: DW_TAG_array_type, baseType: !30, size: 64, elements: !34) !34 = !{!35} !35 = !DISubrange(count: 2) !36 = !DILocation(line: 15, column: 9, scope: !29) !37 = !DILocation(line: 16, column: 5, scope: !29) !38 = !DILocation(line: 17, column: 5, scope: !29) !39 = !DILocation(line: 18, column: 3, scope: !29) !40 = !DILocation(line: 18, column: 3, scope: !18) !41 = distinct !DISubprogram(name: "__omp_offloading_fd02_1116d6_h_l12", scope: !12, file: !12, line: 12, type: !19, scopeLine: 12, flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !11, retainedNodes: !24) !42 = !DILocalVariable(name: "dyn_ptr", arg: 1, scope: !41, type: !21, flags: DIFlagArtificial) !43 = !DILocation(line: 0, scope: !41) !44 = !DILocation(line: 12, column: 1, scope: !41) !45 = distinct !DISubprogram(name: "g", scope: !12, file: !12, line: 3, type: !46, scopeLine: 3, spFlags: DISPFlagDefinition, unit: !11, retainedNodes: !24) !46 = !DISubroutineType(types: !47) !47 = !{null} !48 = !DILocalVariable(name: "i", scope: !45, file: !12, line: 4, type: !30) !49 = !DILocation(line: 4, column: 7, scope: !45) !50 = !DILocalVariable(name: "a", scope: !45, file: !12, line: 5, type: !33) !51 = !DILocation(line: 5, column: 7, scope: !45) !52 = !DILocation(line: 6, column: 3, scope: !45) !53 = !DILocation(line: 7, column: 3, scope: !45) !54 = !DILocation(line: 8, column: 1, scope: !45) !55 = !{!56, !59, i64 2} !56 = !{!"_ZTS26ConfigurationEnvironmentTy", !57, i64 0, !57, i64 1, !59, i64 2, !60, i64 4, !60, i64 8, !60, i64 12, !60, i64 16, !60, i64 20, !60, i64 24} !57 = !{!"omnipotent char", !58, i64 0} !58 = !{!"Simple C++ TBAA"} !59 = !{!"_ZTSN4llvm3omp19OMPTgtExecModeFlagsE", !57, i64 0} !60 = !{!"int", !57, i64 0} !61 = !{!56, !57, i64 0} !62 = !{!60, !60, i64 0} !63 = !{!57, !57, i64 0} !64 = !{!65, !60, i64 16} !65 = !{!"_ZTSN4ompx5state11TeamStateTyE", !66, i64 0, !60, i64 28, !60, i64 32, !67, i64 40} !66 = !{!"_ZTSN4ompx5state10ICVStateTyE", !60, i64 0, !60, i64 4, !60, i64 8, !60, i64 12, !60, i64 16, !60, i64 20, !60, i64 24} !67 = !{!"p1 void", !68, i64 0} !68 = !{!"any pointer", !57, i64 0} !69 = !{!65, !60, i64 20} !70 = !{!65, !60, i64 24} !71 = !{!65, !60, i64 28} !72 = !{!65, !60, i64 32} !73 = !{!65, !67, i64 40} !74 = !{!75, !75, i64 0} !75 = !{!"p2 _ZTSN4ompx5state13ThreadStateTyE", !68, i64 0} !76 = !{!77, !77, i64 0} !77 = !{!"p1 _ZTS19KernelEnvironmentTy", !68, i64 0} !78 = !{!79, !79, i64 0} !79 = !{!"p1 _ZTS25KernelLaunchEnvironmentTy", !68, i64 0} !80 = !{!81, !81, i64 0} !81 = !{!"p2 _ZTS22DynamicScheduleTracker", !68, i64 0} !82 = !{i32 1, i32 1025} !83 = !{!84, !60, i64 0} !84 = !{!"_ZTS19DeviceEnvironmentTy", !60, i64 0, !60, i64 4, !60, i64 8, !60, i64 12, !85, i64 16, !85, i64 24, !85, i64 32, !85, i64 40} !85 = !{!"long", !57, i64 0} !86 = !{!66, !60, i64 0} !87 = !{!66, !60, i64 4} !88 = !{!66, !60, i64 8} !89 = !{!66, !60, i64 16} !90 = !{!66, !60, i64 20} !91 = !{!66, !60, i64 24} !92 = !{i32 0, i32 1024} !93 = !{!67, !67, i64 0} !94 = distinct !{!94, !95} !95 = !{!"llvm.loop.mustprogress"} !96 = !{!97, !97, i64 0} !97 = !{!"p1 _ZTSN4ompx5state13ThreadStateTyE", !68, i64 0} !98 = !{!"branch_weights", !"expected", i32 2000, i32 1} !99 = !{!100, !97, i64 32} !100 = !{!"_ZTSN4ompx5state13ThreadStateTyE", !66, i64 0, !97, i64 32} !101 = !{!102, !102, i64 0} !102 = !{!"p1 omnipotent char", !68, i64 0} !103 = !{!104, !57, i64 0} !104 = !{!"_ZTS19KernelEnvironmentTy", !56, i64 0, !105, i64 32, !106, i64 40} !105 = !{!"p1 _ZTS7IdentTy", !68, i64 0} !106 = !{!"p1 _ZTS20DynamicEnvironmentTy", !68, i64 0}