diff options
| author | Joseph Huber <jhuber6@vols.utk.edu> | 2021-10-04 17:42:46 -0400 |
|---|---|---|
| committer | Joseph Huber <jhuber6@vols.utk.edu> | 2021-11-09 17:39:21 -0500 |
| commit | e52937eba022c78545d7783693f0fb0e56b8cd35 (patch) | |
| tree | 839c519da25b1479ee4da1788f52a34f6911f87b | |
| parent | b8a825b4838f96d70488a4def728a47a5eaaf96e (diff) | |
| download | llvm-e52937eba022c78545d7783693f0fb0e56b8cd35.zip llvm-e52937eba022c78545d7783693f0fb0e56b8cd35.tar.gz llvm-e52937eba022c78545d7783693f0fb0e56b8cd35.tar.bz2 | |
[OpenMP] Use AAAssumptionInfo to get assumptions in OpenMPOpt
This patch uses the abstract attributor introduced in D111054 to get the
assumption values instead of the `hasAssumption` function. This also
calls it so assumption information should propagate throug the device
where applicabile.
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D111445
| -rw-r--r-- | llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 12 | ||||
| -rw-r--r-- | llvm/test/Transforms/OpenMP/spmdization.ll | 75 |
2 files changed, 53 insertions, 34 deletions
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 33ca121..4dfb98c 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -3844,13 +3844,11 @@ struct AAKernelInfoCallSite : AAKernelInfo { CallBase &CB = cast<CallBase>(getAssociatedValue()); Function *Callee = getAssociatedFunction(); - // Helper to lookup an assumption string. - auto HasAssumption = [](CallBase &CB, StringRef AssumptionStr) { - return hasAssumption(CB, AssumptionStr); - }; + auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>( + *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL); // Check for SPMD-mode assumptions. - if (HasAssumption(CB, "ompx_spmd_amenable")) { + if (AssumptionAA.hasAssumption("ompx_spmd_amenable")) { SPMDCompatibilityTracker.indicateOptimisticFixpoint(); indicateOptimisticFixpoint(); } @@ -3875,8 +3873,8 @@ struct AAKernelInfoCallSite : AAKernelInfo { // Unknown callees might contain parallel regions, except if they have // an appropriate assumption attached. - if (!(HasAssumption(CB, "omp_no_openmp") || - HasAssumption(CB, "omp_no_parallelism"))) + if (!(AssumptionAA.hasAssumption("omp_no_openmp") || + AssumptionAA.hasAssumption("omp_no_parallelism"))) ReachedUnknownParallelRegions.insert(&CB); // If SPMDCompatibilityTracker is not fixed, we need to give up on the diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index ad3dc48..85b593e 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -199,13 +199,11 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_l5() #0 { ; AMDGPU-DISABLED: worker_state_machine.is_active.check: ; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: -; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*) -; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] +; AMDGPU-DISABLED-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] ; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: -; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; AMDGPU-DISABLED: worker_state_machine.parallel_region.check1: ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] ; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() @@ -247,13 +245,11 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_l5() #0 { ; NVPTX-DISABLED: worker_state_machine.is_active.check: ; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] ; NVPTX-DISABLED: worker_state_machine.parallel_region.check: -; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*) -; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] +; NVPTX-DISABLED-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] ; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: -; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; NVPTX-DISABLED: worker_state_machine.parallel_region.check1: ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] ; NVPTX-DISABLED: worker_state_machine.parallel_region.end: ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() @@ -322,7 +318,7 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias ; AMDGPU-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] ; AMDGPU: for.end: -; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] +; AMDGPU-NEXT: call void @indirection() #[[ATTR7:[0-9]+]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ @@ -350,7 +346,7 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias ; NVPTX-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] ; NVPTX: for.end: -; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] +; NVPTX-NEXT: call void @indirection() #[[ATTR7:[0-9]+]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ @@ -378,7 +374,7 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias ; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] ; AMDGPU-DISABLED: for.end: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] +; AMDGPU-DISABLED-NEXT: call void @indirection() #[[ATTR7:[0-9]+]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ @@ -406,7 +402,7 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias ; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] ; NVPTX-DISABLED: for.end: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] +; NVPTX-DISABLED-NEXT: call void @indirection() #[[ATTR7:[0-9]+]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -438,7 +434,32 @@ for.inc: ; preds = %for.body br label %for.cond, !llvm.loop !13 for.end: ; preds = %for.cond - call void @spmd_amenable() #4 + call void @indirection() #4 + ret void +} + +define internal void @indirection() { +; AMDGPU-LABEL: define {{[^@]+}}@indirection +; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] { +; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR1]] +; AMDGPU-NEXT: ret void +; +; NVPTX-LABEL: define {{[^@]+}}@indirection +; NVPTX-SAME: () #[[ATTR1:[0-9]+]] { +; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR1]] +; NVPTX-NEXT: ret void +; +; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@indirection +; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { +; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR1]] +; AMDGPU-DISABLED-NEXT: ret void +; +; NVPTX-DISABLED-LABEL: define {{[^@]+}}@indirection +; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { +; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR1]] +; NVPTX-DISABLED-NEXT: ret void +; + call void @spmd_amenable() ret void } @@ -564,7 +585,7 @@ declare void @__kmpc_get_shared_variables(i8***) declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) -declare void @spmd_amenable() #2 +declare void @spmd_amenable() declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3 @@ -887,7 +908,7 @@ for.end: ; preds = %for.cond declare i8* @__kmpc_alloc_shared(i64) #3 -declare void @use(i32* nocapture) #2 +declare void @use(i32* nocapture) define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { ; @@ -2257,9 +2278,9 @@ attributes #5 = { convergent } !17 = distinct !{!17, !14} ;. ; AMDGPU: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU: attributes #[[ATTR2:[0-9]+]] = { alwaysinline } -; AMDGPU: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU: attributes #[[ATTR1]] = { "llvm.assume"="ompx_spmd_amenable" } +; AMDGPU: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU: attributes #[[ATTR3:[0-9]+]] = { alwaysinline } ; AMDGPU: attributes #[[ATTR4]] = { nounwind } ; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } ; AMDGPU: attributes #[[ATTR6:[0-9]+]] = { convergent nounwind } @@ -2267,9 +2288,9 @@ attributes #5 = { convergent } ; AMDGPU: attributes #[[ATTR8]] = { convergent } ;. ; NVPTX: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX: attributes #[[ATTR2:[0-9]+]] = { alwaysinline } -; NVPTX: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX: attributes #[[ATTR1]] = { "llvm.assume"="ompx_spmd_amenable" } +; NVPTX: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX: attributes #[[ATTR3:[0-9]+]] = { alwaysinline } ; NVPTX: attributes #[[ATTR4]] = { nounwind } ; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } ; NVPTX: attributes #[[ATTR6:[0-9]+]] = { convergent nounwind } @@ -2277,9 +2298,9 @@ attributes #5 = { convergent } ; NVPTX: attributes #[[ATTR8]] = { convergent } ;. ; AMDGPU-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU-DISABLED: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; AMDGPU-DISABLED: attributes #[[ATTR2:[0-9]+]] = { alwaysinline } -; AMDGPU-DISABLED: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU-DISABLED: attributes #[[ATTR1]] = { "llvm.assume"="ompx_spmd_amenable" } +; AMDGPU-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU-DISABLED: attributes #[[ATTR3:[0-9]+]] = { alwaysinline } ; AMDGPU-DISABLED: attributes #[[ATTR4]] = { nounwind } ; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } ; AMDGPU-DISABLED: attributes #[[ATTR6:[0-9]+]] = { convergent nounwind } @@ -2287,9 +2308,9 @@ attributes #5 = { convergent } ; AMDGPU-DISABLED: attributes #[[ATTR8]] = { convergent } ;. ; NVPTX-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX-DISABLED: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; NVPTX-DISABLED: attributes #[[ATTR2:[0-9]+]] = { alwaysinline } -; NVPTX-DISABLED: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX-DISABLED: attributes #[[ATTR1]] = { "llvm.assume"="ompx_spmd_amenable" } +; NVPTX-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX-DISABLED: attributes #[[ATTR3:[0-9]+]] = { alwaysinline } ; NVPTX-DISABLED: attributes #[[ATTR4]] = { nounwind } ; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } ; NVPTX-DISABLED: attributes #[[ATTR6:[0-9]+]] = { convergent nounwind } |
