diff options
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 8 | ||||
-rw-r--r-- | llvm/test/Assembler/amdgcn-intrinsic-attributes.ll | 8 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll | 4 |
3 files changed, 15 insertions, 5 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 7506871..a57eb4a 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -234,9 +234,11 @@ def int_amdgcn_init_exec_from_input : Intrinsic<[], def int_amdgcn_init_whole_wave : Intrinsic<[llvm_i1_ty], [], [ IntrHasSideEffects, IntrNoMem, IntrConvergent]>; -def int_amdgcn_wavefrontsize : - ClangBuiltin<"__builtin_amdgcn_wavefrontsize">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [], [NoUndef<RetIndex>, IntrNoMem, IntrSpeculatable]>; +def int_amdgcn_wavefrontsize + : ClangBuiltin<"__builtin_amdgcn_wavefrontsize">, + DefaultAttrsIntrinsic<[llvm_i32_ty], [], + [NoUndef<RetIndex>, Range<RetIndex, 32, 65>, + IntrNoMem, IntrSpeculatable]>; // Represent a relocation constant. def int_amdgcn_reloc_constant : DefaultAttrsIntrinsic< diff --git a/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll b/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll index bd5ce2d..744c94a 100644 --- a/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll +++ b/llvm/test/Assembler/amdgcn-intrinsic-attributes.ll @@ -18,4 +18,12 @@ define i32 @ds_consume(ptr addrspace(3) %ptr) { ret i32 %ret } +; Test assumed range +; CHECK: declare noundef range(i32 32, 65) i32 @llvm.amdgcn.wavefrontsize() #1 +define i32 @wavefrontsize() { + %ret = call i32 @llvm.amdgcn.wavefrontsize() + ret i32 %ret +} + ; CHECK: attributes #0 = { convergent nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll index 92f0af3..e065d96 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll @@ -39,7 +39,7 @@ define amdgpu_kernel void @fold_and_optimize_wavefrontsize(ptr addrspace(1) noca ; OPT-SAME: ptr addrspace(1) captures(none) [[ARG:%.*]]) { ; OPT-NEXT: [[BB:.*:]] ; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1]] -; OPT-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP]], 32 +; OPT-NEXT: [[TMP1:%.*]] = icmp samesign ugt i32 [[TMP]], 32 ; OPT-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 1 ; OPT-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARG]], align 4 ; OPT-NEXT: ret void @@ -69,7 +69,7 @@ define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(ptr addrspace(1) n ; OPT-SAME: ptr addrspace(1) captures(none) [[ARG:%.*]]) { ; OPT-NEXT: [[BB:.*:]] ; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1]] -; OPT-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP]], 32 +; OPT-NEXT: [[TMP1:%.*]] = icmp samesign ugt i32 [[TMP]], 32 ; OPT-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]] ; OPT: [[BB2]]: ; OPT-NEXT: store i32 1, ptr addrspace(1) [[ARG]], align 4 |