aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2024-06-18 19:17:42 +0200
committerGitHub <noreply@github.com>2024-06-18 19:17:42 +0200
commitb932da16b76f905e05520c473e3ae01c2f89e594 (patch)
treed193d4dd0f5755f08be8c7d70ed38a5d8c9f10f2
parent8570685d3b5a71d9a65a8c37a88fb0184d9b131c (diff)
downloadllvm-b932da16b76f905e05520c473e3ae01c2f89e594.zip
llvm-b932da16b76f905e05520c473e3ae01c2f89e594.tar.gz
llvm-b932da16b76f905e05520c473e3ae01c2f89e594.tar.bz2
AMDGPU: Fix vector handling in pown libcall simplification (#95832)
The isIntegerTy check would not work as you would hope in the vector case.
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp10
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll9
2 files changed, 7 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index c515138..456f3cb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1129,15 +1129,11 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
if (needcopysign) {
- Value *opr_n;
- Type* rTy = opr0->getType();
Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
- Type *nTy = nTyS;
- if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
- nTy = FixedVectorType::get(nTyS, vTy);
+ Type *nTy = FPOp->getType()->getWithNewType(nTyS);
unsigned size = nTy->getScalarSizeInBits();
- opr_n = FPOp->getOperand(1);
- if (opr_n->getType()->isIntegerTy())
+ Value *opr_n = FPOp->getOperand(1);
+ if (opr_n->getType()->getScalarType()->isIntegerTy())
opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
else
opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
index e298226..77db224a 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
@@ -698,8 +698,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y)
; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
-; CHECK-NEXT: [[__YTOU:%.*]] = fptosi <2 x float> [[POWNI2F]] to <2 x i32>
-; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[__YTOU]], <i32 31, i32 31>
+; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[Y]], <i32 31, i32 31>
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
@@ -744,8 +743,8 @@ define <2 x double> @test_pown_afn_nnan_ninf_v2f64(<2 x double> %x, <2 x i32> %y
; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x double>
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x double> [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x double> @_Z4exp2Dv2_d(<2 x double> [[__YLOGX]])
-; CHECK-NEXT: [[__YTOU1:%.*]] = zext <2 x i32> [[Y]] to <2 x i64>
-; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i64> [[__YTOU1]], <i64 63, i64 63>
+; CHECK-NEXT: [[__YTOU:%.*]] = zext <2 x i32> [[Y]] to <2 x i64>
+; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i64> [[__YTOU]], <i64 63, i64 63>
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[X]] to <2 x i64>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i64> [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[__EXP2]] to <2 x i64>
@@ -790,7 +789,7 @@ define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) {
; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x half>
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x half> [[__LOG2]], [[POWNI2F]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x half> @llvm.exp2.v2f16(<2 x half> [[__YLOGX]])
-; CHECK-NEXT: [[__YTOU:%.*]] = fptosi <2 x half> [[POWNI2F]] to <2 x i16>
+; CHECK-NEXT: [[__YTOU:%.*]] = trunc <2 x i32> [[Y]] to <2 x i16>
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i16> [[__YTOU]], <i16 15, i16 15>
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x half> [[X]] to <2 x i16>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i16> [[__YEVEN]], [[TMP0]]