aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikita Popov <npopov@redhat.com>2025-07-28 09:46:00 +0200
committerGitHub <noreply@github.com>2025-07-28 09:46:00 +0200
commitfe0dbe0f2950d95071be7140c7b4680f17a7ac4e (patch)
tree825044dd1f255ee88464be8790dcc755a794ff13
parent3d994468098027f9cf550c78a1c91bb266040f61 (diff)
downloadllvm-fe0dbe0f2950d95071be7140c7b4680f17a7ac4e.zip
llvm-fe0dbe0f2950d95071be7140c7b4680f17a7ac4e.tar.gz
llvm-fe0dbe0f2950d95071be7140c7b4680f17a7ac4e.tar.bz2
[CodeGen] More consistently expand float ops by default (#150597)
These float operations were expanded for scalar f32/f64/f128, but not for f16 and more problematically, not for vectors. A small subset of them was separately set to expand for vectors. Change these to always expand by default, and adjust targets to mark these as legal where necessary instead. This is a much safer default, and avoids unnecessary legalization failures because a target failed to manually mark them as expand. Fixes https://github.com/llvm/llvm-project/issues/110753. Fixes https://github.com/llvm/llvm-project/issues/121390.
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp34
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp10
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp11
-rw-r--r--llvm/test/CodeGen/PowerPC/froundeven-legalization.ll111
4 files changed, 145 insertions, 21 deletions
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index d4a3455..68b8a00 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -806,7 +806,17 @@ void TargetLoweringBase::initActions() {
ISD::SDIVFIX, ISD::SDIVFIXSAT,
ISD::UDIVFIX, ISD::UDIVFIXSAT,
ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
- ISD::IS_FPCLASS},
+ ISD::IS_FPCLASS, ISD::FCBRT,
+ ISD::FLOG, ISD::FLOG2,
+ ISD::FLOG10, ISD::FEXP,
+ ISD::FEXP2, ISD::FEXP10,
+ ISD::FFLOOR, ISD::FNEARBYINT,
+ ISD::FCEIL, ISD::FRINT,
+ ISD::FTRUNC, ISD::FROUNDEVEN,
+ ISD::FTAN, ISD::FACOS,
+ ISD::FASIN, ISD::FATAN,
+ ISD::FCOSH, ISD::FSINH,
+ ISD::FTANH, ISD::FATAN2},
VT, Expand);
// Overflow operations default to expand
@@ -852,13 +862,12 @@ void TargetLoweringBase::initActions() {
// These operations default to expand for vector types.
if (VT.isVector())
- setOperationAction(
- {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG,
- ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG,
- ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::LROUND,
- ISD::LLROUND, ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN,
- ISD::FCOSH, ISD::FSINH, ISD::FTANH, ISD::FATAN2},
- VT, Expand);
+ setOperationAction({ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG,
+ ISD::ANY_EXTEND_VECTOR_INREG,
+ ISD::SIGN_EXTEND_VECTOR_INREG,
+ ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR,
+ ISD::LRINT, ISD::LLRINT, ISD::LROUND, ISD::LLROUND},
+ VT, Expand);
// Constrained floating-point operations default to expand.
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
@@ -914,15 +923,6 @@ void TargetLoweringBase::initActions() {
{MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128},
Expand);
- // These library functions default to expand.
- setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
- ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR,
- ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
- ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN,
- ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH,
- ISD::FATAN2},
- {MVT::f32, MVT::f64, MVT::f128}, Expand);
-
// Insert custom handling default for llvm.canonicalize.*.
setOperationAction(ISD::FCANONICALIZE,
{MVT::f16, MVT::f32, MVT::f64, MVT::f128}, Expand);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index e3ca09e..f25ce87 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -391,8 +391,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// Library functions. These default to Expand, but we have instructions
// for them.
setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR,
- ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
- MVT::f32, Legal);
+ ISD::FROUNDEVEN, ISD::FTRUNC},
+ {MVT::f16, MVT::f32}, Legal);
+ setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, MVT::f32, Legal);
setOperationAction(ISD::FLOG2, MVT::f32, Custom);
setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom);
@@ -412,9 +413,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
- if (Subtarget->has16BitInsts())
+ if (Subtarget->has16BitInsts()) {
setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);
- else {
+ setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Legal);
+ } else {
setOperationAction(ISD::IS_FPCLASS, {MVT::f32, MVT::f64}, Legal);
setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom);
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index fca5dff..066b392 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -370,6 +370,11 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::FMINNUM, VT, Legal);
setOperationAction(ISD::FMAXNUM, VT, Legal);
setOperationAction(ISD::FROUND, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::FRINT, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
@@ -1507,6 +1512,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
+ setOperationAction(ISD::FRINT, MVT::f16, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f16, Legal);
}
if (Subtarget->hasNEON()) {
diff --git a/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll b/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll
new file mode 100644
index 0000000..238e200
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=powerpc64le < %s | FileCheck %s
+
+define void @test(ptr %p1, ptr %p2) nounwind {
+; CHECK-LABEL: test:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr 0
+; CHECK-NEXT: stdu 1, -224(1)
+; CHECK-NEXT: li 5, 48
+; CHECK-NEXT: std 0, 240(1)
+; CHECK-NEXT: std 27, 184(1) # 8-byte Folded Spill
+; CHECK-NEXT: li 27, 16
+; CHECK-NEXT: std 28, 192(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 29, 200(1) # 8-byte Folded Spill
+; CHECK-NEXT: li 29, 32
+; CHECK-NEXT: li 28, 48
+; CHECK-NEXT: stxvd2x 56, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 64
+; CHECK-NEXT: std 30, 208(1) # 8-byte Folded Spill
+; CHECK-NEXT: mr 30, 4
+; CHECK-NEXT: stxvd2x 57, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 80
+; CHECK-NEXT: stxvd2x 58, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 96
+; CHECK-NEXT: lxvd2x 58, 0, 3
+; CHECK-NEXT: stxvd2x 59, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 112
+; CHECK-NEXT: lxvd2x 59, 3, 27
+; CHECK-NEXT: stxvd2x 60, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 128
+; CHECK-NEXT: stxvd2x 61, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 144
+; CHECK-NEXT: stxvd2x 62, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 160
+; CHECK-NEXT: lxvd2x 62, 3, 28
+; CHECK-NEXT: stxvd2x 63, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: lxvd2x 63, 3, 29
+; CHECK-NEXT: xxswapd 57, 58
+; CHECK-NEXT: xxswapd 1, 59
+; CHECK-NEXT: xxswapd 60, 62
+; CHECK-NEXT: xxswapd 61, 63
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 56, 1
+; CHECK-NEXT: xxlor 1, 59, 59
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 0, 1
+; CHECK-NEXT: xxlor 1, 60, 60
+; CHECK-NEXT: xxmrgld 59, 0, 56
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 60, 1
+; CHECK-NEXT: xxlor 1, 62, 62
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 0, 1
+; CHECK-NEXT: xxlor 1, 61, 61
+; CHECK-NEXT: xxmrgld 62, 0, 60
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 61, 1
+; CHECK-NEXT: xxlor 1, 63, 63
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 0, 1
+; CHECK-NEXT: xxlor 1, 57, 57
+; CHECK-NEXT: xxmrgld 63, 0, 61
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 61, 1
+; CHECK-NEXT: xxlor 1, 58, 58
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: li 3, 160
+; CHECK-NEXT: stxvd2x 63, 30, 29
+; CHECK-NEXT: xxswapd 0, 1
+; CHECK-NEXT: stxvd2x 62, 30, 28
+; CHECK-NEXT: stxvd2x 59, 30, 27
+; CHECK-NEXT: ld 29, 200(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 28, 192(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 27, 184(1) # 8-byte Folded Reload
+; CHECK-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 144
+; CHECK-NEXT: xxmrgld 0, 0, 61
+; CHECK-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 128
+; CHECK-NEXT: stxvd2x 0, 0, 30
+; CHECK-NEXT: ld 30, 208(1) # 8-byte Folded Reload
+; CHECK-NEXT: lxvd2x 61, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 112
+; CHECK-NEXT: lxvd2x 60, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 96
+; CHECK-NEXT: lxvd2x 59, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 80
+; CHECK-NEXT: lxvd2x 58, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 64
+; CHECK-NEXT: lxvd2x 57, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 48
+; CHECK-NEXT: lxvd2x 56, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: addi 1, 1, 224
+; CHECK-NEXT: ld 0, 16(1)
+; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: blr
+ %v = load <8 x double>, ptr %p1, align 64
+ %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %v)
+ store <8 x double> %res, ptr %p2, align 64
+ ret void
+}
+
+declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)