diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2020-03-11 17:12:20 -0400 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2020-03-27 12:48:58 -0700 |
commit | 0ab5b5b8581d9f2951575f7245824e6e4fc57dec (patch) | |
tree | 5334b5ee4a349e9250c036fbcbf2fb9468be0657 /llvm/lib/CodeGen/CommandFlags.cpp | |
parent | c579a5b1d92a9bc2046d00ee2d427832e0f5ddec (diff) | |
download | llvm-0ab5b5b8581d9f2951575f7245824e6e4fc57dec.zip llvm-0ab5b5b8581d9f2951575f7245824e6e4fc57dec.tar.gz llvm-0ab5b5b8581d9f2951575f7245824e6e4fc57dec.tar.bz2 |
Fix denormal-fp-math flag and attribute interaction
Make these behave the same way unsafe-fp-math and co. The command line
flag should add the attribute to functions that do not already have
it, and leave existing attributes. The attribute is the actual
implementation, but the flag is useful in some testing situations.
AMDGPU has a variety of tests with denormals enabled/disabled that
would require a painful level of test duplication without a flag. This
doesn't expose setting the separate input/output modes, or add a flag
for the f32 version yet.
Tests will be included in future patch.
Diffstat (limited to 'llvm/lib/CodeGen/CommandFlags.cpp')
-rw-r--r-- | llvm/lib/CodeGen/CommandFlags.cpp | 28 |
1 files changed, 21 insertions, 7 deletions
diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index d5dc49a..d1540af 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -54,7 +54,7 @@ CGOPT(bool, EnableNoInfsFPMath) CGOPT(bool, EnableNoNaNsFPMath) CGOPT(bool, EnableNoSignedZerosFPMath) CGOPT(bool, EnableNoTrappingFPMath) -CGOPT(FPDenormal::DenormalMode, DenormalFPMath) +CGOPT(DenormalMode::DenormalModeKind, DenormalFPMath) CGOPT(bool, EnableHonorSignDependentRoundingFPMath) CGOPT(FloatABI::ABIType, FloatABIForCalls) CGOPT(FPOpFusion::FPOpFusionMode, FuseFPOps) @@ -212,17 +212,17 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(EnableNoTrappingFPMath); - static cl::opt<FPDenormal::DenormalMode> DenormalFPMath( + static cl::opt<DenormalMode::DenormalModeKind> DenormalFPMath( "denormal-fp-math", cl::desc( "Select which denormal numbers the code is permitted to require"), - cl::init(FPDenormal::IEEE), + cl::init(DenormalMode::IEEE), cl::values( - clEnumValN(FPDenormal::IEEE, "ieee", "IEEE 754 denormal numbers"), - clEnumValN(FPDenormal::PreserveSign, "preserve-sign", + clEnumValN(DenormalMode::IEEE, "ieee", "IEEE 754 denormal numbers"), + clEnumValN(DenormalMode::PreserveSign, "preserve-sign", "the sign of a flushed-to-zero number is preserved " "in the sign of 0"), - clEnumValN(FPDenormal::PositiveZero, "positive-zero", + clEnumValN(DenormalMode::PositiveZero, "positive-zero", "denormals are flushed to positive zero"))); CGBINDOPT(DenormalFPMath); @@ -425,7 +425,12 @@ TargetOptions codegen::InitTargetOptionsFromCodeGenFlags() { Options.NoNaNsFPMath = getEnableNoNaNsFPMath(); Options.NoSignedZerosFPMath = getEnableNoSignedZerosFPMath(); Options.NoTrappingFPMath = getEnableNoTrappingFPMath(); - Options.FPDenormalMode = getDenormalFPMath(); + + DenormalMode::DenormalModeKind DenormKind = getDenormalFPMath(); + + // FIXME: Should have separate input and output flags + Options.setFPDenormalMode(DenormalMode(DenormKind, DenormKind)); + Options.HonorSignDependentRoundingFPMathOption = getEnableHonorSignDependentRoundingFPMath(); if (getFloatABIForCalls() != FloatABI::Default) @@ -563,6 +568,15 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, HANDLE_BOOL_ATTR(EnableNoNaNsFPMathView, "no-nans-fp-math"); HANDLE_BOOL_ATTR(EnableNoSignedZerosFPMathView, "no-signed-zeros-fp-math"); + if (DenormalFPMathView->getNumOccurrences() > 0 && + !F.hasFnAttribute("denormal-fp-math")) { + DenormalMode::DenormalModeKind DenormKind = getDenormalFPMath(); + + // FIXME: Command line flag should expose separate input/output modes. + NewAttrs.addAttribute("denormal-fp-math", + DenormalMode(DenormKind, DenormKind).str()); + } + if (TrapFuncNameView->getNumOccurrences() > 0) for (auto &B : F) for (auto &I : B) |