aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpaperchalice <liujunchang97@outlook.com>2025-10-15 09:03:15 +0800
committerGitHub <noreply@github.com>2025-10-15 09:03:15 +0800
commitdd44e63c8ed04b4f9d62e7d104b5339e1cf18b9d (patch)
treed344005b0c80e76d2d184f995fd1da19ce766c66
parent2f50a9913552d41ae93af5e9a8c1927b0f4b3833 (diff)
downloadllvm-dd44e63c8ed04b4f9d62e7d104b5339e1cf18b9d.zip
llvm-dd44e63c8ed04b4f9d62e7d104b5339e1cf18b9d.tar.gz
llvm-dd44e63c8ed04b4f9d62e7d104b5339e1cf18b9d.tar.bz2
[DAGCombiner] Use `FlagInserter` in `visitFSQRT` (#163301)
Propagate fast-math flags for TLI.getSqrtEstimate etc.
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp72
-rw-r--r--llvm/test/CodeGen/PowerPC/fmf-propagation.ll90
-rw-r--r--llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll4
3 files changed, 76 insertions, 90 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 787a81a..358e060 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -658,13 +658,13 @@ namespace {
bool InexpensiveOnly = false,
std::optional<EVT> OutVT = std::nullopt);
SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
- SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
- SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
- SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
+ SDValue buildRsqrtEstimate(SDValue Op);
+ SDValue buildSqrtEstimate(SDValue Op);
+ SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip);
SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
- SDNodeFlags Flags, bool Reciprocal);
+ bool Reciprocal);
SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
- SDNodeFlags Flags, bool Reciprocal);
+ bool Reciprocal);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -18590,20 +18590,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
- if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0)))
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV =
- buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
} else if (N1.getOpcode() == ISD::FP_ROUND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV =
- buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
@@ -18635,7 +18633,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
SDValue AAZ =
DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
- if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
+ if (SDValue Rsqrt = buildRsqrtEstimate(AAZ))
return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
// Estimate creation failed. Clean up speculatively created nodes.
@@ -18645,7 +18643,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// We found a FSQRT, so try to make this fold:
// X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
- if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
+ if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) {
SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
AddToWorklist(Div.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
@@ -18742,11 +18740,12 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
return SDValue();
// FSQRT nodes have flags that propagate to the created nodes.
+ SelectionDAG::FlagInserter FlagInserter(DAG, Flags);
// TODO: If this is N0/sqrt(N0), and we reach this node before trying to
// transform the fdiv, we may produce a sub-optimal estimate sequence
// because the reciprocal calculation may not have to filter out a
// 0.0 input.
- return buildSqrtEstimate(N0, Flags);
+ return buildSqrtEstimate(N0);
}
/// copysign(x, fp_extend(y)) -> copysign(x, y)
@@ -29743,28 +29742,27 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
/// As a result, we precompute A/2 prior to the iteration loop.
SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
- unsigned Iterations,
- SDNodeFlags Flags, bool Reciprocal) {
+ unsigned Iterations, bool Reciprocal) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
// this entire sequence requires only one FP constant.
- SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
- HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
+ HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
- NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
- NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
}
// If non-reciprocal square root is requested, multiply the result by Arg.
if (!Reciprocal)
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
return Est;
}
@@ -29775,8 +29773,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
/// =>
/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
- unsigned Iterations,
- SDNodeFlags Flags, bool Reciprocal) {
+ unsigned Iterations, bool Reciprocal) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
@@ -29789,9 +29786,9 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
// Newton iterations for reciprocal square root:
// E = (E * -0.5) * ((A * E) * E + -3.0)
for (unsigned i = 0; i < Iterations; ++i) {
- SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
- SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
- SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
+ SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est);
+ SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est);
+ SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree);
// When calculating a square root at the last iteration build:
// S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
@@ -29799,13 +29796,13 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
SDValue LHS;
if (Reciprocal || (i + 1) < Iterations) {
// RSQRT: LHS = (E * -0.5)
- LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
+ LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
} else {
// SQRT: LHS = (A * E) * -0.5
- LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
+ LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf);
}
- Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS);
}
return Est;
@@ -29814,8 +29811,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
/// Op can be zero.
-SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
- bool Reciprocal) {
+SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal) {
if (LegalDAG)
return SDValue();
@@ -29843,8 +29839,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
if (Iterations > 0)
Est = UseOneConstNR
- ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
- : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+ ? buildSqrtNROneConst(Op, Est, Iterations, Reciprocal)
+ : buildSqrtNRTwoConst(Op, Est, Iterations, Reciprocal);
if (!Reciprocal) {
SDLoc DL(Op);
// Try the target specific test first.
@@ -29862,12 +29858,12 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
return SDValue();
}
-SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
- return buildSqrtEstimateImpl(Op, Flags, true);
+SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op) {
+ return buildSqrtEstimateImpl(Op, true);
}
-SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
- return buildSqrtEstimateImpl(Op, Flags, false);
+SDValue DAGCombiner::buildSqrtEstimate(SDValue Op) {
+ return buildSqrtEstimateImpl(Op, false);
}
/// Return true if there is any possibility that the two addresses overlap.
diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index e71f59c..cad684e 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -325,24 +325,21 @@ define float @sqrt_afn_ieee(float %x) #0 {
;
; GLOBAL-LABEL: sqrt_afn_ieee:
; GLOBAL: # %bb.0:
-; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha
-; GLOBAL-NEXT: xsabsdp 0, 1
-; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3)
-; GLOBAL-NEXT: fcmpu 0, 0, 2
-; GLOBAL-NEXT: xxlxor 0, 0, 0
-; GLOBAL-NEXT: blt 0, .LBB11_2
-; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 0, 1
; GLOBAL-NEXT: vspltisw 2, -3
; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha
-; GLOBAL-NEXT: xvcvsxwdp 2, 34
-; GLOBAL-NEXT: xsmulsp 1, 1, 0
-; GLOBAL-NEXT: xsmaddasp 2, 1, 0
+; GLOBAL-NEXT: xvcvsxwdp 3, 34
+; GLOBAL-NEXT: xsmulsp 2, 1, 0
+; GLOBAL-NEXT: xsabsdp 1, 1
+; GLOBAL-NEXT: xsmaddasp 3, 2, 0
; GLOBAL-NEXT: lfs 0, .LCPI11_0@toc@l(3)
-; GLOBAL-NEXT: xsmulsp 0, 1, 0
-; GLOBAL-NEXT: xsmulsp 0, 0, 2
-; GLOBAL-NEXT: .LBB11_2:
-; GLOBAL-NEXT: fmr 1, 0
+; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha
+; GLOBAL-NEXT: xsmulsp 0, 2, 0
+; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3)
+; GLOBAL-NEXT: xssubsp 1, 1, 2
+; GLOBAL-NEXT: xxlxor 2, 2, 2
+; GLOBAL-NEXT: xsmulsp 0, 0, 3
+; GLOBAL-NEXT: fsel 1, 1, 0, 2
; GLOBAL-NEXT: blr
%rt = call afn ninf float @llvm.sqrt.f32(float %x)
ret float %rt
@@ -393,21 +390,19 @@ define float @sqrt_afn_preserve_sign(float %x) #1 {
;
; GLOBAL-LABEL: sqrt_afn_preserve_sign:
; GLOBAL: # %bb.0:
-; GLOBAL-NEXT: xxlxor 0, 0, 0
-; GLOBAL-NEXT: fcmpu 0, 1, 0
-; GLOBAL-NEXT: beq 0, .LBB13_2
-; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 0, 1
; GLOBAL-NEXT: vspltisw 2, -3
; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha
-; GLOBAL-NEXT: xvcvsxwdp 2, 34
-; GLOBAL-NEXT: xsmulsp 1, 1, 0
-; GLOBAL-NEXT: xsmaddasp 2, 1, 0
+; GLOBAL-NEXT: xvcvsxwdp 3, 34
+; GLOBAL-NEXT: xsmulsp 2, 1, 0
+; GLOBAL-NEXT: xsmaddasp 3, 2, 0
; GLOBAL-NEXT: lfs 0, .LCPI13_0@toc@l(3)
-; GLOBAL-NEXT: xsmulsp 0, 1, 0
-; GLOBAL-NEXT: xsmulsp 0, 0, 2
-; GLOBAL-NEXT: .LBB13_2:
-; GLOBAL-NEXT: fmr 1, 0
+; GLOBAL-NEXT: xsmulsp 0, 2, 0
+; GLOBAL-NEXT: xxlxor 2, 2, 2
+; GLOBAL-NEXT: xsmulsp 0, 0, 3
+; GLOBAL-NEXT: fsel 2, 1, 2, 0
+; GLOBAL-NEXT: xsnegdp 1, 1
+; GLOBAL-NEXT: fsel 1, 1, 2, 0
; GLOBAL-NEXT: blr
%rt = call afn ninf float @llvm.sqrt.f32(float %x)
ret float %rt
@@ -462,24 +457,21 @@ define float @sqrt_fast_ieee(float %x) #0 {
;
; GLOBAL-LABEL: sqrt_fast_ieee:
; GLOBAL: # %bb.0:
-; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha
-; GLOBAL-NEXT: xsabsdp 0, 1
-; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3)
-; GLOBAL-NEXT: fcmpu 0, 0, 2
-; GLOBAL-NEXT: xxlxor 0, 0, 0
-; GLOBAL-NEXT: blt 0, .LBB15_2
-; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 0, 1
; GLOBAL-NEXT: vspltisw 2, -3
; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha
-; GLOBAL-NEXT: xvcvsxwdp 2, 34
-; GLOBAL-NEXT: xsmulsp 1, 1, 0
-; GLOBAL-NEXT: xsmaddasp 2, 1, 0
+; GLOBAL-NEXT: xvcvsxwdp 3, 34
+; GLOBAL-NEXT: xsmulsp 2, 1, 0
+; GLOBAL-NEXT: xsabsdp 1, 1
+; GLOBAL-NEXT: xsmaddasp 3, 2, 0
; GLOBAL-NEXT: lfs 0, .LCPI15_0@toc@l(3)
-; GLOBAL-NEXT: xsmulsp 0, 1, 0
-; GLOBAL-NEXT: xsmulsp 0, 0, 2
-; GLOBAL-NEXT: .LBB15_2:
-; GLOBAL-NEXT: fmr 1, 0
+; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha
+; GLOBAL-NEXT: xsmulsp 0, 2, 0
+; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3)
+; GLOBAL-NEXT: xssubsp 1, 1, 2
+; GLOBAL-NEXT: xxlxor 2, 2, 2
+; GLOBAL-NEXT: xsmulsp 0, 0, 3
+; GLOBAL-NEXT: fsel 1, 1, 0, 2
; GLOBAL-NEXT: blr
%rt = call contract reassoc afn ninf float @llvm.sqrt.f32(float %x)
ret float %rt
@@ -517,21 +509,19 @@ define float @sqrt_fast_preserve_sign(float %x) #1 {
;
; GLOBAL-LABEL: sqrt_fast_preserve_sign:
; GLOBAL: # %bb.0:
-; GLOBAL-NEXT: xxlxor 0, 0, 0
-; GLOBAL-NEXT: fcmpu 0, 1, 0
-; GLOBAL-NEXT: beq 0, .LBB16_2
-; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 0, 1
; GLOBAL-NEXT: vspltisw 2, -3
; GLOBAL-NEXT: addis 3, 2, .LCPI16_0@toc@ha
-; GLOBAL-NEXT: xvcvsxwdp 2, 34
-; GLOBAL-NEXT: xsmulsp 1, 1, 0
-; GLOBAL-NEXT: xsmaddasp 2, 1, 0
+; GLOBAL-NEXT: xvcvsxwdp 3, 34
+; GLOBAL-NEXT: xsmulsp 2, 1, 0
+; GLOBAL-NEXT: xsmaddasp 3, 2, 0
; GLOBAL-NEXT: lfs 0, .LCPI16_0@toc@l(3)
-; GLOBAL-NEXT: xsmulsp 0, 1, 0
-; GLOBAL-NEXT: xsmulsp 0, 0, 2
-; GLOBAL-NEXT: .LBB16_2:
-; GLOBAL-NEXT: fmr 1, 0
+; GLOBAL-NEXT: xsmulsp 0, 2, 0
+; GLOBAL-NEXT: xxlxor 2, 2, 2
+; GLOBAL-NEXT: xsmulsp 0, 0, 3
+; GLOBAL-NEXT: fsel 2, 1, 2, 0
+; GLOBAL-NEXT: xsnegdp 1, 1
+; GLOBAL-NEXT: fsel 1, 1, 2, 0
; GLOBAL-NEXT: blr
%rt = call contract reassoc ninf afn float @llvm.sqrt.f32(float %x)
ret float %rt
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
index 42617c1..18588aa 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
@@ -24,7 +24,7 @@ define float @sqrt_ieee_ninf(float %f) #0 {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
; CHECK-NEXT: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
+ ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]]
; CHECK-NEXT: [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
; CHECK-NEXT: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool)
; CHECK-NEXT: [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr
@@ -71,7 +71,7 @@ define float @sqrt_daz_ninf(float %f) #1 {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
; CHECK-NEXT: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
+ ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]]
; CHECK-NEXT: [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
; CHECK-NEXT: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool)
; CHECK-NEXT: [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr