diff options
author | paperchalice <liujunchang97@outlook.com> | 2025-10-15 09:03:15 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-10-15 09:03:15 +0800 |
commit | dd44e63c8ed04b4f9d62e7d104b5339e1cf18b9d (patch) | |
tree | d344005b0c80e76d2d184f995fd1da19ce766c66 | |
parent | 2f50a9913552d41ae93af5e9a8c1927b0f4b3833 (diff) | |
download | llvm-dd44e63c8ed04b4f9d62e7d104b5339e1cf18b9d.zip llvm-dd44e63c8ed04b4f9d62e7d104b5339e1cf18b9d.tar.gz llvm-dd44e63c8ed04b4f9d62e7d104b5339e1cf18b9d.tar.bz2 |
[DAGCombiner] Use `FlagInserter` in `visitFSQRT` (#163301)
Propagate fast-math flags for TLI.getSqrtEstimate etc.
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 72 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/fmf-propagation.ll | 90 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll | 4 |
3 files changed, 76 insertions, 90 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 787a81a..358e060 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -658,13 +658,13 @@ namespace { bool InexpensiveOnly = false, std::optional<EVT> OutVT = std::nullopt); SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); - SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); - SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); - SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); + SDValue buildRsqrtEstimate(SDValue Op); + SDValue buildSqrtEstimate(SDValue Op); + SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip); SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal); + bool Reciprocal); SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal); + bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -18590,20 +18590,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0))) return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = - buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = - buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); @@ -18635,7 +18633,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A); SDValue AAZ = DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0)); - if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags)) + if (SDValue Rsqrt = buildRsqrtEstimate(AAZ)) return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt); // Estimate creation failed. Clean up speculatively created nodes. @@ -18645,7 +18643,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // We found a FSQRT, so try to make this fold: // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y) - if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) { + if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) { SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y); AddToWorklist(Div.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, Div); @@ -18742,11 +18740,12 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { return SDValue(); // FSQRT nodes have flags that propagate to the created nodes. + SelectionDAG::FlagInserter FlagInserter(DAG, Flags); // TODO: If this is N0/sqrt(N0), and we reach this node before trying to // transform the fdiv, we may produce a sub-optimal estimate sequence // because the reciprocal calculation may not have to filter out a // 0.0 input. - return buildSqrtEstimate(N0, Flags); + return buildSqrtEstimate(N0); } /// copysign(x, fp_extend(y)) -> copysign(x, y) @@ -29743,28 +29742,27 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal) { + unsigned Iterations, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. - SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); + SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); + HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); } // If non-reciprocal square root is requested, multiply the result by Arg. if (!Reciprocal) - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); return Est; } @@ -29775,8 +29773,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal) { + unsigned Iterations, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); @@ -29789,9 +29786,9 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // Newton iterations for reciprocal square root: // E = (E * -0.5) * ((A * E) * E + -3.0) for (unsigned i = 0; i < Iterations; ++i) { - SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); - SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); - SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); + SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est); + SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est); + SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree); // When calculating a square root at the last iteration build: // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) @@ -29799,13 +29796,13 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, SDValue LHS; if (Reciprocal || (i + 1) < Iterations) { // RSQRT: LHS = (E * -0.5) - LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); + LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); } else { // SQRT: LHS = (A * E) * -0.5 - LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); + LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf); } - Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS); } return Est; @@ -29814,8 +29811,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if /// Op can be zero. -SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, - bool Reciprocal) { +SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal) { if (LegalDAG) return SDValue(); @@ -29843,8 +29839,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, if (Iterations > 0) Est = UseOneConstNR - ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) - : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); + ? buildSqrtNROneConst(Op, Est, Iterations, Reciprocal) + : buildSqrtNRTwoConst(Op, Est, Iterations, Reciprocal); if (!Reciprocal) { SDLoc DL(Op); // Try the target specific test first. @@ -29862,12 +29858,12 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, return SDValue(); } -SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { - return buildSqrtEstimateImpl(Op, Flags, true); +SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op) { + return buildSqrtEstimateImpl(Op, true); } -SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { - return buildSqrtEstimateImpl(Op, Flags, false); +SDValue DAGCombiner::buildSqrtEstimate(SDValue Op) { + return buildSqrtEstimateImpl(Op, false); } /// Return true if there is any possibility that the two addresses overlap. diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll index e71f59c..cad684e 100644 --- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -325,24 +325,21 @@ define float @sqrt_afn_ieee(float %x) #0 { ; ; GLOBAL-LABEL: sqrt_afn_ieee: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha -; GLOBAL-NEXT: xsabsdp 0, 1 -; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3) -; GLOBAL-NEXT: fcmpu 0, 0, 2 -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: blt 0, .LBB11_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsabsdp 1, 1 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI11_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB11_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3) +; GLOBAL-NEXT: xssubsp 1, 1, 2 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 1, 1, 0, 2 ; GLOBAL-NEXT: blr %rt = call afn ninf float @llvm.sqrt.f32(float %x) ret float %rt @@ -393,21 +390,19 @@ define float @sqrt_afn_preserve_sign(float %x) #1 { ; ; GLOBAL-LABEL: sqrt_afn_preserve_sign: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB13_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI13_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB13_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 2, 1, 2, 0 +; GLOBAL-NEXT: xsnegdp 1, 1 +; GLOBAL-NEXT: fsel 1, 1, 2, 0 ; GLOBAL-NEXT: blr %rt = call afn ninf float @llvm.sqrt.f32(float %x) ret float %rt @@ -462,24 +457,21 @@ define float @sqrt_fast_ieee(float %x) #0 { ; ; GLOBAL-LABEL: sqrt_fast_ieee: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha -; GLOBAL-NEXT: xsabsdp 0, 1 -; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3) -; GLOBAL-NEXT: fcmpu 0, 0, 2 -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: blt 0, .LBB15_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsabsdp 1, 1 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI15_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB15_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3) +; GLOBAL-NEXT: xssubsp 1, 1, 2 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 1, 1, 0, 2 ; GLOBAL-NEXT: blr %rt = call contract reassoc afn ninf float @llvm.sqrt.f32(float %x) ret float %rt @@ -517,21 +509,19 @@ define float @sqrt_fast_preserve_sign(float %x) #1 { ; ; GLOBAL-LABEL: sqrt_fast_preserve_sign: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB16_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI16_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI16_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB16_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 2, 1, 2, 0 +; GLOBAL-NEXT: xsnegdp 1, 1 +; GLOBAL-NEXT: fsel 1, 1, 2, 0 ; GLOBAL-NEXT: blr %rt = call contract reassoc ninf afn float @llvm.sqrt.f32(float %x) ret float %rt diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll index 42617c1..18588aa 100644 --- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll @@ -24,7 +24,7 @@ define float @sqrt_ieee_ninf(float %f) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF - ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]] ; CHECK-NEXT: [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr ; CHECK-NEXT: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK-NEXT: [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr @@ -71,7 +71,7 @@ define float @sqrt_daz_ninf(float %f) #1 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF - ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]] ; CHECK-NEXT: [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr ; CHECK-NEXT: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK-NEXT: [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr |