diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 63 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 919 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.h | 21 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/ctselect-i386-fp.ll | 272 |
4 files changed, 663 insertions, 612 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 401c1953323f..7a3bb3c648fb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26170,6 +26170,69 @@ SDValue X86TargetLowering::LowerCT_SELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getBitcast(VT, CtSelect); } + // Handle f80 types by splitting into three 32-bit chunks + if (VT == MVT::f80) { + SDValue Chain = DAG.getEntryNode(); + + // Create temporary stack slots for input f80 values + SDValue TrueSlot = DAG.CreateStackTemporary(MVT::f80); + SDValue FalseSlot = DAG.CreateStackTemporary(MVT::f80); + + // Store f80 values to memory + SDValue StoreTrueF80 = + DAG.getStore(Chain, DL, TrueOp, TrueSlot, MachinePointerInfo()); + SDValue StoreFalseF80 = + DAG.getStore(Chain, DL, FalseOp, FalseSlot, MachinePointerInfo()); + + // Load i32 parts from memory (3 chunks for 96-bit f80 storage) + SDValue TruePart0 = + DAG.getLoad(MVT::i32, DL, StoreTrueF80, TrueSlot, MachinePointerInfo()); + SDValue TruePart1Ptr = + DAG.getMemBasePlusOffset(TrueSlot, TypeSize::getFixed(4), DL); + SDValue TruePart1 = DAG.getLoad(MVT::i32, DL, StoreTrueF80, TruePart1Ptr, + MachinePointerInfo()); + SDValue TruePart2Ptr = + DAG.getMemBasePlusOffset(TrueSlot, TypeSize::getFixed(8), DL); + SDValue TruePart2 = DAG.getLoad(MVT::i32, DL, StoreTrueF80, TruePart2Ptr, + MachinePointerInfo()); + + SDValue FalsePart0 = DAG.getLoad(MVT::i32, DL, StoreFalseF80, FalseSlot, + MachinePointerInfo()); + SDValue FalsePart1Ptr = + DAG.getMemBasePlusOffset(FalseSlot, TypeSize::getFixed(4), DL); + SDValue FalsePart1 = DAG.getLoad(MVT::i32, DL, StoreFalseF80, FalsePart1Ptr, + MachinePointerInfo()); + SDValue FalsePart2Ptr = + DAG.getMemBasePlusOffset(FalseSlot, TypeSize::getFixed(8), DL); + SDValue FalsePart2 = DAG.getLoad(MVT::i32, DL, StoreFalseF80, FalsePart2Ptr, + MachinePointerInfo()); + + // Perform CT_SELECT on each 32-bit chunk + SDValue Part0Ops[] = {FalsePart0, TruePart0, CC, ProcessedCond}; + SDValue Part0Select = DAG.getNode(X86ISD::CT_SELECT, DL, MVT::i32, Part0Ops); + SDValue Part1Ops[] = {FalsePart1, TruePart1, CC, ProcessedCond}; + SDValue Part1Select = DAG.getNode(X86ISD::CT_SELECT, DL, MVT::i32, Part1Ops); + SDValue Part2Ops[] = {FalsePart2, TruePart2, CC, ProcessedCond}; + SDValue Part2Select = DAG.getNode(X86ISD::CT_SELECT, DL, MVT::i32, Part2Ops); + + // Create result stack slot and store the selected parts + SDValue ResultSlot = DAG.CreateStackTemporary(MVT::f80); + SDValue StorePart0 = + DAG.getStore(Chain, DL, Part0Select, ResultSlot, MachinePointerInfo()); + SDValue ResPart1Ptr = + DAG.getMemBasePlusOffset(ResultSlot, TypeSize::getFixed(4), DL); + SDValue StorePart1 = DAG.getStore(StorePart0, DL, Part1Select, ResPart1Ptr, + MachinePointerInfo()); + SDValue ResPart2Ptr = + DAG.getMemBasePlusOffset(ResultSlot, TypeSize::getFixed(8), DL); + SDValue StorePart2 = DAG.getStore(StorePart1, DL, Part2Select, ResPart2Ptr, + MachinePointerInfo()); + + // Load complete f80 result from memory + return DAG.getLoad(MVT::f80, DL, StorePart2, ResultSlot, + MachinePointerInfo()); + } + // Create final CT_SELECT node SDValue Ops[] = {FalseOp, TrueOp, CC, ProcessedCond}; return DAG.getNode(X86ISD::CT_SELECT, DL, Op.getValueType(), Ops, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index d4a46048a1d2..f98501da8210 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -689,8 +689,7 @@ bool X86InstrInfo::expandCtSelectVector(MachineInstr &MI) const { .addImm(31)); } else { // Negate to convert 1 -> 0xFFFFFFFF, 0 -> 0x00000000 (negl %eax) - recordInstr(BuildMI(*MBB, MI, DL, get(X86::NEG32r), TmpGPR) - .addReg(TmpGPR)); + recordInstr(BuildMI(*MBB, MI, DL, get(X86::NEG32r), TmpGPR).addReg(TmpGPR)); } // Broadcast to TmpX (vector mask) @@ -847,7 +846,8 @@ bool X86InstrInfo::expandCtSelectVector(MachineInstr &MI) const { .setMIFlags(MachineInstr::MIFlag::NoMerge)); } - assert(FirstInstr && LastInstr && "Expected at least one expanded instruction"); + assert(FirstInstr && LastInstr && + "Expected at least one expanded instruction"); auto BundleEnd = LastInstr->getIterator(); finalizeBundle(*MBB, FirstInstr->getIterator(), std::next(BundleEnd)); @@ -915,25 +915,28 @@ bool X86InstrInfo::expandCtSelectWithCMOV(MachineInstr &MI) const { /// Expand i386-specific CT_SELECT pseudo instructions (post-RA, constant-time) /// These internal pseudos receive a pre-materialized condition byte from the -/// custom inserter, avoiding EFLAGS corruption issues during i64 type legalization. +/// custom inserter, avoiding EFLAGS corruption issues during i64 type +/// legalization. bool X86InstrInfo::expandCtSelectIntWithoutCMOV(MachineInstr &MI) const { MachineBasicBlock *MBB = MI.getParent(); DebugLoc DL = MI.getDebugLoc(); // CT_SELECT_I386_INT_GRxxrr has operands: (outs dst, tmp_byte, tmp_mask), // (ins src1, src2, cond_byte) - // Note: cond_byte is pre-materialized by custom inserter, not EFLAGS-dependent + // Note: cond_byte is pre-materialized by custom inserter, not + // EFLAGS-dependent Register DstReg = MI.getOperand(0).getReg(); Register TmpByteReg = MI.getOperand(1).getReg(); Register TmpMaskReg = MI.getOperand(2).getReg(); Register Src1Reg = MI.getOperand(3).getReg(); Register Src2Reg = MI.getOperand(4).getReg(); - Register CondByteReg = MI.getOperand(5).getReg(); // Pre-materialized condition byte + Register CondByteReg = + MI.getOperand(5).getReg(); // Pre-materialized condition byte // Determine instruction opcodes based on register width unsigned MovZXOp, NegOp, MovOp, AndOp, NotOp, OrOp; if (MI.getOpcode() == X86::CT_SELECT_I386_INT_GR8rr) { - MovZXOp = 0; // No zero-extend needed for GR8 + MovZXOp = 0; // No zero-extend needed for GR8 NegOp = X86::NEG8r; MovOp = X86::MOV8rr; AndOp = X86::AND8rr; @@ -962,8 +965,8 @@ bool X86InstrInfo::expandCtSelectIntWithoutCMOV(MachineInstr &MI) const { // Step 1: Copy pre-materialized condition byte to TmpByteReg // This allows the bundle to work with allocated temporaries auto I1 = BuildMI(*MBB, MI, DL, get(X86::MOV8rr), TmpByteReg) - .addReg(CondByteReg) - .setMIFlag(MachineInstr::MIFlag::NoMerge); + .addReg(CondByteReg) + .setMIFlag(MachineInstr::MIFlag::NoMerge); auto BundleStart = I1->getIterator(); // Step 2: Zero-extend condition byte to register width (0 or 1) @@ -974,7 +977,9 @@ bool X86InstrInfo::expandCtSelectIntWithoutCMOV(MachineInstr &MI) const { } // Step 3: Convert condition to bitmask (NEG: 1 -> 0xFFFF..., 0 -> 0x0000...) - Register MaskReg = (MI.getOpcode() == X86::CT_SELECT_I386_INT_GR8rr) ? TmpByteReg : TmpMaskReg; + Register MaskReg = (MI.getOpcode() == X86::CT_SELECT_I386_INT_GR8rr) + ? TmpByteReg + : TmpMaskReg; BuildMI(*MBB, MI, DL, get(NegOp), MaskReg) .addReg(MaskReg) .setMIFlag(MachineInstr::MIFlag::NoMerge); @@ -1002,9 +1007,9 @@ bool X86InstrInfo::expandCtSelectIntWithoutCMOV(MachineInstr &MI) const { // Step 8: Final result: (src1 & mask) | (src2 & ~mask) auto LI = BuildMI(*MBB, MI, DL, get(OrOp), DstReg) - .addReg(DstReg) - .addReg(MaskReg) - .setMIFlag(MachineInstr::MIFlag::NoMerge); + .addReg(DstReg) + .addReg(MaskReg) + .setMIFlag(MachineInstr::MIFlag::NoMerge); // Bundle all generated instructions for atomic execution before removing MI auto BundleEnd = std::next(LI->getIterator()); @@ -1013,11 +1018,12 @@ bool X86InstrInfo::expandCtSelectIntWithoutCMOV(MachineInstr &MI) const { finalizeBundle(*MBB, BundleStart, BundleEnd); } - // TODO: Optimization opportunity - The register allocator may choose callee-saved - // registers (e.g., %ebx, %esi) for TmpByteReg/TmpMaskReg, causing unnecessary - // save/restore overhead. Consider constraining these to caller-saved register - // classes (e.g., GR8_AL, GR32_CallSaved) in the TableGen definitions to improve - // constant-time performance by eliminating prologue/epilogue instructions. + // TODO: Optimization opportunity - The register allocator may choose + // callee-saved registers (e.g., %ebx, %esi) for TmpByteReg/TmpMaskReg, + // causing unnecessary save/restore overhead. Consider constraining these to + // caller-saved register classes (e.g., GR8_AL, GR32_CallSaved) in the + // TableGen definitions to improve constant-time performance by eliminating + // prologue/epilogue instructions. // Remove the original pseudo instruction MI.eraseFromParent(); @@ -1305,8 +1311,7 @@ static bool regIsPICBase(Register BaseReg, const MachineRegisterInfo &MRI) { return isPICBase; } -bool X86InstrInfo::isReMaterializableImpl( - const MachineInstr &MI) const { +bool X86InstrInfo::isReMaterializableImpl(const MachineInstr &MI) const { switch (MI.getOpcode()) { default: // This function should only be called for opcodes with the ReMaterializable @@ -1823,32 +1828,32 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, switch (MIOpc) { default: llvm_unreachable("Unreachable!"); - CASE_NF(SHL8ri) - CASE_NF(SHL16ri) { - unsigned ShAmt = MI.getOperand(2).getImm(); - MIB.addReg(0) - .addImm(1LL << ShAmt) - .addReg(InRegLEA, RegState::Kill) - .addImm(0) - .addReg(0); - break; - } - CASE_NF(INC8r) - CASE_NF(INC16r) + CASE_NF(SHL8ri) + CASE_NF(SHL16ri) { + unsigned ShAmt = MI.getOperand(2).getImm(); + MIB.addReg(0) + .addImm(1LL << ShAmt) + .addReg(InRegLEA, RegState::Kill) + .addImm(0) + .addReg(0); + break; + } + CASE_NF(INC8r) + CASE_NF(INC16r) addRegOffset(MIB, InRegLEA, true, 1); break; - CASE_NF(DEC8r) - CASE_NF(DEC16r) + CASE_NF(DEC8r) + CASE_NF(DEC16r) addRegOffset(MIB, InRegLEA, true, -1); break; - CASE_NF(ADD8ri) - CASE_NF(ADD16ri) + CASE_NF(ADD8ri) + CASE_NF(ADD16ri) case X86::ADD8ri_DB: case X86::ADD16ri_DB: addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm()); break; - CASE_NF(ADD8rr) - CASE_NF(ADD16rr) + CASE_NF(ADD8rr) + CASE_NF(ADD16rr) case X86::ADD8rr_DB: case X86::ADD16rr_DB: { Src2 = MI.getOperand(2).getReg(); @@ -1986,128 +1991,129 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI, switch (MIOpc) { default: llvm_unreachable("Unreachable!"); - CASE_NF(SHL64ri) { - assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!"); - unsigned ShAmt = getTruncatedShiftCount(MI, 2); - if (!isTruncatedShiftCountForLEA(ShAmt)) - return nullptr; - - // LEA can't handle RSP. - if (Src.getReg().isVirtual() && !MF.getRegInfo().constrainRegClass( - Src.getReg(), &X86::GR64_NOSPRegClass)) - return nullptr; + CASE_NF(SHL64ri) { + assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!"); + unsigned ShAmt = getTruncatedShiftCount(MI, 2); + if (!isTruncatedShiftCountForLEA(ShAmt)) + return nullptr; - NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)) - .add(Dest) - .addReg(0) - .addImm(1LL << ShAmt) - .add(Src) - .addImm(0) - .addReg(0); - break; - } - CASE_NF(SHL32ri) { - assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!"); - unsigned ShAmt = getTruncatedShiftCount(MI, 2); - if (!isTruncatedShiftCountForLEA(ShAmt)) - return nullptr; + // LEA can't handle RSP. + if (Src.getReg().isVirtual() && + !MF.getRegInfo().constrainRegClass(Src.getReg(), + &X86::GR64_NOSPRegClass)) + return nullptr; - unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; + NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)) + .add(Dest) + .addReg(0) + .addImm(1LL << ShAmt) + .add(Src) + .addImm(0) + .addReg(0); + break; + } + CASE_NF(SHL32ri) { + assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!"); + unsigned ShAmt = getTruncatedShiftCount(MI, 2); + if (!isTruncatedShiftCountForLEA(ShAmt)) + return nullptr; - // LEA can't handle ESP. - bool isKill; - MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); - if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, SrcSubReg, - isKill, ImplicitOp, LV, LIS)) - return nullptr; + unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; - MachineInstrBuilder MIB = - BuildMI(MF, MI.getDebugLoc(), get(Opc)) - .add(Dest) - .addReg(0) - .addImm(1LL << ShAmt) - .addReg(SrcReg, getKillRegState(isKill), SrcSubReg) - .addImm(0) - .addReg(0); - if (ImplicitOp.getReg() != 0) - MIB.add(ImplicitOp); - NewMI = MIB; + // LEA can't handle ESP. + bool isKill; + MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); + if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, SrcSubReg, + isKill, ImplicitOp, LV, LIS)) + return nullptr; - // Add kills if classifyLEAReg created a new register. - if (LV && SrcReg != Src.getReg()) - LV->getVarInfo(SrcReg).Kills.push_back(NewMI); - break; - } - CASE_NF(SHL8ri) + MachineInstrBuilder MIB = + BuildMI(MF, MI.getDebugLoc(), get(Opc)) + .add(Dest) + .addReg(0) + .addImm(1LL << ShAmt) + .addReg(SrcReg, getKillRegState(isKill), SrcSubReg) + .addImm(0) + .addReg(0); + if (ImplicitOp.getReg() != 0) + MIB.add(ImplicitOp); + NewMI = MIB; + + // Add kills if classifyLEAReg created a new register. + if (LV && SrcReg != Src.getReg()) + LV->getVarInfo(SrcReg).Kills.push_back(NewMI); + break; + } + CASE_NF(SHL8ri) Is8BitOp = true; [[fallthrough]]; - CASE_NF(SHL16ri) { - assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!"); - unsigned ShAmt = getTruncatedShiftCount(MI, 2); - if (!isTruncatedShiftCountForLEA(ShAmt)) - return nullptr; - return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp); - } - CASE_NF(INC64r) - CASE_NF(INC32r) { - assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!"); - unsigned Opc = (MIOpc == X86::INC64r || MIOpc == X86::INC64r_NF) - ? X86::LEA64r - : (Is64Bit ? X86::LEA64_32r : X86::LEA32r); - bool isKill; - MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); - if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, SrcSubReg, - isKill, ImplicitOp, LV, LIS)) - return nullptr; - - MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) - .add(Dest) - .addReg(SrcReg, getKillRegState(isKill)); - if (ImplicitOp.getReg() != 0) - MIB.add(ImplicitOp); + CASE_NF(SHL16ri) { + assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!"); + unsigned ShAmt = getTruncatedShiftCount(MI, 2); + if (!isTruncatedShiftCountForLEA(ShAmt)) + return nullptr; + return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp); + } + CASE_NF(INC64r) + CASE_NF(INC32r) { + assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!"); + unsigned Opc = (MIOpc == X86::INC64r || MIOpc == X86::INC64r_NF) + ? X86::LEA64r + : (Is64Bit ? X86::LEA64_32r : X86::LEA32r); + bool isKill; + MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); + if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, SrcSubReg, + isKill, ImplicitOp, LV, LIS)) + return nullptr; - NewMI = addOffset(MIB, 1); + MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) + .add(Dest) + .addReg(SrcReg, getKillRegState(isKill)); + if (ImplicitOp.getReg() != 0) + MIB.add(ImplicitOp); - // Add kills if classifyLEAReg created a new register. - if (LV && SrcReg != Src.getReg()) - LV->getVarInfo(SrcReg).Kills.push_back(NewMI); - break; - } - CASE_NF(DEC64r) - CASE_NF(DEC32r) { - assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!"); - unsigned Opc = (MIOpc == X86::DEC64r || MIOpc == X86::DEC64r_NF) - ? X86::LEA64r - : (Is64Bit ? X86::LEA64_32r : X86::LEA32r); + NewMI = addOffset(MIB, 1); - bool isKill; - MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); - if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, SrcSubReg, - isKill, ImplicitOp, LV, LIS)) - return nullptr; + // Add kills if classifyLEAReg created a new register. + if (LV && SrcReg != Src.getReg()) + LV->getVarInfo(SrcReg).Kills.push_back(NewMI); + break; + } + CASE_NF(DEC64r) + CASE_NF(DEC32r) { + assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!"); + unsigned Opc = (MIOpc == X86::DEC64r || MIOpc == X86::DEC64r_NF) + ? X86::LEA64r + : (Is64Bit ? X86::LEA64_32r : X86::LEA32r); + + bool isKill; + MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); + if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, SrcSubReg, + isKill, ImplicitOp, LV, LIS)) + return nullptr; - MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) - .add(Dest) - .addReg(SrcReg, getKillRegState(isKill)); - if (ImplicitOp.getReg() != 0) - MIB.add(ImplicitOp); + MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) + .add(Dest) + .addReg(SrcReg, getKillRegState(isKill)); + if (ImplicitOp.getReg() != 0) + MIB.add(ImplicitOp); - NewMI = addOffset(MIB, -1); + NewMI = addOffset(MIB, -1); - // Add kills if classifyLEAReg created a new register. - if (LV && SrcReg != Src.getReg()) - LV->getVarInfo(SrcReg).Kills.push_back(NewMI); - break; - } - CASE_NF(DEC8r) - CASE_NF(INC8r) + // Add kills if classifyLEAReg created a new register. + if (LV && SrcReg != Src.getReg()) + LV->getVarInfo(SrcReg).Kills.push_back(NewMI); + break; + } + CASE_NF(DEC8r) + CASE_NF(INC8r) Is8BitOp = true; [[fallthrough]]; - CASE_NF(DEC16r) - CASE_NF(INC16r) + CASE_NF(DEC16r) + CASE_NF(INC16r) return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp); - CASE_NF(ADD64rr) - CASE_NF(ADD32rr) + CASE_NF(ADD64rr) + CASE_NF(ADD32rr) case X86::ADD64rr_DB: case X86::ADD32rr_DB: { assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); @@ -2158,21 +2164,21 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI, NumRegOperands = 3; break; } - CASE_NF(ADD8rr) + CASE_NF(ADD8rr) case X86::ADD8rr_DB: Is8BitOp = true; [[fallthrough]]; - CASE_NF(ADD16rr) + CASE_NF(ADD16rr) case X86::ADD16rr_DB: return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp); - CASE_NF(ADD64ri32) + CASE_NF(ADD64ri32) case X86::ADD64ri32_DB: assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); NewMI = addOffset( BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src), MI.getOperand(2)); break; - CASE_NF(ADD32ri) + CASE_NF(ADD32ri) case X86::ADD32ri_DB: { assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; @@ -2197,62 +2203,62 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI, LV->getVarInfo(SrcReg).Kills.push_back(NewMI); break; } - CASE_NF(ADD8ri) + CASE_NF(ADD8ri) case X86::ADD8ri_DB: Is8BitOp = true; [[fallthrough]]; - CASE_NF(ADD16ri) + CASE_NF(ADD16ri) case X86::ADD16ri_DB: return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp); - CASE_NF(SUB8ri) - CASE_NF(SUB16ri) + CASE_NF(SUB8ri) + CASE_NF(SUB16ri) /// FIXME: Support these similar to ADD8ri/ADD16ri*. return nullptr; - CASE_NF(SUB32ri) { - if (!MI.getOperand(2).isImm()) - return nullptr; - int64_t Imm = MI.getOperand(2).getImm(); - if (!isInt<32>(-Imm)) - return nullptr; + CASE_NF(SUB32ri) { + if (!MI.getOperand(2).isImm()) + return nullptr; + int64_t Imm = MI.getOperand(2).getImm(); + if (!isInt<32>(-Imm)) + return nullptr; - assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); - unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; + assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); + unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; - bool isKill; - MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); - if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, SrcSubReg, - isKill, ImplicitOp, LV, LIS)) - return nullptr; + bool isKill; + MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); + if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, SrcSubReg, + isKill, ImplicitOp, LV, LIS)) + return nullptr; - MachineInstrBuilder MIB = - BuildMI(MF, MI.getDebugLoc(), get(Opc)) - .add(Dest) - .addReg(SrcReg, getKillRegState(isKill), SrcSubReg); - if (ImplicitOp.getReg() != 0) - MIB.add(ImplicitOp); + MachineInstrBuilder MIB = + BuildMI(MF, MI.getDebugLoc(), get(Opc)) + .add(Dest) + .addReg(SrcReg, getKillRegState(isKill), SrcSubReg); + if (ImplicitOp.getReg() != 0) + MIB.add(ImplicitOp); - NewMI = addOffset(MIB, -Imm); + NewMI = addOffset(MIB, -Imm); - // Add kills if classifyLEAReg created a new register. - if (LV && SrcReg != Src.getReg()) - LV->getVarInfo(SrcReg).Kills.push_back(NewMI); - break; - } + // Add kills if classifyLEAReg created a new register. + if (LV && SrcReg != Src.getReg()) + LV->getVarInfo(SrcReg).Kills.push_back(NewMI); + break; + } - CASE_NF(SUB64ri32) { - if (!MI.getOperand(2).isImm()) - return nullptr; - int64_t Imm = MI.getOperand(2).getImm(); - if (!isInt<32>(-Imm)) - return nullptr; + CASE_NF(SUB64ri32) { + if (!MI.getOperand(2).isImm()) + return nullptr; + int64_t Imm = MI.getOperand(2).getImm(); + if (!isInt<32>(-Imm)) + return nullptr; - assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!"); + assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!"); - MachineInstrBuilder MIB = - BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src); - NewMI = addOffset(MIB, -Imm); - break; - } + MachineInstrBuilder MIB = + BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src); + NewMI = addOffset(MIB, -Imm); + break; + } case X86::VMOVDQU8Z128rmk: case X86::VMOVDQU8Z256rmk: @@ -2852,17 +2858,17 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, case X86::OP##_ND: switch (Opc) { - // SHLD B, C, I <-> SHRD C, B, (BitWidth - I) - CASE_ND(SHRD16rri8) - CASE_ND(SHLD16rri8) - CASE_ND(SHRD32rri8) - CASE_ND(SHLD32rri8) - CASE_ND(SHRD64rri8) - CASE_ND(SHLD64rri8) { - unsigned Size; - switch (Opc) { - default: - llvm_unreachable("Unreachable!"); + // SHLD B, C, I <-> SHRD C, B, (BitWidth - I) + CASE_ND(SHRD16rri8) + CASE_ND(SHLD16rri8) + CASE_ND(SHRD32rri8) + CASE_ND(SHLD32rri8) + CASE_ND(SHRD64rri8) + CASE_ND(SHLD64rri8) { + unsigned Size; + switch (Opc) { + default: + llvm_unreachable("Unreachable!"); #define FROM_TO_SIZE(A, B, S) \ case X86::A: \ Opc = X86::B; \ @@ -2881,16 +2887,16 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, Size = S; \ break; - FROM_TO_SIZE(SHRD16rri8, SHLD16rri8, 16) - FROM_TO_SIZE(SHRD32rri8, SHLD32rri8, 32) - FROM_TO_SIZE(SHRD64rri8, SHLD64rri8, 64) + FROM_TO_SIZE(SHRD16rri8, SHLD16rri8, 16) + FROM_TO_SIZE(SHRD32rri8, SHLD32rri8, 32) + FROM_TO_SIZE(SHRD64rri8, SHLD64rri8, 64) #undef FROM_TO_SIZE + } + WorkingMI = CloneIfNew(MI); + WorkingMI->setDesc(get(Opc)); + WorkingMI->getOperand(3).setImm(Size - MI.getOperand(3).getImm()); + break; } - WorkingMI = CloneIfNew(MI); - WorkingMI->setDesc(get(Opc)); - WorkingMI->getOperand(3).setImm(Size - MI.getOperand(3).getImm()); - break; - } case X86::PFSUBrr: case X86::PFSUBRrr: // PFSUB x, y: x = x - y @@ -3174,15 +3180,16 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, WorkingMI = CloneIfNew(MI); WorkingMI->setDesc(get(Opc)); break; - CASE_ND(CMOV16rr) - CASE_ND(CMOV32rr) - CASE_ND(CMOV64rr) { - WorkingMI = CloneIfNew(MI); - unsigned OpNo = MI.getDesc().getNumOperands() - 1; - X86::CondCode CC = static_cast<X86::CondCode>(MI.getOperand(OpNo).getImm()); - WorkingMI->getOperand(OpNo).setImm(X86::GetOppositeBranchCondition(CC)); - break; - } + CASE_ND(CMOV16rr) + CASE_ND(CMOV32rr) + CASE_ND(CMOV64rr) { + WorkingMI = CloneIfNew(MI); + unsigned OpNo = MI.getDesc().getNumOperands() - 1; + X86::CondCode CC = + static_cast<X86::CondCode>(MI.getOperand(OpNo).getImm()); + WorkingMI->getOperand(OpNo).setImm(X86::GetOppositeBranchCondition(CC)); + break; + } case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi: case X86::VPTERNLOGDZ128rri: @@ -5391,29 +5398,29 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, CmpMask = CmpValue = 0; } return true; - // A SUB can be used to perform comparison. - CASE_ND(SUB64rm) - CASE_ND(SUB32rm) - CASE_ND(SUB16rm) - CASE_ND(SUB8rm) + // A SUB can be used to perform comparison. + CASE_ND(SUB64rm) + CASE_ND(SUB32rm) + CASE_ND(SUB16rm) + CASE_ND(SUB8rm) SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; CmpMask = 0; CmpValue = 0; return true; - CASE_ND(SUB64rr) - CASE_ND(SUB32rr) - CASE_ND(SUB16rr) - CASE_ND(SUB8rr) + CASE_ND(SUB64rr) + CASE_ND(SUB32rr) + CASE_ND(SUB16rr) + CASE_ND(SUB8rr) SrcReg = MI.getOperand(1).getReg(); SrcReg2 = MI.getOperand(2).getReg(); CmpMask = 0; CmpValue = 0; return true; - CASE_ND(SUB64ri32) - CASE_ND(SUB32ri) - CASE_ND(SUB16ri) - CASE_ND(SUB8ri) + CASE_ND(SUB64ri32) + CASE_ND(SUB32ri) + CASE_ND(SUB16ri) + CASE_ND(SUB8ri) SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; if (MI.getOperand(2).isImm()) { @@ -5468,27 +5475,27 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI, case X86::CMP32rr: case X86::CMP16rr: case X86::CMP8rr: - CASE_ND(SUB64rr) - CASE_ND(SUB32rr) - CASE_ND(SUB16rr) - CASE_ND(SUB8rr) { - Register OISrcReg; - Register OISrcReg2; - int64_t OIMask; - int64_t OIValue; - if (!analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) || - OIMask != ImmMask || OIValue != ImmValue) + CASE_ND(SUB64rr) + CASE_ND(SUB32rr) + CASE_ND(SUB16rr) + CASE_ND(SUB8rr) { + Register OISrcReg; + Register OISrcReg2; + int64_t OIMask; + int64_t OIValue; + if (!analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) || + OIMask != ImmMask || OIValue != ImmValue) + return false; + if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) { + *IsSwapped = false; + return true; + } + if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) { + *IsSwapped = true; + return true; + } return false; - if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) { - *IsSwapped = false; - return true; } - if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) { - *IsSwapped = true; - return true; - } - return false; - } case X86::CMP64ri32: case X86::CMP32ri: case X86::CMP16ri: @@ -5497,10 +5504,10 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI, case X86::TEST32ri: case X86::TEST16ri: case X86::TEST8ri: - CASE_ND(SUB64ri32) - CASE_ND(SUB32ri) - CASE_ND(SUB16ri) - CASE_ND(SUB8ri) + CASE_ND(SUB64ri32) + CASE_ND(SUB32ri) + CASE_ND(SUB16ri) + CASE_ND(SUB8ri) case X86::TEST64rr: case X86::TEST32rr: case X86::TEST16rr: @@ -5557,98 +5564,98 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag, default: return false; - // The shift instructions only modify ZF if their shift count is non-zero. - // N.B.: The processor truncates the shift count depending on the encoding. - CASE_ND(SAR8ri) - CASE_ND(SAR16ri) - CASE_ND(SAR32ri) - CASE_ND(SAR64ri) - CASE_ND(SHR8ri) - CASE_ND(SHR16ri) - CASE_ND(SHR32ri) - CASE_ND(SHR64ri) + // The shift instructions only modify ZF if their shift count is non-zero. + // N.B.: The processor truncates the shift count depending on the encoding. + CASE_ND(SAR8ri) + CASE_ND(SAR16ri) + CASE_ND(SAR32ri) + CASE_ND(SAR64ri) + CASE_ND(SHR8ri) + CASE_ND(SHR16ri) + CASE_ND(SHR32ri) + CASE_ND(SHR64ri) return getTruncatedShiftCount(MI, 2) != 0; - // Some left shift instructions can be turned into LEA instructions but only - // if their flags aren't used. Avoid transforming such instructions. - CASE_ND(SHL8ri) - CASE_ND(SHL16ri) - CASE_ND(SHL32ri) - CASE_ND(SHL64ri) { - unsigned ShAmt = getTruncatedShiftCount(MI, 2); - if (isTruncatedShiftCountForLEA(ShAmt)) - return false; - return ShAmt != 0; - } + // Some left shift instructions can be turned into LEA instructions but only + // if their flags aren't used. Avoid transforming such instructions. + CASE_ND(SHL8ri) + CASE_ND(SHL16ri) + CASE_ND(SHL32ri) + CASE_ND(SHL64ri) { + unsigned ShAmt = getTruncatedShiftCount(MI, 2); + if (isTruncatedShiftCountForLEA(ShAmt)) + return false; + return ShAmt != 0; + } - CASE_ND(SHRD16rri8) - CASE_ND(SHRD32rri8) - CASE_ND(SHRD64rri8) - CASE_ND(SHLD16rri8) - CASE_ND(SHLD32rri8) - CASE_ND(SHLD64rri8) + CASE_ND(SHRD16rri8) + CASE_ND(SHRD32rri8) + CASE_ND(SHRD64rri8) + CASE_ND(SHLD16rri8) + CASE_ND(SHLD32rri8) + CASE_ND(SHLD64rri8) return getTruncatedShiftCount(MI, 3) != 0; - CASE_ND(SUB64ri32) - CASE_ND(SUB32ri) - CASE_ND(SUB16ri) - CASE_ND(SUB8ri) - CASE_ND(SUB64rr) - CASE_ND(SUB32rr) - CASE_ND(SUB16rr) - CASE_ND(SUB8rr) - CASE_ND(SUB64rm) - CASE_ND(SUB32rm) - CASE_ND(SUB16rm) - CASE_ND(SUB8rm) - CASE_ND(DEC64r) - CASE_ND(DEC32r) - CASE_ND(DEC16r) - CASE_ND(DEC8r) - CASE_ND(ADD64ri32) - CASE_ND(ADD32ri) - CASE_ND(ADD16ri) - CASE_ND(ADD8ri) - CASE_ND(ADD64rr) - CASE_ND(ADD32rr) - CASE_ND(ADD16rr) - CASE_ND(ADD8rr) - CASE_ND(ADD64rm) - CASE_ND(ADD32rm) - CASE_ND(ADD16rm) - CASE_ND(ADD8rm) - CASE_ND(INC64r) - CASE_ND(INC32r) - CASE_ND(INC16r) - CASE_ND(INC8r) - CASE_ND(ADC64ri32) - CASE_ND(ADC32ri) - CASE_ND(ADC16ri) - CASE_ND(ADC8ri) - CASE_ND(ADC64rr) - CASE_ND(ADC32rr) - CASE_ND(ADC16rr) - CASE_ND(ADC8rr) - CASE_ND(ADC64rm) - CASE_ND(ADC32rm) - CASE_ND(ADC16rm) - CASE_ND(ADC8rm) - CASE_ND(SBB64ri32) - CASE_ND(SBB32ri) - CASE_ND(SBB16ri) - CASE_ND(SBB8ri) - CASE_ND(SBB64rr) - CASE_ND(SBB32rr) - CASE_ND(SBB16rr) - CASE_ND(SBB8rr) - CASE_ND(SBB64rm) - CASE_ND(SBB32rm) - CASE_ND(SBB16rm) - CASE_ND(SBB8rm) - CASE_ND(NEG8r) - CASE_ND(NEG16r) - CASE_ND(NEG32r) - CASE_ND(NEG64r) + CASE_ND(SUB64ri32) + CASE_ND(SUB32ri) + CASE_ND(SUB16ri) + CASE_ND(SUB8ri) + CASE_ND(SUB64rr) + CASE_ND(SUB32rr) + CASE_ND(SUB16rr) + CASE_ND(SUB8rr) + CASE_ND(SUB64rm) + CASE_ND(SUB32rm) + CASE_ND(SUB16rm) + CASE_ND(SUB8rm) + CASE_ND(DEC64r) + CASE_ND(DEC32r) + CASE_ND(DEC16r) + CASE_ND(DEC8r) + CASE_ND(ADD64ri32) + CASE_ND(ADD32ri) + CASE_ND(ADD16ri) + CASE_ND(ADD8ri) + CASE_ND(ADD64rr) + CASE_ND(ADD32rr) + CASE_ND(ADD16rr) + CASE_ND(ADD8rr) + CASE_ND(ADD64rm) + CASE_ND(ADD32rm) + CASE_ND(ADD16rm) + CASE_ND(ADD8rm) + CASE_ND(INC64r) + CASE_ND(INC32r) + CASE_ND(INC16r) + CASE_ND(INC8r) + CASE_ND(ADC64ri32) + CASE_ND(ADC32ri) + CASE_ND(ADC16ri) + CASE_ND(ADC8ri) + CASE_ND(ADC64rr) + CASE_ND(ADC32rr) + CASE_ND(ADC16rr) + CASE_ND(ADC8rr) + CASE_ND(ADC64rm) + CASE_ND(ADC32rm) + CASE_ND(ADC16rm) + CASE_ND(ADC8rm) + CASE_ND(SBB64ri32) + CASE_ND(SBB32ri) + CASE_ND(SBB16ri) + CASE_ND(SBB8ri) + CASE_ND(SBB64rr) + CASE_ND(SBB32rr) + CASE_ND(SBB16rr) + CASE_ND(SBB8rr) + CASE_ND(SBB64rm) + CASE_ND(SBB32rm) + CASE_ND(SBB16rm) + CASE_ND(SBB8rm) + CASE_ND(NEG8r) + CASE_ND(NEG16r) + CASE_ND(NEG32r) + CASE_ND(NEG64r) case X86::LZCNT16rr: case X86::LZCNT16rm: case X86::LZCNT32rr: @@ -5668,42 +5675,42 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag, case X86::TZCNT64rr: case X86::TZCNT64rm: return true; - CASE_ND(AND64ri32) - CASE_ND(AND32ri) - CASE_ND(AND16ri) - CASE_ND(AND8ri) - CASE_ND(AND64rr) - CASE_ND(AND32rr) - CASE_ND(AND16rr) - CASE_ND(AND8rr) - CASE_ND(AND64rm) - CASE_ND(AND32rm) - CASE_ND(AND16rm) - CASE_ND(AND8rm) - CASE_ND(XOR64ri32) - CASE_ND(XOR32ri) - CASE_ND(XOR16ri) - CASE_ND(XOR8ri) - CASE_ND(XOR64rr) - CASE_ND(XOR32rr) - CASE_ND(XOR16rr) - CASE_ND(XOR8rr) - CASE_ND(XOR64rm) - CASE_ND(XOR32rm) - CASE_ND(XOR16rm) - CASE_ND(XOR8rm) - CASE_ND(OR64ri32) - CASE_ND(OR32ri) - CASE_ND(OR16ri) - CASE_ND(OR8ri) - CASE_ND(OR64rr) - CASE_ND(OR32rr) - CASE_ND(OR16rr) - CASE_ND(OR8rr) - CASE_ND(OR64rm) - CASE_ND(OR32rm) - CASE_ND(OR16rm) - CASE_ND(OR8rm) + CASE_ND(AND64ri32) + CASE_ND(AND32ri) + CASE_ND(AND16ri) + CASE_ND(AND8ri) + CASE_ND(AND64rr) + CASE_ND(AND32rr) + CASE_ND(AND16rr) + CASE_ND(AND8rr) + CASE_ND(AND64rm) + CASE_ND(AND32rm) + CASE_ND(AND16rm) + CASE_ND(AND8rm) + CASE_ND(XOR64ri32) + CASE_ND(XOR32ri) + CASE_ND(XOR16ri) + CASE_ND(XOR8ri) + CASE_ND(XOR64rr) + CASE_ND(XOR32rr) + CASE_ND(XOR16rr) + CASE_ND(XOR8rr) + CASE_ND(XOR64rm) + CASE_ND(XOR32rm) + CASE_ND(XOR16rm) + CASE_ND(XOR8rm) + CASE_ND(OR64ri32) + CASE_ND(OR32ri) + CASE_ND(OR16ri) + CASE_ND(OR8ri) + CASE_ND(OR64rr) + CASE_ND(OR32rr) + CASE_ND(OR16rr) + CASE_ND(OR8rr) + CASE_ND(OR64rm) + CASE_ND(OR32rm) + CASE_ND(OR16rm) + CASE_ND(OR8rm) case X86::ANDN32rr: case X86::ANDN32rm: case X86::ANDN64rr: @@ -5781,15 +5788,17 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag, } /// Check whether the use can be converted to remove a comparison against zero. -/// Returns the EFLAGS condition and the operand that we are comparing against zero. -static std::pair<X86::CondCode, unsigned> isUseDefConvertible(const MachineInstr &MI) { +/// Returns the EFLAGS condition and the operand that we are comparing against +/// zero. +static std::pair<X86::CondCode, unsigned> +isUseDefConvertible(const MachineInstr &MI) { switch (MI.getOpcode()) { default: return std::make_pair(X86::COND_INVALID, ~0U); - CASE_ND(NEG8r) - CASE_ND(NEG16r) - CASE_ND(NEG32r) - CASE_ND(NEG64r) + CASE_ND(NEG8r) + CASE_ND(NEG16r) + CASE_ND(NEG32r) + CASE_ND(NEG64r) return std::make_pair(X86::COND_AE, 1U); case X86::LZCNT16rr: case X86::LZCNT32rr: @@ -5833,51 +5842,53 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, switch (CmpInstr.getOpcode()) { default: break; - CASE_ND(SUB64ri32) - CASE_ND(SUB32ri) - CASE_ND(SUB16ri) - CASE_ND(SUB8ri) - CASE_ND(SUB64rm) - CASE_ND(SUB32rm) - CASE_ND(SUB16rm) - CASE_ND(SUB8rm) - CASE_ND(SUB64rr) - CASE_ND(SUB32rr) - CASE_ND(SUB16rr) - CASE_ND(SUB8rr) { - if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) - return false; - // There is no use of the destination register, we can replace SUB with CMP. - unsigned NewOpcode = 0; + CASE_ND(SUB64ri32) + CASE_ND(SUB32ri) + CASE_ND(SUB16ri) + CASE_ND(SUB8ri) + CASE_ND(SUB64rm) + CASE_ND(SUB32rm) + CASE_ND(SUB16rm) + CASE_ND(SUB8rm) + CASE_ND(SUB64rr) + CASE_ND(SUB32rr) + CASE_ND(SUB16rr) + CASE_ND(SUB8rr) { + if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) + return false; + // There is no use of the destination register, we can replace SUB with + // CMP. + unsigned NewOpcode = 0; #define FROM_TO(A, B) \ CASE_ND(A) NewOpcode = X86::B; \ break; - switch (CmpInstr.getOpcode()) { - default: - llvm_unreachable("Unreachable!"); - FROM_TO(SUB64rm, CMP64rm) - FROM_TO(SUB32rm, CMP32rm) - FROM_TO(SUB16rm, CMP16rm) - FROM_TO(SUB8rm, CMP8rm) - FROM_TO(SUB64rr, CMP64rr) - FROM_TO(SUB32rr, CMP32rr) - FROM_TO(SUB16rr, CMP16rr) - FROM_TO(SUB8rr, CMP8rr) - FROM_TO(SUB64ri32, CMP64ri32) - FROM_TO(SUB32ri, CMP32ri) - FROM_TO(SUB16ri, CMP16ri) - FROM_TO(SUB8ri, CMP8ri) - } + switch (CmpInstr.getOpcode()) { + default: + llvm_unreachable("Unreachable!"); + FROM_TO(SUB64rm, CMP64rm) + FROM_TO(SUB32rm, CMP32rm) + FROM_TO(SUB16rm, CMP16rm) + FROM_TO(SUB8rm, CMP8rm) + FROM_TO(SUB64rr, CMP64rr) + FROM_TO(SUB32rr, CMP32rr) + FROM_TO(SUB16rr, CMP16rr) + FROM_TO(SUB8rr, CMP8rr) + FROM_TO(SUB64ri32, CMP64ri32) + FROM_TO(SUB32ri, CMP32ri) + FROM_TO(SUB16ri, CMP16ri) + FROM_TO(SUB8ri, CMP8ri) + } #undef FROM_TO - CmpInstr.setDesc(get(NewOpcode)); - CmpInstr.removeOperand(0); - // Mutating this instruction invalidates any debug data associated with it. - CmpInstr.dropDebugNumber(); - // Fall through to optimize Cmp if Cmp is CMPrr or CMPri. - if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm || - NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm) - return false; - } + CmpInstr.setDesc(get(NewOpcode)); + CmpInstr.removeOperand(0); + // Mutating this instruction invalidates any debug data associated with + // it. + CmpInstr.dropDebugNumber(); + // Fall through to optimize Cmp if Cmp is CMPrr or CMPri. + if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm || + NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm) + return false; + } } // The following code tries to remove the comparison by re-using EFLAGS @@ -6234,14 +6245,14 @@ static bool canConvert2Copy(unsigned Opc) { switch (Opc) { default: return false; - CASE_ND(ADD64ri32) - CASE_ND(SUB64ri32) - CASE_ND(OR64ri32) - CASE_ND(XOR64ri32) - CASE_ND(ADD32ri) - CASE_ND(SUB32ri) - CASE_ND(OR32ri) - CASE_ND(XOR32ri) + CASE_ND(ADD64ri32) + CASE_ND(SUB64ri32) + CASE_ND(OR64ri32) + CASE_ND(XOR64ri32) + CASE_ND(ADD32ri) + CASE_ND(SUB32ri) + CASE_ND(OR32ri) + CASE_ND(XOR32ri) return true; } } @@ -9656,7 +9667,7 @@ Register X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { static const uint16_t *lookup(unsigned opcode, unsigned domain, ArrayRef<uint16_t[3]> Table) { - for (const uint16_t(&Row)[3] : Table) + for (const uint16_t (&Row)[3] : Table) if (Row[domain - 1] == opcode) return Row; return nullptr; @@ -9665,7 +9676,7 @@ static const uint16_t *lookup(unsigned opcode, unsigned domain, static const uint16_t *lookupAVX512(unsigned opcode, unsigned domain, ArrayRef<uint16_t[4]> Table) { // If this is the integer domain make sure to check both integer columns. - for (const uint16_t(&Row)[4] : Table) + for (const uint16_t (&Row)[4] : Table) if (Row[domain - 1] == opcode || (domain == 3 && Row[3] == opcode)) return Row; return nullptr; @@ -10421,25 +10432,25 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, if (Invert) return false; switch (Inst.getOpcode()) { - CASE_ND(ADD8rr) - CASE_ND(ADD16rr) - CASE_ND(ADD32rr) - CASE_ND(ADD64rr) - CASE_ND(AND8rr) - CASE_ND(AND16rr) - CASE_ND(AND32rr) - CASE_ND(AND64rr) - CASE_ND(OR8rr) - CASE_ND(OR16rr) - CASE_ND(OR32rr) - CASE_ND(OR64rr) - CASE_ND(XOR8rr) - CASE_ND(XOR16rr) - CASE_ND(XOR32rr) - CASE_ND(XOR64rr) - CASE_ND(IMUL16rr) - CASE_ND(IMUL32rr) - CASE_ND(IMUL64rr) + CASE_ND(ADD8rr) + CASE_ND(ADD16rr) + CASE_ND(ADD32rr) + CASE_ND(ADD64rr) + CASE_ND(AND8rr) + CASE_ND(AND16rr) + CASE_ND(AND32rr) + CASE_ND(AND64rr) + CASE_ND(OR8rr) + CASE_ND(OR16rr) + CASE_ND(OR32rr) + CASE_ND(OR64rr) + CASE_ND(XOR8rr) + CASE_ND(XOR16rr) + CASE_ND(XOR32rr) + CASE_ND(XOR64rr) + CASE_ND(IMUL16rr) + CASE_ND(IMUL32rr) + CASE_ND(IMUL64rr) case X86::PANDrr: case X86::PORrr: case X86::PXORrr: @@ -11263,8 +11274,8 @@ bool X86InstrInfo::getMachineCombinerPatterns( break; } } - return TargetInstrInfo::getMachineCombinerPatterns(Root, - Patterns, DoRegPressureReduce); + return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, + DoRegPressureReduce); } static void diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 76f18803c2e3..846bcc85b7ad 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -319,8 +319,7 @@ public: Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; - Register isLoadFromStackSlot(const MachineInstr &MI, - int &FrameIndex, + Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex, TypeSize &MemBytes) const override; /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination /// stack locations as well. This uses a heuristic so it isn't @@ -330,8 +329,7 @@ public: Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; - Register isStoreToStackSlot(const MachineInstr &MI, - int &FrameIndex, + Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex, TypeSize &MemBytes) const override; /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination /// stack locations as well. This uses a heuristic so it isn't @@ -491,12 +489,12 @@ public: /// is likely that the referenced instruction has been changed. /// /// \returns true on success. - MachineInstr * - foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, - ArrayRef<unsigned> Ops, - MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr, - VirtRegMap *VRM = nullptr) const override; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, + ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, + int FrameIndex, + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; /// Same as the previous version except it allows folding of any load and /// store from / to any address, not just from a specific stack slot. @@ -745,8 +743,7 @@ private: /// /// If IsIntrinsic is set, operand 1 will be ignored for commuting. bool findThreeSrcCommutedOpIndices(const MachineInstr &MI, - unsigned &SrcOpIdx1, - unsigned &SrcOpIdx2, + unsigned &SrcOpIdx1, unsigned &SrcOpIdx2, bool IsIntrinsic = false) const; /// Returns true when instruction \p FlagI produces the same flags as \p OI. diff --git a/llvm/test/CodeGen/X86/ctselect-i386-fp.ll b/llvm/test/CodeGen/X86/ctselect-i386-fp.ll index b88ec72a3792..4b5f31bad831 100644 --- a/llvm/test/CodeGen/X86/ctselect-i386-fp.ll +++ b/llvm/test/CodeGen/X86/ctselect-i386-fp.ll @@ -209,94 +209,84 @@ define double @test_ctselect_f64_basic(i1 %cond, double %a, double %b) nounwind define x86_fp80 @test_ctselect_f80_basic(i1 %cond, x86_fp80 %a, x86_fp80 %b) nounwind { ; I386-NOCMOV-LABEL: test_ctselect_f80_basic: ; I386-NOCMOV: # %bb.0: +; I386-NOCMOV-NEXT: pushl %ebp +; I386-NOCMOV-NEXT: pushl %ebx ; I386-NOCMOV-NEXT: pushl %edi ; I386-NOCMOV-NEXT: pushl %esi -; I386-NOCMOV-NEXT: subl $12, %esp -; I386-NOCMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; I386-NOCMOV-NEXT: sete %al -; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; I386-NOCMOV-NEXT: subl $40, %esp +; I386-NOCMOV-NEXT: fldt {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: fldt {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: fstpt {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: fstpt {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; I386-NOCMOV-NEXT: testb $1, %cl +; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; I386-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx -; I386-NOCMOV-NEXT: movb %al, %ah -; I386-NOCMOV-NEXT: movzbl %ah, %edi -; I386-NOCMOV-NEXT: negl %edi -; I386-NOCMOV-NEXT: movl %edx, %esi -; I386-NOCMOV-NEXT: andl %edi, %esi -; I386-NOCMOV-NEXT: notl %edi -; I386-NOCMOV-NEXT: andl %ecx, %edi -; I386-NOCMOV-NEXT: orl %edi, %esi -; I386-NOCMOV-NEXT: movl %esi, (%esp) -; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %esi +; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edi +; I386-NOCMOV-NEXT: sete %ch +; I386-NOCMOV-NEXT: movb %ch, %al +; I386-NOCMOV-NEXT: movzbl %al, %ebp +; I386-NOCMOV-NEXT: negl %ebp +; I386-NOCMOV-NEXT: movl %edi, %ebx +; I386-NOCMOV-NEXT: andl %ebp, %ebx +; I386-NOCMOV-NEXT: notl %ebp +; I386-NOCMOV-NEXT: andl %edx, %ebp +; I386-NOCMOV-NEXT: orl %ebp, %ebx +; I386-NOCMOV-NEXT: testb $1, %cl +; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx -; I386-NOCMOV-NEXT: movb %al, %ah -; I386-NOCMOV-NEXT: movzbl %ah, %edi -; I386-NOCMOV-NEXT: negl %edi -; I386-NOCMOV-NEXT: movl %edx, %esi -; I386-NOCMOV-NEXT: andl %edi, %esi -; I386-NOCMOV-NEXT: notl %edi -; I386-NOCMOV-NEXT: andl %ecx, %edi -; I386-NOCMOV-NEXT: orl %edi, %esi -; I386-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) -; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx -; I386-NOCMOV-NEXT: movb %al, %ah -; I386-NOCMOV-NEXT: movzbl %ah, %edi +; I386-NOCMOV-NEXT: sete %ch +; I386-NOCMOV-NEXT: movb %ch, %cl +; I386-NOCMOV-NEXT: movzbl %cl, %ebp +; I386-NOCMOV-NEXT: negl %ebp +; I386-NOCMOV-NEXT: movl %edx, %edi +; I386-NOCMOV-NEXT: andl %ebp, %edi +; I386-NOCMOV-NEXT: notl %ebp +; I386-NOCMOV-NEXT: andl %eax, %ebp +; I386-NOCMOV-NEXT: orl %ebp, %edi +; I386-NOCMOV-NEXT: testb $1, {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: movl %edi, {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: sete %al +; I386-NOCMOV-NEXT: movl (%esp), %ebx # 4-byte Reload +; I386-NOCMOV-NEXT: movb %al, %dl +; I386-NOCMOV-NEXT: movzbl %dl, %edi ; I386-NOCMOV-NEXT: negl %edi -; I386-NOCMOV-NEXT: movl %edx, %esi -; I386-NOCMOV-NEXT: andl %edi, %esi +; I386-NOCMOV-NEXT: movl %esi, %ecx +; I386-NOCMOV-NEXT: andl %edi, %ecx ; I386-NOCMOV-NEXT: notl %edi -; I386-NOCMOV-NEXT: andl %ecx, %edi -; I386-NOCMOV-NEXT: orl %edi, %esi -; I386-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) -; I386-NOCMOV-NEXT: fldt (%esp) -; I386-NOCMOV-NEXT: addl $12, %esp +; I386-NOCMOV-NEXT: andl %ebx, %edi +; I386-NOCMOV-NEXT: orl %edi, %ecx +; I386-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: fldt {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: addl $40, %esp ; I386-NOCMOV-NEXT: popl %esi ; I386-NOCMOV-NEXT: popl %edi +; I386-NOCMOV-NEXT: popl %ebx +; I386-NOCMOV-NEXT: popl %ebp ; I386-NOCMOV-NEXT: retl ; ; I386-CMOV-LABEL: test_ctselect_f80_basic: ; I386-CMOV: # %bb.0: -; I386-CMOV-NEXT: pushl %edi -; I386-CMOV-NEXT: pushl %esi -; I386-CMOV-NEXT: subl $12, %esp +; I386-CMOV-NEXT: subl $36, %esp +; I386-CMOV-NEXT: fldt {{[0-9]+}}(%esp) +; I386-CMOV-NEXT: fldt {{[0-9]+}}(%esp) +; I386-CMOV-NEXT: fstpt {{[0-9]+}}(%esp) +; I386-CMOV-NEXT: fstpt {{[0-9]+}}(%esp) +; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; I386-CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; I386-CMOV-NEXT: sete %al +; I386-CMOV-NEXT: cmovnel {{[0-9]+}}(%esp), %eax +; I386-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) +; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx -; I386-CMOV-NEXT: movb %al, %ah -; I386-CMOV-NEXT: movzbl %ah, %edi -; I386-CMOV-NEXT: negl %edi -; I386-CMOV-NEXT: movl %edx, %esi -; I386-CMOV-NEXT: andl %edi, %esi -; I386-CMOV-NEXT: notl %edi -; I386-CMOV-NEXT: andl %ecx, %edi -; I386-CMOV-NEXT: orl %edi, %esi -; I386-CMOV-NEXT: movl %esi, (%esp) -; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx -; I386-CMOV-NEXT: movb %al, %ah -; I386-CMOV-NEXT: movzbl %ah, %edi -; I386-CMOV-NEXT: negl %edi -; I386-CMOV-NEXT: movl %edx, %esi -; I386-CMOV-NEXT: andl %edi, %esi -; I386-CMOV-NEXT: notl %edi -; I386-CMOV-NEXT: andl %ecx, %edi -; I386-CMOV-NEXT: orl %edi, %esi -; I386-CMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) -; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx -; I386-CMOV-NEXT: movb %al, %ah -; I386-CMOV-NEXT: movzbl %ah, %edi -; I386-CMOV-NEXT: negl %edi -; I386-CMOV-NEXT: movl %edx, %esi -; I386-CMOV-NEXT: andl %edi, %esi -; I386-CMOV-NEXT: notl %edi -; I386-CMOV-NEXT: andl %ecx, %edi -; I386-CMOV-NEXT: orl %edi, %esi -; I386-CMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) +; I386-CMOV-NEXT: cmovnel {{[0-9]+}}(%esp), %ecx +; I386-CMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; I386-CMOV-NEXT: cmovnel {{[0-9]+}}(%esp), %eax +; I386-CMOV-NEXT: movl %eax, (%esp) ; I386-CMOV-NEXT: fldt (%esp) -; I386-CMOV-NEXT: addl $12, %esp -; I386-CMOV-NEXT: popl %esi -; I386-CMOV-NEXT: popl %edi +; I386-CMOV-NEXT: addl $36, %esp ; I386-CMOV-NEXT: retl %result = call x86_fp80 @llvm.ct.select.f80(i1 %cond, x86_fp80 %a, x86_fp80 %b) ret x86_fp80 %result @@ -543,94 +533,84 @@ define float @test_ctselect_f32_nan(i1 %cond) nounwind { define x86_fp80 @test_ctselect_f80_alignment(i1 %cond, x86_fp80 %a, x86_fp80 %b) nounwind { ; I386-NOCMOV-LABEL: test_ctselect_f80_alignment: ; I386-NOCMOV: # %bb.0: +; I386-NOCMOV-NEXT: pushl %ebp +; I386-NOCMOV-NEXT: pushl %ebx ; I386-NOCMOV-NEXT: pushl %edi ; I386-NOCMOV-NEXT: pushl %esi -; I386-NOCMOV-NEXT: subl $12, %esp -; I386-NOCMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; I386-NOCMOV-NEXT: sete %al -; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; I386-NOCMOV-NEXT: subl $40, %esp +; I386-NOCMOV-NEXT: fldt {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: fldt {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: fstpt {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: fstpt {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; I386-NOCMOV-NEXT: testb $1, %cl +; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; I386-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx -; I386-NOCMOV-NEXT: movb %al, %ah -; I386-NOCMOV-NEXT: movzbl %ah, %edi -; I386-NOCMOV-NEXT: negl %edi -; I386-NOCMOV-NEXT: movl %edx, %esi -; I386-NOCMOV-NEXT: andl %edi, %esi -; I386-NOCMOV-NEXT: notl %edi -; I386-NOCMOV-NEXT: andl %ecx, %edi -; I386-NOCMOV-NEXT: orl %edi, %esi -; I386-NOCMOV-NEXT: movl %esi, (%esp) -; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %esi +; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edi +; I386-NOCMOV-NEXT: sete %ch +; I386-NOCMOV-NEXT: movb %ch, %al +; I386-NOCMOV-NEXT: movzbl %al, %ebp +; I386-NOCMOV-NEXT: negl %ebp +; I386-NOCMOV-NEXT: movl %edi, %ebx +; I386-NOCMOV-NEXT: andl %ebp, %ebx +; I386-NOCMOV-NEXT: notl %ebp +; I386-NOCMOV-NEXT: andl %edx, %ebp +; I386-NOCMOV-NEXT: orl %ebp, %ebx +; I386-NOCMOV-NEXT: testb $1, %cl +; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx -; I386-NOCMOV-NEXT: movb %al, %ah -; I386-NOCMOV-NEXT: movzbl %ah, %edi -; I386-NOCMOV-NEXT: negl %edi -; I386-NOCMOV-NEXT: movl %edx, %esi -; I386-NOCMOV-NEXT: andl %edi, %esi -; I386-NOCMOV-NEXT: notl %edi -; I386-NOCMOV-NEXT: andl %ecx, %edi -; I386-NOCMOV-NEXT: orl %edi, %esi -; I386-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) -; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx -; I386-NOCMOV-NEXT: movb %al, %ah -; I386-NOCMOV-NEXT: movzbl %ah, %edi +; I386-NOCMOV-NEXT: sete %ch +; I386-NOCMOV-NEXT: movb %ch, %cl +; I386-NOCMOV-NEXT: movzbl %cl, %ebp +; I386-NOCMOV-NEXT: negl %ebp +; I386-NOCMOV-NEXT: movl %edx, %edi +; I386-NOCMOV-NEXT: andl %ebp, %edi +; I386-NOCMOV-NEXT: notl %ebp +; I386-NOCMOV-NEXT: andl %eax, %ebp +; I386-NOCMOV-NEXT: orl %ebp, %edi +; I386-NOCMOV-NEXT: testb $1, {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: movl %edi, {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: sete %al +; I386-NOCMOV-NEXT: movl (%esp), %ebx # 4-byte Reload +; I386-NOCMOV-NEXT: movb %al, %dl +; I386-NOCMOV-NEXT: movzbl %dl, %edi ; I386-NOCMOV-NEXT: negl %edi -; I386-NOCMOV-NEXT: movl %edx, %esi -; I386-NOCMOV-NEXT: andl %edi, %esi +; I386-NOCMOV-NEXT: movl %esi, %ecx +; I386-NOCMOV-NEXT: andl %edi, %ecx ; I386-NOCMOV-NEXT: notl %edi -; I386-NOCMOV-NEXT: andl %ecx, %edi -; I386-NOCMOV-NEXT: orl %edi, %esi -; I386-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) -; I386-NOCMOV-NEXT: fldt (%esp) -; I386-NOCMOV-NEXT: addl $12, %esp +; I386-NOCMOV-NEXT: andl %ebx, %edi +; I386-NOCMOV-NEXT: orl %edi, %ecx +; I386-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: fldt {{[0-9]+}}(%esp) +; I386-NOCMOV-NEXT: addl $40, %esp ; I386-NOCMOV-NEXT: popl %esi ; I386-NOCMOV-NEXT: popl %edi +; I386-NOCMOV-NEXT: popl %ebx +; I386-NOCMOV-NEXT: popl %ebp ; I386-NOCMOV-NEXT: retl ; ; I386-CMOV-LABEL: test_ctselect_f80_alignment: ; I386-CMOV: # %bb.0: -; I386-CMOV-NEXT: pushl %edi -; I386-CMOV-NEXT: pushl %esi -; I386-CMOV-NEXT: subl $12, %esp +; I386-CMOV-NEXT: subl $36, %esp +; I386-CMOV-NEXT: fldt {{[0-9]+}}(%esp) +; I386-CMOV-NEXT: fldt {{[0-9]+}}(%esp) +; I386-CMOV-NEXT: fstpt {{[0-9]+}}(%esp) +; I386-CMOV-NEXT: fstpt {{[0-9]+}}(%esp) +; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; I386-CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; I386-CMOV-NEXT: sete %al +; I386-CMOV-NEXT: cmovnel {{[0-9]+}}(%esp), %eax +; I386-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) +; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx -; I386-CMOV-NEXT: movb %al, %ah -; I386-CMOV-NEXT: movzbl %ah, %edi -; I386-CMOV-NEXT: negl %edi -; I386-CMOV-NEXT: movl %edx, %esi -; I386-CMOV-NEXT: andl %edi, %esi -; I386-CMOV-NEXT: notl %edi -; I386-CMOV-NEXT: andl %ecx, %edi -; I386-CMOV-NEXT: orl %edi, %esi -; I386-CMOV-NEXT: movl %esi, (%esp) -; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx -; I386-CMOV-NEXT: movb %al, %ah -; I386-CMOV-NEXT: movzbl %ah, %edi -; I386-CMOV-NEXT: negl %edi -; I386-CMOV-NEXT: movl %edx, %esi -; I386-CMOV-NEXT: andl %edi, %esi -; I386-CMOV-NEXT: notl %edi -; I386-CMOV-NEXT: andl %ecx, %edi -; I386-CMOV-NEXT: orl %edi, %esi -; I386-CMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) -; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx -; I386-CMOV-NEXT: movb %al, %ah -; I386-CMOV-NEXT: movzbl %ah, %edi -; I386-CMOV-NEXT: negl %edi -; I386-CMOV-NEXT: movl %edx, %esi -; I386-CMOV-NEXT: andl %edi, %esi -; I386-CMOV-NEXT: notl %edi -; I386-CMOV-NEXT: andl %ecx, %edi -; I386-CMOV-NEXT: orl %edi, %esi -; I386-CMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) +; I386-CMOV-NEXT: cmovnel {{[0-9]+}}(%esp), %ecx +; I386-CMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; I386-CMOV-NEXT: cmovnel {{[0-9]+}}(%esp), %eax +; I386-CMOV-NEXT: movl %eax, (%esp) ; I386-CMOV-NEXT: fldt (%esp) -; I386-CMOV-NEXT: addl $12, %esp -; I386-CMOV-NEXT: popl %esi -; I386-CMOV-NEXT: popl %edi +; I386-CMOV-NEXT: addl $36, %esp ; I386-CMOV-NEXT: retl %result = call x86_fp80 @llvm.ct.select.f80(i1 %cond, x86_fp80 %a, x86_fp80 %b) ret x86_fp80 %result |
