aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp63
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp919
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.h21
-rw-r--r--llvm/test/CodeGen/X86/ctselect-i386-fp.ll272
4 files changed, 663 insertions, 612 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 401c1953323f..7a3bb3c648fb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26170,6 +26170,69 @@ SDValue X86TargetLowering::LowerCT_SELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getBitcast(VT, CtSelect);
}
+ // Handle f80 types by splitting into three 32-bit chunks
+ if (VT == MVT::f80) {
+ SDValue Chain = DAG.getEntryNode();
+
+ // Create temporary stack slots for input f80 values
+ SDValue TrueSlot = DAG.CreateStackTemporary(MVT::f80);
+ SDValue FalseSlot = DAG.CreateStackTemporary(MVT::f80);
+
+ // Store f80 values to memory
+ SDValue StoreTrueF80 =
+ DAG.getStore(Chain, DL, TrueOp, TrueSlot, MachinePointerInfo());
+ SDValue StoreFalseF80 =
+ DAG.getStore(Chain, DL, FalseOp, FalseSlot, MachinePointerInfo());
+
+ // Load i32 parts from memory (3 chunks for 96-bit f80 storage)
+ SDValue TruePart0 =
+ DAG.getLoad(MVT::i32, DL, StoreTrueF80, TrueSlot, MachinePointerInfo());
+ SDValue TruePart1Ptr =
+ DAG.getMemBasePlusOffset(TrueSlot, TypeSize::getFixed(4), DL);
+ SDValue TruePart1 = DAG.getLoad(MVT::i32, DL, StoreTrueF80, TruePart1Ptr,
+ MachinePointerInfo());
+ SDValue TruePart2Ptr =
+ DAG.getMemBasePlusOffset(TrueSlot, TypeSize::getFixed(8), DL);
+ SDValue TruePart2 = DAG.getLoad(MVT::i32, DL, StoreTrueF80, TruePart2Ptr,
+ MachinePointerInfo());
+
+ SDValue FalsePart0 = DAG.getLoad(MVT::i32, DL, StoreFalseF80, FalseSlot,
+ MachinePointerInfo());
+ SDValue FalsePart1Ptr =
+ DAG.getMemBasePlusOffset(FalseSlot, TypeSize::getFixed(4), DL);
+ SDValue FalsePart1 = DAG.getLoad(MVT::i32, DL, StoreFalseF80, FalsePart1Ptr,
+ MachinePointerInfo());
+ SDValue FalsePart2Ptr =
+ DAG.getMemBasePlusOffset(FalseSlot, TypeSize::getFixed(8), DL);
+ SDValue FalsePart2 = DAG.getLoad(MVT::i32, DL, StoreFalseF80, FalsePart2Ptr,
+ MachinePointerInfo());
+
+ // Perform CT_SELECT on each 32-bit chunk
+ SDValue Part0Ops[] = {FalsePart0, TruePart0, CC, ProcessedCond};
+ SDValue Part0Select = DAG.getNode(X86ISD::CT_SELECT, DL, MVT::i32, Part0Ops);
+ SDValue Part1Ops[] = {FalsePart1, TruePart1, CC, ProcessedCond};
+ SDValue Part1Select = DAG.getNode(X86ISD::CT_SELECT, DL, MVT::i32, Part1Ops);
+ SDValue Part2Ops[] = {FalsePart2, TruePart2, CC, ProcessedCond};
+ SDValue Part2Select = DAG.getNode(X86ISD::CT_SELECT, DL, MVT::i32, Part2Ops);
+
+ // Create result stack slot and store the selected parts
+ SDValue ResultSlot = DAG.CreateStackTemporary(MVT::f80);
+ SDValue StorePart0 =
+ DAG.getStore(Chain, DL, Part0Select, ResultSlot, MachinePointerInfo());
+ SDValue ResPart1Ptr =
+ DAG.getMemBasePlusOffset(ResultSlot, TypeSize::getFixed(4), DL);
+ SDValue StorePart1 = DAG.getStore(StorePart0, DL, Part1Select, ResPart1Ptr,
+ MachinePointerInfo());
+ SDValue ResPart2Ptr =
+ DAG.getMemBasePlusOffset(ResultSlot, TypeSize::getFixed(8), DL);
+ SDValue StorePart2 = DAG.getStore(StorePart1, DL, Part2Select, ResPart2Ptr,
+ MachinePointerInfo());
+
+ // Load complete f80 result from memory
+ return DAG.getLoad(MVT::f80, DL, StorePart2, ResultSlot,
+ MachinePointerInfo());
+ }
+
// Create final CT_SELECT node
SDValue Ops[] = {FalseOp, TrueOp, CC, ProcessedCond};
return DAG.getNode(X86ISD::CT_SELECT, DL, Op.getValueType(), Ops,
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index d4a46048a1d2..f98501da8210 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -689,8 +689,7 @@ bool X86InstrInfo::expandCtSelectVector(MachineInstr &MI) const {
.addImm(31));
} else {
// Negate to convert 1 -> 0xFFFFFFFF, 0 -> 0x00000000 (negl %eax)
- recordInstr(BuildMI(*MBB, MI, DL, get(X86::NEG32r), TmpGPR)
- .addReg(TmpGPR));
+ recordInstr(BuildMI(*MBB, MI, DL, get(X86::NEG32r), TmpGPR).addReg(TmpGPR));
}
// Broadcast to TmpX (vector mask)
@@ -847,7 +846,8 @@ bool X86InstrInfo::expandCtSelectVector(MachineInstr &MI) const {
.setMIFlags(MachineInstr::MIFlag::NoMerge));
}
- assert(FirstInstr && LastInstr && "Expected at least one expanded instruction");
+ assert(FirstInstr && LastInstr &&
+ "Expected at least one expanded instruction");
auto BundleEnd = LastInstr->getIterator();
finalizeBundle(*MBB, FirstInstr->getIterator(), std::next(BundleEnd));
@@ -915,25 +915,28 @@ bool X86InstrInfo::expandCtSelectWithCMOV(MachineInstr &MI) const {
/// Expand i386-specific CT_SELECT pseudo instructions (post-RA, constant-time)
/// These internal pseudos receive a pre-materialized condition byte from the
-/// custom inserter, avoiding EFLAGS corruption issues during i64 type legalization.
+/// custom inserter, avoiding EFLAGS corruption issues during i64 type
+/// legalization.
bool X86InstrInfo::expandCtSelectIntWithoutCMOV(MachineInstr &MI) const {
MachineBasicBlock *MBB = MI.getParent();
DebugLoc DL = MI.getDebugLoc();
// CT_SELECT_I386_INT_GRxxrr has operands: (outs dst, tmp_byte, tmp_mask),
// (ins src1, src2, cond_byte)
- // Note: cond_byte is pre-materialized by custom inserter, not EFLAGS-dependent
+ // Note: cond_byte is pre-materialized by custom inserter, not
+ // EFLAGS-dependent
Register DstReg = MI.getOperand(0).getReg();
Register TmpByteReg = MI.getOperand(1).getReg();
Register TmpMaskReg = MI.getOperand(2).getReg();
Register Src1Reg = MI.getOperand(3).getReg();
Register Src2Reg = MI.getOperand(4).getReg();
- Register CondByteReg = MI.getOperand(5).getReg(); // Pre-materialized condition byte
+ Register CondByteReg =
+ MI.getOperand(5).getReg(); // Pre-materialized condition byte
// Determine instruction opcodes based on register width
unsigned MovZXOp, NegOp, MovOp, AndOp, NotOp, OrOp;
if (MI.getOpcode() == X86::CT_SELECT_I386_INT_GR8rr) {
- MovZXOp = 0; // No zero-extend needed for GR8
+ MovZXOp = 0; // No zero-extend needed for GR8
NegOp = X86::NEG8r;
MovOp = X86::MOV8rr;
AndOp = X86::AND8rr;
@@ -962,8 +965,8 @@ bool X86InstrInfo::expandCtSelectIntWithoutCMOV(MachineInstr &MI) const {
// Step 1: Copy pre-materialized condition byte to TmpByteReg
// This allows the bundle to work with allocated temporaries
auto I1 = BuildMI(*MBB, MI, DL, get(X86::MOV8rr), TmpByteReg)
- .addReg(CondByteReg)
- .setMIFlag(MachineInstr::MIFlag::NoMerge);
+ .addReg(CondByteReg)
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
auto BundleStart = I1->getIterator();
// Step 2: Zero-extend condition byte to register width (0 or 1)
@@ -974,7 +977,9 @@ bool X86InstrInfo::expandCtSelectIntWithoutCMOV(MachineInstr &MI) const {
}
// Step 3: Convert condition to bitmask (NEG: 1 -> 0xFFFF..., 0 -> 0x0000...)
- Register MaskReg = (MI.getOpcode() == X86::CT_SELECT_I386_INT_GR8rr) ? TmpByteReg : TmpMaskReg;
+ Register MaskReg = (MI.getOpcode() == X86::CT_SELECT_I386_INT_GR8rr)
+ ? TmpByteReg
+ : TmpMaskReg;
BuildMI(*MBB, MI, DL, get(NegOp), MaskReg)
.addReg(MaskReg)
.setMIFlag(MachineInstr::MIFlag::NoMerge);
@@ -1002,9 +1007,9 @@ bool X86InstrInfo::expandCtSelectIntWithoutCMOV(MachineInstr &MI) const {
// Step 8: Final result: (src1 & mask) | (src2 & ~mask)
auto LI = BuildMI(*MBB, MI, DL, get(OrOp), DstReg)
- .addReg(DstReg)
- .addReg(MaskReg)
- .setMIFlag(MachineInstr::MIFlag::NoMerge);
+ .addReg(DstReg)
+ .addReg(MaskReg)
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
// Bundle all generated instructions for atomic execution before removing MI
auto BundleEnd = std::next(LI->getIterator());
@@ -1013,11 +1018,12 @@ bool X86InstrInfo::expandCtSelectIntWithoutCMOV(MachineInstr &MI) const {
finalizeBundle(*MBB, BundleStart, BundleEnd);
}
- // TODO: Optimization opportunity - The register allocator may choose callee-saved
- // registers (e.g., %ebx, %esi) for TmpByteReg/TmpMaskReg, causing unnecessary
- // save/restore overhead. Consider constraining these to caller-saved register
- // classes (e.g., GR8_AL, GR32_CallSaved) in the TableGen definitions to improve
- // constant-time performance by eliminating prologue/epilogue instructions.
+ // TODO: Optimization opportunity - The register allocator may choose
+ // callee-saved registers (e.g., %ebx, %esi) for TmpByteReg/TmpMaskReg,
+ // causing unnecessary save/restore overhead. Consider constraining these to
+ // caller-saved register classes (e.g., GR8_AL, GR32_CallSaved) in the
+ // TableGen definitions to improve constant-time performance by eliminating
+ // prologue/epilogue instructions.
// Remove the original pseudo instruction
MI.eraseFromParent();
@@ -1305,8 +1311,7 @@ static bool regIsPICBase(Register BaseReg, const MachineRegisterInfo &MRI) {
return isPICBase;
}
-bool X86InstrInfo::isReMaterializableImpl(
- const MachineInstr &MI) const {
+bool X86InstrInfo::isReMaterializableImpl(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
// This function should only be called for opcodes with the ReMaterializable
@@ -1823,32 +1828,32 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
switch (MIOpc) {
default:
llvm_unreachable("Unreachable!");
- CASE_NF(SHL8ri)
- CASE_NF(SHL16ri) {
- unsigned ShAmt = MI.getOperand(2).getImm();
- MIB.addReg(0)
- .addImm(1LL << ShAmt)
- .addReg(InRegLEA, RegState::Kill)
- .addImm(0)
- .addReg(0);
- break;
- }
- CASE_NF(INC8r)
- CASE_NF(INC16r)
+ CASE_NF(SHL8ri)
+ CASE_NF(SHL16ri) {
+ unsigned ShAmt = MI.getOperand(2).getImm();
+ MIB.addReg(0)
+ .addImm(1LL << ShAmt)
+ .addReg(InRegLEA, RegState::Kill)
+ .addImm(0)
+ .addReg(0);
+ break;
+ }
+ CASE_NF(INC8r)
+ CASE_NF(INC16r)
addRegOffset(MIB, InRegLEA, true, 1);
break;
- CASE_NF(DEC8r)
- CASE_NF(DEC16r)
+ CASE_NF(DEC8r)
+ CASE_NF(DEC16r)
addRegOffset(MIB, InRegLEA, true, -1);
break;
- CASE_NF(ADD8ri)
- CASE_NF(ADD16ri)
+ CASE_NF(ADD8ri)
+ CASE_NF(ADD16ri)
case X86::ADD8ri_DB:
case X86::ADD16ri_DB:
addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
break;
- CASE_NF(ADD8rr)
- CASE_NF(ADD16rr)
+ CASE_NF(ADD8rr)
+ CASE_NF(ADD16rr)
case X86::ADD8rr_DB:
case X86::ADD16rr_DB: {
Src2 = MI.getOperand(2).getReg();
@@ -1986,128 +1991,129 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
switch (MIOpc) {
default:
llvm_unreachable("Unreachable!");
- CASE_NF(SHL64ri) {
- assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
- unsigned ShAmt = getTruncatedShiftCount(MI, 2);
- if (!isTruncatedShiftCountForLEA(ShAmt))
- return nullptr;
-
- // LEA can't handle RSP.
- if (Src.getReg().isVirtual() && !MF.getRegInfo().constrainRegClass(
- Src.getReg(), &X86::GR64_NOSPRegClass))
- return nullptr;
+ CASE_NF(SHL64ri) {
+ assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
+ unsigned ShAmt = getTruncatedShiftCount(MI, 2);
+ if (!isTruncatedShiftCountForLEA(ShAmt))
+ return nullptr;
- NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
- .add(Dest)
- .addReg(0)
- .addImm(1LL << ShAmt)
- .add(Src)
- .addImm(0)
- .addReg(0);
- break;
- }
- CASE_NF(SHL32ri) {
- assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
- unsigned ShAmt = getTruncatedShiftCount(MI, 2);
- if (!isTruncatedShiftCountForLEA(ShAmt))
- return nullptr;
+ // LEA can't handle RSP.
+ if (Src.getReg().isVirtual() &&
+ !MF.getRegInfo().constrainRegClass(Src.getReg(),
+ &X86::GR64_NOSPRegClass))
+ return nullptr;
- unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
+ .add(Dest)
+ .addReg(0)
+ .addImm(1LL << ShAmt)
+ .add(Src)
+ .addImm(0)
+ .addReg(0);
+ break;
+ }
+ CASE_NF(SHL32ri) {
+ assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
+ unsigned ShAmt = getTruncatedShiftCount(MI, 2);
+ if (!isTruncatedShiftCountForLEA(ShAmt))
+ return nullptr;
- // LEA can't handle ESP.
- bool isKill;
- MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, SrcSubReg,
- isKill, ImplicitOp, LV, LIS))
- return nullptr;
+ unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
- MachineInstrBuilder MIB =
- BuildMI(MF, MI.getDebugLoc(), get(Opc))
- .add(Dest)
- .addReg(0)
- .addImm(1LL << ShAmt)
- .addReg(SrcReg, getKillRegState(isKill), SrcSubReg)
- .addImm(0)
- .addReg(0);
- if (ImplicitOp.getReg() != 0)
- MIB.add(ImplicitOp);
- NewMI = MIB;
+ // LEA can't handle ESP.
+ bool isKill;
+ MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, SrcSubReg,
+ isKill, ImplicitOp, LV, LIS))
+ return nullptr;
- // Add kills if classifyLEAReg created a new register.
- if (LV && SrcReg != Src.getReg())
- LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
- break;
- }
- CASE_NF(SHL8ri)
+ MachineInstrBuilder MIB =
+ BuildMI(MF, MI.getDebugLoc(), get(Opc))
+ .add(Dest)
+ .addReg(0)
+ .addImm(1LL << ShAmt)
+ .addReg(SrcReg, getKillRegState(isKill), SrcSubReg)
+ .addImm(0)
+ .addReg(0);
+ if (ImplicitOp.getReg() != 0)
+ MIB.add(ImplicitOp);
+ NewMI = MIB;
+
+ // Add kills if classifyLEAReg created a new register.
+ if (LV && SrcReg != Src.getReg())
+ LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
+ break;
+ }
+ CASE_NF(SHL8ri)
Is8BitOp = true;
[[fallthrough]];
- CASE_NF(SHL16ri) {
- assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
- unsigned ShAmt = getTruncatedShiftCount(MI, 2);
- if (!isTruncatedShiftCountForLEA(ShAmt))
- return nullptr;
- return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
- }
- CASE_NF(INC64r)
- CASE_NF(INC32r) {
- assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
- unsigned Opc = (MIOpc == X86::INC64r || MIOpc == X86::INC64r_NF)
- ? X86::LEA64r
- : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
- bool isKill;
- MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, SrcSubReg,
- isKill, ImplicitOp, LV, LIS))
- return nullptr;
-
- MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
- .add(Dest)
- .addReg(SrcReg, getKillRegState(isKill));
- if (ImplicitOp.getReg() != 0)
- MIB.add(ImplicitOp);
+ CASE_NF(SHL16ri) {
+ assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
+ unsigned ShAmt = getTruncatedShiftCount(MI, 2);
+ if (!isTruncatedShiftCountForLEA(ShAmt))
+ return nullptr;
+ return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
+ }
+ CASE_NF(INC64r)
+ CASE_NF(INC32r) {
+ assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
+ unsigned Opc = (MIOpc == X86::INC64r || MIOpc == X86::INC64r_NF)
+ ? X86::LEA64r
+ : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ bool isKill;
+ MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, SrcSubReg,
+ isKill, ImplicitOp, LV, LIS))
+ return nullptr;
- NewMI = addOffset(MIB, 1);
+ MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
+ .add(Dest)
+ .addReg(SrcReg, getKillRegState(isKill));
+ if (ImplicitOp.getReg() != 0)
+ MIB.add(ImplicitOp);
- // Add kills if classifyLEAReg created a new register.
- if (LV && SrcReg != Src.getReg())
- LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
- break;
- }
- CASE_NF(DEC64r)
- CASE_NF(DEC32r) {
- assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
- unsigned Opc = (MIOpc == X86::DEC64r || MIOpc == X86::DEC64r_NF)
- ? X86::LEA64r
- : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ NewMI = addOffset(MIB, 1);
- bool isKill;
- MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, SrcSubReg,
- isKill, ImplicitOp, LV, LIS))
- return nullptr;
+ // Add kills if classifyLEAReg created a new register.
+ if (LV && SrcReg != Src.getReg())
+ LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
+ break;
+ }
+ CASE_NF(DEC64r)
+ CASE_NF(DEC32r) {
+ assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
+ unsigned Opc = (MIOpc == X86::DEC64r || MIOpc == X86::DEC64r_NF)
+ ? X86::LEA64r
+ : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
+
+ bool isKill;
+ MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, SrcSubReg,
+ isKill, ImplicitOp, LV, LIS))
+ return nullptr;
- MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
- .add(Dest)
- .addReg(SrcReg, getKillRegState(isKill));
- if (ImplicitOp.getReg() != 0)
- MIB.add(ImplicitOp);
+ MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
+ .add(Dest)
+ .addReg(SrcReg, getKillRegState(isKill));
+ if (ImplicitOp.getReg() != 0)
+ MIB.add(ImplicitOp);
- NewMI = addOffset(MIB, -1);
+ NewMI = addOffset(MIB, -1);
- // Add kills if classifyLEAReg created a new register.
- if (LV && SrcReg != Src.getReg())
- LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
- break;
- }
- CASE_NF(DEC8r)
- CASE_NF(INC8r)
+ // Add kills if classifyLEAReg created a new register.
+ if (LV && SrcReg != Src.getReg())
+ LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
+ break;
+ }
+ CASE_NF(DEC8r)
+ CASE_NF(INC8r)
Is8BitOp = true;
[[fallthrough]];
- CASE_NF(DEC16r)
- CASE_NF(INC16r)
+ CASE_NF(DEC16r)
+ CASE_NF(INC16r)
return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
- CASE_NF(ADD64rr)
- CASE_NF(ADD32rr)
+ CASE_NF(ADD64rr)
+ CASE_NF(ADD32rr)
case X86::ADD64rr_DB:
case X86::ADD32rr_DB: {
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
@@ -2158,21 +2164,21 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
NumRegOperands = 3;
break;
}
- CASE_NF(ADD8rr)
+ CASE_NF(ADD8rr)
case X86::ADD8rr_DB:
Is8BitOp = true;
[[fallthrough]];
- CASE_NF(ADD16rr)
+ CASE_NF(ADD16rr)
case X86::ADD16rr_DB:
return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
- CASE_NF(ADD64ri32)
+ CASE_NF(ADD64ri32)
case X86::ADD64ri32_DB:
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
NewMI = addOffset(
BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src),
MI.getOperand(2));
break;
- CASE_NF(ADD32ri)
+ CASE_NF(ADD32ri)
case X86::ADD32ri_DB: {
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
@@ -2197,62 +2203,62 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
break;
}
- CASE_NF(ADD8ri)
+ CASE_NF(ADD8ri)
case X86::ADD8ri_DB:
Is8BitOp = true;
[[fallthrough]];
- CASE_NF(ADD16ri)
+ CASE_NF(ADD16ri)
case X86::ADD16ri_DB:
return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
- CASE_NF(SUB8ri)
- CASE_NF(SUB16ri)
+ CASE_NF(SUB8ri)
+ CASE_NF(SUB16ri)
/// FIXME: Support these similar to ADD8ri/ADD16ri*.
return nullptr;
- CASE_NF(SUB32ri) {
- if (!MI.getOperand(2).isImm())
- return nullptr;
- int64_t Imm = MI.getOperand(2).getImm();
- if (!isInt<32>(-Imm))
- return nullptr;
+ CASE_NF(SUB32ri) {
+ if (!MI.getOperand(2).isImm())
+ return nullptr;
+ int64_t Imm = MI.getOperand(2).getImm();
+ if (!isInt<32>(-Imm))
+ return nullptr;
- assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
- unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
- bool isKill;
- MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, SrcSubReg,
- isKill, ImplicitOp, LV, LIS))
- return nullptr;
+ bool isKill;
+ MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, SrcSubReg,
+ isKill, ImplicitOp, LV, LIS))
+ return nullptr;
- MachineInstrBuilder MIB =
- BuildMI(MF, MI.getDebugLoc(), get(Opc))
- .add(Dest)
- .addReg(SrcReg, getKillRegState(isKill), SrcSubReg);
- if (ImplicitOp.getReg() != 0)
- MIB.add(ImplicitOp);
+ MachineInstrBuilder MIB =
+ BuildMI(MF, MI.getDebugLoc(), get(Opc))
+ .add(Dest)
+ .addReg(SrcReg, getKillRegState(isKill), SrcSubReg);
+ if (ImplicitOp.getReg() != 0)
+ MIB.add(ImplicitOp);
- NewMI = addOffset(MIB, -Imm);
+ NewMI = addOffset(MIB, -Imm);
- // Add kills if classifyLEAReg created a new register.
- if (LV && SrcReg != Src.getReg())
- LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
- break;
- }
+ // Add kills if classifyLEAReg created a new register.
+ if (LV && SrcReg != Src.getReg())
+ LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
+ break;
+ }
- CASE_NF(SUB64ri32) {
- if (!MI.getOperand(2).isImm())
- return nullptr;
- int64_t Imm = MI.getOperand(2).getImm();
- if (!isInt<32>(-Imm))
- return nullptr;
+ CASE_NF(SUB64ri32) {
+ if (!MI.getOperand(2).isImm())
+ return nullptr;
+ int64_t Imm = MI.getOperand(2).getImm();
+ if (!isInt<32>(-Imm))
+ return nullptr;
- assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!");
+ assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!");
- MachineInstrBuilder MIB =
- BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src);
- NewMI = addOffset(MIB, -Imm);
- break;
- }
+ MachineInstrBuilder MIB =
+ BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src);
+ NewMI = addOffset(MIB, -Imm);
+ break;
+ }
case X86::VMOVDQU8Z128rmk:
case X86::VMOVDQU8Z256rmk:
@@ -2852,17 +2858,17 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
case X86::OP##_ND:
switch (Opc) {
- // SHLD B, C, I <-> SHRD C, B, (BitWidth - I)
- CASE_ND(SHRD16rri8)
- CASE_ND(SHLD16rri8)
- CASE_ND(SHRD32rri8)
- CASE_ND(SHLD32rri8)
- CASE_ND(SHRD64rri8)
- CASE_ND(SHLD64rri8) {
- unsigned Size;
- switch (Opc) {
- default:
- llvm_unreachable("Unreachable!");
+ // SHLD B, C, I <-> SHRD C, B, (BitWidth - I)
+ CASE_ND(SHRD16rri8)
+ CASE_ND(SHLD16rri8)
+ CASE_ND(SHRD32rri8)
+ CASE_ND(SHLD32rri8)
+ CASE_ND(SHRD64rri8)
+ CASE_ND(SHLD64rri8) {
+ unsigned Size;
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unreachable!");
#define FROM_TO_SIZE(A, B, S) \
case X86::A: \
Opc = X86::B; \
@@ -2881,16 +2887,16 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
Size = S; \
break;
- FROM_TO_SIZE(SHRD16rri8, SHLD16rri8, 16)
- FROM_TO_SIZE(SHRD32rri8, SHLD32rri8, 32)
- FROM_TO_SIZE(SHRD64rri8, SHLD64rri8, 64)
+ FROM_TO_SIZE(SHRD16rri8, SHLD16rri8, 16)
+ FROM_TO_SIZE(SHRD32rri8, SHLD32rri8, 32)
+ FROM_TO_SIZE(SHRD64rri8, SHLD64rri8, 64)
#undef FROM_TO_SIZE
+ }
+ WorkingMI = CloneIfNew(MI);
+ WorkingMI->setDesc(get(Opc));
+ WorkingMI->getOperand(3).setImm(Size - MI.getOperand(3).getImm());
+ break;
}
- WorkingMI = CloneIfNew(MI);
- WorkingMI->setDesc(get(Opc));
- WorkingMI->getOperand(3).setImm(Size - MI.getOperand(3).getImm());
- break;
- }
case X86::PFSUBrr:
case X86::PFSUBRrr:
// PFSUB x, y: x = x - y
@@ -3174,15 +3180,16 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
WorkingMI = CloneIfNew(MI);
WorkingMI->setDesc(get(Opc));
break;
- CASE_ND(CMOV16rr)
- CASE_ND(CMOV32rr)
- CASE_ND(CMOV64rr) {
- WorkingMI = CloneIfNew(MI);
- unsigned OpNo = MI.getDesc().getNumOperands() - 1;
- X86::CondCode CC = static_cast<X86::CondCode>(MI.getOperand(OpNo).getImm());
- WorkingMI->getOperand(OpNo).setImm(X86::GetOppositeBranchCondition(CC));
- break;
- }
+ CASE_ND(CMOV16rr)
+ CASE_ND(CMOV32rr)
+ CASE_ND(CMOV64rr) {
+ WorkingMI = CloneIfNew(MI);
+ unsigned OpNo = MI.getDesc().getNumOperands() - 1;
+ X86::CondCode CC =
+ static_cast<X86::CondCode>(MI.getOperand(OpNo).getImm());
+ WorkingMI->getOperand(OpNo).setImm(X86::GetOppositeBranchCondition(CC));
+ break;
+ }
case X86::VPTERNLOGDZrri:
case X86::VPTERNLOGDZrmi:
case X86::VPTERNLOGDZ128rri:
@@ -5391,29 +5398,29 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
CmpMask = CmpValue = 0;
}
return true;
- // A SUB can be used to perform comparison.
- CASE_ND(SUB64rm)
- CASE_ND(SUB32rm)
- CASE_ND(SUB16rm)
- CASE_ND(SUB8rm)
+ // A SUB can be used to perform comparison.
+ CASE_ND(SUB64rm)
+ CASE_ND(SUB32rm)
+ CASE_ND(SUB16rm)
+ CASE_ND(SUB8rm)
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = 0;
CmpValue = 0;
return true;
- CASE_ND(SUB64rr)
- CASE_ND(SUB32rr)
- CASE_ND(SUB16rr)
- CASE_ND(SUB8rr)
+ CASE_ND(SUB64rr)
+ CASE_ND(SUB32rr)
+ CASE_ND(SUB16rr)
+ CASE_ND(SUB8rr)
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = MI.getOperand(2).getReg();
CmpMask = 0;
CmpValue = 0;
return true;
- CASE_ND(SUB64ri32)
- CASE_ND(SUB32ri)
- CASE_ND(SUB16ri)
- CASE_ND(SUB8ri)
+ CASE_ND(SUB64ri32)
+ CASE_ND(SUB32ri)
+ CASE_ND(SUB16ri)
+ CASE_ND(SUB8ri)
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
if (MI.getOperand(2).isImm()) {
@@ -5468,27 +5475,27 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
case X86::CMP32rr:
case X86::CMP16rr:
case X86::CMP8rr:
- CASE_ND(SUB64rr)
- CASE_ND(SUB32rr)
- CASE_ND(SUB16rr)
- CASE_ND(SUB8rr) {
- Register OISrcReg;
- Register OISrcReg2;
- int64_t OIMask;
- int64_t OIValue;
- if (!analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) ||
- OIMask != ImmMask || OIValue != ImmValue)
+ CASE_ND(SUB64rr)
+ CASE_ND(SUB32rr)
+ CASE_ND(SUB16rr)
+ CASE_ND(SUB8rr) {
+ Register OISrcReg;
+ Register OISrcReg2;
+ int64_t OIMask;
+ int64_t OIValue;
+ if (!analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) ||
+ OIMask != ImmMask || OIValue != ImmValue)
+ return false;
+ if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) {
+ *IsSwapped = false;
+ return true;
+ }
+ if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) {
+ *IsSwapped = true;
+ return true;
+ }
return false;
- if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) {
- *IsSwapped = false;
- return true;
}
- if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) {
- *IsSwapped = true;
- return true;
- }
- return false;
- }
case X86::CMP64ri32:
case X86::CMP32ri:
case X86::CMP16ri:
@@ -5497,10 +5504,10 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
case X86::TEST32ri:
case X86::TEST16ri:
case X86::TEST8ri:
- CASE_ND(SUB64ri32)
- CASE_ND(SUB32ri)
- CASE_ND(SUB16ri)
- CASE_ND(SUB8ri)
+ CASE_ND(SUB64ri32)
+ CASE_ND(SUB32ri)
+ CASE_ND(SUB16ri)
+ CASE_ND(SUB8ri)
case X86::TEST64rr:
case X86::TEST32rr:
case X86::TEST16rr:
@@ -5557,98 +5564,98 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
default:
return false;
- // The shift instructions only modify ZF if their shift count is non-zero.
- // N.B.: The processor truncates the shift count depending on the encoding.
- CASE_ND(SAR8ri)
- CASE_ND(SAR16ri)
- CASE_ND(SAR32ri)
- CASE_ND(SAR64ri)
- CASE_ND(SHR8ri)
- CASE_ND(SHR16ri)
- CASE_ND(SHR32ri)
- CASE_ND(SHR64ri)
+ // The shift instructions only modify ZF if their shift count is non-zero.
+ // N.B.: The processor truncates the shift count depending on the encoding.
+ CASE_ND(SAR8ri)
+ CASE_ND(SAR16ri)
+ CASE_ND(SAR32ri)
+ CASE_ND(SAR64ri)
+ CASE_ND(SHR8ri)
+ CASE_ND(SHR16ri)
+ CASE_ND(SHR32ri)
+ CASE_ND(SHR64ri)
return getTruncatedShiftCount(MI, 2) != 0;
- // Some left shift instructions can be turned into LEA instructions but only
- // if their flags aren't used. Avoid transforming such instructions.
- CASE_ND(SHL8ri)
- CASE_ND(SHL16ri)
- CASE_ND(SHL32ri)
- CASE_ND(SHL64ri) {
- unsigned ShAmt = getTruncatedShiftCount(MI, 2);
- if (isTruncatedShiftCountForLEA(ShAmt))
- return false;
- return ShAmt != 0;
- }
+ // Some left shift instructions can be turned into LEA instructions but only
+ // if their flags aren't used. Avoid transforming such instructions.
+ CASE_ND(SHL8ri)
+ CASE_ND(SHL16ri)
+ CASE_ND(SHL32ri)
+ CASE_ND(SHL64ri) {
+ unsigned ShAmt = getTruncatedShiftCount(MI, 2);
+ if (isTruncatedShiftCountForLEA(ShAmt))
+ return false;
+ return ShAmt != 0;
+ }
- CASE_ND(SHRD16rri8)
- CASE_ND(SHRD32rri8)
- CASE_ND(SHRD64rri8)
- CASE_ND(SHLD16rri8)
- CASE_ND(SHLD32rri8)
- CASE_ND(SHLD64rri8)
+ CASE_ND(SHRD16rri8)
+ CASE_ND(SHRD32rri8)
+ CASE_ND(SHRD64rri8)
+ CASE_ND(SHLD16rri8)
+ CASE_ND(SHLD32rri8)
+ CASE_ND(SHLD64rri8)
return getTruncatedShiftCount(MI, 3) != 0;
- CASE_ND(SUB64ri32)
- CASE_ND(SUB32ri)
- CASE_ND(SUB16ri)
- CASE_ND(SUB8ri)
- CASE_ND(SUB64rr)
- CASE_ND(SUB32rr)
- CASE_ND(SUB16rr)
- CASE_ND(SUB8rr)
- CASE_ND(SUB64rm)
- CASE_ND(SUB32rm)
- CASE_ND(SUB16rm)
- CASE_ND(SUB8rm)
- CASE_ND(DEC64r)
- CASE_ND(DEC32r)
- CASE_ND(DEC16r)
- CASE_ND(DEC8r)
- CASE_ND(ADD64ri32)
- CASE_ND(ADD32ri)
- CASE_ND(ADD16ri)
- CASE_ND(ADD8ri)
- CASE_ND(ADD64rr)
- CASE_ND(ADD32rr)
- CASE_ND(ADD16rr)
- CASE_ND(ADD8rr)
- CASE_ND(ADD64rm)
- CASE_ND(ADD32rm)
- CASE_ND(ADD16rm)
- CASE_ND(ADD8rm)
- CASE_ND(INC64r)
- CASE_ND(INC32r)
- CASE_ND(INC16r)
- CASE_ND(INC8r)
- CASE_ND(ADC64ri32)
- CASE_ND(ADC32ri)
- CASE_ND(ADC16ri)
- CASE_ND(ADC8ri)
- CASE_ND(ADC64rr)
- CASE_ND(ADC32rr)
- CASE_ND(ADC16rr)
- CASE_ND(ADC8rr)
- CASE_ND(ADC64rm)
- CASE_ND(ADC32rm)
- CASE_ND(ADC16rm)
- CASE_ND(ADC8rm)
- CASE_ND(SBB64ri32)
- CASE_ND(SBB32ri)
- CASE_ND(SBB16ri)
- CASE_ND(SBB8ri)
- CASE_ND(SBB64rr)
- CASE_ND(SBB32rr)
- CASE_ND(SBB16rr)
- CASE_ND(SBB8rr)
- CASE_ND(SBB64rm)
- CASE_ND(SBB32rm)
- CASE_ND(SBB16rm)
- CASE_ND(SBB8rm)
- CASE_ND(NEG8r)
- CASE_ND(NEG16r)
- CASE_ND(NEG32r)
- CASE_ND(NEG64r)
+ CASE_ND(SUB64ri32)
+ CASE_ND(SUB32ri)
+ CASE_ND(SUB16ri)
+ CASE_ND(SUB8ri)
+ CASE_ND(SUB64rr)
+ CASE_ND(SUB32rr)
+ CASE_ND(SUB16rr)
+ CASE_ND(SUB8rr)
+ CASE_ND(SUB64rm)
+ CASE_ND(SUB32rm)
+ CASE_ND(SUB16rm)
+ CASE_ND(SUB8rm)
+ CASE_ND(DEC64r)
+ CASE_ND(DEC32r)
+ CASE_ND(DEC16r)
+ CASE_ND(DEC8r)
+ CASE_ND(ADD64ri32)
+ CASE_ND(ADD32ri)
+ CASE_ND(ADD16ri)
+ CASE_ND(ADD8ri)
+ CASE_ND(ADD64rr)
+ CASE_ND(ADD32rr)
+ CASE_ND(ADD16rr)
+ CASE_ND(ADD8rr)
+ CASE_ND(ADD64rm)
+ CASE_ND(ADD32rm)
+ CASE_ND(ADD16rm)
+ CASE_ND(ADD8rm)
+ CASE_ND(INC64r)
+ CASE_ND(INC32r)
+ CASE_ND(INC16r)
+ CASE_ND(INC8r)
+ CASE_ND(ADC64ri32)
+ CASE_ND(ADC32ri)
+ CASE_ND(ADC16ri)
+ CASE_ND(ADC8ri)
+ CASE_ND(ADC64rr)
+ CASE_ND(ADC32rr)
+ CASE_ND(ADC16rr)
+ CASE_ND(ADC8rr)
+ CASE_ND(ADC64rm)
+ CASE_ND(ADC32rm)
+ CASE_ND(ADC16rm)
+ CASE_ND(ADC8rm)
+ CASE_ND(SBB64ri32)
+ CASE_ND(SBB32ri)
+ CASE_ND(SBB16ri)
+ CASE_ND(SBB8ri)
+ CASE_ND(SBB64rr)
+ CASE_ND(SBB32rr)
+ CASE_ND(SBB16rr)
+ CASE_ND(SBB8rr)
+ CASE_ND(SBB64rm)
+ CASE_ND(SBB32rm)
+ CASE_ND(SBB16rm)
+ CASE_ND(SBB8rm)
+ CASE_ND(NEG8r)
+ CASE_ND(NEG16r)
+ CASE_ND(NEG32r)
+ CASE_ND(NEG64r)
case X86::LZCNT16rr:
case X86::LZCNT16rm:
case X86::LZCNT32rr:
@@ -5668,42 +5675,42 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
case X86::TZCNT64rr:
case X86::TZCNT64rm:
return true;
- CASE_ND(AND64ri32)
- CASE_ND(AND32ri)
- CASE_ND(AND16ri)
- CASE_ND(AND8ri)
- CASE_ND(AND64rr)
- CASE_ND(AND32rr)
- CASE_ND(AND16rr)
- CASE_ND(AND8rr)
- CASE_ND(AND64rm)
- CASE_ND(AND32rm)
- CASE_ND(AND16rm)
- CASE_ND(AND8rm)
- CASE_ND(XOR64ri32)
- CASE_ND(XOR32ri)
- CASE_ND(XOR16ri)
- CASE_ND(XOR8ri)
- CASE_ND(XOR64rr)
- CASE_ND(XOR32rr)
- CASE_ND(XOR16rr)
- CASE_ND(XOR8rr)
- CASE_ND(XOR64rm)
- CASE_ND(XOR32rm)
- CASE_ND(XOR16rm)
- CASE_ND(XOR8rm)
- CASE_ND(OR64ri32)
- CASE_ND(OR32ri)
- CASE_ND(OR16ri)
- CASE_ND(OR8ri)
- CASE_ND(OR64rr)
- CASE_ND(OR32rr)
- CASE_ND(OR16rr)
- CASE_ND(OR8rr)
- CASE_ND(OR64rm)
- CASE_ND(OR32rm)
- CASE_ND(OR16rm)
- CASE_ND(OR8rm)
+ CASE_ND(AND64ri32)
+ CASE_ND(AND32ri)
+ CASE_ND(AND16ri)
+ CASE_ND(AND8ri)
+ CASE_ND(AND64rr)
+ CASE_ND(AND32rr)
+ CASE_ND(AND16rr)
+ CASE_ND(AND8rr)
+ CASE_ND(AND64rm)
+ CASE_ND(AND32rm)
+ CASE_ND(AND16rm)
+ CASE_ND(AND8rm)
+ CASE_ND(XOR64ri32)
+ CASE_ND(XOR32ri)
+ CASE_ND(XOR16ri)
+ CASE_ND(XOR8ri)
+ CASE_ND(XOR64rr)
+ CASE_ND(XOR32rr)
+ CASE_ND(XOR16rr)
+ CASE_ND(XOR8rr)
+ CASE_ND(XOR64rm)
+ CASE_ND(XOR32rm)
+ CASE_ND(XOR16rm)
+ CASE_ND(XOR8rm)
+ CASE_ND(OR64ri32)
+ CASE_ND(OR32ri)
+ CASE_ND(OR16ri)
+ CASE_ND(OR8ri)
+ CASE_ND(OR64rr)
+ CASE_ND(OR32rr)
+ CASE_ND(OR16rr)
+ CASE_ND(OR8rr)
+ CASE_ND(OR64rm)
+ CASE_ND(OR32rm)
+ CASE_ND(OR16rm)
+ CASE_ND(OR8rm)
case X86::ANDN32rr:
case X86::ANDN32rm:
case X86::ANDN64rr:
@@ -5781,15 +5788,17 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
}
/// Check whether the use can be converted to remove a comparison against zero.
-/// Returns the EFLAGS condition and the operand that we are comparing against zero.
-static std::pair<X86::CondCode, unsigned> isUseDefConvertible(const MachineInstr &MI) {
+/// Returns the EFLAGS condition and the operand that we are comparing against
+/// zero.
+static std::pair<X86::CondCode, unsigned>
+isUseDefConvertible(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return std::make_pair(X86::COND_INVALID, ~0U);
- CASE_ND(NEG8r)
- CASE_ND(NEG16r)
- CASE_ND(NEG32r)
- CASE_ND(NEG64r)
+ CASE_ND(NEG8r)
+ CASE_ND(NEG16r)
+ CASE_ND(NEG32r)
+ CASE_ND(NEG64r)
return std::make_pair(X86::COND_AE, 1U);
case X86::LZCNT16rr:
case X86::LZCNT32rr:
@@ -5833,51 +5842,53 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
switch (CmpInstr.getOpcode()) {
default:
break;
- CASE_ND(SUB64ri32)
- CASE_ND(SUB32ri)
- CASE_ND(SUB16ri)
- CASE_ND(SUB8ri)
- CASE_ND(SUB64rm)
- CASE_ND(SUB32rm)
- CASE_ND(SUB16rm)
- CASE_ND(SUB8rm)
- CASE_ND(SUB64rr)
- CASE_ND(SUB32rr)
- CASE_ND(SUB16rr)
- CASE_ND(SUB8rr) {
- if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
- return false;
- // There is no use of the destination register, we can replace SUB with CMP.
- unsigned NewOpcode = 0;
+ CASE_ND(SUB64ri32)
+ CASE_ND(SUB32ri)
+ CASE_ND(SUB16ri)
+ CASE_ND(SUB8ri)
+ CASE_ND(SUB64rm)
+ CASE_ND(SUB32rm)
+ CASE_ND(SUB16rm)
+ CASE_ND(SUB8rm)
+ CASE_ND(SUB64rr)
+ CASE_ND(SUB32rr)
+ CASE_ND(SUB16rr)
+ CASE_ND(SUB8rr) {
+ if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
+ return false;
+ // There is no use of the destination register, we can replace SUB with
+ // CMP.
+ unsigned NewOpcode = 0;
#define FROM_TO(A, B) \
CASE_ND(A) NewOpcode = X86::B; \
break;
- switch (CmpInstr.getOpcode()) {
- default:
- llvm_unreachable("Unreachable!");
- FROM_TO(SUB64rm, CMP64rm)
- FROM_TO(SUB32rm, CMP32rm)
- FROM_TO(SUB16rm, CMP16rm)
- FROM_TO(SUB8rm, CMP8rm)
- FROM_TO(SUB64rr, CMP64rr)
- FROM_TO(SUB32rr, CMP32rr)
- FROM_TO(SUB16rr, CMP16rr)
- FROM_TO(SUB8rr, CMP8rr)
- FROM_TO(SUB64ri32, CMP64ri32)
- FROM_TO(SUB32ri, CMP32ri)
- FROM_TO(SUB16ri, CMP16ri)
- FROM_TO(SUB8ri, CMP8ri)
- }
+ switch (CmpInstr.getOpcode()) {
+ default:
+ llvm_unreachable("Unreachable!");
+ FROM_TO(SUB64rm, CMP64rm)
+ FROM_TO(SUB32rm, CMP32rm)
+ FROM_TO(SUB16rm, CMP16rm)
+ FROM_TO(SUB8rm, CMP8rm)
+ FROM_TO(SUB64rr, CMP64rr)
+ FROM_TO(SUB32rr, CMP32rr)
+ FROM_TO(SUB16rr, CMP16rr)
+ FROM_TO(SUB8rr, CMP8rr)
+ FROM_TO(SUB64ri32, CMP64ri32)
+ FROM_TO(SUB32ri, CMP32ri)
+ FROM_TO(SUB16ri, CMP16ri)
+ FROM_TO(SUB8ri, CMP8ri)
+ }
#undef FROM_TO
- CmpInstr.setDesc(get(NewOpcode));
- CmpInstr.removeOperand(0);
- // Mutating this instruction invalidates any debug data associated with it.
- CmpInstr.dropDebugNumber();
- // Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
- if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
- NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
- return false;
- }
+ CmpInstr.setDesc(get(NewOpcode));
+ CmpInstr.removeOperand(0);
+ // Mutating this instruction invalidates any debug data associated with
+ // it.
+ CmpInstr.dropDebugNumber();
+ // Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
+ if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
+ NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
+ return false;
+ }
}
// The following code tries to remove the comparison by re-using EFLAGS
@@ -6234,14 +6245,14 @@ static bool canConvert2Copy(unsigned Opc) {
switch (Opc) {
default:
return false;
- CASE_ND(ADD64ri32)
- CASE_ND(SUB64ri32)
- CASE_ND(OR64ri32)
- CASE_ND(XOR64ri32)
- CASE_ND(ADD32ri)
- CASE_ND(SUB32ri)
- CASE_ND(OR32ri)
- CASE_ND(XOR32ri)
+ CASE_ND(ADD64ri32)
+ CASE_ND(SUB64ri32)
+ CASE_ND(OR64ri32)
+ CASE_ND(XOR64ri32)
+ CASE_ND(ADD32ri)
+ CASE_ND(SUB32ri)
+ CASE_ND(OR32ri)
+ CASE_ND(XOR32ri)
return true;
}
}
@@ -9656,7 +9667,7 @@ Register X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
static const uint16_t *lookup(unsigned opcode, unsigned domain,
ArrayRef<uint16_t[3]> Table) {
- for (const uint16_t(&Row)[3] : Table)
+ for (const uint16_t (&Row)[3] : Table)
if (Row[domain - 1] == opcode)
return Row;
return nullptr;
@@ -9665,7 +9676,7 @@ static const uint16_t *lookup(unsigned opcode, unsigned domain,
static const uint16_t *lookupAVX512(unsigned opcode, unsigned domain,
ArrayRef<uint16_t[4]> Table) {
// If this is the integer domain make sure to check both integer columns.
- for (const uint16_t(&Row)[4] : Table)
+ for (const uint16_t (&Row)[4] : Table)
if (Row[domain - 1] == opcode || (domain == 3 && Row[3] == opcode))
return Row;
return nullptr;
@@ -10421,25 +10432,25 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
if (Invert)
return false;
switch (Inst.getOpcode()) {
- CASE_ND(ADD8rr)
- CASE_ND(ADD16rr)
- CASE_ND(ADD32rr)
- CASE_ND(ADD64rr)
- CASE_ND(AND8rr)
- CASE_ND(AND16rr)
- CASE_ND(AND32rr)
- CASE_ND(AND64rr)
- CASE_ND(OR8rr)
- CASE_ND(OR16rr)
- CASE_ND(OR32rr)
- CASE_ND(OR64rr)
- CASE_ND(XOR8rr)
- CASE_ND(XOR16rr)
- CASE_ND(XOR32rr)
- CASE_ND(XOR64rr)
- CASE_ND(IMUL16rr)
- CASE_ND(IMUL32rr)
- CASE_ND(IMUL64rr)
+ CASE_ND(ADD8rr)
+ CASE_ND(ADD16rr)
+ CASE_ND(ADD32rr)
+ CASE_ND(ADD64rr)
+ CASE_ND(AND8rr)
+ CASE_ND(AND16rr)
+ CASE_ND(AND32rr)
+ CASE_ND(AND64rr)
+ CASE_ND(OR8rr)
+ CASE_ND(OR16rr)
+ CASE_ND(OR32rr)
+ CASE_ND(OR64rr)
+ CASE_ND(XOR8rr)
+ CASE_ND(XOR16rr)
+ CASE_ND(XOR32rr)
+ CASE_ND(XOR64rr)
+ CASE_ND(IMUL16rr)
+ CASE_ND(IMUL32rr)
+ CASE_ND(IMUL64rr)
case X86::PANDrr:
case X86::PORrr:
case X86::PXORrr:
@@ -11263,8 +11274,8 @@ bool X86InstrInfo::getMachineCombinerPatterns(
break;
}
}
- return TargetInstrInfo::getMachineCombinerPatterns(Root,
- Patterns, DoRegPressureReduce);
+ return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
+ DoRegPressureReduce);
}
static void
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 76f18803c2e3..846bcc85b7ad 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -319,8 +319,7 @@ public:
Register isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
- Register isLoadFromStackSlot(const MachineInstr &MI,
- int &FrameIndex,
+ Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex,
TypeSize &MemBytes) const override;
/// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
/// stack locations as well. This uses a heuristic so it isn't
@@ -330,8 +329,7 @@ public:
Register isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
- Register isStoreToStackSlot(const MachineInstr &MI,
- int &FrameIndex,
+ Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex,
TypeSize &MemBytes) const override;
/// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
/// stack locations as well. This uses a heuristic so it isn't
@@ -491,12 +489,12 @@ public:
/// is likely that the referenced instruction has been changed.
///
/// \returns true on success.
- MachineInstr *
- foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
- ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt, int FrameIndex,
- LiveIntervals *LIS = nullptr,
- VirtRegMap *VRM = nullptr) const override;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
+ ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt,
+ int FrameIndex,
+ LiveIntervals *LIS = nullptr,
+ VirtRegMap *VRM = nullptr) const override;
/// Same as the previous version except it allows folding of any load and
/// store from / to any address, not just from a specific stack slot.
@@ -745,8 +743,7 @@ private:
///
/// If IsIntrinsic is set, operand 1 will be ignored for commuting.
bool findThreeSrcCommutedOpIndices(const MachineInstr &MI,
- unsigned &SrcOpIdx1,
- unsigned &SrcOpIdx2,
+ unsigned &SrcOpIdx1, unsigned &SrcOpIdx2,
bool IsIntrinsic = false) const;
/// Returns true when instruction \p FlagI produces the same flags as \p OI.
diff --git a/llvm/test/CodeGen/X86/ctselect-i386-fp.ll b/llvm/test/CodeGen/X86/ctselect-i386-fp.ll
index b88ec72a3792..4b5f31bad831 100644
--- a/llvm/test/CodeGen/X86/ctselect-i386-fp.ll
+++ b/llvm/test/CodeGen/X86/ctselect-i386-fp.ll
@@ -209,94 +209,84 @@ define double @test_ctselect_f64_basic(i1 %cond, double %a, double %b) nounwind
define x86_fp80 @test_ctselect_f80_basic(i1 %cond, x86_fp80 %a, x86_fp80 %b) nounwind {
; I386-NOCMOV-LABEL: test_ctselect_f80_basic:
; I386-NOCMOV: # %bb.0:
+; I386-NOCMOV-NEXT: pushl %ebp
+; I386-NOCMOV-NEXT: pushl %ebx
; I386-NOCMOV-NEXT: pushl %edi
; I386-NOCMOV-NEXT: pushl %esi
-; I386-NOCMOV-NEXT: subl $12, %esp
-; I386-NOCMOV-NEXT: testb $1, {{[0-9]+}}(%esp)
-; I386-NOCMOV-NEXT: sete %al
-; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; I386-NOCMOV-NEXT: subl $40, %esp
+; I386-NOCMOV-NEXT: fldt {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: fldt {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: fstpt {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: fstpt {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; I386-NOCMOV-NEXT: testb $1, %cl
+; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; I386-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
-; I386-NOCMOV-NEXT: movb %al, %ah
-; I386-NOCMOV-NEXT: movzbl %ah, %edi
-; I386-NOCMOV-NEXT: negl %edi
-; I386-NOCMOV-NEXT: movl %edx, %esi
-; I386-NOCMOV-NEXT: andl %edi, %esi
-; I386-NOCMOV-NEXT: notl %edi
-; I386-NOCMOV-NEXT: andl %ecx, %edi
-; I386-NOCMOV-NEXT: orl %edi, %esi
-; I386-NOCMOV-NEXT: movl %esi, (%esp)
-; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %esi
+; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edi
+; I386-NOCMOV-NEXT: sete %ch
+; I386-NOCMOV-NEXT: movb %ch, %al
+; I386-NOCMOV-NEXT: movzbl %al, %ebp
+; I386-NOCMOV-NEXT: negl %ebp
+; I386-NOCMOV-NEXT: movl %edi, %ebx
+; I386-NOCMOV-NEXT: andl %ebp, %ebx
+; I386-NOCMOV-NEXT: notl %ebp
+; I386-NOCMOV-NEXT: andl %edx, %ebp
+; I386-NOCMOV-NEXT: orl %ebp, %ebx
+; I386-NOCMOV-NEXT: testb $1, %cl
+; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
-; I386-NOCMOV-NEXT: movb %al, %ah
-; I386-NOCMOV-NEXT: movzbl %ah, %edi
-; I386-NOCMOV-NEXT: negl %edi
-; I386-NOCMOV-NEXT: movl %edx, %esi
-; I386-NOCMOV-NEXT: andl %edi, %esi
-; I386-NOCMOV-NEXT: notl %edi
-; I386-NOCMOV-NEXT: andl %ecx, %edi
-; I386-NOCMOV-NEXT: orl %edi, %esi
-; I386-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
-; I386-NOCMOV-NEXT: movb %al, %ah
-; I386-NOCMOV-NEXT: movzbl %ah, %edi
+; I386-NOCMOV-NEXT: sete %ch
+; I386-NOCMOV-NEXT: movb %ch, %cl
+; I386-NOCMOV-NEXT: movzbl %cl, %ebp
+; I386-NOCMOV-NEXT: negl %ebp
+; I386-NOCMOV-NEXT: movl %edx, %edi
+; I386-NOCMOV-NEXT: andl %ebp, %edi
+; I386-NOCMOV-NEXT: notl %ebp
+; I386-NOCMOV-NEXT: andl %eax, %ebp
+; I386-NOCMOV-NEXT: orl %ebp, %edi
+; I386-NOCMOV-NEXT: testb $1, {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: sete %al
+; I386-NOCMOV-NEXT: movl (%esp), %ebx # 4-byte Reload
+; I386-NOCMOV-NEXT: movb %al, %dl
+; I386-NOCMOV-NEXT: movzbl %dl, %edi
; I386-NOCMOV-NEXT: negl %edi
-; I386-NOCMOV-NEXT: movl %edx, %esi
-; I386-NOCMOV-NEXT: andl %edi, %esi
+; I386-NOCMOV-NEXT: movl %esi, %ecx
+; I386-NOCMOV-NEXT: andl %edi, %ecx
; I386-NOCMOV-NEXT: notl %edi
-; I386-NOCMOV-NEXT: andl %ecx, %edi
-; I386-NOCMOV-NEXT: orl %edi, %esi
-; I386-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; I386-NOCMOV-NEXT: fldt (%esp)
-; I386-NOCMOV-NEXT: addl $12, %esp
+; I386-NOCMOV-NEXT: andl %ebx, %edi
+; I386-NOCMOV-NEXT: orl %edi, %ecx
+; I386-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: fldt {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: addl $40, %esp
; I386-NOCMOV-NEXT: popl %esi
; I386-NOCMOV-NEXT: popl %edi
+; I386-NOCMOV-NEXT: popl %ebx
+; I386-NOCMOV-NEXT: popl %ebp
; I386-NOCMOV-NEXT: retl
;
; I386-CMOV-LABEL: test_ctselect_f80_basic:
; I386-CMOV: # %bb.0:
-; I386-CMOV-NEXT: pushl %edi
-; I386-CMOV-NEXT: pushl %esi
-; I386-CMOV-NEXT: subl $12, %esp
+; I386-CMOV-NEXT: subl $36, %esp
+; I386-CMOV-NEXT: fldt {{[0-9]+}}(%esp)
+; I386-CMOV-NEXT: fldt {{[0-9]+}}(%esp)
+; I386-CMOV-NEXT: fstpt {{[0-9]+}}(%esp)
+; I386-CMOV-NEXT: fstpt {{[0-9]+}}(%esp)
+; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; I386-CMOV-NEXT: testb $1, {{[0-9]+}}(%esp)
-; I386-CMOV-NEXT: sete %al
+; I386-CMOV-NEXT: cmovnel {{[0-9]+}}(%esp), %eax
+; I386-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
-; I386-CMOV-NEXT: movb %al, %ah
-; I386-CMOV-NEXT: movzbl %ah, %edi
-; I386-CMOV-NEXT: negl %edi
-; I386-CMOV-NEXT: movl %edx, %esi
-; I386-CMOV-NEXT: andl %edi, %esi
-; I386-CMOV-NEXT: notl %edi
-; I386-CMOV-NEXT: andl %ecx, %edi
-; I386-CMOV-NEXT: orl %edi, %esi
-; I386-CMOV-NEXT: movl %esi, (%esp)
-; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
-; I386-CMOV-NEXT: movb %al, %ah
-; I386-CMOV-NEXT: movzbl %ah, %edi
-; I386-CMOV-NEXT: negl %edi
-; I386-CMOV-NEXT: movl %edx, %esi
-; I386-CMOV-NEXT: andl %edi, %esi
-; I386-CMOV-NEXT: notl %edi
-; I386-CMOV-NEXT: andl %ecx, %edi
-; I386-CMOV-NEXT: orl %edi, %esi
-; I386-CMOV-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
-; I386-CMOV-NEXT: movb %al, %ah
-; I386-CMOV-NEXT: movzbl %ah, %edi
-; I386-CMOV-NEXT: negl %edi
-; I386-CMOV-NEXT: movl %edx, %esi
-; I386-CMOV-NEXT: andl %edi, %esi
-; I386-CMOV-NEXT: notl %edi
-; I386-CMOV-NEXT: andl %ecx, %edi
-; I386-CMOV-NEXT: orl %edi, %esi
-; I386-CMOV-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; I386-CMOV-NEXT: cmovnel {{[0-9]+}}(%esp), %ecx
+; I386-CMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; I386-CMOV-NEXT: cmovnel {{[0-9]+}}(%esp), %eax
+; I386-CMOV-NEXT: movl %eax, (%esp)
; I386-CMOV-NEXT: fldt (%esp)
-; I386-CMOV-NEXT: addl $12, %esp
-; I386-CMOV-NEXT: popl %esi
-; I386-CMOV-NEXT: popl %edi
+; I386-CMOV-NEXT: addl $36, %esp
; I386-CMOV-NEXT: retl
%result = call x86_fp80 @llvm.ct.select.f80(i1 %cond, x86_fp80 %a, x86_fp80 %b)
ret x86_fp80 %result
@@ -543,94 +533,84 @@ define float @test_ctselect_f32_nan(i1 %cond) nounwind {
define x86_fp80 @test_ctselect_f80_alignment(i1 %cond, x86_fp80 %a, x86_fp80 %b) nounwind {
; I386-NOCMOV-LABEL: test_ctselect_f80_alignment:
; I386-NOCMOV: # %bb.0:
+; I386-NOCMOV-NEXT: pushl %ebp
+; I386-NOCMOV-NEXT: pushl %ebx
; I386-NOCMOV-NEXT: pushl %edi
; I386-NOCMOV-NEXT: pushl %esi
-; I386-NOCMOV-NEXT: subl $12, %esp
-; I386-NOCMOV-NEXT: testb $1, {{[0-9]+}}(%esp)
-; I386-NOCMOV-NEXT: sete %al
-; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; I386-NOCMOV-NEXT: subl $40, %esp
+; I386-NOCMOV-NEXT: fldt {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: fldt {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: fstpt {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: fstpt {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; I386-NOCMOV-NEXT: testb $1, %cl
+; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; I386-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
-; I386-NOCMOV-NEXT: movb %al, %ah
-; I386-NOCMOV-NEXT: movzbl %ah, %edi
-; I386-NOCMOV-NEXT: negl %edi
-; I386-NOCMOV-NEXT: movl %edx, %esi
-; I386-NOCMOV-NEXT: andl %edi, %esi
-; I386-NOCMOV-NEXT: notl %edi
-; I386-NOCMOV-NEXT: andl %ecx, %edi
-; I386-NOCMOV-NEXT: orl %edi, %esi
-; I386-NOCMOV-NEXT: movl %esi, (%esp)
-; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %esi
+; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edi
+; I386-NOCMOV-NEXT: sete %ch
+; I386-NOCMOV-NEXT: movb %ch, %al
+; I386-NOCMOV-NEXT: movzbl %al, %ebp
+; I386-NOCMOV-NEXT: negl %ebp
+; I386-NOCMOV-NEXT: movl %edi, %ebx
+; I386-NOCMOV-NEXT: andl %ebp, %ebx
+; I386-NOCMOV-NEXT: notl %ebp
+; I386-NOCMOV-NEXT: andl %edx, %ebp
+; I386-NOCMOV-NEXT: orl %ebp, %ebx
+; I386-NOCMOV-NEXT: testb $1, %cl
+; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
-; I386-NOCMOV-NEXT: movb %al, %ah
-; I386-NOCMOV-NEXT: movzbl %ah, %edi
-; I386-NOCMOV-NEXT: negl %edi
-; I386-NOCMOV-NEXT: movl %edx, %esi
-; I386-NOCMOV-NEXT: andl %edi, %esi
-; I386-NOCMOV-NEXT: notl %edi
-; I386-NOCMOV-NEXT: andl %ecx, %edi
-; I386-NOCMOV-NEXT: orl %edi, %esi
-; I386-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
-; I386-NOCMOV-NEXT: movb %al, %ah
-; I386-NOCMOV-NEXT: movzbl %ah, %edi
+; I386-NOCMOV-NEXT: sete %ch
+; I386-NOCMOV-NEXT: movb %ch, %cl
+; I386-NOCMOV-NEXT: movzbl %cl, %ebp
+; I386-NOCMOV-NEXT: negl %ebp
+; I386-NOCMOV-NEXT: movl %edx, %edi
+; I386-NOCMOV-NEXT: andl %ebp, %edi
+; I386-NOCMOV-NEXT: notl %ebp
+; I386-NOCMOV-NEXT: andl %eax, %ebp
+; I386-NOCMOV-NEXT: orl %ebp, %edi
+; I386-NOCMOV-NEXT: testb $1, {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: sete %al
+; I386-NOCMOV-NEXT: movl (%esp), %ebx # 4-byte Reload
+; I386-NOCMOV-NEXT: movb %al, %dl
+; I386-NOCMOV-NEXT: movzbl %dl, %edi
; I386-NOCMOV-NEXT: negl %edi
-; I386-NOCMOV-NEXT: movl %edx, %esi
-; I386-NOCMOV-NEXT: andl %edi, %esi
+; I386-NOCMOV-NEXT: movl %esi, %ecx
+; I386-NOCMOV-NEXT: andl %edi, %ecx
; I386-NOCMOV-NEXT: notl %edi
-; I386-NOCMOV-NEXT: andl %ecx, %edi
-; I386-NOCMOV-NEXT: orl %edi, %esi
-; I386-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; I386-NOCMOV-NEXT: fldt (%esp)
-; I386-NOCMOV-NEXT: addl $12, %esp
+; I386-NOCMOV-NEXT: andl %ebx, %edi
+; I386-NOCMOV-NEXT: orl %edi, %ecx
+; I386-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: fldt {{[0-9]+}}(%esp)
+; I386-NOCMOV-NEXT: addl $40, %esp
; I386-NOCMOV-NEXT: popl %esi
; I386-NOCMOV-NEXT: popl %edi
+; I386-NOCMOV-NEXT: popl %ebx
+; I386-NOCMOV-NEXT: popl %ebp
; I386-NOCMOV-NEXT: retl
;
; I386-CMOV-LABEL: test_ctselect_f80_alignment:
; I386-CMOV: # %bb.0:
-; I386-CMOV-NEXT: pushl %edi
-; I386-CMOV-NEXT: pushl %esi
-; I386-CMOV-NEXT: subl $12, %esp
+; I386-CMOV-NEXT: subl $36, %esp
+; I386-CMOV-NEXT: fldt {{[0-9]+}}(%esp)
+; I386-CMOV-NEXT: fldt {{[0-9]+}}(%esp)
+; I386-CMOV-NEXT: fstpt {{[0-9]+}}(%esp)
+; I386-CMOV-NEXT: fstpt {{[0-9]+}}(%esp)
+; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; I386-CMOV-NEXT: testb $1, {{[0-9]+}}(%esp)
-; I386-CMOV-NEXT: sete %al
+; I386-CMOV-NEXT: cmovnel {{[0-9]+}}(%esp), %eax
+; I386-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
-; I386-CMOV-NEXT: movb %al, %ah
-; I386-CMOV-NEXT: movzbl %ah, %edi
-; I386-CMOV-NEXT: negl %edi
-; I386-CMOV-NEXT: movl %edx, %esi
-; I386-CMOV-NEXT: andl %edi, %esi
-; I386-CMOV-NEXT: notl %edi
-; I386-CMOV-NEXT: andl %ecx, %edi
-; I386-CMOV-NEXT: orl %edi, %esi
-; I386-CMOV-NEXT: movl %esi, (%esp)
-; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
-; I386-CMOV-NEXT: movb %al, %ah
-; I386-CMOV-NEXT: movzbl %ah, %edi
-; I386-CMOV-NEXT: negl %edi
-; I386-CMOV-NEXT: movl %edx, %esi
-; I386-CMOV-NEXT: andl %edi, %esi
-; I386-CMOV-NEXT: notl %edi
-; I386-CMOV-NEXT: andl %ecx, %edi
-; I386-CMOV-NEXT: orl %edi, %esi
-; I386-CMOV-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
-; I386-CMOV-NEXT: movb %al, %ah
-; I386-CMOV-NEXT: movzbl %ah, %edi
-; I386-CMOV-NEXT: negl %edi
-; I386-CMOV-NEXT: movl %edx, %esi
-; I386-CMOV-NEXT: andl %edi, %esi
-; I386-CMOV-NEXT: notl %edi
-; I386-CMOV-NEXT: andl %ecx, %edi
-; I386-CMOV-NEXT: orl %edi, %esi
-; I386-CMOV-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; I386-CMOV-NEXT: cmovnel {{[0-9]+}}(%esp), %ecx
+; I386-CMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; I386-CMOV-NEXT: cmovnel {{[0-9]+}}(%esp), %eax
+; I386-CMOV-NEXT: movl %eax, (%esp)
; I386-CMOV-NEXT: fldt (%esp)
-; I386-CMOV-NEXT: addl $12, %esp
-; I386-CMOV-NEXT: popl %esi
-; I386-CMOV-NEXT: popl %edi
+; I386-CMOV-NEXT: addl $36, %esp
; I386-CMOV-NEXT: retl
%result = call x86_fp80 @llvm.ct.select.f80(i1 %cond, x86_fp80 %a, x86_fp80 %b)
ret x86_fp80 %result