aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/ARM')
-rw-r--r--llvm/lib/Target/ARM/ARM.td8
-rw-r--r--llvm/lib/Target/ARM/ARMAsmPrinter.cpp97
-rw-r--r--llvm/lib/Target/ARM/ARMAsmPrinter.h9
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp117
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.h20
-rw-r--r--llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMConstantIslandPass.cpp1
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp1
-rw-r--r--llvm/lib/Target/ARM/ARMFrameLowering.cpp11
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp36
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp393
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h318
-rw-r--r--llvm/lib/Target/ARM/ARMInstrCDE.td3
-rw-r--r--llvm/lib/Target/ARM/ARMInstrFormats.td6
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.cpp3
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.h2
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td142
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td122
-rw-r--r--llvm/lib/Target/ARM/ARMInstrNEON.td17
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb.td11
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb2.td61
-rw-r--r--llvm/lib/Target/ARM/ARMInstrVFP.td24
-rw-r--r--llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp8
-rw-r--r--llvm/lib/Target/ARM/ARMMCInstLower.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMMachineFunctionInfo.h1
-rw-r--r--llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp83
-rw-r--r--llvm/lib/Target/ARM/ARMSelectionDAGInfo.h66
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.cpp87
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h1
-rw-r--r--llvm/lib/Target/ARM/ARMTargetMachine.h14
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp54
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h31
-rw-r--r--llvm/lib/Target/ARM/CMakeLists.txt4
-rw-r--r--llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp188
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp2
-rw-r--r--llvm/lib/Target/ARM/MLxExpansionPass.cpp2
-rw-r--r--llvm/lib/Target/ARM/Thumb1InstrInfo.cpp13
-rw-r--r--llvm/lib/Target/ARM/Thumb1InstrInfo.h7
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.cpp28
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.h7
-rw-r--r--llvm/lib/Target/ARM/ThumbRegisterInfo.cpp8
42 files changed, 1046 insertions, 973 deletions
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 570aae9..1f71d81 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -38,6 +38,14 @@ include "ARMSchedule.td"
//===----------------------------------------------------------------------===//
include "ARMInstrInfo.td"
+
+def Thumb1OnlyMode : HwMode<[IsThumb1Only]>;
+def arm_ptr_rc : RegClassByHwMode<
+ [DefaultMode, Thumb1OnlyMode],
+ [GPR, tGPR]>;
+
+defm : RemapAllTargetPseudoPointerOperands<arm_ptr_rc>;
+
def ARMInstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 36b9908..458a3f0 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -51,8 +51,8 @@ using namespace llvm;
ARMAsmPrinter::ARMAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer), ID), Subtarget(nullptr), AFI(nullptr),
- MCP(nullptr), InConstantPool(false), OptimizationGoals(-1) {}
+ : AsmPrinter(TM, std::move(Streamer), ID), AFI(nullptr), MCP(nullptr),
+ InConstantPool(false), OptimizationGoals(-1) {}
const ARMBaseTargetMachine &ARMAsmPrinter::getTM() const {
return static_cast<const ARMBaseTargetMachine &>(TM);
@@ -97,11 +97,41 @@ void ARMAsmPrinter::emitXXStructor(const DataLayout &DL, const Constant *CV) {
const MCExpr *E = MCSymbolRefExpr::create(
GetARMGVSymbol(GV, ARMII::MO_NO_FLAG),
- (Subtarget->isTargetELF() ? ARM::S_TARGET1 : ARM::S_None), OutContext);
+ (TM.getTargetTriple().isOSBinFormatELF() ? ARM::S_TARGET1 : ARM::S_None),
+ OutContext);
OutStreamer->emitValue(E, Size);
}
+// An alias to a cmse entry function should also emit a `__acle_se_` symbol.
+void ARMAsmPrinter::emitCMSEVeneerAlias(const GlobalAlias &GA) {
+ const Function *BaseFn = dyn_cast_or_null<Function>(GA.getAliaseeObject());
+ if (!BaseFn || !BaseFn->hasFnAttribute("cmse_nonsecure_entry"))
+ return;
+
+ MCSymbol *AliasSym = getSymbol(&GA);
+ MCSymbol *FnSym = getSymbol(BaseFn);
+
+ MCSymbol *SEAliasSym =
+ OutContext.getOrCreateSymbol(Twine("__acle_se_") + AliasSym->getName());
+ MCSymbol *SEBaseSym =
+ OutContext.getOrCreateSymbol(Twine("__acle_se_") + FnSym->getName());
+
+ // Mirror alias linkage/visibility onto the veneer-alias symbol.
+ emitLinkage(&GA, SEAliasSym);
+ OutStreamer->emitSymbolAttribute(SEAliasSym, MCSA_ELF_TypeFunction);
+ emitVisibility(SEAliasSym, GA.getVisibility());
+
+ // emit "__acle_se_<alias> = __acle_se_<aliasee>"
+ const MCExpr *SEExpr = MCSymbolRefExpr::create(SEBaseSym, OutContext);
+ OutStreamer->emitAssignment(SEAliasSym, SEExpr);
+}
+
+void ARMAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
+ AsmPrinter::emitGlobalAlias(M, GA);
+ emitCMSEVeneerAlias(GA);
+}
+
void ARMAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
if (PromotedGlobals.count(GV))
// The global was promoted into a constant pool. It should not be emitted.
@@ -115,7 +145,6 @@ void ARMAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
AFI = MF.getInfo<ARMFunctionInfo>();
MCP = MF.getConstantPool();
- Subtarget = &MF.getSubtarget<ARMSubtarget>();
SetupMachineFunction(MF);
const Function &F = MF.getFunction();
@@ -153,7 +182,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
else if (OptimizationGoals != (int)OptimizationGoal) // conflicting goals
OptimizationGoals = 0;
- if (Subtarget->isTargetCOFF()) {
+ if (TM.getTargetTriple().isOSBinFormatCOFF()) {
bool Local = F.hasLocalLinkage();
COFF::SymbolStorageClass Scl =
Local ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL;
@@ -259,8 +288,8 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
break;
}
case MachineOperand::MO_ConstantPoolIndex:
- if (Subtarget->genExecuteOnly())
- llvm_unreachable("execute-only should not generate constant pools");
+ assert(!MF->getSubtarget<ARMSubtarget>().genExecuteOnly() &&
+ "execute-only should not generate constant pools");
GetCPISymbol(MO.getIndex())->print(O, MAI);
break;
}
@@ -595,8 +624,7 @@ void ARMAsmPrinter::emitEndOfAsmFile(Module &M) {
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
if (OptimizationGoals > 0 &&
- (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
- Subtarget->isTargetMuslAEABI()))
+ (TT.isTargetAEABI() || TT.isTargetGNUAEABI() || TT.isTargetMuslAEABI()))
ATS.emitAttribute(ARMBuildAttrs::ABI_optimization_goals, OptimizationGoals);
OptimizationGoals = -1;
@@ -884,9 +912,10 @@ static uint8_t getModifierSpecifier(ARMCP::ARMCPModifier Modifier) {
MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
unsigned char TargetFlags) {
- if (Subtarget->isTargetMachO()) {
+ const Triple &TT = TM.getTargetTriple();
+ if (TT.isOSBinFormatMachO()) {
bool IsIndirect =
- (TargetFlags & ARMII::MO_NONLAZY) && Subtarget->isGVIndirectSymbol(GV);
+ (TargetFlags & ARMII::MO_NONLAZY) && getTM().isGVIndirectSymbol(GV);
if (!IsIndirect)
return getSymbol(GV);
@@ -903,9 +932,8 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
!GV->hasInternalLinkage());
return MCSym;
- } else if (Subtarget->isTargetCOFF()) {
- assert(Subtarget->isTargetWindows() &&
- "Windows is the only supported COFF target");
+ } else if (TT.isOSBinFormatCOFF()) {
+ assert(TT.isOSWindows() && "Windows is the only supported COFF target");
bool IsIndirect =
(TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB));
@@ -932,7 +960,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
}
return MCSym;
- } else if (Subtarget->isTargetELF()) {
+ } else if (TT.isOSBinFormatELF()) {
return getSymbolPreferLocal(*GV);
}
llvm_unreachable("unexpected target");
@@ -978,7 +1006,8 @@ void ARMAsmPrinter::emitMachineConstantPoolValue(
// On Darwin, const-pool entries may get the "FOO$non_lazy_ptr" mangling, so
// flag the global as MO_NONLAZY.
- unsigned char TF = Subtarget->isTargetMachO() ? ARMII::MO_NONLAZY : 0;
+ unsigned char TF =
+ TM.getTargetTriple().isOSBinFormatMachO() ? ARMII::MO_NONLAZY : 0;
MCSym = GetARMGVSymbol(GV, TF);
} else if (ACPV->isMachineBasicBlock()) {
const MachineBasicBlock *MBB = cast<ARMConstantPoolMBB>(ACPV)->getMBB();
@@ -1047,7 +1076,8 @@ void ARMAsmPrinter::emitJumpTableAddrs(const MachineInstr *MI) {
// .word (LBB1 - LJTI_0_0)
const MCExpr *Expr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
- if (isPositionIndependent() || Subtarget->isROPI())
+ const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
+ if (isPositionIndependent() || STI.isROPI())
Expr = MCBinaryExpr::createSub(Expr, MCSymbolRefExpr::create(JTISymbol,
OutContext),
OutContext);
@@ -1096,7 +1126,8 @@ void ARMAsmPrinter::emitJumpTableTBInst(const MachineInstr *MI,
const MachineOperand &MO1 = MI->getOperand(1);
unsigned JTI = MO1.getIndex();
- if (Subtarget->isThumb1Only())
+ const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
+ if (STI.isThumb1Only())
emitAlignment(Align(4));
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
@@ -1904,6 +1935,7 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
ARM_MC::verifyInstructionPredicates(MI->getOpcode(),
getSubtargetInfo().getFeatureBits());
+ const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
const DataLayout &DL = getDataLayout();
MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
@@ -1915,8 +1947,8 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
}
// Emit unwinding stuff for frame-related instructions
- if (Subtarget->isTargetEHABICompatible() &&
- MI->getFlag(MachineInstr::FrameSetup))
+ if (TM.getTargetTriple().isTargetEHABICompatible() &&
+ MI->getFlag(MachineInstr::FrameSetup))
EmitUnwindingInstruction(MI);
// Do any auto-generated pseudo lowerings.
@@ -1982,14 +2014,13 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
// Add 's' bit operand (always reg0 for this)
.addReg(0));
- assert(Subtarget->hasV4TOps());
- EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX)
- .addReg(MI->getOperand(0).getReg()));
+ assert(STI.hasV4TOps() && "Expected V4TOps for BX call");
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(ARM::BX).addReg(MI->getOperand(0).getReg()));
return;
}
case ARM::tBX_CALL: {
- if (Subtarget->hasV5TOps())
- llvm_unreachable("Expected BLX to be selected for v5t+");
+ assert(!STI.hasV5TOps() && "Expected BLX to be selected for v5t+");
// On ARM v4t, when doing a call from thumb mode, we need to ensure
// that the saved lr has its LSB set correctly (the arch doesn't
@@ -2278,8 +2309,8 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
}
case ARM::CONSTPOOL_ENTRY: {
- if (Subtarget->genExecuteOnly())
- llvm_unreachable("execute-only should not generate constant pools");
+ assert(!STI.genExecuteOnly() &&
+ "execute-only should not generate constant pools");
/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool
/// in the function. The first operand is the ID# for this instruction, the
@@ -2485,7 +2516,7 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
case ARM::TRAP: {
// Non-Darwin binutils don't yet support the "trap" mnemonic.
// FIXME: Remove this special case when they do.
- if (!Subtarget->isTargetMachO()) {
+ if (!TM.getTargetTriple().isOSBinFormatMachO()) {
uint32_t Val = 0xe7ffdefeUL;
OutStreamer->AddComment("trap");
ATS.emitInst(Val);
@@ -2496,7 +2527,7 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
case ARM::tTRAP: {
// Non-Darwin binutils don't yet support the "trap" mnemonic.
// FIXME: Remove this special case when they do.
- if (!Subtarget->isTargetMachO()) {
+ if (!TM.getTargetTriple().isOSBinFormatMachO()) {
uint16_t Val = 0xdefe;
OutStreamer->AddComment("trap");
ATS.emitInst(Val, 'n');
@@ -2656,9 +2687,6 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addImm(ARMCC::AL)
.addReg(0));
- const MachineFunction &MF = *MI->getParent()->getParent();
- const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
-
if (STI.isTargetDarwin() || STI.isTargetWindows()) {
// These platforms always use the same frame register
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12)
@@ -2687,7 +2715,7 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addReg(0));
}
- assert(Subtarget->hasV4TOps());
+ assert(STI.hasV4TOps());
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX)
.addReg(ScratchReg)
// Predicate.
@@ -2704,9 +2732,6 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
Register SrcReg = MI->getOperand(0).getReg();
Register ScratchReg = MI->getOperand(1).getReg();
- const MachineFunction &MF = *MI->getParent()->getParent();
- const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
-
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi)
.addReg(ScratchReg)
.addReg(SrcReg)
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.h b/llvm/lib/Target/ARM/ARMAsmPrinter.h
index 9e92b5a..12e20d7 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -9,13 +9,13 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMASMPRINTER_H
#define LLVM_LIB_TARGET_ARM_ARMASMPRINTER_H
-#include "ARMSubtarget.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
class ARMFunctionInfo;
+class ARMBaseTargetMachine;
class MCOperand;
class MachineConstantPool;
class MachineOperand;
@@ -33,10 +33,6 @@ public:
static char ID;
private:
- /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
- /// make the right decision when printing asm code for different targets.
- const ARMSubtarget *Subtarget;
-
/// AFI - Keep a pointer to ARMFunctionInfo for the current
/// MachineFunction.
ARMFunctionInfo *AFI;
@@ -108,6 +104,7 @@ public:
void emitEndOfAsmFile(Module &M) override;
void emitXXStructor(const DataLayout &DL, const Constant *CV) override;
void emitGlobalVariable(const GlobalVariable *GV) override;
+ void emitGlobalAlias(const Module &M, const GlobalAlias &GA) override;
MCSymbol *GetCPISymbol(unsigned CPID) const override;
@@ -163,6 +160,8 @@ private:
MCSymbol *GetARMGVSymbol(const GlobalValue *GV, unsigned char TargetFlags);
+ void emitCMSEVeneerAlias(const GlobalAlias &GA);
+
public:
/// EmitMachineConstantPoolValue - Print a machine constantpool value to
/// the .s file.
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 22769db..02887ce 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -107,8 +107,9 @@ static const ARM_MLxEntry ARM_MLxTable[] = {
{ ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
};
-ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI)
- : ARMGenInstrInfo(STI, ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
+ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI,
+ const ARMBaseRegisterInfo &TRI)
+ : ARMGenInstrInfo(STI, TRI, ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
Subtarget(STI) {
for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) {
if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
@@ -928,15 +929,15 @@ ARMBaseInstrInfo::describeLoadedValue(const MachineInstr &MI,
return TargetInstrInfo::describeLoadedValue(MI, Reg);
}
-const MachineInstrBuilder &
-ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
- unsigned SubIdx, unsigned State,
- const TargetRegisterInfo *TRI) const {
+const MachineInstrBuilder &ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB,
+ unsigned Reg,
+ unsigned SubIdx,
+ unsigned State) const {
if (!SubIdx)
return MIB.addReg(Reg, State);
if (Register::isPhysicalRegister(Reg))
- return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
+ return MIB.addReg(getRegisterInfo().getSubReg(Reg, SubIdx), State);
return MIB.addReg(Reg, State, SubIdx);
}
@@ -944,18 +945,18 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
Register SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI,
Register VReg,
MachineInstr::MIFlag Flags) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
Align Alignment = MFI.getObjectAlign(FI);
+ const ARMBaseRegisterInfo &TRI = getRegisterInfo();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), Alignment);
- switch (TRI->getSpillSize(*RC)) {
+ switch (TRI.getSpillSize(*RC)) {
case 2:
if (ARM::HPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))
@@ -1010,8 +1011,8 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
if (Subtarget.hasV5TEOps()) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));
- AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
- AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill));
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0);
MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else {
@@ -1021,8 +1022,8 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FI)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
- AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
- AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill));
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0);
}
} else
llvm_unreachable("Unknown reg class!");
@@ -1072,9 +1073,9 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
.addMemOperand(MMO);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
- AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0);
+ AddDReg(MIB, SrcReg, ARM::dsub_2, 0);
}
} else
llvm_unreachable("Unknown reg class!");
@@ -1104,10 +1105,10 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
.addMemOperand(MMO);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
- AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0);
+ AddDReg(MIB, SrcReg, ARM::dsub_3, 0);
}
} else
llvm_unreachable("Unknown reg class!");
@@ -1124,14 +1125,14 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
.addMemOperand(MMO);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
- AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill));
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0);
+ AddDReg(MIB, SrcReg, ARM::dsub_7, 0);
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -1207,10 +1208,12 @@ Register ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
return false;
}
-void ARMBaseInstrInfo::loadRegFromStackSlot(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg,
- int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
- Register VReg, MachineInstr::MIFlag Flags) const {
+void ARMBaseInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ Register DestReg, int FI,
+ const TargetRegisterClass *RC,
+ Register VReg,
+ MachineInstr::MIFlag Flags) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
@@ -1220,7 +1223,8 @@ void ARMBaseInstrInfo::loadRegFromStackSlot(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), Alignment);
- switch (TRI->getSpillSize(*RC)) {
+ const ARMBaseRegisterInfo &TRI = getRegisterInfo();
+ switch (TRI.getSpillSize(*RC)) {
case 2:
if (ARM::HPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)
@@ -1271,8 +1275,8 @@ void ARMBaseInstrInfo::loadRegFromStackSlot(
if (Subtarget.hasV5TEOps()) {
MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
- AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
- AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead);
+ AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead);
MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
.add(predOps(ARMCC::AL));
} else {
@@ -1282,8 +1286,8 @@ void ARMBaseInstrInfo::loadRegFromStackSlot(
.addFrameIndex(FI)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
- MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead);
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead);
}
if (DestReg.isPhysical())
@@ -1329,9 +1333,9 @@ void ARMBaseInstrInfo::loadRegFromStackSlot(
.addFrameIndex(FI)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
if (DestReg.isPhysical())
MIB.addReg(DestReg, RegState::ImplicitDefine);
}
@@ -1358,10 +1362,10 @@ void ARMBaseInstrInfo::loadRegFromStackSlot(
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
.addMemOperand(MMO);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead);
if (DestReg.isPhysical())
MIB.addReg(DestReg, RegState::ImplicitDefine);
}
@@ -1379,14 +1383,14 @@ void ARMBaseInstrInfo::loadRegFromStackSlot(
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
.addMemOperand(MMO);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead);
if (DestReg.isPhysical())
MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
@@ -1652,8 +1656,7 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
Register DestReg, unsigned SubIdx,
- const MachineInstr &Orig,
- const TargetRegisterInfo &TRI) const {
+ const MachineInstr &Orig) const {
unsigned Opcode = Orig.getOpcode();
switch (Opcode) {
default: {
@@ -6548,7 +6551,7 @@ class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
static int constexpr LAST_IS_USE = MAX_STAGES;
static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1;
typedef std::bitset<MAX_STAGES + 2> IterNeed;
- typedef std::map<unsigned, IterNeed> IterNeeds;
+ typedef std::map<Register, IterNeed> IterNeeds;
void bumpCrossIterationPressure(RegPressureTracker &RPT,
const IterNeeds &CIN);
@@ -6622,14 +6625,14 @@ void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT,
for (const auto &N : CIN) {
int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
for (int I = 0; I < Cnt; ++I)
- RPT.increaseRegPressure(Register(N.first), LaneBitmask::getNone(),
+ RPT.increaseRegPressure(VirtRegOrUnit(N.first), LaneBitmask::getNone(),
LaneBitmask::getAll());
}
// Decrease pressure by the amounts in CrossIterationNeeds
for (const auto &N : CIN) {
int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
for (int I = 0; I < Cnt; ++I)
- RPT.decreaseRegPressure(Register(N.first), LaneBitmask::getAll(),
+ RPT.decreaseRegPressure(VirtRegOrUnit(N.first), LaneBitmask::getAll(),
LaneBitmask::getNone());
}
}
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 2869e7f..04e2ab0 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -44,7 +44,8 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
protected:
// Can be only subclassed.
- explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
+ explicit ARMBaseInstrInfo(const ARMSubtarget &STI,
+ const ARMBaseRegisterInfo &TRI);
void expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
unsigned LoadImmOpc, unsigned LoadOpc) const;
@@ -125,7 +126,11 @@ public:
// if there is not such an opcode.
virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0;
- virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0;
+ const ARMBaseRegisterInfo &getRegisterInfo() const {
+ return static_cast<const ARMBaseRegisterInfo &>(
+ TargetInstrInfo::getRegisterInfo());
+ }
+
const ARMSubtarget &getSubtarget() const { return Subtarget; }
ScheduleHazardRecognizer *
@@ -211,14 +216,13 @@ public:
void storeRegToStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
- bool isKill, int FrameIndex, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, Register VReg,
+ bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg,
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
void loadRegFromStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, Register VReg,
+ Register VReg,
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
bool expandPostRAPseudo(MachineInstr &MI) const override;
@@ -227,16 +231,14 @@ public:
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
Register DestReg, unsigned SubIdx,
- const MachineInstr &Orig,
- const TargetRegisterInfo &TRI) const override;
+ const MachineInstr &Orig) const override;
MachineInstr &
duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
const MachineInstr &Orig) const override;
const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
- unsigned SubIdx, unsigned State,
- const TargetRegisterInfo *TRI) const;
+ unsigned SubIdx, unsigned State) const;
bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1,
const MachineRegisterInfo *MRI) const override;
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index ce1cdb3..80921ce 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -708,7 +708,7 @@ ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const MCInstrDesc &MCID = TII.get(ADDriOpc);
Register BaseReg = MRI.createVirtualRegister(&ARM::GPRRegClass);
- MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this));
+ MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0));
MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
.addFrameIndex(FrameIdx).addImm(Offset);
@@ -881,8 +881,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Register PredReg = (PIdx == -1) ? Register() : MI.getOperand(PIdx+1).getReg();
const MCInstrDesc &MCID = MI.getDesc();
- const TargetRegisterClass *RegClass =
- TII.getRegClass(MCID, FIOperandNum, this);
+ const TargetRegisterClass *RegClass = TII.getRegClass(MCID, FIOperandNum);
if (Offset == 0 && (FrameReg.isVirtual() || RegClass->contains(FrameReg)))
// Must be addrmode4/6.
diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index f43ec73..80494d9 100644
--- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -51,7 +51,6 @@
#include <cassert>
#include <cstdint>
#include <iterator>
-#include <utility>
#include <vector>
using namespace llvm;
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index fffb6373..d69c09f 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -932,6 +932,7 @@ static bool IsAnAddressOperand(const MachineOperand &MO) {
return true;
case MachineOperand::MO_RegisterMask:
case MachineOperand::MO_RegisterLiveOut:
+ case MachineOperand::MO_LaneMask:
return false;
case MachineOperand::MO_Metadata:
case MachineOperand::MO_MCSymbol:
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 138981a..c19eed1 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -2342,7 +2342,6 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const ARMBaseInstrInfo &TII =
*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
unsigned Limit = (1 << 12) - 1;
for (auto &MBB : MF) {
for (auto &MI : MBB) {
@@ -2364,7 +2363,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
break;
const MCInstrDesc &MCID = MI.getDesc();
- const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI);
+ const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i);
if (RegClass && !RegClass->contains(ARM::SP))
HasNonSPFrameIndex = true;
@@ -2537,7 +2536,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
(void)TRI; // Silence unused warning in non-assert builds.
- Register FramePtr = RegInfo->getFrameRegister(MF);
+ Register FramePtr = STI.getFramePointerReg();
ARMSubtarget::PushPopSplitVariation PushPopSplit =
STI.getPushPopSplitVariation(MF);
@@ -2784,7 +2783,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
!CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
AFI->setHasStackFrame(true);
- if (HasFP) {
+ // Save the FP if:
+ // 1. We currently need it (HasFP), OR
+ // 2. We might need it later due to stack realignment from aligned DPRCS2
+ // saves (which will make hasFP() become true in emitPrologue).
+ if (HasFP || (isFPReserved(MF) && AFI->getNumAlignedDPRCS2Regs() > 0)) {
SavedRegs.set(FramePtr);
// If the frame pointer is required by the ABI, also spill LR so that we
// emit a complete frame record.
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 847b7af..26b5e5a 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -3965,31 +3965,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
// Other cases are autogenerated.
break;
- case ARMISD::WLSSETUP: {
- SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
- N->getOperand(0));
- ReplaceUses(N, New);
- CurDAG->RemoveDeadNode(N);
- return;
- }
- case ARMISD::WLS: {
- SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
- N->getOperand(1), N->getOperand(2),
- N->getOperand(0));
- ReplaceUses(N, New);
- CurDAG->RemoveDeadNode(N);
- return;
- }
- case ARMISD::LE: {
- SDValue Ops[] = { N->getOperand(1),
- N->getOperand(2),
- N->getOperand(0) };
- unsigned Opc = ARM::t2LoopEnd;
- SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
- ReplaceUses(N, New);
- CurDAG->RemoveDeadNode(N);
- return;
- }
case ARMISD::LDRD: {
if (Subtarget->isThumb2())
break; // TableGen handles isel in this case.
@@ -4043,17 +4018,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->RemoveDeadNode(N);
return;
}
- case ARMISD::LOOP_DEC: {
- SDValue Ops[] = { N->getOperand(1),
- N->getOperand(2),
- N->getOperand(0) };
- SDNode *Dec =
- CurDAG->getMachineNode(ARM::t2LoopDec, dl,
- CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
- ReplaceUses(N, Dec);
- CurDAG->RemoveDeadNode(N);
- return;
- }
case ARMISD::BRCOND: {
// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
// Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 92fae71..2d26c67 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -508,7 +508,7 @@ const ARMBaseTargetMachine &ARMTargetLowering::getTM() const {
ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
const ARMSubtarget &STI)
- : TargetLowering(TM_), Subtarget(&STI),
+ : TargetLowering(TM_, STI), Subtarget(&STI),
RegInfo(Subtarget->getRegisterInfo()),
Itins(Subtarget->getInstrItineraryData()) {
const auto &TM = static_cast<const ARMBaseTargetMachine &>(TM_);
@@ -518,74 +518,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
const Triple &TT = TM.getTargetTriple();
- if (TT.isOSBinFormatMachO()) {
- // Uses VFP for Thumb libfuncs if available.
- if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
- Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
- // clang-format off
- static const struct {
- const RTLIB::Libcall Op;
- const RTLIB::LibcallImpl Impl;
- } LibraryCalls[] = {
- // Single-precision floating-point arithmetic.
- { RTLIB::ADD_F32, RTLIB::impl___addsf3vfp },
- { RTLIB::SUB_F32, RTLIB::impl___subsf3vfp },
- { RTLIB::MUL_F32, RTLIB::impl___mulsf3vfp },
- { RTLIB::DIV_F32, RTLIB::impl___divsf3vfp },
-
- // Double-precision floating-point arithmetic.
- { RTLIB::ADD_F64, RTLIB::impl___adddf3vfp },
- { RTLIB::SUB_F64, RTLIB::impl___subdf3vfp },
- { RTLIB::MUL_F64, RTLIB::impl___muldf3vfp },
- { RTLIB::DIV_F64, RTLIB::impl___divdf3vfp },
-
- // Single-precision comparisons.
- { RTLIB::OEQ_F32, RTLIB::impl___eqsf2vfp },
- { RTLIB::UNE_F32, RTLIB::impl___nesf2vfp },
- { RTLIB::OLT_F32, RTLIB::impl___ltsf2vfp },
- { RTLIB::OLE_F32, RTLIB::impl___lesf2vfp },
- { RTLIB::OGE_F32, RTLIB::impl___gesf2vfp },
- { RTLIB::OGT_F32, RTLIB::impl___gtsf2vfp },
- { RTLIB::UO_F32, RTLIB::impl___unordsf2vfp },
-
- // Double-precision comparisons.
- { RTLIB::OEQ_F64, RTLIB::impl___eqdf2vfp },
- { RTLIB::UNE_F64, RTLIB::impl___nedf2vfp },
- { RTLIB::OLT_F64, RTLIB::impl___ltdf2vfp },
- { RTLIB::OLE_F64, RTLIB::impl___ledf2vfp },
- { RTLIB::OGE_F64, RTLIB::impl___gedf2vfp },
- { RTLIB::OGT_F64, RTLIB::impl___gtdf2vfp },
- { RTLIB::UO_F64, RTLIB::impl___unorddf2vfp },
-
- // Floating-point to integer conversions.
- // i64 conversions are done via library routines even when generating VFP
- // instructions, so use the same ones.
- { RTLIB::FPTOSINT_F64_I32, RTLIB::impl___fixdfsivfp },
- { RTLIB::FPTOUINT_F64_I32, RTLIB::impl___fixunsdfsivfp },
- { RTLIB::FPTOSINT_F32_I32, RTLIB::impl___fixsfsivfp },
- { RTLIB::FPTOUINT_F32_I32, RTLIB::impl___fixunssfsivfp },
-
- // Conversions between floating types.
- { RTLIB::FPROUND_F64_F32, RTLIB::impl___truncdfsf2vfp },
- { RTLIB::FPEXT_F32_F64, RTLIB::impl___extendsfdf2vfp },
-
- // Integer to floating-point conversions.
- // i64 conversions are done via library routines even when generating VFP
- // instructions, so use the same ones.
- // FIXME: There appears to be some naming inconsistency in ARM libgcc:
- // e.g., __floatunsidf vs. __floatunssidfvfp.
- { RTLIB::SINTTOFP_I32_F64, RTLIB::impl___floatsidfvfp },
- { RTLIB::UINTTOFP_I32_F64, RTLIB::impl___floatunssidfvfp },
- { RTLIB::SINTTOFP_I32_F32, RTLIB::impl___floatsisfvfp },
- { RTLIB::UINTTOFP_I32_F32, RTLIB::impl___floatunssisfvfp },
- };
- // clang-format on
-
- for (const auto &LC : LibraryCalls)
- setLibcallImpl(LC.Op, LC.Impl);
- }
- }
-
if (Subtarget->isThumb1Only())
addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
else
@@ -614,16 +546,24 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})
setOperationAction(Op, MVT::f64, Legal);
+
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
}
}
if (Subtarget->hasFullFP16()) {
+ for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})
+ setOperationAction(Op, MVT::f16, Legal);
+
addRegisterClass(MVT::f16, &ARM::HPRRegClass);
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Legal);
}
if (Subtarget->hasBF16()) {
@@ -933,13 +873,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
}
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
+
if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
@@ -947,11 +888,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
}
+ } else {
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
}
if (!Subtarget->hasFP16()) {
setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
+ } else {
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
}
computeRegisterProperties(Subtarget->getRegisterInfo());
@@ -1291,16 +1237,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
- setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, LibCall);
- setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, LibCall);
+ setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Expand);
}
// fp16 is a special v7 extension that adds f16 <-> f32 conversions.
if (!Subtarget->hasFP16()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
- setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, LibCall);
- setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, LibCall);
+ setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Expand);
}
// Strict floating-point comparisons need custom lowering.
@@ -1316,34 +1262,26 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// FP-ARMv8 implements a lot of rounding-like FP operations.
- if (Subtarget->hasFPARMv8Base()) {
- setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
- setOperationAction(ISD::FCEIL, MVT::f32, Legal);
- setOperationAction(ISD::FROUND, MVT::f32, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- setOperationAction(ISD::FROUNDEVEN, MVT::f32, Legal);
- setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+ if (Subtarget->hasFPARMv8Base()) {
+ for (auto Op :
+ {ISD::FFLOOR, ISD::FCEIL, ISD::FROUND,
+ ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT,
+ ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM,
+ ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FROUND,
+ ISD::STRICT_FTRUNC, ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
+ ISD::STRICT_FROUNDEVEN, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM}) {
+ setOperationAction(Op, MVT::f32, Legal);
+
+ if (Subtarget->hasFP64())
+ setOperationAction(Op, MVT::f64, Legal);
+ }
+
if (Subtarget->hasNEON()) {
setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
}
-
- if (Subtarget->hasFP64()) {
- setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
- setOperationAction(ISD::FCEIL, MVT::f64, Legal);
- setOperationAction(ISD::FROUND, MVT::f64, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
- setOperationAction(ISD::FROUNDEVEN, MVT::f64, Legal);
- setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
- }
}
// FP16 often need to be promoted to call lib functions
@@ -1498,6 +1436,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
+
+ IsStrictFPEnabled = true;
}
bool ARMTargetLowering::useSoftFloat() const {
@@ -1556,220 +1496,6 @@ ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
return std::make_pair(RRC, Cost);
}
-const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
-#define MAKE_CASE(V) \
- case V: \
- return #V;
- switch ((ARMISD::NodeType)Opcode) {
- case ARMISD::FIRST_NUMBER:
- break;
- MAKE_CASE(ARMISD::Wrapper)
- MAKE_CASE(ARMISD::WrapperPIC)
- MAKE_CASE(ARMISD::WrapperJT)
- MAKE_CASE(ARMISD::COPY_STRUCT_BYVAL)
- MAKE_CASE(ARMISD::CALL)
- MAKE_CASE(ARMISD::CALL_PRED)
- MAKE_CASE(ARMISD::CALL_NOLINK)
- MAKE_CASE(ARMISD::tSECALL)
- MAKE_CASE(ARMISD::t2CALL_BTI)
- MAKE_CASE(ARMISD::BRCOND)
- MAKE_CASE(ARMISD::BR_JT)
- MAKE_CASE(ARMISD::BR2_JT)
- MAKE_CASE(ARMISD::RET_GLUE)
- MAKE_CASE(ARMISD::SERET_GLUE)
- MAKE_CASE(ARMISD::INTRET_GLUE)
- MAKE_CASE(ARMISD::PIC_ADD)
- MAKE_CASE(ARMISD::CMP)
- MAKE_CASE(ARMISD::CMN)
- MAKE_CASE(ARMISD::CMPZ)
- MAKE_CASE(ARMISD::CMPFP)
- MAKE_CASE(ARMISD::CMPFPE)
- MAKE_CASE(ARMISD::CMPFPw0)
- MAKE_CASE(ARMISD::CMPFPEw0)
- MAKE_CASE(ARMISD::BCC_i64)
- MAKE_CASE(ARMISD::FMSTAT)
- MAKE_CASE(ARMISD::CMOV)
- MAKE_CASE(ARMISD::SSAT)
- MAKE_CASE(ARMISD::USAT)
- MAKE_CASE(ARMISD::ASRL)
- MAKE_CASE(ARMISD::LSRL)
- MAKE_CASE(ARMISD::LSLL)
- MAKE_CASE(ARMISD::LSLS)
- MAKE_CASE(ARMISD::LSRS1)
- MAKE_CASE(ARMISD::ASRS1)
- MAKE_CASE(ARMISD::RRX)
- MAKE_CASE(ARMISD::ADDC)
- MAKE_CASE(ARMISD::ADDE)
- MAKE_CASE(ARMISD::SUBC)
- MAKE_CASE(ARMISD::SUBE)
- MAKE_CASE(ARMISD::VMOVRRD)
- MAKE_CASE(ARMISD::VMOVDRR)
- MAKE_CASE(ARMISD::VMOVhr)
- MAKE_CASE(ARMISD::VMOVrh)
- MAKE_CASE(ARMISD::VMOVSR)
- MAKE_CASE(ARMISD::EH_SJLJ_SETJMP)
- MAKE_CASE(ARMISD::EH_SJLJ_LONGJMP)
- MAKE_CASE(ARMISD::EH_SJLJ_SETUP_DISPATCH)
- MAKE_CASE(ARMISD::TC_RETURN)
- MAKE_CASE(ARMISD::THREAD_POINTER)
- MAKE_CASE(ARMISD::DYN_ALLOC)
- MAKE_CASE(ARMISD::MEMBARRIER_MCR)
- MAKE_CASE(ARMISD::PRELOAD)
- MAKE_CASE(ARMISD::LDRD)
- MAKE_CASE(ARMISD::STRD)
- MAKE_CASE(ARMISD::WIN__CHKSTK)
- MAKE_CASE(ARMISD::WIN__DBZCHK)
- MAKE_CASE(ARMISD::PREDICATE_CAST)
- MAKE_CASE(ARMISD::VECTOR_REG_CAST)
- MAKE_CASE(ARMISD::MVESEXT)
- MAKE_CASE(ARMISD::MVEZEXT)
- MAKE_CASE(ARMISD::MVETRUNC)
- MAKE_CASE(ARMISD::VCMP)
- MAKE_CASE(ARMISD::VCMPZ)
- MAKE_CASE(ARMISD::VTST)
- MAKE_CASE(ARMISD::VSHLs)
- MAKE_CASE(ARMISD::VSHLu)
- MAKE_CASE(ARMISD::VSHLIMM)
- MAKE_CASE(ARMISD::VSHRsIMM)
- MAKE_CASE(ARMISD::VSHRuIMM)
- MAKE_CASE(ARMISD::VRSHRsIMM)
- MAKE_CASE(ARMISD::VRSHRuIMM)
- MAKE_CASE(ARMISD::VRSHRNIMM)
- MAKE_CASE(ARMISD::VQSHLsIMM)
- MAKE_CASE(ARMISD::VQSHLuIMM)
- MAKE_CASE(ARMISD::VQSHLsuIMM)
- MAKE_CASE(ARMISD::VQSHRNsIMM)
- MAKE_CASE(ARMISD::VQSHRNuIMM)
- MAKE_CASE(ARMISD::VQSHRNsuIMM)
- MAKE_CASE(ARMISD::VQRSHRNsIMM)
- MAKE_CASE(ARMISD::VQRSHRNuIMM)
- MAKE_CASE(ARMISD::VQRSHRNsuIMM)
- MAKE_CASE(ARMISD::VSLIIMM)
- MAKE_CASE(ARMISD::VSRIIMM)
- MAKE_CASE(ARMISD::VGETLANEu)
- MAKE_CASE(ARMISD::VGETLANEs)
- MAKE_CASE(ARMISD::VMOVIMM)
- MAKE_CASE(ARMISD::VMVNIMM)
- MAKE_CASE(ARMISD::VMOVFPIMM)
- MAKE_CASE(ARMISD::VDUP)
- MAKE_CASE(ARMISD::VDUPLANE)
- MAKE_CASE(ARMISD::VEXT)
- MAKE_CASE(ARMISD::VREV64)
- MAKE_CASE(ARMISD::VREV32)
- MAKE_CASE(ARMISD::VREV16)
- MAKE_CASE(ARMISD::VZIP)
- MAKE_CASE(ARMISD::VUZP)
- MAKE_CASE(ARMISD::VTRN)
- MAKE_CASE(ARMISD::VTBL1)
- MAKE_CASE(ARMISD::VTBL2)
- MAKE_CASE(ARMISD::VMOVN)
- MAKE_CASE(ARMISD::VQMOVNs)
- MAKE_CASE(ARMISD::VQMOVNu)
- MAKE_CASE(ARMISD::VCVTN)
- MAKE_CASE(ARMISD::VCVTL)
- MAKE_CASE(ARMISD::VIDUP)
- MAKE_CASE(ARMISD::VMULLs)
- MAKE_CASE(ARMISD::VMULLu)
- MAKE_CASE(ARMISD::VQDMULH)
- MAKE_CASE(ARMISD::VADDVs)
- MAKE_CASE(ARMISD::VADDVu)
- MAKE_CASE(ARMISD::VADDVps)
- MAKE_CASE(ARMISD::VADDVpu)
- MAKE_CASE(ARMISD::VADDLVs)
- MAKE_CASE(ARMISD::VADDLVu)
- MAKE_CASE(ARMISD::VADDLVAs)
- MAKE_CASE(ARMISD::VADDLVAu)
- MAKE_CASE(ARMISD::VADDLVps)
- MAKE_CASE(ARMISD::VADDLVpu)
- MAKE_CASE(ARMISD::VADDLVAps)
- MAKE_CASE(ARMISD::VADDLVApu)
- MAKE_CASE(ARMISD::VMLAVs)
- MAKE_CASE(ARMISD::VMLAVu)
- MAKE_CASE(ARMISD::VMLAVps)
- MAKE_CASE(ARMISD::VMLAVpu)
- MAKE_CASE(ARMISD::VMLALVs)
- MAKE_CASE(ARMISD::VMLALVu)
- MAKE_CASE(ARMISD::VMLALVps)
- MAKE_CASE(ARMISD::VMLALVpu)
- MAKE_CASE(ARMISD::VMLALVAs)
- MAKE_CASE(ARMISD::VMLALVAu)
- MAKE_CASE(ARMISD::VMLALVAps)
- MAKE_CASE(ARMISD::VMLALVApu)
- MAKE_CASE(ARMISD::VMINVu)
- MAKE_CASE(ARMISD::VMINVs)
- MAKE_CASE(ARMISD::VMAXVu)
- MAKE_CASE(ARMISD::VMAXVs)
- MAKE_CASE(ARMISD::UMAAL)
- MAKE_CASE(ARMISD::UMLAL)
- MAKE_CASE(ARMISD::SMLAL)
- MAKE_CASE(ARMISD::SMLALBB)
- MAKE_CASE(ARMISD::SMLALBT)
- MAKE_CASE(ARMISD::SMLALTB)
- MAKE_CASE(ARMISD::SMLALTT)
- MAKE_CASE(ARMISD::SMULWB)
- MAKE_CASE(ARMISD::SMULWT)
- MAKE_CASE(ARMISD::SMLALD)
- MAKE_CASE(ARMISD::SMLALDX)
- MAKE_CASE(ARMISD::SMLSLD)
- MAKE_CASE(ARMISD::SMLSLDX)
- MAKE_CASE(ARMISD::SMMLAR)
- MAKE_CASE(ARMISD::SMMLSR)
- MAKE_CASE(ARMISD::QADD16b)
- MAKE_CASE(ARMISD::QSUB16b)
- MAKE_CASE(ARMISD::QADD8b)
- MAKE_CASE(ARMISD::QSUB8b)
- MAKE_CASE(ARMISD::UQADD16b)
- MAKE_CASE(ARMISD::UQSUB16b)
- MAKE_CASE(ARMISD::UQADD8b)
- MAKE_CASE(ARMISD::UQSUB8b)
- MAKE_CASE(ARMISD::BUILD_VECTOR)
- MAKE_CASE(ARMISD::BFI)
- MAKE_CASE(ARMISD::VORRIMM)
- MAKE_CASE(ARMISD::VBICIMM)
- MAKE_CASE(ARMISD::VBSP)
- MAKE_CASE(ARMISD::MEMCPY)
- MAKE_CASE(ARMISD::VLD1DUP)
- MAKE_CASE(ARMISD::VLD2DUP)
- MAKE_CASE(ARMISD::VLD3DUP)
- MAKE_CASE(ARMISD::VLD4DUP)
- MAKE_CASE(ARMISD::VLD1_UPD)
- MAKE_CASE(ARMISD::VLD2_UPD)
- MAKE_CASE(ARMISD::VLD3_UPD)
- MAKE_CASE(ARMISD::VLD4_UPD)
- MAKE_CASE(ARMISD::VLD1x2_UPD)
- MAKE_CASE(ARMISD::VLD1x3_UPD)
- MAKE_CASE(ARMISD::VLD1x4_UPD)
- MAKE_CASE(ARMISD::VLD2LN_UPD)
- MAKE_CASE(ARMISD::VLD3LN_UPD)
- MAKE_CASE(ARMISD::VLD4LN_UPD)
- MAKE_CASE(ARMISD::VLD1DUP_UPD)
- MAKE_CASE(ARMISD::VLD2DUP_UPD)
- MAKE_CASE(ARMISD::VLD3DUP_UPD)
- MAKE_CASE(ARMISD::VLD4DUP_UPD)
- MAKE_CASE(ARMISD::VST1_UPD)
- MAKE_CASE(ARMISD::VST2_UPD)
- MAKE_CASE(ARMISD::VST3_UPD)
- MAKE_CASE(ARMISD::VST4_UPD)
- MAKE_CASE(ARMISD::VST1x2_UPD)
- MAKE_CASE(ARMISD::VST1x3_UPD)
- MAKE_CASE(ARMISD::VST1x4_UPD)
- MAKE_CASE(ARMISD::VST2LN_UPD)
- MAKE_CASE(ARMISD::VST3LN_UPD)
- MAKE_CASE(ARMISD::VST4LN_UPD)
- MAKE_CASE(ARMISD::WLS)
- MAKE_CASE(ARMISD::WLSSETUP)
- MAKE_CASE(ARMISD::LE)
- MAKE_CASE(ARMISD::LOOP_DEC)
- MAKE_CASE(ARMISD::CSINV)
- MAKE_CASE(ARMISD::CSNEG)
- MAKE_CASE(ARMISD::CSINC)
- MAKE_CASE(ARMISD::MEMCPYLOOP)
- MAKE_CASE(ARMISD::MEMSETLOOP)
-#undef MAKE_CASE
- }
- return nullptr;
-}
-
EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT VT) const {
if (!VT.isVector())
@@ -2510,9 +2236,44 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
Chain = DAG.getStackArgumentTokenFactor(Chain);
- if (ByValTempChain)
+ if (ByValTempChain) {
+ // In case of large byval copies, re-using the stackframe for tail-calls
+ // can lead to overwriting incoming arguments on the stack. Force
+ // loading these stack arguments before the copy to avoid that.
+ SmallVector<SDValue, 8> IncomingLoad;
+ for (unsigned I = 0; I < OutVals.size(); ++I) {
+ if (Outs[I].Flags.isByVal())
+ continue;
+
+ SDValue OutVal = OutVals[I];
+ LoadSDNode *OutLN = dyn_cast_or_null<LoadSDNode>(OutVal);
+ if (!OutLN)
+ continue;
+
+ FrameIndexSDNode *FIN =
+ dyn_cast_or_null<FrameIndexSDNode>(OutLN->getBasePtr());
+ if (!FIN)
+ continue;
+
+ if (!MFI.isFixedObjectIndex(FIN->getIndex()))
+ continue;
+
+ for (const CCValAssign &VA : ArgLocs) {
+ if (VA.isMemLoc())
+ IncomingLoad.push_back(OutVal.getValue(1));
+ }
+ }
+
+ // Update the chain to force loads for potentially clobbered argument
+ // loads to happen before the byval copy.
+ if (!IncomingLoad.empty()) {
+ IncomingLoad.push_back(Chain);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, IncomingLoad);
+ }
+
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain,
ByValTempChain);
+ }
AfterFormalArgLoads = true;
}
@@ -3309,8 +3070,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
return LowerInterruptReturn(RetOps, dl, DAG);
}
- ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE :
- ARMISD::RET_GLUE;
+ unsigned RetNode =
+ AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE : ARMISD::RET_GLUE;
return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
}
@@ -4826,7 +4587,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
}
- ARMISD::NodeType CompareType;
+ unsigned CompareType;
switch (CondCode) {
default:
CompareType = ARMISD::CMP;
@@ -20904,7 +20665,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
/// specified in the intrinsic calls.
bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I,
+ const CallBase &I,
MachineFunction &MF,
unsigned Intrinsic) const {
switch (Intrinsic) {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index bc2fec3..d0fb58c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -51,319 +51,6 @@ class TargetMachine;
class TargetRegisterInfo;
class VectorType;
- namespace ARMISD {
-
- // ARM Specific DAG Nodes
- enum NodeType : unsigned {
- // Start the numbering where the builtin ops and target ops leave off.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- Wrapper, // Wrapper - A wrapper node for TargetConstantPool,
- // TargetExternalSymbol, and TargetGlobalAddress.
- WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in
- // PIC mode.
- WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
-
- // Add pseudo op to model memcpy for struct byval.
- COPY_STRUCT_BYVAL,
-
- CALL, // Function call.
- CALL_PRED, // Function call that's predicable.
- CALL_NOLINK, // Function call with branch not branch-and-link.
- tSECALL, // CMSE non-secure function call.
- t2CALL_BTI, // Thumb function call followed by BTI instruction.
- BRCOND, // Conditional branch.
- BR_JT, // Jumptable branch.
- BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
- RET_GLUE, // Return with a flag operand.
- SERET_GLUE, // CMSE Entry function return with a flag operand.
- INTRET_GLUE, // Interrupt return with an LR-offset and a flag operand.
-
- PIC_ADD, // Add with a PC operand and a PIC label.
-
- ASRL, // MVE long arithmetic shift right.
- LSRL, // MVE long shift right.
- LSLL, // MVE long shift left.
-
- CMP, // ARM compare instructions.
- CMN, // ARM CMN instructions.
- CMPZ, // ARM compare that sets only Z flag.
- CMPFP, // ARM VFP compare instruction, sets FPSCR.
- CMPFPE, // ARM VFP signalling compare instruction, sets FPSCR.
- CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
- CMPFPEw0, // ARM VFP signalling compare against zero instruction, sets
- // FPSCR.
- FMSTAT, // ARM fmstat instruction.
-
- CMOV, // ARM conditional move instructions.
-
- SSAT, // Signed saturation
- USAT, // Unsigned saturation
-
- BCC_i64,
-
- LSLS, // Flag-setting shift left.
- LSRS1, // Flag-setting logical shift right by one bit.
- ASRS1, // Flag-setting arithmetic shift right by one bit.
- RRX, // Shift right one bit with carry in.
-
- ADDC, // Add with carry
- ADDE, // Add using carry
- SUBC, // Sub with carry
- SUBE, // Sub using carry
-
- VMOVRRD, // double to two gprs.
- VMOVDRR, // Two gprs to double.
- VMOVSR, // move gpr to single, used for f32 literal constructed in a gpr
-
- EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
- EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
- EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch.
-
- TC_RETURN, // Tail call return pseudo.
-
- THREAD_POINTER,
-
- DYN_ALLOC, // Dynamic allocation on the stack.
-
- MEMBARRIER_MCR, // Memory barrier (MCR)
-
- PRELOAD, // Preload
-
- WIN__CHKSTK, // Windows' __chkstk call to do stack probing.
- WIN__DBZCHK, // Windows' divide by zero check
-
- WLS, // Low-overhead loops, While Loop Start branch. See t2WhileLoopStart
- WLSSETUP, // Setup for the iteration count of a WLS. See t2WhileLoopSetup.
- LOOP_DEC, // Really a part of LE, performs the sub
- LE, // Low-overhead loops, Loop End
-
- PREDICATE_CAST, // Predicate cast for MVE i1 types
- VECTOR_REG_CAST, // Reinterpret the current contents of a vector register
-
- MVESEXT, // Legalization aids for extending a vector into two/four vectors.
- MVEZEXT, // or truncating two/four vectors into one. Eventually becomes
- MVETRUNC, // stack store/load sequence, if not optimized to anything else.
-
- VCMP, // Vector compare.
- VCMPZ, // Vector compare to zero.
- VTST, // Vector test bits.
-
- // Vector shift by vector
- VSHLs, // ...left/right by signed
- VSHLu, // ...left/right by unsigned
-
- // Vector shift by immediate:
- VSHLIMM, // ...left
- VSHRsIMM, // ...right (signed)
- VSHRuIMM, // ...right (unsigned)
-
- // Vector rounding shift by immediate:
- VRSHRsIMM, // ...right (signed)
- VRSHRuIMM, // ...right (unsigned)
- VRSHRNIMM, // ...right narrow
-
- // Vector saturating shift by immediate:
- VQSHLsIMM, // ...left (signed)
- VQSHLuIMM, // ...left (unsigned)
- VQSHLsuIMM, // ...left (signed to unsigned)
- VQSHRNsIMM, // ...right narrow (signed)
- VQSHRNuIMM, // ...right narrow (unsigned)
- VQSHRNsuIMM, // ...right narrow (signed to unsigned)
-
- // Vector saturating rounding shift by immediate:
- VQRSHRNsIMM, // ...right narrow (signed)
- VQRSHRNuIMM, // ...right narrow (unsigned)
- VQRSHRNsuIMM, // ...right narrow (signed to unsigned)
-
- // Vector shift and insert:
- VSLIIMM, // ...left
- VSRIIMM, // ...right
-
- // Vector get lane (VMOV scalar to ARM core register)
- // (These are used for 8- and 16-bit element types only.)
- VGETLANEu, // zero-extend vector extract element
- VGETLANEs, // sign-extend vector extract element
-
- // Vector move immediate and move negated immediate:
- VMOVIMM,
- VMVNIMM,
-
- // Vector move f32 immediate:
- VMOVFPIMM,
-
- // Move H <-> R, clearing top 16 bits
- VMOVrh,
- VMOVhr,
-
- // Vector duplicate:
- VDUP,
- VDUPLANE,
-
- // Vector shuffles:
- VEXT, // extract
- VREV64, // reverse elements within 64-bit doublewords
- VREV32, // reverse elements within 32-bit words
- VREV16, // reverse elements within 16-bit halfwords
- VZIP, // zip (interleave)
- VUZP, // unzip (deinterleave)
- VTRN, // transpose
- VTBL1, // 1-register shuffle with mask
- VTBL2, // 2-register shuffle with mask
- VMOVN, // MVE vmovn
-
- // MVE Saturating truncates
- VQMOVNs, // Vector (V) Saturating (Q) Move and Narrow (N), signed (s)
- VQMOVNu, // Vector (V) Saturating (Q) Move and Narrow (N), unsigned (u)
-
- // MVE float <> half converts
- VCVTN, // MVE vcvt f32 -> f16, truncating into either the bottom or top
- // lanes
- VCVTL, // MVE vcvt f16 -> f32, extending from either the bottom or top lanes
-
- // MVE VIDUP instruction, taking a start value and increment.
- VIDUP,
-
- // Vector multiply long:
- VMULLs, // ...signed
- VMULLu, // ...unsigned
-
- VQDMULH, // MVE vqdmulh instruction
-
- // MVE reductions
- VADDVs, // sign- or zero-extend the elements of a vector to i32,
- VADDVu, // add them all together, and return an i32 of their sum
- VADDVps, // Same as VADDV[su] but with a v4i1 predicate mask
- VADDVpu,
- VADDLVs, // sign- or zero-extend elements to i64 and sum, returning
- VADDLVu, // the low and high 32-bit halves of the sum
- VADDLVAs, // Same as VADDLV[su] but also add an input accumulator
- VADDLVAu, // provided as low and high halves
- VADDLVps, // Same as VADDLV[su] but with a v4i1 predicate mask
- VADDLVpu,
- VADDLVAps, // Same as VADDLVp[su] but with a v4i1 predicate mask
- VADDLVApu,
- VMLAVs, // sign- or zero-extend the elements of two vectors to i32, multiply
- VMLAVu, // them and add the results together, returning an i32 of the sum
- VMLAVps, // Same as VMLAV[su] with a v4i1 predicate mask
- VMLAVpu,
- VMLALVs, // Same as VMLAV but with i64, returning the low and
- VMLALVu, // high 32-bit halves of the sum
- VMLALVps, // Same as VMLALV[su] with a v4i1 predicate mask
- VMLALVpu,
- VMLALVAs, // Same as VMLALV but also add an input accumulator
- VMLALVAu, // provided as low and high halves
- VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask
- VMLALVApu,
- VMINVu, // Find minimum unsigned value of a vector and register
- VMINVs, // Find minimum signed value of a vector and register
- VMAXVu, // Find maximum unsigned value of a vector and register
- VMAXVs, // Find maximum signed value of a vector and register
-
- SMULWB, // Signed multiply word by half word, bottom
- SMULWT, // Signed multiply word by half word, top
- UMLAL, // 64bit Unsigned Accumulate Multiply
- SMLAL, // 64bit Signed Accumulate Multiply
- UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply
- SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16
- SMLALBT, // 64-bit signed accumulate multiply bottom, top 16
- SMLALTB, // 64-bit signed accumulate multiply top, bottom 16
- SMLALTT, // 64-bit signed accumulate multiply top, top 16
- SMLALD, // Signed multiply accumulate long dual
- SMLALDX, // Signed multiply accumulate long dual exchange
- SMLSLD, // Signed multiply subtract long dual
- SMLSLDX, // Signed multiply subtract long dual exchange
- SMMLAR, // Signed multiply long, round and add
- SMMLSR, // Signed multiply long, subtract and round
-
- // Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b
- // stands for.
- QADD8b,
- QSUB8b,
- QADD16b,
- QSUB16b,
- UQADD8b,
- UQSUB8b,
- UQADD16b,
- UQSUB16b,
-
- // Operands of the standard BUILD_VECTOR node are not legalized, which
- // is fine if BUILD_VECTORs are always lowered to shuffles or other
- // operations, but for ARM some BUILD_VECTORs are legal as-is and their
- // operands need to be legalized. Define an ARM-specific version of
- // BUILD_VECTOR for this purpose.
- BUILD_VECTOR,
-
- // Bit-field insert
- BFI,
-
- // Vector OR with immediate
- VORRIMM,
- // Vector AND with NOT of immediate
- VBICIMM,
-
- // Pseudo vector bitwise select
- VBSP,
-
- // Pseudo-instruction representing a memory copy using ldm/stm
- // instructions.
- MEMCPY,
-
- // Pseudo-instruction representing a memory copy using a tail predicated
- // loop
- MEMCPYLOOP,
- // Pseudo-instruction representing a memset using a tail predicated
- // loop
- MEMSETLOOP,
-
- // V8.1MMainline condition select
- CSINV, // Conditional select invert.
- CSNEG, // Conditional select negate.
- CSINC, // Conditional select increment.
-
- // Vector load N-element structure to all lanes:
- FIRST_MEMORY_OPCODE,
- VLD1DUP = FIRST_MEMORY_OPCODE,
- VLD2DUP,
- VLD3DUP,
- VLD4DUP,
-
- // NEON loads with post-increment base updates:
- VLD1_UPD,
- VLD2_UPD,
- VLD3_UPD,
- VLD4_UPD,
- VLD2LN_UPD,
- VLD3LN_UPD,
- VLD4LN_UPD,
- VLD1DUP_UPD,
- VLD2DUP_UPD,
- VLD3DUP_UPD,
- VLD4DUP_UPD,
- VLD1x2_UPD,
- VLD1x3_UPD,
- VLD1x4_UPD,
-
- // NEON stores with post-increment base updates:
- VST1_UPD,
- VST2_UPD,
- VST3_UPD,
- VST4_UPD,
- VST2LN_UPD,
- VST3LN_UPD,
- VST4LN_UPD,
- VST1x2_UPD,
- VST1x3_UPD,
- VST1x4_UPD,
-
- // Load/Store of dual registers
- LDRD,
- STRD,
- LAST_MEMORY_OPCODE = STRD,
- };
-
- } // end namespace ARMISD
-
namespace ARM {
/// Possible values of current rounding mode, which is specified in bits
/// 23:22 of FPSCR.
@@ -427,8 +114,6 @@ class VectorType;
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const override;
- const char *getTargetNodeName(unsigned Opcode) const override;
-
bool isSelectSupported(SelectSupportKind Kind) const override {
// ARM does not support scalar condition selects on vectors.
return (Kind != ScalarCondVectorVal);
@@ -630,8 +315,7 @@ class VectorType;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize = false) const override;
- bool getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I,
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
MachineFunction &MF,
unsigned Intrinsic) const override;
diff --git a/llvm/lib/Target/ARM/ARMInstrCDE.td b/llvm/lib/Target/ARM/ARMInstrCDE.td
index f4326de..5d4e3ac 100644
--- a/llvm/lib/Target/ARM/ARMInstrCDE.td
+++ b/llvm/lib/Target/ARM/ARMInstrCDE.td
@@ -115,6 +115,7 @@ class CDE_CX1_Instr<string iname, CX_Params params>
!con(params.Iops1, (ins imm_13b:$imm), params.PredOp),
!strconcat(iname, params.PAsm, "\t$coproc, $Rd, $imm"),
params.Cstr> {
+ bits<0> p;
bits<13> imm;
bits<4> Rd;
@@ -131,6 +132,7 @@ class CDE_CX2_Instr<string iname, CX_Params params>
!con(params.Iops2, (ins imm_9b:$imm), params.PredOp),
!strconcat(iname, params.PAsm, "\t$coproc, $Rd, $Rn, $imm"),
params.Cstr> {
+ bits<0> p;
bits<9> imm;
bits<4> Rd;
bits<4> Rn;
@@ -149,6 +151,7 @@ class CDE_CX3_Instr<string iname, CX_Params params>
!con(params.Iops3, (ins imm_6b:$imm), params.PredOp),
!strconcat(iname, params.PAsm, "\t$coproc, $Rd, $Rn, $Rm, $imm"),
params.Cstr> {
+ bits<0> p;
bits<6> imm;
bits<4> Rd;
bits<4> Rn;
diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td
index 1ad2485..1cd1a9a 100644
--- a/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -1220,6 +1220,7 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, int sz,
string opc, string asm, string cstr, list<dag> pattern>
: InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
bits<0> s;
+ bits<0> p;
let OutOperandList = !con(oops, (outs s_cc_out:$s));
let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(opc, "${s}${p}", asm);
@@ -1244,6 +1245,7 @@ class Thumb1pI<dag oops, dag iops, AddrMode am, int sz,
InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
: InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ bits<0> p;
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(opc, "${p}", asm);
@@ -1343,6 +1345,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, int sz,
InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
: InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ bits<0> p;
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(opc, "${p}", asm);
@@ -1361,6 +1364,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, int sz,
InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
: InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ bits<0> p;
bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
let Inst{20} = s;
@@ -2221,6 +2225,7 @@ class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
InstrItinClass itin, string opc, string dt, string asm, string cstr,
list<dag> pattern>
: InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
+ bits<0> p;
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
@@ -2234,6 +2239,7 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
InstrItinClass itin, string opc, string asm, string cstr,
list<dag> pattern>
: InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
+ bits<0> p;
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(opc, "${p}", "\t", asm);
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index c684de7..f370547 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -25,7 +25,8 @@
#include "llvm/MC/MCInst.h"
using namespace llvm;
-ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI) {}
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+ : ARMBaseInstrInfo(STI, RI) {}
/// Return the noop instruction to use for a noop.
MCInst ARMInstrInfo::getNop() const {
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.h b/llvm/lib/Target/ARM/ARMInstrInfo.h
index 178d7a2..9feaf14 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.h
@@ -35,7 +35,7 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- const ARMRegisterInfo &getRegisterInfo() const override { return RI; }
+ const ARMRegisterInfo &getRegisterInfo() const { return RI; }
private:
void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index f7176a6..ddc8941 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -40,7 +40,7 @@ def SDT_ARMCMov : SDTypeProfile<1, 4, [
SDTCisVT<4, FlagsVT>, // in flags
]>;
-def SDT_ARMBrcond : SDTypeProfile<0, 2, [
+def SDT_ARMBrcond : SDTypeProfile<0, 3, [
SDTCisVT<0, OtherVT>, // target basic block
SDTCisVT<1, CondCodeVT>, // condition code
SDTCisVT<2, FlagsVT>, // in flags
@@ -133,9 +133,16 @@ def SDT_ARMIntShiftParts : SDTypeProfile<2, 3, [SDTCisSameAs<0, 1>,
SDTCisInt<0>,
SDTCisInt<4>]>;
+// Signed multiply accumulate long dual
def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>;
+
+// Signed multiply accumulate long dual exchange
def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>;
+
+// Signed multiply subtract long dual
def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>;
+
+// Signed multiply subtract long dual exchange
def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>;
def SDT_ARMCSel : SDTypeProfile<1, 4, [
@@ -146,8 +153,13 @@ def SDT_ARMCSel : SDTypeProfile<1, 4, [
SDTCisVT<3, FlagsVT> // in flags
]>;
+// Conditional select invert.
def ARMcsinv : SDNode<"ARMISD::CSINV", SDT_ARMCSel>;
+
+// Conditional select negate.
def ARMcsneg : SDNode<"ARMISD::CSNEG", SDT_ARMCSel>;
+
+// Conditional select increment.
def ARMcsinc : SDNode<"ARMISD::CSINC", SDT_ARMCSel>;
def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>,
@@ -155,110 +167,197 @@ def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>]>;
+// Signed multiply long, round and add
def ARMsmmlar : SDNode<"ARMISD::SMMLAR", SDT_MulHSR>;
+
+// Signed multiply long, subtract and round
def ARMsmmlsr : SDNode<"ARMISD::SMMLSR", SDT_MulHSR>;
-// Node definitions.
+
+// Wrapper - A wrapper node for TargetConstantPool,
+// TargetExternalSymbol, and TargetGlobalAddress.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
+
+// WrapperPIC - A wrapper node for TargetGlobalAddress in
+// PIC mode.
def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
+
+// WrapperJT - A wrapper node for TargetJumpTable
def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntUnaryOp>;
def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+// Add pseudo op to model memcpy for struct byval.
def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" ,
SDT_ARMStructByVal,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
SDNPMayStore, SDNPMayLoad]>;
+// Function call.
def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+
+// Function call that's predicable.
def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+
+// Function call with branch not branch-and-link.
def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+// Return with a flag operand.
def ARMretglue : SDNode<"ARMISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+// CMSE Entry function return with a flag operand.
def ARMseretglue : SDNode<"ARMISD::SERET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+// Interrupt return with an LR-offset and a flag operand.
def ARMintretglue : SDNode<"ARMISD::INTRET_GLUE", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+// ARM conditional move instructions.
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov>;
+// Signed saturation
def ARMssat : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
+// Unsigned saturation
def ARMusat : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>;
+// Conditional branch.
def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, [SDNPHasChain]>;
+// Jumptable branch.
def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
[SDNPHasChain]>;
+
+// Jumptable branch (2 level - jumptable entry is a jump).
def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
[SDNPHasChain]>;
def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
[SDNPHasChain]>;
+// ARM compare instructions.
def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp>;
+// ARM CMN instructions.
def ARMcmn : SDNode<"ARMISD::CMN", SDT_ARMCmp>;
+// ARM compare that sets only Z flag.
def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp, [SDNPCommutative]>;
+// Add with a PC operand and a PIC label.
def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
+// MVE long arithmetic shift right.
def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>;
+
+// MVE long shift right.
def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>;
+
+// MVE long shift left.
def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>;
+// Flag-setting logical shift right by one bit.
def ARMlsrs1 : SDNode<"ARMISD::LSRS1", SDTIntUnaryOpWithFlagsOut>;
+
+// Flag-setting arithmetic shift right by one bit.
def ARMasrs1 : SDNode<"ARMISD::ASRS1", SDTIntUnaryOpWithFlagsOut>;
+
+// Shift right one bit with carry in.
def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOpWithFlagsIn>;
+// Add with carry
def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
+
+// Sub with carry
def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>;
+
+// Flag-setting shift left.
def ARMlsls : SDNode<"ARMISD::LSLS", SDTBinaryArithWithFlags>;
+
+// Add using carry
def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>;
+
+// Sub using carry
def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>;
def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
+
+// SjLj exception handling setjmp.
def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
SDT_ARMEH_SJLJ_Setjmp,
[SDNPHasChain, SDNPSideEffect]>;
+
+// SjLj exception handling longjmp.
def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
SDT_ARMEH_SJLJ_Longjmp,
[SDNPHasChain, SDNPSideEffect]>;
+
+// SjLj exception handling setup_dispatch.
def ARMeh_sjlj_setup_dispatch: SDNode<"ARMISD::EH_SJLJ_SETUP_DISPATCH",
SDT_ARMEH_SJLJ_SetupDispatch,
[SDNPHasChain, SDNPSideEffect]>;
+// Memory barrier (MCR)
def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
[SDNPHasChain, SDNPSideEffect]>;
+
+// Preload
def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+// Tail call return pseudo.
def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+// Bit-field insert
def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
+// Pseudo-instruction representing a memory copy using ldm/stm instructions.
def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
SDNPMayStore, SDNPMayLoad]>;
+// Signed multiply word by half word, bottom
def ARMsmulwb : SDNode<"ARMISD::SMULWB", SDTIntBinOp, []>;
+
+// Signed multiply word by half word, top
def ARMsmulwt : SDNode<"ARMISD::SMULWT", SDTIntBinOp, []>;
+
+// 64bit Unsigned Accumulate Multiply
+def ARMumlal : SDNode<"ARMISD::UMLAL", SDT_LongMac>;
+
+// 64bit Signed Accumulate Multiply
+def ARMsmlal : SDNode<"ARMISD::SMLAL", SDT_LongMac>;
+
+// 64-bit Unsigned Accumulate Accumulate Multiply
+def ARMumaal : SDNode<"ARMISD::UMAAL", SDT_LongMac>;
+
+// 64-bit signed accumulate multiply bottom, bottom 16
def ARMsmlalbb : SDNode<"ARMISD::SMLALBB", SDT_LongMac, []>;
+
+// 64-bit signed accumulate multiply bottom, top 16
def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>;
+
+// 64-bit signed accumulate multiply top, bottom 16
def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>;
+
+// 64-bit signed accumulate multiply top, top 16
def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>;
+// Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b
+// stands for.
def ARMqadd8b : SDNode<"ARMISD::QADD8b", SDT_ARMAnd, []>;
def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>;
def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>;
@@ -270,13 +369,15 @@ def ARMuqadd16b : SDNode<"ARMISD::UQADD16b", SDT_ARMAnd, []>;
def ARMuqsub16b : SDNode<"ARMISD::UQSUB16b", SDT_ARMAnd, []>;
def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
-def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-
def SDT_ARMstrd : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+
+// Load/Store of dual registers
+def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def ARMstrd : SDNode<"ARMISD::STRD", SDT_ARMstrd, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
// Vector operations shared between NEON and MVE
+// Vector duplicate
def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
// VDUPLANE can produce a quad-register result from a double-register source,
@@ -287,40 +388,65 @@ def ARMvduplane : SDNode<"ARMISD::VDUPLANE",
def SDTARMVIDUP : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
+// MVE VIDUP instruction, taking a start value and increment.
def ARMvidup : SDNode<"ARMISD::VIDUP", SDTARMVIDUP>;
def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
+
+// reverse elements within 64-bit doublewords
def ARMvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
+
+// reverse elements within 32-bit words
def ARMvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
+
+// reverse elements within 16-bit halfwords
def ARMvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>,
SDTCisVT<2, i32>]>;
+
+// Vector get lane (VMOV scalar to ARM core register)
+// (These are used for 8- and 16-bit element types only.)
def ARMvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
def ARMvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+
+// Vector move immediate and move negated immediate
def ARMvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
def ARMvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+
+// Vector move f32 immediate
def ARMvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
+
+// Vector OR with immediate
def ARMvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
+
+// Vector AND with NOT of immediate
def ARMvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,]>;
+
+// Vector shift by immediate
def ARMvshlImm : SDNode<"ARMISD::VSHLIMM", SDTARMVSHIMM>;
def ARMvshrsImm : SDNode<"ARMISD::VSHRsIMM", SDTARMVSHIMM>;
def ARMvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>;
+
+// Vector shift by vector
def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>;
def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>;
def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisSameAs<1, 2>]>;
+
+// Vector multiply long
def ARMvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
def ARMvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
@@ -328,9 +454,13 @@ def SDTARMVCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
SDTCisInt<3>]>;
def SDTARMVCMPZ : SDTypeProfile<1, 2, [SDTCisInt<2>]>;
+// Vector compare.
def ARMvcmp : SDNode<"ARMISD::VCMP", SDTARMVCMP>;
+
+// Vector compare to zero.
def ARMvcmpz : SDNode<"ARMISD::VCMPZ", SDTARMVCMPZ>;
+// Reinterpret the current contents of a vector register
// 'VECTOR_REG_CAST' is an operation that reinterprets the contents of a
// vector register as a different vector type, without changing the contents of
// the register. It differs from 'bitconvert' in that bitconvert reinterprets
@@ -5894,13 +6024,17 @@ def MSRbanked : ABI<0b0001, (outs), (ins banked_reg:$banked, GPRnopc:$Rn),
// The main point of having separate instruction are extra unmodelled effects
// (compared to ordinary calls) like stack pointer change.
+// Windows' __chkstk call to do stack probing.
def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
+
let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP], hasNoSchedulingInfo = 1 in
def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>;
+// Windows' divide by zero check
def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK,
[SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
+
let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in
def WIN__DBZCHK : PseudoInst<(outs), (ins tGPR:$divisor), NoItinerary,
[(win__dbzchk tGPR:$divisor)]>;
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index e244134..0973187 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -384,6 +384,22 @@ multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrin
(VTI.Vec MQPR:$inactive)))>;
}
+def vadd : PatFrags<(ops node:$lhs, node:$rhs),
+ [(fadd node:$lhs, node:$rhs),
+ (int_arm_mve_vadd node:$lhs, node:$rhs)]>;
+def vsub : PatFrags<(ops node:$lhs, node:$rhs),
+ [(fsub node:$lhs, node:$rhs),
+ (int_arm_mve_vsub node:$lhs, node:$rhs)]>;
+def vmul : PatFrags<(ops node:$lhs, node:$rhs),
+ [(fmul node:$lhs, node:$rhs),
+ (int_arm_mve_vmul node:$lhs, node:$rhs)]>;
+def vminnm : PatFrags<(ops node:$lhs, node:$rhs),
+ [(fminnum node:$lhs, node:$rhs),
+ (int_arm_mve_vminnm node:$lhs, node:$rhs)]>;
+def vmaxnm : PatFrags<(ops node:$lhs, node:$rhs),
+ [(fmaxnum node:$lhs, node:$rhs),
+ (int_arm_mve_vmaxnm node:$lhs, node:$rhs)]>;
+
// --------- Start of base classes for the instructions themselves
class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
@@ -683,8 +699,13 @@ class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
def SDTVecReduceP : SDTypeProfile<1, 2, [ // VADDLVp
SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2>
]>;
+
+// sign- or zero-extend the elements of a vector to i32,
+// add them all together, and return an i32 of their sum
def ARMVADDVs : SDNode<"ARMISD::VADDVs", SDTVecReduce>;
def ARMVADDVu : SDNode<"ARMISD::VADDVu", SDTVecReduce>;
+
+// Same as VADDV[su] but with a v4i1 predicate mask
def ARMVADDVps : SDNode<"ARMISD::VADDVps", SDTVecReduceP>;
def ARMVADDVpu : SDNode<"ARMISD::VADDVpu", SDTVecReduceP>;
@@ -806,9 +827,19 @@ multiclass MVE_VADDLV_A<MVEVectorVTInfo VTI> {
defvar InstN = !cast<Instruction>(NAME # "no_acc");
defvar letter = VTI.SuffixLetter;
+
+ // sign- or zero-extend elements to i64 and sum, returning
+ // the low and high 32-bit halves of the sum
defvar ARMVADDLV = SDNode<"ARMISD::VADDLV" # letter, SDTVecReduceL>;
+
+ // Same as VADDLV[su] but also add an input accumulator
+ // provided as low and high halves
defvar ARMVADDLVA = SDNode<"ARMISD::VADDLVA" # letter, SDTVecReduceLA>;
+
+ // Same as VADDLV[su] but with a v4i1 predicate mask
defvar ARMVADDLVp = SDNode<"ARMISD::VADDLVp" # letter, SDTVecReduceLP>;
+
+ // Same as VADDLVp[su] but with a v4i1 predicate mask
defvar ARMVADDLVAp = SDNode<"ARMISD::VADDLVAp" # letter, SDTVecReduceLPA>;
let Predicates = [HasMVEInt] in {
@@ -943,9 +974,17 @@ multiclass MVE_VMINMAXV_ty<string iname, bit isMin, string intrBaseName> {
def SDTVecReduceR : SDTypeProfile<1, 2, [ // Reduction of an integer and vector into an integer
SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>
]>;
+
+// Find minimum unsigned value of a vector and register
def ARMVMINVu : SDNode<"ARMISD::VMINVu", SDTVecReduceR>;
+
+// Find minimum signed value of a vector and register
def ARMVMINVs : SDNode<"ARMISD::VMINVs", SDTVecReduceR>;
+
+// Find maximum unsigned value of a vector and register
def ARMVMAXVu : SDNode<"ARMISD::VMAXVu", SDTVecReduceR>;
+
+// Find maximum signed value of a vector and register
def ARMVMAXVs : SDNode<"ARMISD::VMAXVs", SDTVecReduceR>;
defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">;
@@ -1146,16 +1185,31 @@ def SDTVecReduce2LAP : SDTypeProfile<2, 5, [ // VMLALVA
SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
SDTCisVec<4>, SDTCisVec<5>, SDTCisVec<6>
]>;
+
+// sign- or zero-extend the elements of two vectors to i32, multiply
+// them and add the results together, returning an i32 of the sum
def ARMVMLAVs : SDNode<"ARMISD::VMLAVs", SDTVecReduce2>;
def ARMVMLAVu : SDNode<"ARMISD::VMLAVu", SDTVecReduce2>;
+
+// Same as VMLAV but with i64, returning the low and
+// high 32-bit halves of the sum
def ARMVMLALVs : SDNode<"ARMISD::VMLALVs", SDTVecReduce2L>;
def ARMVMLALVu : SDNode<"ARMISD::VMLALVu", SDTVecReduce2L>;
+
+// Same as VMLALV but also add an input accumulator
+// provided as low and high halves
def ARMVMLALVAs : SDNode<"ARMISD::VMLALVAs", SDTVecReduce2LA>;
def ARMVMLALVAu : SDNode<"ARMISD::VMLALVAu", SDTVecReduce2LA>;
+
+// Same as VMLAV[su] with a v4i1 predicate mask
def ARMVMLAVps : SDNode<"ARMISD::VMLAVps", SDTVecReduce2P>;
def ARMVMLAVpu : SDNode<"ARMISD::VMLAVpu", SDTVecReduce2P>;
+
+// Same as VMLALV[su] with a v4i1 predicate mask
def ARMVMLALVps : SDNode<"ARMISD::VMLALVps", SDTVecReduce2LP>;
def ARMVMLALVpu : SDNode<"ARMISD::VMLALVpu", SDTVecReduce2LP>;
+
+// Same as VMLALVA[su] with a v4i1 predicate mask
def ARMVMLALVAps : SDNode<"ARMISD::VMLALVAps", SDTVecReduce2LAP>;
def ARMVMLALVApu : SDNode<"ARMISD::VMLALVApu", SDTVecReduce2LAP>;
@@ -1441,7 +1495,7 @@ class MVE_VMINMAXNM<string iname, string suffix, bits<2> sz, bit bit_21,
let validForTailPredication = 1;
}
-multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDNode Op, Intrinsic PredInt> {
+multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDPatternOperator Op, Intrinsic PredInt> {
def "" : MVE_VMINMAXNM<iname, VTI.Suffix, VTI.Size, bit_4>;
let Predicates = [HasMVEFloat] in {
@@ -1449,10 +1503,10 @@ multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDNode
}
}
-defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, fmaxnum, int_arm_mve_max_predicated>;
-defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, fmaxnum, int_arm_mve_max_predicated>;
-defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, fminnum, int_arm_mve_min_predicated>;
-defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, fminnum, int_arm_mve_min_predicated>;
+defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, vmaxnm, int_arm_mve_max_predicated>;
+defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, vmaxnm, int_arm_mve_max_predicated>;
+defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, vminnm, int_arm_mve_min_predicated>;
+defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, vminnm, int_arm_mve_min_predicated>;
class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
@@ -1997,6 +2051,7 @@ class MVE_VQxDMULH_Base<string iname, string suffix, bits<2> size, bit rounding,
let validForTailPredication = 1;
}
+// MVE vqdmulh instruction
def MVEvqdmulh : SDNode<"ARMISD::VQDMULH", SDTIntBinOp>;
multiclass MVE_VQxDMULH_m<string iname, MVEVectorVTInfo VTI,
@@ -3566,7 +3621,7 @@ class MVE_VMUL_fp<string iname, string suffix, bits<2> size, list<dag> pattern=[
let validForTailPredication = 1;
}
-multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDNode Op,
+multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDPatternOperator Op,
Intrinsic PredInt, SDPatternOperator IdentityVec> {
def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
@@ -3577,7 +3632,7 @@ multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDNode Op,
}
multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec>
- : MVE_VMULT_fp_m<"vmul", VTI, fmul, int_arm_mve_mul_predicated, IdentityVec>;
+ : MVE_VMULT_fp_m<"vmul", VTI, vmul, int_arm_mve_mul_predicated, IdentityVec>;
def ARMimmOneF: PatLeaf<(bitconvert (v4f32 (ARMvmovFPImm (i32 112))))>; // 1.0 float
def ARMimmOneH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2620))))>; // 1.0 half
@@ -3674,6 +3729,10 @@ multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> {
if fms then {
def : Pat<(VTI.Vec (fma (fneg m1), m2, add)),
(Inst $add, $m1, $m2)>;
+ def : Pat<(VTI.Vec (int_arm_mve_fma (fneg m1), m2, add)),
+ (Inst $add, $m1, $m2)>;
+ def : Pat<(VTI.Vec (int_arm_mve_fma m1, (fneg m2), add)),
+ (Inst $add, $m1, $m2)>;
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma (fneg m1), m2, add)),
add)),
@@ -3685,6 +3744,8 @@ multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> {
} else {
def : Pat<(VTI.Vec (fma m1, m2, add)),
(Inst $add, $m1, $m2)>;
+ def : Pat<(VTI.Vec (int_arm_mve_fma m1, m2, add)),
+ (Inst $add, $m1, $m2)>;
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma m1, m2, add)),
add)),
@@ -3701,7 +3762,7 @@ defm MVE_VFMSf32 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v4f32>;
defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>;
multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
- SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> {
+ SDPatternOperator Op, Intrinsic PredInt, SDPatternOperator IdentityVec> {
def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size, 0, 1, bit_21> {
let validForTailPredication = 1;
}
@@ -3713,9 +3774,9 @@ multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
}
multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec>
- : MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated, IdentityVec>;
+ : MVE_VADDSUB_fp_m<"vadd", 0, VTI, vadd, int_arm_mve_add_predicated, IdentityVec>;
multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec>
- : MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated, IdentityVec>;
+ : MVE_VADDSUB_fp_m<"vsub", 1, VTI, vsub, int_arm_mve_sub_predicated, IdentityVec>;
def ARMimmMinusZeroF: PatLeaf<(bitconvert (v4i32 (ARMvmovImm (i32 1664))))>; // -0.0 float
def ARMimmMinusZeroH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2688))))>; // -0.0 half
@@ -4093,7 +4154,7 @@ class MVE_VMAXMINNMA<string iname, string suffix, bits<2> size, bit bit_12,
}
multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI,
- SDNode unpred_op, Intrinsic pred_int,
+ SDPatternOperator unpred_op, Intrinsic pred_int,
bit bit_12> {
def "" : MVE_VMAXMINNMA<iname, VTI.Suffix, VTI.Size, bit_12>;
defvar Inst = !cast<Instruction>(NAME);
@@ -4113,13 +4174,13 @@ multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI,
}
multiclass MVE_VMAXNMA<MVEVectorVTInfo VTI, bit bit_12>
- : MVE_VMAXMINNMA_m<"vmaxnma", VTI, fmaxnum, int_arm_mve_vmaxnma_predicated, bit_12>;
+ : MVE_VMAXMINNMA_m<"vmaxnma", VTI, vmaxnm, int_arm_mve_vmaxnma_predicated, bit_12>;
defm MVE_VMAXNMAf32 : MVE_VMAXNMA<MVE_v4f32, 0b0>;
defm MVE_VMAXNMAf16 : MVE_VMAXNMA<MVE_v8f16, 0b0>;
multiclass MVE_VMINNMA<MVEVectorVTInfo VTI, bit bit_12>
- : MVE_VMAXMINNMA_m<"vminnma", VTI, fminnum, int_arm_mve_vminnma_predicated, bit_12>;
+ : MVE_VMAXMINNMA_m<"vminnma", VTI, vminnm, int_arm_mve_vminnma_predicated, bit_12>;
defm MVE_VMINNMAf32 : MVE_VMINNMA<MVE_v4f32, 0b1>;
defm MVE_VMINNMAf16 : MVE_VMINNMA<MVE_v8f16, 0b1>;
@@ -4414,6 +4475,7 @@ let Predicates = [HasMVEInt] in {
defm PEOR : two_predops<xor, t2EORrr>;
}
+// Predicate cast for MVE i1 types
// Occasionally we need to cast between a i32 and a boolean vector, for
// example when moving between rGPR and VPR.P0 as part of predicate vector
// shuffles. We also sometimes need to cast between different predicate
@@ -4810,6 +4872,7 @@ defm MVE_VQMOVNu32 : MVE_VxMOVxN_halves<"vqmovn", "u32", 0b1, 0b1, 0b01>;
defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>;
defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>;
+// MVE vmovn
def MVEvmovn : SDNode<"ARMISD::VMOVN", SDTARMVEXT>;
multiclass MVE_VMOVN_p<Instruction Inst, bit top,
@@ -4880,7 +4943,11 @@ defm : MVE_VQMOVN_p<MVE_VQMOVUNs16th, 1, 0, 1, MVE_v16i8, MVE_v8i16>;
def SDTARMVMOVNQ : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisVec<2>, SDTCisVT<3, i32>]>;
+
+// Vector (V) Saturating (Q) Move and Narrow (N), signed (s)
def MVEvqmovns : SDNode<"ARMISD::VQMOVNs", SDTARMVMOVNQ>;
+
+// Vector (V) Saturating (Q) Move and Narrow (N), unsigned (u)
def MVEvqmovnu : SDNode<"ARMISD::VQMOVNu", SDTARMVMOVNQ>;
let Predicates = [HasMVEInt] in {
@@ -4938,7 +5005,11 @@ class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
def SDTARMVCVTL : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisVT<2, i32>]>;
+
+// MVE vcvt f32 -> f16, truncating into either the bottom or top lanes
def MVEvcvtn : SDNode<"ARMISD::VCVTN", SDTARMVMOVNQ>;
+
+// MVE vcvt f16 -> f32, extending from either the bottom or top lanes
def MVEvcvtl : SDNode<"ARMISD::VCVTL", SDTARMVCVTL>;
multiclass MVE_VCVT_f2h_m<string iname, int half> {
@@ -5342,21 +5413,22 @@ defm MVE_VHSUB_qr_u16 : MVE_VHSUB_qr_m<MVE_v8u16, subnuw, ARMvshruImm>;
defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32, subnuw, ARMvshruImm>;
multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract,
- SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> {
+ SDPatternOperator Op, Intrinsic PredInt,
+ SDPatternOperator IdentityVec> {
def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract, VTI.Size>;
defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ),
!cast<Instruction>(NAME), IdentityVec>;
}
let Predicates = [HasMVEFloat] in {
- defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, fadd,
+ defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, vadd,
int_arm_mve_add_predicated, ARMimmMinusZeroF>;
- defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, fadd,
+ defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, vadd,
int_arm_mve_add_predicated, ARMimmMinusZeroH>;
- defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, fsub,
+ defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, vsub,
int_arm_mve_sub_predicated, ARMimmAllZerosV>;
- defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, fsub,
+ defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, vsub,
int_arm_mve_sub_predicated, ARMimmAllZerosV>;
}
@@ -5539,7 +5611,7 @@ defm MVE_VQRDMULH_qr_s32 : MVE_VQRDMULH_qr_m<MVE_v4s32>;
multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> {
let validForTailPredication = 1 in
def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11, VTI.Size>;
- defm : MVE_TwoOpPatternDup<VTI, fmul, int_arm_mve_mul_predicated, (? ),
+ defm : MVE_TwoOpPatternDup<VTI, vmul, int_arm_mve_mul_predicated, (? ),
!cast<Instruction>(NAME), IdentityVec>;
}
@@ -5612,6 +5684,8 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
if scalar_addend then {
def : Pat<(VTI.Vec (fma v1, v2, vs)),
(VTI.Vec (Inst v1, v2, is))>;
+ def : Pat<(VTI.Vec (int_arm_mve_fma v1, v2, vs)),
+ (VTI.Vec (Inst v1, v2, is))>;
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma v1, v2, vs)),
v1)),
@@ -5621,6 +5695,10 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
(VTI.Vec (Inst v2, v1, is))>;
def : Pat<(VTI.Vec (fma vs, v1, v2)),
(VTI.Vec (Inst v2, v1, is))>;
+ def : Pat<(VTI.Vec (int_arm_mve_fma v1, vs, v2)),
+ (VTI.Vec (Inst v2, v1, is))>;
+ def : Pat<(VTI.Vec (int_arm_mve_fma vs, v1, v2)),
+ (VTI.Vec (Inst v2, v1, is))>;
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma vs, v2, v1)),
v1)),
@@ -6865,6 +6943,9 @@ class MVE_WLSTP<string asm, bits<2> size>
def SDT_MVEMEMCPYLOOPNODE
: SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
+
+// Pseudo-instruction representing a memory copy using a tail predicated
+// loop
def MVE_MEMCPYLOOPNODE : SDNode<"ARMISD::MEMCPYLOOP", SDT_MVEMEMCPYLOOPNODE,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
@@ -6877,6 +6958,9 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CPSR] in {
def SDT_MVEMEMSETLOOPNODE
: SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisVT<1, v16i8>, SDTCisVT<2, i32>]>;
+
+// Pseudo-instruction representing a memset using a tail predicated
+// loop
def MVE_MEMSETLOOPNODE : SDNode<"ARMISD::MEMSETLOOP", SDT_MVEMEMSETLOOPNODE,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 37f0103..90e74a5 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -475,6 +475,8 @@ def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
//===----------------------------------------------------------------------===//
def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
+
+// Vector test bits.
def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>;
// Types for vector shift by immediates. The "SHX" version is for long and
@@ -487,10 +489,12 @@ def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>;
+// Vector rounding shift by immediate
def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>;
def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>;
def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>;
+// Vector saturating shift by immediate
def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>;
def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>;
def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>;
@@ -498,13 +502,16 @@ def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>;
def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>;
def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>;
+// Vector saturating rounding shift by immediate
def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>;
def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>;
def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
+// Vector shift and insert
def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
+// Pseudo vector bitwise select
def NEONvbsp : SDNode<"ARMISD::VBSP",
SDTypeProfile<1, 3, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
@@ -518,15 +525,25 @@ def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>]>;
+
+// zip (interleave)
def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
+
+// unzip (deinterleave)
def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
+
+// transpose
def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
SDTCisVT<2, v8i8>]>;
def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>;
+
+// 1-register shuffle with mask
def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
+
+// 2-register shuffle with mask
def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td
index 0c5ea3e..0ee98e6 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -14,6 +14,7 @@
// Thumb specific DAG Nodes.
//
+// CMSE non-secure function call.
def ARMtsecall : SDNode<"ARMISD::tSECALL", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
@@ -483,6 +484,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def tBX : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bx${p}\t$Rm", []>,
T1Special<{1,1,0,?}>, Sched<[WriteBr]> {
// A6.2.3 & A8.6.25
+ bits<0> p;
bits<4> Rm;
let Inst{6-3} = Rm;
let Inst{2-0} = 0b000;
@@ -491,6 +493,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def tBXNS : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bxns${p}\t$Rm", []>,
Requires<[IsThumb, Has8MSecExt]>,
T1Special<{1,1,0,?}>, Sched<[WriteBr]> {
+ bits<0> p;
bits<4> Rm;
let Inst{6-3} = Rm;
let Inst{2-0} = 0b100;
@@ -523,6 +526,7 @@ let isCall = 1,
"bl${p}\t$func",
[(ARMcall tglobaladdr:$func)]>,
Requires<[IsThumb]>, Sched<[WriteBrL]> {
+ bits<0> p;
bits<24> func;
let Inst{26} = func{23};
let Inst{25-16} = func{20-11};
@@ -536,6 +540,7 @@ let isCall = 1,
(outs), (ins pred:$p, thumb_blx_target:$func), IIC_Br,
"blx${p}\t$func", []>,
Requires<[IsThumb, HasV5T, IsNotMClass]>, Sched<[WriteBrL]> {
+ bits<0> p;
bits<24> func;
let Inst{26} = func{23};
let Inst{25-16} = func{20-11};
@@ -550,6 +555,7 @@ let isCall = 1,
"blx${p}\t$func", []>,
Requires<[IsThumb, HasV5T]>,
T1Special<{1,1,1,?}>, Sched<[WriteBrL]> { // A6.2.3 & A8.6.24;
+ bits<0> p;
bits<4> func;
let Inst{6-3} = func;
let Inst{2-0} = 0b000;
@@ -565,6 +571,7 @@ let isCall = 1,
"blxns${p}\t$func", []>,
Requires<[IsThumb, Has8MSecExt]>,
T1Special<{1,1,1,?}>, Sched<[WriteBrL]> {
+ bits<0> p;
bits<4> func;
let Inst{6-3} = func;
let Inst{2-0} = 0b100;
@@ -824,6 +831,7 @@ let hasSideEffects = 0 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1, variadicOpsAreDefs = 1 in
def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IIC_iLoad_m, "ldm${p}\t$Rn, $regs", []>, T1Encoding<{1,1,0,0,1,?}> {
+ bits<0> p;
bits<3> Rn;
bits<8> regs;
let Inst{10-8} = Rn;
@@ -854,6 +862,7 @@ def tSTMIA_UPD : Thumb1I<(outs tGPR:$wb),
AddrModeNone, 2, IIC_iStore_mu,
"stm${p}\t$Rn!, $regs", "$Rn = $wb", []>,
T1Encoding<{1,1,0,0,0,?}> {
+ bits<0> p;
bits<3> Rn;
bits<8> regs;
let Inst{10-8} = Rn;
@@ -872,6 +881,7 @@ def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
IIC_iPop,
"pop${p}\t$regs", []>,
T1Misc<{1,1,0,?,?,?,?}>, Sched<[WriteLd]> {
+ bits<0> p;
bits<16> regs;
let Inst{8} = regs{15};
let Inst{7-0} = regs{7-0};
@@ -882,6 +892,7 @@ def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
IIC_iStore_m,
"push${p}\t$regs", []>,
T1Misc<{0,1,0,?,?,?,?}>, Sched<[WriteST]> {
+ bits<0> p;
bits<16> regs;
let Inst{8} = regs{14};
let Inst{7-0} = regs{7-0};
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index c229c8e..596196c 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -2059,6 +2059,7 @@ multiclass thumb2_ld_mult<string asm, InstrItinClass itin,
def IA :
T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> {
+ bits<0> p;
bits<4> Rn;
bits<16> regs;
@@ -2074,6 +2075,7 @@ multiclass thumb2_ld_mult<string asm, InstrItinClass itin,
def IA_UPD :
T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<0> p;
bits<4> Rn;
bits<16> regs;
@@ -2089,6 +2091,7 @@ multiclass thumb2_ld_mult<string asm, InstrItinClass itin,
def DB :
T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> {
+ bits<0> p;
bits<4> Rn;
bits<16> regs;
@@ -2104,6 +2107,7 @@ multiclass thumb2_ld_mult<string asm, InstrItinClass itin,
def DB_UPD :
T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<0> p;
bits<4> Rn;
bits<16> regs;
@@ -2128,6 +2132,7 @@ multiclass thumb2_st_mult<string asm, InstrItinClass itin,
def IA :
T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> {
+ bits<0> p;
bits<4> Rn;
bits<16> regs;
@@ -2146,6 +2151,7 @@ multiclass thumb2_st_mult<string asm, InstrItinClass itin,
def IA_UPD :
T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<0> p;
bits<4> Rn;
bits<16> regs;
@@ -2164,6 +2170,7 @@ multiclass thumb2_st_mult<string asm, InstrItinClass itin,
def DB :
T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> {
+ bits<0> p;
bits<4> Rn;
bits<16> regs;
@@ -2182,6 +2189,7 @@ multiclass thumb2_st_mult<string asm, InstrItinClass itin,
def DB_UPD :
T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<0> p;
bits<4> Rn;
bits<16> regs;
@@ -4030,9 +4038,11 @@ def t2TBH : T2I<(outs), (ins (addrmode_tbh $Rn, $Rm):$addr), IIC_Br,
// FIXME: should be able to write a pattern for ARMBrcond, but can't use
// a two-value operand where a dag node expects ", "two operands. :(
let isBranch = 1, isTerminator = 1 in
-def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
- "b", ".w\t$target",
- [/*(ARMbrcond bb:$target, imm:$cc)*/]>, Sched<[WriteBr]> {
+def t2Bcc : Thumb2XI<(outs), (ins brtarget:$target, pred:$p),
+ AddrModeNone, 4, IIC_Br,
+ "b${p}.w\t$target", "",
+ [/*(ARMbrcond bb:$target, imm:$cc)*/]>,
+ Sched<[WriteBr]> {
let Inst{31-27} = 0b11110;
let Inst{15-14} = 0b10;
let Inst{12} = 0;
@@ -5481,6 +5491,7 @@ class V8_1MI<dag oops, dag iops, AddrMode am, InstrItinClass itin, string asm,
def t2CLRM : V8_1MI<(outs),
(ins pred:$p, reglist_with_apsr:$regs, variable_ops),
AddrModeNone, NoItinerary, "clrm${p}", "$regs", "", []> {
+ bits<0> p;
bits<16> regs;
let Inst{31-16} = 0b1110100010011111;
@@ -5509,6 +5520,7 @@ def t2BF_LabelPseudo
def t2BFi : t2BF<(ins bflabel_u4:$b_label, bflabel_s16:$label, pred:$p),
!strconcat("bf", "${p}"), "$b_label, $label"> {
+ bits<0> p;
bits<4> b_label;
bits<16> label;
@@ -5540,6 +5552,7 @@ def t2BFic : t2BF<(ins bflabel_u4:$b_label, bflabel_s12:$label,
def t2BFr : t2BF<(ins bflabel_u4:$b_label, rGPR:$Rn, pred:$p),
!strconcat("bfx", "${p}"), "$b_label, $Rn"> {
+ bits<0> p;
bits<4> b_label;
bits<4> Rn;
@@ -5551,6 +5564,7 @@ def t2BFr : t2BF<(ins bflabel_u4:$b_label, rGPR:$Rn, pred:$p),
def t2BFLi : t2BF<(ins bflabel_u4:$b_label, bflabel_s18:$label, pred:$p),
!strconcat("bfl", "${p}"), "$b_label, $label"> {
+ bits<0> p;
bits<4> b_label;
bits<18> label;
@@ -5563,6 +5577,7 @@ def t2BFLi : t2BF<(ins bflabel_u4:$b_label, bflabel_s18:$label, pred:$p),
def t2BFLr : t2BF<(ins bflabel_u4:$b_label, rGPR:$Rn, pred:$p),
!strconcat("bflx", "${p}"), "$b_label, $Rn"> {
+ bits<0> p;
bits<4> b_label;
bits<4> Rn;
@@ -5581,6 +5596,25 @@ class t2LOL<dag oops, dag iops, string asm, string ops>
let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
}
+// Setup for the iteration count of a WLS. See t2WhileLoopSetup.
+def arm_wlssetup
+ : SDNode<"ARMISD::WLSSETUP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<1, 0>]>,
+ [SDNPSideEffect]>;
+
+// Low-overhead loops, While Loop Start branch. See t2WhileLoopStart
+def arm_wls : SDNode<"ARMISD::WLS",
+ SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>,
+ [SDNPHasChain]>;
+
+// Really a part of LE, performs the sub
+def arm_loop_dec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>;
+
+// Low-overhead loops, Loop End
+def arm_le : SDNode<"ARMISD::LE",
+ SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>,
+ [SDNPHasChain]>;
+
let isNotDuplicable = 1 in {
def t2WLS : t2LOL<(outs GPRlr:$LR),
(ins rGPR:$Rn, wlslabel_u11:$label),
@@ -5650,16 +5684,19 @@ def t2DoLoopStartTP :
// t2WhileLoopSetup to setup LR and t2WhileLoopStart to perform the branch. Not
// valid after reg alloc, as it should be lowered during MVETPAndVPTOptimisations
// into a t2WhileLoopStartLR (or expanded).
+let hasSideEffects = 1 in
def t2WhileLoopSetup :
- t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br, []>;
+ t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br,
+ [(set i32:$lr, (arm_wlssetup i32:$tc))]>;
// A pseudo to represent the decrement in a low overhead loop. A t2LoopDec and
// t2LoopEnd together represent a LE instruction. Ideally these are converted
// to a t2LoopEndDec which is lowered as a single instruction.
let hasSideEffects = 0 in
def t2LoopDec :
- t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size),
- 4, IIC_Br, []>, Sched<[WriteBr]>;
+ t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size), 4, IIC_Br,
+ [(set i32:$Rm, (arm_loop_dec i32:$Rn, timm:$size))]>,
+ Sched<[WriteBr]>;
let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in {
// The branch in a t2WhileLoopSetup/t2WhileLoopStart pair, eventually turned
@@ -5667,8 +5704,8 @@ let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in {
def t2WhileLoopStart :
t2PseudoInst<(outs),
(ins GPRlr:$tc, brtarget:$target),
- 4, IIC_Br, []>,
- Sched<[WriteBr]>;
+ 4, IIC_Br, [(arm_wls i32:$tc, bb:$target)]>,
+ Sched<[WriteBr]>;
// WhileLoopStartLR that sets up LR and branches on zero, equivalent to WLS. It
// is lowered in the ARMLowOverheadLoops pass providing the branches are within
@@ -5690,8 +5727,9 @@ def t2WhileLoopStartTP :
// t2LoopEnd - the branch half of a t2LoopDec/t2LoopEnd pair.
def t2LoopEnd :
- t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target),
- 8, IIC_Br, []>, Sched<[WriteBr]>;
+ t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target),
+ 8, IIC_Br, [(arm_le i32:$tc, bb:$target)]>,
+ Sched<[WriteBr]>;
// The combination of a t2LoopDec and t2LoopEnd, performing both the LR
// decrement and branch as a single instruction. Is lowered to a LE or
@@ -5803,6 +5841,7 @@ let Predicates = [IsThumb2, HasV8_1MMainline, HasPACBTI] in {
def t2PACG : V8_1MI<(outs rGPR:$Rd),
(ins pred:$p, GPRnopc:$Rn, GPRnopc:$Rm),
AddrModeNone, NoItinerary, "pacg${p}", "$Rd, $Rn, $Rm", "", []> {
+ bits<0> p;
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -5818,6 +5857,7 @@ let hasSideEffects = 1 in {
class PACBTIAut<dag iops, string asm, bit b>
: V8_1MI<(outs), iops,
AddrModeNone, NoItinerary, asm, "$Ra, $Rn, $Rm", "", []> {
+ bits<0> p;
bits<4> Ra;
bits<4> Rn;
bits<4> Rm;
@@ -5873,6 +5913,7 @@ def t2AUT : PACBTIHintSpaceUseInst<"aut", 0b00101101> {
let hasSideEffects = 1;
}
+// Thumb function call followed by BTI instruction.
def ARMt2CallBTI : SDNode<"ARMISD::t2CALL_BTI", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index e2cc97b..5f5f703 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -28,11 +28,20 @@ def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>;
+// ARM VFP compare instruction, sets FPSCR.
def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_CMPFP>;
+
+// ARM VFP compare against zero instruction, sets FPSCR.
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0>;
+
+// ARM VFP signalling compare instruction, sets FPSCR.
def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_CMPFP>;
+
+// ARM VFP signalling compare against zero instruction, sets
+// FPSCR.
def arm_cmpfpe0 : SDNode<"ARMISD::CMPFPEw0", SDT_CMPFP0>;
+// ARM fmstat instruction.
def arm_fmstat : SDNode<"ARMISD::FMSTAT",
SDTypeProfile<1, 1, [
SDTCisVT<0, FlagsVT>, // out flags
@@ -40,12 +49,19 @@ def arm_fmstat : SDNode<"ARMISD::FMSTAT",
]>
>;
+// Two gprs to double.
def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
+
+// double to two gprs.
def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>;
+
+// move gpr to single, used for f32 literal constructed in a gpr
def arm_vmovsr : SDNode<"ARMISD::VMOVSR", SDT_VMOVSR>;
def SDT_VMOVhr : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, i32>] >;
def SDT_VMOVrh : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisFP<1>] >;
+
+// Move H <-> R, clearing top 16 bits
def arm_vmovhr : SDNode<"ARMISD::VMOVhr", SDT_VMOVhr>;
def arm_vmovrh : SDNode<"ARMISD::VMOVrh", SDT_VMOVrh>;
@@ -798,7 +814,7 @@ def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
def : FP16Pat<(f32 (any_fpextend (f16 HPR:$Sm))),
(VCVTBHS (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>;
-def : FP16Pat<(f16_to_fp GPR:$a),
+def : FP16Pat<(any_f16_to_fp GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
let hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPSCR_RM] in
@@ -810,7 +826,7 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sda,
def : FP16Pat<(f16 (any_fpround SPR:$Sm)),
(COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$Sm), HPR)>;
-def : FP16Pat<(fp_to_f16 SPR:$a),
+def : FP16Pat<(any_fp_to_f16 SPR:$a),
(i32 (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$a), GPR))>;
def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (any_fpround (f32 SPR:$src2))), imm_even:$lane),
(v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1),
@@ -875,7 +891,7 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
def : FullFP16Pat<(f64 (any_fpextend (f16 HPR:$Sm))),
(VCVTBHD (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>,
Requires<[HasFPARMv8, HasDPVFP]>;
-def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
+def : FP16Pat<(f64 (any_f16_to_fp GPR:$a)),
(VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>,
Requires<[HasFPARMv8, HasDPVFP]>;
@@ -901,7 +917,7 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
def : FullFP16Pat<(f16 (any_fpround DPR:$Dm)),
(COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$Dm), HPR)>,
Requires<[HasFPARMv8, HasDPVFP]>;
-def : FP16Pat<(fp_to_f16 (f64 DPR:$a)),
+def : FP16Pat<(any_fp_to_f16 (f64 DPR:$a)),
(i32 (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$a), GPR))>,
Requires<[HasFPARMv8, HasDPVFP]>;
diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index cd4299b..db37b76 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -2424,7 +2424,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(
Ops.pop_back();
const MCInstrDesc &MCID = TII->get(NewOpc);
- const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
+ const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0);
MRI->constrainRegClass(FirstReg, TRC);
MRI->constrainRegClass(SecondReg, TRC);
@@ -3014,7 +3014,7 @@ static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg,
MachineFunction *MF = MI->getMF();
MachineRegisterInfo &MRI = MF->getRegInfo();
const MCInstrDesc &MCID = TII->get(MI->getOpcode());
- const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp, TRI);
+ const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp);
MRI.constrainRegClass(NewBaseReg, TRC);
int OldOffset = MI->getOperand(BaseOp + 1).getImm();
@@ -3071,10 +3071,10 @@ static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
const MCInstrDesc &MCID = TII->get(NewOpcode);
// Constrain the def register class
- const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
+ const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0);
MRI.constrainRegClass(NewReg, TRC);
// And do the same for the base operand
- TRC = TII->getRegClass(MCID, 2, TRI);
+ TRC = TII->getRegClass(MCID, 2);
MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
diff --git a/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/llvm/lib/Target/ARM/ARMMCInstLower.cpp
index f5d6597..c040904 100644
--- a/llvm/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -112,8 +112,8 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
MCOp = GetSymbolRef(MO, GetJTISymbol(MO.getIndex()));
break;
case MachineOperand::MO_ConstantPoolIndex:
- if (Subtarget->genExecuteOnly())
- llvm_unreachable("execute-only should not generate constant pools");
+ assert(!MF->getSubtarget<ARMSubtarget>().genExecuteOnly() &&
+ "execute-only should not generate constant pools");
MCOp = GetSymbolRef(MO, GetCPISymbol(MO.getIndex()));
break;
case MachineOperand::MO_BlockAddress:
diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 72eb3d0..b689760 100644
--- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/Support/ErrorHandling.h"
-#include <utility>
namespace llvm {
diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index bf7c962f..501dce9 100644
--- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -10,9 +10,14 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMSelectionDAGInfo.h"
#include "ARMTargetTransformInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Support/CommandLine.h"
+
+#define GET_SDNODE_DESC
+#include "ARMGenSDNodeInfo.inc"
+
using namespace llvm;
#define DEBUG_TYPE "arm-selectiondag-info"
@@ -30,9 +35,83 @@ static cl::opt<TPLoop::MemTransfer> EnableMemtransferTPLoop(
"Allow (may be subject to certain conditions) "
"conversion of memcpy to TP loop.")));
+ARMSelectionDAGInfo::ARMSelectionDAGInfo()
+ : SelectionDAGGenTargetInfo(ARMGenSDNodeInfo) {}
+
+const char *ARMSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const {
+#define MAKE_CASE(V) \
+ case V: \
+ return #V;
+
+ // These nodes don't have corresponding entries in *.td files yet.
+ switch (static_cast<ARMISD::NodeType>(Opcode)) {
+ MAKE_CASE(ARMISD::DYN_ALLOC)
+ MAKE_CASE(ARMISD::MVESEXT)
+ MAKE_CASE(ARMISD::MVEZEXT)
+ MAKE_CASE(ARMISD::MVETRUNC)
+ MAKE_CASE(ARMISD::BUILD_VECTOR)
+ MAKE_CASE(ARMISD::VLD1DUP)
+ MAKE_CASE(ARMISD::VLD2DUP)
+ MAKE_CASE(ARMISD::VLD3DUP)
+ MAKE_CASE(ARMISD::VLD4DUP)
+ MAKE_CASE(ARMISD::VLD1_UPD)
+ MAKE_CASE(ARMISD::VLD2_UPD)
+ MAKE_CASE(ARMISD::VLD3_UPD)
+ MAKE_CASE(ARMISD::VLD4_UPD)
+ MAKE_CASE(ARMISD::VLD1x2_UPD)
+ MAKE_CASE(ARMISD::VLD1x3_UPD)
+ MAKE_CASE(ARMISD::VLD1x4_UPD)
+ MAKE_CASE(ARMISD::VLD2LN_UPD)
+ MAKE_CASE(ARMISD::VLD3LN_UPD)
+ MAKE_CASE(ARMISD::VLD4LN_UPD)
+ MAKE_CASE(ARMISD::VLD1DUP_UPD)
+ MAKE_CASE(ARMISD::VLD2DUP_UPD)
+ MAKE_CASE(ARMISD::VLD3DUP_UPD)
+ MAKE_CASE(ARMISD::VLD4DUP_UPD)
+ MAKE_CASE(ARMISD::VST1_UPD)
+ MAKE_CASE(ARMISD::VST3_UPD)
+ MAKE_CASE(ARMISD::VST1x2_UPD)
+ MAKE_CASE(ARMISD::VST1x3_UPD)
+ MAKE_CASE(ARMISD::VST1x4_UPD)
+ MAKE_CASE(ARMISD::VST2LN_UPD)
+ MAKE_CASE(ARMISD::VST3LN_UPD)
+ MAKE_CASE(ARMISD::VST4LN_UPD)
+ }
+#undef MAKE_CASE
+
+ return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode);
+}
+
bool ARMSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
- return Opcode >= ARMISD::FIRST_MEMORY_OPCODE &&
- Opcode <= ARMISD::LAST_MEMORY_OPCODE;
+ // These nodes don't have corresponding entries in *.td files yet.
+ if (Opcode >= ARMISD::FIRST_MEMORY_OPCODE &&
+ Opcode <= ARMISD::LAST_MEMORY_OPCODE)
+ return true;
+
+ return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode);
+}
+
+void ARMSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ARMISD::WIN__DBZCHK:
+ // invalid number of results; expected 2, got 1
+ case ARMISD::WIN__CHKSTK:
+ // invalid number of results; expected 1, got 2
+ case ARMISD::COPY_STRUCT_BYVAL:
+ // invalid number of operands; expected 6, got 5
+ case ARMISD::MEMCPY:
+ // invalid number of operands; expected 5, got 4
+ case ARMISD::VMOVRRD:
+ // operand #0 must have type f64, but has type v1i64/v4f16/v8i8
+ case ARMISD::VMOVIMM:
+ // operand #0 must have type i32, but has type i16
+ return;
+ }
+
+ SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
}
// Emit, if possible, a specialized version of the given Libcall. Typically this
diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
index d68150e..38d2a65 100644
--- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -17,7 +17,62 @@
#include "llvm/CodeGen/RuntimeLibcallUtil.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#define GET_SDNODE_ENUM
+#include "ARMGenSDNodeInfo.inc"
+
namespace llvm {
+namespace ARMISD {
+
+enum NodeType : unsigned {
+ DYN_ALLOC = GENERATED_OPCODE_END, // Dynamic allocation on the stack.
+
+ MVESEXT, // Legalization aids for extending a vector into two/four vectors.
+ MVEZEXT, // or truncating two/four vectors into one. Eventually becomes
+ MVETRUNC, // stack store/load sequence, if not optimized to anything else.
+
+ // Operands of the standard BUILD_VECTOR node are not legalized, which
+ // is fine if BUILD_VECTORs are always lowered to shuffles or other
+ // operations, but for ARM some BUILD_VECTORs are legal as-is and their
+ // operands need to be legalized. Define an ARM-specific version of
+ // BUILD_VECTOR for this purpose.
+ BUILD_VECTOR,
+
+ // Vector load N-element structure to all lanes:
+ FIRST_MEMORY_OPCODE,
+ VLD1DUP = FIRST_MEMORY_OPCODE,
+ VLD2DUP,
+ VLD3DUP,
+ VLD4DUP,
+
+ // NEON loads with post-increment base updates:
+ VLD1_UPD,
+ VLD2_UPD,
+ VLD3_UPD,
+ VLD4_UPD,
+ VLD2LN_UPD,
+ VLD3LN_UPD,
+ VLD4LN_UPD,
+ VLD1DUP_UPD,
+ VLD2DUP_UPD,
+ VLD3DUP_UPD,
+ VLD4DUP_UPD,
+ VLD1x2_UPD,
+ VLD1x3_UPD,
+ VLD1x4_UPD,
+
+ // NEON stores with post-increment base updates:
+ VST1_UPD,
+ VST3_UPD,
+ VST2LN_UPD,
+ VST3LN_UPD,
+ VST4LN_UPD,
+ VST1x2_UPD,
+ VST1x3_UPD,
+ VST1x4_UPD,
+ LAST_MEMORY_OPCODE = VST1x4_UPD,
+};
+
+} // namespace ARMISD
namespace ARM_AM {
static inline ShiftOpc getShiftOpcForNode(unsigned Opcode) {
@@ -35,10 +90,17 @@ namespace ARM_AM {
}
} // end namespace ARM_AM
-class ARMSelectionDAGInfo : public SelectionDAGTargetInfo {
+class ARMSelectionDAGInfo : public SelectionDAGGenTargetInfo {
public:
+ ARMSelectionDAGInfo();
+
+ const char *getTargetNodeName(unsigned Opcode) const override;
+
bool isTargetMemoryOpcode(unsigned Opcode) const override;
+ void verifyTargetNode(const SelectionDAG &DAG,
+ const SDNode *N) const override;
+
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, Align Alignment,
@@ -66,6 +128,6 @@ public:
RTLIB::Libcall LC) const;
};
-}
+} // namespace llvm
#endif
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 58bc338..e6af32d 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -129,6 +129,76 @@ const RegisterBankInfo *ARMSubtarget::getRegBankInfo() const {
return RegBankInfo.get();
}
+void ARMSubtarget::initLibcallLoweringInfo(LibcallLoweringInfo &Info) const {
+ const Triple &TT = getTargetTriple();
+ if (TT.isOSBinFormatMachO()) {
+ // Uses VFP for Thumb libfuncs if available.
+ if (isThumb() && hasVFP2Base() && hasARMOps() && !useSoftFloat()) {
+ // clang-format off
+ static const struct {
+ const RTLIB::Libcall Op;
+ const RTLIB::LibcallImpl Impl;
+ } LibraryCalls[] = {
+ // Single-precision floating-point arithmetic.
+ { RTLIB::ADD_F32, RTLIB::impl___addsf3vfp },
+ { RTLIB::SUB_F32, RTLIB::impl___subsf3vfp },
+ { RTLIB::MUL_F32, RTLIB::impl___mulsf3vfp },
+ { RTLIB::DIV_F32, RTLIB::impl___divsf3vfp },
+
+ // Double-precision floating-point arithmetic.
+ { RTLIB::ADD_F64, RTLIB::impl___adddf3vfp },
+ { RTLIB::SUB_F64, RTLIB::impl___subdf3vfp },
+ { RTLIB::MUL_F64, RTLIB::impl___muldf3vfp },
+ { RTLIB::DIV_F64, RTLIB::impl___divdf3vfp },
+
+ // Single-precision comparisons.
+ { RTLIB::OEQ_F32, RTLIB::impl___eqsf2vfp },
+ { RTLIB::UNE_F32, RTLIB::impl___nesf2vfp },
+ { RTLIB::OLT_F32, RTLIB::impl___ltsf2vfp },
+ { RTLIB::OLE_F32, RTLIB::impl___lesf2vfp },
+ { RTLIB::OGE_F32, RTLIB::impl___gesf2vfp },
+ { RTLIB::OGT_F32, RTLIB::impl___gtsf2vfp },
+ { RTLIB::UO_F32, RTLIB::impl___unordsf2vfp },
+
+ // Double-precision comparisons.
+ { RTLIB::OEQ_F64, RTLIB::impl___eqdf2vfp },
+ { RTLIB::UNE_F64, RTLIB::impl___nedf2vfp },
+ { RTLIB::OLT_F64, RTLIB::impl___ltdf2vfp },
+ { RTLIB::OLE_F64, RTLIB::impl___ledf2vfp },
+ { RTLIB::OGE_F64, RTLIB::impl___gedf2vfp },
+ { RTLIB::OGT_F64, RTLIB::impl___gtdf2vfp },
+ { RTLIB::UO_F64, RTLIB::impl___unorddf2vfp },
+
+ // Floating-point to integer conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ { RTLIB::FPTOSINT_F64_I32, RTLIB::impl___fixdfsivfp },
+ { RTLIB::FPTOUINT_F64_I32, RTLIB::impl___fixunsdfsivfp },
+ { RTLIB::FPTOSINT_F32_I32, RTLIB::impl___fixsfsivfp },
+ { RTLIB::FPTOUINT_F32_I32, RTLIB::impl___fixunssfsivfp },
+
+ // Conversions between floating types.
+ { RTLIB::FPROUND_F64_F32, RTLIB::impl___truncdfsf2vfp },
+ { RTLIB::FPEXT_F32_F64, RTLIB::impl___extendsfdf2vfp },
+
+ // Integer to floating-point conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ // FIXME: There appears to be some naming inconsistency in ARM libgcc:
+ // e.g., __floatunsidf vs. __floatunssidfvfp.
+ { RTLIB::SINTTOFP_I32_F64, RTLIB::impl___floatsidfvfp },
+ { RTLIB::UINTTOFP_I32_F64, RTLIB::impl___floatunssidfvfp },
+ { RTLIB::SINTTOFP_I32_F32, RTLIB::impl___floatsisfvfp },
+ { RTLIB::UINTTOFP_I32_F32, RTLIB::impl___floatunssisfvfp },
+ };
+ // clang-format on
+
+ for (const auto &LC : LibraryCalls)
+ Info.setLibcallImpl(LC.Op, LC.Impl);
+ }
+ }
+}
+
bool ARMSubtarget::isXRaySupported() const {
// We don't currently suppport Thumb, but Windows requires Thumb.
return hasV6Ops() && hasARMOps() && !isTargetWindows();
@@ -309,26 +379,21 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
}
bool ARMSubtarget::isROPI() const {
+ // FIXME: This should ideally come from a function attribute, to work
+ // correctly with LTO.
return TM.getRelocationModel() == Reloc::ROPI ||
TM.getRelocationModel() == Reloc::ROPI_RWPI;
}
+
bool ARMSubtarget::isRWPI() const {
+ // FIXME: This should ideally come from a function attribute, to work
+ // correctly with LTO.
return TM.getRelocationModel() == Reloc::RWPI ||
TM.getRelocationModel() == Reloc::ROPI_RWPI;
}
bool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
- if (!TM.shouldAssumeDSOLocal(GV))
- return true;
-
- // 32 bit macho has no relocation for a-b if a is undefined, even if b is in
- // the section that is being relocated. This means we have to use o load even
- // for GVs that are known to be local to the dso.
- if (isTargetMachO() && TM.isPositionIndependent() &&
- (GV->isDeclarationForLinker() || GV->hasCommonLinkage()))
- return true;
-
- return false;
+ return TM.isGVIndirectSymbol(GV);
}
bool ARMSubtarget::isGVInGOT(const GlobalValue *GV) const {
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 4a0883c..2a90f42 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -258,6 +258,7 @@ public:
InstructionSelector *getInstructionSelector() const override;
const LegalizerInfo *getLegalizerInfo() const override;
const RegisterBankInfo *getRegBankInfo() const override;
+ void initLibcallLoweringInfo(LibcallLoweringInfo &Info) const override;
private:
ARMSelectionDAGInfo TSInfo;
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.h b/llvm/lib/Target/ARM/ARMTargetMachine.h
index c417c4c..1f74e9f 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.h
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -98,6 +98,20 @@ public:
return true;
}
+ bool isGVIndirectSymbol(const GlobalValue *GV) const {
+ if (!shouldAssumeDSOLocal(GV))
+ return true;
+
+ // 32 bit macho has no relocation for a-b if a is undefined, even if b is in
+ // the section that is being relocated. This means we have to use o load
+ // even for GVs that are known to be local to the dso.
+ if (getTargetTriple().isOSBinFormatMachO() && isPositionIndependent() &&
+ (GV->isDeclarationForLinker() || GV->hasCommonLinkage()))
+ return true;
+
+ return false;
+ }
+
yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override;
yaml::MachineFunctionInfo *
convertFuncInfoToYAML(const MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 24f58a6..88a7fb1 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -66,6 +66,11 @@ extern cl::opt<bool> EnableMaskedGatherScatters;
extern cl::opt<unsigned> MVEMaxSupportedInterleaveFactor;
+static cl::opt<int> ArmForceUnrollThreshold(
+ "arm-force-unroll-threshold", cl::init(12), cl::Hidden,
+ cl::desc(
+ "Threshold for forced unrolling of small loops in Arm architecture"));
+
/// Convert a vector load intrinsic into a simple llvm load instruction.
/// This is beneficial when the underlying object being addressed comes
/// from a constant, since we get constant-folding for free.
@@ -1125,7 +1130,8 @@ bool ARMTTIImpl::isProfitableLSRChainElement(Instruction *I) const {
}
bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment,
- unsigned /*AddressSpace*/) const {
+ unsigned /*AddressSpace*/,
+ TTI::MaskKind /*MaskKind*/) const {
if (!EnableMaskedLoadStores || !ST->hasMVEIntegerOps())
return false;
@@ -1631,20 +1637,36 @@ InstructionCost ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
}
InstructionCost
-ARMTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace,
+ARMTTIImpl::getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const {
+ switch (MICA.getID()) {
+ case Intrinsic::masked_scatter:
+ case Intrinsic::masked_gather:
+ return getGatherScatterOpCost(MICA, CostKind);
+ case Intrinsic::masked_load:
+ case Intrinsic::masked_store:
+ return getMaskedMemoryOpCost(MICA, CostKind);
+ }
+ return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
+}
+
+InstructionCost
+ARMTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
TTI::TargetCostKind CostKind) const {
+ unsigned IID = MICA.getID();
+ Type *Src = MICA.getDataType();
+ Align Alignment = MICA.getAlignment();
+ unsigned AddressSpace = MICA.getAddressSpace();
if (ST->hasMVEIntegerOps()) {
- if (Opcode == Instruction::Load &&
+ if (IID == Intrinsic::masked_load &&
isLegalMaskedLoad(Src, Alignment, AddressSpace))
return ST->getMVEVectorCostFactor(CostKind);
- if (Opcode == Instruction::Store &&
+ if (IID == Intrinsic::masked_store &&
isLegalMaskedStore(Src, Alignment, AddressSpace))
return ST->getMVEVectorCostFactor(CostKind);
}
if (!isa<FixedVectorType>(Src))
- return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
- CostKind);
+ return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
// Scalar cost, which is currently very high due to the efficiency of the
// generated code.
return cast<FixedVectorType>(Src)->getNumElements() * 8;
@@ -1691,13 +1713,19 @@ InstructionCost ARMTTIImpl::getInterleavedMemoryOpCost(
UseMaskForCond, UseMaskForGaps);
}
-InstructionCost ARMTTIImpl::getGatherScatterOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
+InstructionCost
+ARMTTIImpl::getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const {
+
+ Type *DataTy = MICA.getDataType();
+ const Value *Ptr = MICA.getPointer();
+ bool VariableMask = MICA.getVariableMask();
+ Align Alignment = MICA.getAlignment();
+ const Instruction *I = MICA.getInst();
+
using namespace PatternMatch;
if (!ST->hasMVEIntegerOps() || !EnableMaskedGatherScatters)
- return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
- Alignment, CostKind, I);
+ return BaseT::getMemIntrinsicInstrCost(MICA, CostKind);
assert(DataTy->isVectorTy() && "Can't do gather/scatters on scalar!");
auto *VTy = cast<FixedVectorType>(DataTy);
@@ -2728,7 +2756,7 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// Force unrolling small loops can be very useful because of the branch
// taken cost of the backedge.
- if (Cost < 12)
+ if (Cost < ArmForceUnrollThreshold)
UP.Force = true;
}
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 0810c55..a232563 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -186,12 +186,16 @@ public:
bool isProfitableLSRChainElement(Instruction *I) const override;
- bool isLegalMaskedLoad(Type *DataTy, Align Alignment,
- unsigned AddressSpace) const override;
-
- bool isLegalMaskedStore(Type *DataTy, Align Alignment,
- unsigned AddressSpace) const override {
- return isLegalMaskedLoad(DataTy, Alignment, AddressSpace);
+ bool
+ isLegalMaskedLoad(Type *DataTy, Align Alignment, unsigned AddressSpace,
+ TTI::MaskKind MaskKind =
+ TTI::MaskKind::VariableOrConstantMask) const override;
+
+ bool
+ isLegalMaskedStore(Type *DataTy, Align Alignment, unsigned AddressSpace,
+ TTI::MaskKind MaskKind =
+ TTI::MaskKind::VariableOrConstantMask) const override {
+ return isLegalMaskedLoad(DataTy, Alignment, AddressSpace, MaskKind);
}
bool forceScalarizeMaskedGather(VectorType *VTy,
@@ -275,20 +279,19 @@ public:
const Instruction *I = nullptr) const override;
InstructionCost
- getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind) const override;
+ getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const override;
+
+ InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const;
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;
- InstructionCost
- getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
- bool VariableMask, Align Alignment,
- TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr) const override;
+ InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA,
+ TTI::TargetCostKind CostKind) const;
InstructionCost
getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt
index fa778ca..d99368e 100644
--- a/llvm/lib/Target/ARM/CMakeLists.txt
+++ b/llvm/lib/Target/ARM/CMakeLists.txt
@@ -6,8 +6,7 @@ tablegen(LLVM ARMGenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM ARMGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM ARMGenCallingConv.inc -gen-callingconv)
tablegen(LLVM ARMGenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler
- -ignore-non-decodable-operands)
+tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM ARMGenFastISel.inc -gen-fast-isel)
tablegen(LLVM ARMGenGlobalISel.inc -gen-global-isel)
tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info)
@@ -15,6 +14,7 @@ tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering)
tablegen(LLVM ARMGenRegisterBank.inc -gen-register-bank)
tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM ARMGenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM ARMGenSystemRegister.inc -gen-searchable-tables)
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index b119146..44f50dd 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -149,7 +149,7 @@ private:
raw_ostream &CStream) const;
bool isVectorPredicable(const MCInst &MI) const;
- DecodeStatus AddThumbPredicate(MCInst&) const;
+ DecodeStatus checkThumbPredicate(MCInst &) const;
void UpdateThumbPredicate(DecodeStatus &S, MCInst &MI) const;
llvm::endianness InstructionEndianness;
@@ -618,6 +618,23 @@ static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
return S;
}
+// This overload is used to decode a `pred` operand that is not encoded into
+// instruction. This is the case for almost all predicable Thumb instructions
+// (exceptions are tBcc and t2Bcc). Some predicable Thumb instructions have ARM
+// equivalents where they are not predicable (always executed). This function
+// is used to decode `pred` operand of these ARM instructions, too.
+static DecodeStatus DecodePredicateOperand(MCInst &Inst,
+ const MCDisassembler *Decoder) {
+ const auto *D = static_cast<const ARMDisassembler *>(Decoder);
+ unsigned CC = ARMCC::AL;
+ if (D->getSubtargetInfo().hasFeature(ARM::ModeThumb))
+ CC = D->ITBlock.getITCC();
+ MCRegister CondReg = CC == ARMCC::AL ? ARM::NoRegister : ARM::CPSR;
+ Inst.addOperand(MCOperand::createImm(CC));
+ Inst.addOperand(MCOperand::createReg(CondReg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
const MCDisassembler *Decoder) {
@@ -1050,6 +1067,40 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
return MCDisassembler::Fail;
break;
+ case ARM::t2LDC2L_OFFSET:
+ case ARM::t2LDC2L_OPTION:
+ case ARM::t2LDC2L_POST:
+ case ARM::t2LDC2L_PRE:
+ case ARM::t2LDC2_OFFSET:
+ case ARM::t2LDC2_OPTION:
+ case ARM::t2LDC2_POST:
+ case ARM::t2LDC2_PRE:
+ case ARM::t2LDCL_OFFSET:
+ case ARM::t2LDCL_OPTION:
+ case ARM::t2LDCL_POST:
+ case ARM::t2LDCL_PRE:
+ case ARM::t2LDC_OFFSET:
+ case ARM::t2LDC_OPTION:
+ case ARM::t2LDC_POST:
+ case ARM::t2LDC_PRE:
+ case ARM::t2STC2L_OFFSET:
+ case ARM::t2STC2L_OPTION:
+ case ARM::t2STC2L_POST:
+ case ARM::t2STC2L_PRE:
+ case ARM::t2STC2_OFFSET:
+ case ARM::t2STC2_OPTION:
+ case ARM::t2STC2_POST:
+ case ARM::t2STC2_PRE:
+ case ARM::t2STCL_OFFSET:
+ case ARM::t2STCL_OPTION:
+ case ARM::t2STCL_POST:
+ case ARM::t2STCL_PRE:
+ case ARM::t2STC_OFFSET:
+ case ARM::t2STC_OPTION:
+ case ARM::t2STC_POST:
+ case ARM::t2STC_PRE:
+ DecodePredicateOperand(Inst, Decoder);
+ break;
default:
break;
}
@@ -1217,6 +1268,8 @@ static DecodeStatus DecodeTSBInstruction(MCInst &Inst, unsigned Insn,
// the only available operand), but LLVM expects the instruction to have one
// operand, so we need to add the csync when decoding.
Inst.addOperand(MCOperand::createImm(ARM_TSB::CSYNC));
+ if (Inst.getOpcode() == ARM::t2TSB)
+ DecodePredicateOperand(Inst, Decoder);
return MCDisassembler::Success;
}
@@ -1650,6 +1703,7 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
if(imm > 4) return MCDisassembler::Fail;
Inst.setOpcode(ARM::t2HINT);
Inst.addOperand(MCOperand::createImm(imm));
+ DecodePredicateOperand(Inst, Decoder);
}
return S;
@@ -1675,6 +1729,7 @@ DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
Inst.setOpcode(Opcode);
if (Opcode == ARM::t2HINT) {
Inst.addOperand(MCOperand::createImm(imm));
+ DecodePredicateOperand(Inst, Decoder);
}
return MCDisassembler::Success;
@@ -1702,6 +1757,7 @@ static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
Inst.addOperand(MCOperand::createImm(imm));
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -1906,6 +1962,7 @@ static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::createImm(imm32));
+ DecodePredicateOperand(Inst, Decoder);
return Status;
}
@@ -2231,6 +2288,7 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
break;
}
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -2502,6 +2560,7 @@ static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
break;
}
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -2605,6 +2664,7 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -2654,6 +2714,7 @@ static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
}
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -2690,6 +2751,7 @@ static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
}
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -2743,6 +2805,7 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
}
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -2789,6 +2852,7 @@ static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn,
break;
}
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -2861,6 +2925,7 @@ static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(8 << size));
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -2926,6 +2991,7 @@ static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -2951,6 +3017,7 @@ static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
}
Inst.addOperand(MCOperand::createImm(imm));
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -3113,6 +3180,7 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
}
Inst.addOperand(MCOperand::createImm(imm));
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -3197,6 +3265,7 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder)))
return MCDisassembler::Fail;
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -3341,6 +3410,7 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeT2AddrModeImm8(Inst, imm, Address, Decoder)))
return MCDisassembler::Fail;
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -3449,6 +3519,7 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeT2AddrModeImm12(Inst, imm, Address, Decoder)))
return MCDisassembler::Fail;
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -3488,6 +3559,7 @@ static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Fail;
if (!Check(S, DecodeT2AddrModeImm8(Inst, imm, Address, Decoder)))
return MCDisassembler::Fail;
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -3678,6 +3750,7 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeT2AddrModeImm8(Inst, addr, Address, Decoder)))
return MCDisassembler::Fail;
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -3690,6 +3763,7 @@ static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn,
Inst.addOperand(MCOperand::createReg(ARM::SP));
Inst.addOperand(MCOperand::createImm(imm));
+ DecodePredicateOperand(Inst, Decoder);
return MCDisassembler::Success;
}
@@ -3716,6 +3790,7 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
return MCDisassembler::Fail;
}
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -3840,6 +3915,7 @@ static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -4305,6 +4381,7 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(index));
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -4370,6 +4447,7 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(index));
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -4437,6 +4515,7 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(index));
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -4500,6 +4579,7 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(index));
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -4570,6 +4650,7 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(index));
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -4633,6 +4714,7 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(index));
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -4714,6 +4796,7 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(index));
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -4786,6 +4869,7 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(index));
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -4904,6 +4988,7 @@ static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder)))
return MCDisassembler::Fail;
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -4939,6 +5024,7 @@ static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder)))
return MCDisassembler::Fail;
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -4965,6 +5051,7 @@ static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, uint64_t Address,
Val = -Val;
}
Inst.addOperand(MCOperand::createImm(Val));
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -5062,6 +5149,7 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(64 - imm));
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -5121,6 +5209,7 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(64 - imm));
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -5326,8 +5415,10 @@ static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address,
const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- if (Inst.getOpcode() == ARM::MVE_LCTP)
+ if (Inst.getOpcode() == ARM::MVE_LCTP) {
+ DecodePredicateOperand(Inst, Decoder);
return S;
+ }
unsigned Imm = fieldFromInstruction(Insn, 11, 1) |
fieldFromInstruction(Insn, 1, 10) << 1;
@@ -5372,6 +5463,7 @@ static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address,
Check(S, MCDisassembler::SoftFail); // an SBZ bit is wrong: soft fail
Inst.setOpcode(ARM::MVE_LCTP);
+ DecodePredicateOperand(Inst, Decoder);
} else {
Inst.addOperand(MCOperand::createReg(ARM::LR));
if (!Check(S, DecoderGPRRegisterClass(Inst,
@@ -5762,6 +5854,7 @@ static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeMVEPairVectorIndexOperand<0>(Inst, index, Address, Decoder)))
return MCDisassembler::Fail;
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -5788,6 +5881,7 @@ static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeMVEPairVectorIndexOperand<0>(Inst, index, Address, Decoder)))
return MCDisassembler::Fail;
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -5833,6 +5927,8 @@ DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address,
if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
+ DecodePredicateOperand(Inst, Decoder);
+
if (fieldFromInstruction (Insn, 6, 3) != 4)
return MCDisassembler::SoftFail;
@@ -5868,6 +5964,7 @@ DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address,
Inst.addOperand(MCOperand::createImm(Saturate));
}
+ DecodePredicateOperand(Inst, Decoder);
return S;
}
@@ -5971,10 +6068,12 @@ static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn,
if (TypeT3) {
Inst.setOpcode(sign1 ? ARM::t2SUBspImm12 : ARM::t2ADDspImm12);
Inst.addOperand(MCOperand::createImm(Imm12)); // zext imm12
+ DecodePredicateOperand(Inst, Decoder);
} else {
Inst.setOpcode(sign1 ? ARM::t2SUBspImm : ARM::t2ADDspImm);
if (!Check(DS, DecodeT2SOImm(Inst, Imm12, Address, Decoder))) // imm12
return MCDisassembler::Fail;
+ DecodePredicateOperand(Inst, Decoder);
if (!Check(DS, DecodeCCOutOperand(Inst, S, Address, Decoder))) // cc_out
return MCDisassembler::Fail;
}
@@ -5994,7 +6093,7 @@ static DecodeStatus DecodeLazyLoadStoreMul(MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
// An optional predicate, '$p' in the assembly.
- DecodePredicateOperand(Inst, ARMCC::AL, Address, Decoder);
+ DecodePredicateOperand(Inst, Decoder);
// An immediate that represents a floating point registers list. '$regs' in
// the assembly.
Inst.addOperand(MCOperand::createImm(0)); // Arbitrary value, has no effect.
@@ -6115,28 +6214,17 @@ DecodeStatus ARMDisassembler::getARMInstruction(MCInst &MI, uint64_t &Size,
return checkDecodedInstruction(MI, Size, Address, CS, Insn, Result);
}
- struct DecodeTable {
- const uint8_t *P;
- bool DecodePred;
- };
-
- const DecodeTable Tables[] = {
- {DecoderTableVFP32, false}, {DecoderTableVFPV832, false},
- {DecoderTableNEONData32, true}, {DecoderTableNEONLoadStore32, true},
- {DecoderTableNEONDup32, false}, {DecoderTablev8NEON32, false},
- {DecoderTablev8Crypto32, false},
+ const uint8_t *Tables[] = {
+ DecoderTableVFP32, DecoderTableVFPV832,
+ DecoderTableNEONData32, DecoderTableNEONLoadStore32,
+ DecoderTableNEONDup32, DecoderTablev8NEON32,
+ DecoderTablev8Crypto32,
};
- for (auto Table : Tables) {
- Result = decodeInstruction(Table.P, MI, Insn, Address, this, STI);
+ for (const uint8_t *Table : Tables) {
+ Result = decodeInstruction(Table, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
- // Add a fake predicate operand, because we share these instruction
- // definitions with Thumb2 where these instructions are predicable.
- if (Table.DecodePred && MCII->get(MI.getOpcode()).isPredicable()) {
- MI.addOperand(MCOperand::createImm(ARMCC::AL));
- MI.addOperand(MCOperand::createReg(ARM::NoRegister));
- }
return Result;
}
}
@@ -6161,18 +6249,16 @@ bool ARMDisassembler::isVectorPredicable(const MCInst &MI) const {
return false;
}
-// Most Thumb instructions don't have explicit predicates in the
-// encoding, but rather get their predicates from IT context. We need
-// to fix up the predicate operands using this context information as a
-// post-pass.
+// Most Thumb instructions don't have explicit predicates in the encoding,
+// but rather get their predicates from IT context. Here, we check that the
+// decoded instruction is allowed to have the decoded predicate and advance
+// IT/VPT block states.
MCDisassembler::DecodeStatus
-ARMDisassembler::AddThumbPredicate(MCInst &MI) const {
+ARMDisassembler::checkThumbPredicate(MCInst &MI) const {
MCDisassembler::DecodeStatus S = Success;
const FeatureBitset &FeatureBits = getSubtargetInfo().getFeatureBits();
- // A few instructions actually have predicates encoded in them. Don't
- // try to overwrite it if we're seeing one of those.
switch (MI.getOpcode()) {
case ARM::tBcc:
case ARM::t2Bcc:
@@ -6218,34 +6304,10 @@ ARMDisassembler::AddThumbPredicate(MCInst &MI) const {
(isVectorPredicable(MI) && ITBlock.instrInITBlock()))
S = SoftFail;
- // If we're in an IT block, base the predicate on that. Otherwise,
- // assume a predicate of AL.
- unsigned CC = ARMCC::AL;
- if (ITBlock.instrInITBlock()) {
- CC = ITBlock.getITCC();
+ if (ITBlock.instrInITBlock())
ITBlock.advanceITState();
- } else if (VPTBlock.instrInVPTBlock()) {
+ else if (VPTBlock.instrInVPTBlock())
VPTBlock.advanceVPTState();
- }
-
- const MCInstrDesc &MCID = MCII->get(MI.getOpcode());
-
- MCInst::iterator CCI = MI.begin();
- for (unsigned i = 0; i < MCID.NumOperands; ++i, ++CCI) {
- if (MCID.operands()[i].isPredicate() || CCI == MI.end())
- break;
- }
-
- if (MCID.isPredicable()) {
- CCI = MI.insert(CCI, MCOperand::createImm(CC));
- ++CCI;
- if (CC == ARMCC::AL)
- MI.insert(CCI, MCOperand::createReg(ARM::NoRegister));
- else
- MI.insert(CCI, MCOperand::createReg(ARM::CPSR));
- } else if (CC != ARMCC::AL) {
- Check(S, SoftFail);
- }
return S;
}
@@ -6307,7 +6369,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
decodeInstruction(DecoderTableThumb16, MI, Insn16, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 2;
- Check(Result, AddThumbPredicate(MI));
+ Check(Result, checkThumbPredicate(MI));
return Result;
}
@@ -6315,7 +6377,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
STI);
if (Result) {
Size = 2;
- Check(Result, AddThumbPredicate(MI));
+ Check(Result, checkThumbPredicate(MI));
return Result;
}
@@ -6329,7 +6391,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
if (MI.getOpcode() == ARM::t2IT && ITBlock.instrInITBlock())
Result = MCDisassembler::SoftFail;
- Check(Result, AddThumbPredicate(MI));
+ Check(Result, checkThumbPredicate(MI));
// If we find an IT instruction, we need to parse its condition
// code and mask operands so that we can apply them correctly
@@ -6367,7 +6429,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
if (isVPTOpcode(MI.getOpcode()) && VPTBlock.instrInVPTBlock())
Result = MCDisassembler::SoftFail;
- Check(Result, AddThumbPredicate(MI));
+ Check(Result, checkThumbPredicate(MI));
if (isVPTOpcode(MI.getOpcode())) {
unsigned Mask = MI.getOperand(0).getImm();
@@ -6381,7 +6443,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
decodeInstruction(DecoderTableThumb32, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
- Check(Result, AddThumbPredicate(MI));
+ Check(Result, checkThumbPredicate(MI));
return Result;
}
@@ -6389,7 +6451,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
decodeInstruction(DecoderTableThumb232, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
- Check(Result, AddThumbPredicate(MI));
+ Check(Result, checkThumbPredicate(MI));
return checkDecodedInstruction(MI, Size, Address, CS, Insn32, Result);
}
@@ -6428,7 +6490,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
- Check(Result, AddThumbPredicate(MI));
+ Check(Result, checkThumbPredicate(MI));
return Result;
}
}
@@ -6442,7 +6504,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
- Check(Result, AddThumbPredicate(MI));
+ Check(Result, checkThumbPredicate(MI));
return Result;
}
@@ -6475,7 +6537,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
decodeInstruction(DecoderTable, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
- Check(Result, AddThumbPredicate(MI));
+ Check(Result, checkThumbPredicate(MI));
return Result;
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 01fe13b..cc21844 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -428,7 +428,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
// signed 16bit range.
if ((Kind == ARM::fixup_arm_movw_lo16 || Kind == ARM::fixup_arm_movt_hi16 ||
Kind == ARM::fixup_t2_movw_lo16 || Kind == ARM::fixup_t2_movt_hi16) &&
- (Addend < minIntN(16) || Addend > maxIntN(16))) {
+ !IsResolved && (Addend < minIntN(16) || Addend > maxIntN(16))) {
Ctx.reportError(Fixup.getLoc(), "Relocation Not In Range");
return 0;
}
@@ -1238,7 +1238,7 @@ uint64_t ARMAsmBackendDarwin::generateCompactUnwindEncoding(
// Verify standard frame (lr/r7) was used.
if (CFARegister != ARM::R7) {
DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() << "frame register is "
- << CFARegister
+ << CFARegister.id()
<< " instead of r7\n");
return CU::UNWIND_ARM_MODE_DWARF;
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index ca366ed..060d1f8 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -54,7 +54,7 @@ void ARMWinCOFFStreamer::emitWindowsUnwindTables() {
}
void ARMWinCOFFStreamer::finishImpl() {
- emitFrames(nullptr);
+ emitFrames();
emitWindowsUnwindTables();
MCWinCOFFStreamer::finishImpl();
diff --git a/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/llvm/lib/Target/ARM/MLxExpansionPass.cpp
index 8e1bf1d..eb237b4 100644
--- a/llvm/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/llvm/lib/Target/ARM/MLxExpansionPass.cpp
@@ -283,7 +283,7 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
const MCInstrDesc &MCID1 = TII->get(MulOpc);
const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
- Register TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI));
+ Register TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0));
MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
.addReg(Src1Reg, getKillRegState(Src1Kill))
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 4b8c2fd..01f588f 100644
--- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -24,7 +24,7 @@
using namespace llvm;
Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(STI) {}
+ : ARMBaseInstrInfo(STI, RI), RI(STI) {}
/// Return the noop instruction to use for a noop.
MCInst Thumb1InstrInfo::getNop() const {
@@ -116,7 +116,6 @@ void Thumb1InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
Register SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI,
Register VReg,
MachineInstr::MIFlag Flags) const {
assert((RC == &ARM::tGPRRegClass ||
@@ -142,10 +141,12 @@ void Thumb1InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
}
}
-void Thumb1InstrInfo::loadRegFromStackSlot(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg,
- int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
- Register VReg, MachineInstr::MIFlag Flags) const {
+void Thumb1InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ Register DestReg, int FI,
+ const TargetRegisterClass *RC,
+ Register VReg,
+ MachineInstr::MIFlag Flags) const {
assert((RC->hasSuperClassEq(&ARM::tGPRRegClass) ||
(DestReg.isPhysical() && isARMLowRegister(DestReg))) &&
"Unknown regclass!");
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/llvm/lib/Target/ARM/Thumb1InstrInfo.h
index 68b326c..289a30a 100644
--- a/llvm/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.h
@@ -35,7 +35,7 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
+ const ThumbRegisterInfo &getRegisterInfo() const { return RI; }
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, Register DestReg, Register SrcReg,
@@ -43,14 +43,13 @@ public:
bool RenamableSrc = false) const override;
void storeRegToStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
- bool isKill, int FrameIndex, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, Register VReg,
+ bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg,
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
void loadRegFromStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, Register VReg,
+ Register VReg,
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
bool canCopyGluedNodeDuringSchedule(SDNode *N) const override;
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index f5653d4..efb92c9 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -46,7 +46,7 @@ PreferNoCSEL("prefer-no-csel", cl::Hidden,
cl::init(false));
Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(STI) {}
+ : ARMBaseInstrInfo(STI, RI), RI(STI) {}
/// Return the noop instruction to use for a noop.
MCInst Thumb2InstrInfo::getNop() const {
@@ -165,7 +165,6 @@ void Thumb2InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
Register SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI,
Register VReg,
MachineInstr::MIFlag Flags) const {
DebugLoc DL;
@@ -197,20 +196,22 @@ void Thumb2InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
}
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
- AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
- AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill));
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0);
MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL));
return;
}
- ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI,
+ ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC,
Register());
}
-void Thumb2InstrInfo::loadRegFromStackSlot(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg,
- int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
- Register VReg, MachineInstr::MIFlag Flags) const {
+void Thumb2InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ Register DestReg, int FI,
+ const TargetRegisterClass *RC,
+ Register VReg,
+ MachineInstr::MIFlag Flags) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -238,8 +239,8 @@ void Thumb2InstrInfo::loadRegFromStackSlot(
}
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
- AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
- AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead);
+ AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead);
MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL));
if (DestReg.isPhysical())
@@ -247,8 +248,7 @@ void Thumb2InstrInfo::loadRegFromStackSlot(
return;
}
- ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI,
- Register());
+ ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, Register());
}
void Thumb2InstrInfo::expandLoadStackGuard(
@@ -564,7 +564,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
bool isSub = false;
MachineFunction &MF = *MI.getParent()->getParent();
- const TargetRegisterClass *RegClass = TII.getRegClass(Desc, FrameRegIdx, TRI);
+ const TargetRegisterClass *RegClass = TII.getRegClass(Desc, FrameRegIdx);
// Memory operands in inline assembly always use AddrModeT2_i12.
if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index 1b0bf2d..1e11cb3 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -44,21 +44,20 @@ public:
void storeRegToStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
- bool isKill, int FrameIndex, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, Register VReg,
+ bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg,
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
void loadRegFromStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, Register VReg,
+ Register VReg,
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
+ const ThumbRegisterInfo &getRegisterInfo() const { return RI; }
MachineInstr *optimizeSelect(MachineInstr &MI,
SmallPtrSetImpl<MachineInstr *> &SeenMIs,
diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
index 12875c2..85e705d 100644
--- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -66,8 +66,8 @@ static void emitThumb1LoadConstPool(MachineBasicBlock &MBB,
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
const TargetInstrInfo &TII = *STI.getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
- const Constant *C = ConstantInt::get(
- Type::getInt32Ty(MBB.getParent()->getFunction().getContext()), Val);
+ const Constant *C = ConstantInt::getSigned(
+ Type::getInt32Ty(MBB.getParent()->getFunction().getContext()), Val);
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align(4));
BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci))
@@ -85,8 +85,8 @@ static void emitThumb2LoadConstPool(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
- const Constant *C = ConstantInt::get(
- Type::getInt32Ty(MBB.getParent()->getFunction().getContext()), Val);
+ const Constant *C = ConstantInt::getSigned(
+ Type::getInt32Ty(MBB.getParent()->getFunction().getContext()), Val);
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align(4));
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))