diff options
Diffstat (limited to 'llvm/lib/Target/ARM')
42 files changed, 1046 insertions, 973 deletions
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 570aae9..1f71d81 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -38,6 +38,14 @@ include "ARMSchedule.td" //===----------------------------------------------------------------------===// include "ARMInstrInfo.td" + +def Thumb1OnlyMode : HwMode<[IsThumb1Only]>; +def arm_ptr_rc : RegClassByHwMode< + [DefaultMode, Thumb1OnlyMode], + [GPR, tGPR]>; + +defm : RemapAllTargetPseudoPointerOperands<arm_ptr_rc>; + def ARMInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 36b9908..458a3f0 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -51,8 +51,8 @@ using namespace llvm; ARMAsmPrinter::ARMAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) - : AsmPrinter(TM, std::move(Streamer), ID), Subtarget(nullptr), AFI(nullptr), - MCP(nullptr), InConstantPool(false), OptimizationGoals(-1) {} + : AsmPrinter(TM, std::move(Streamer), ID), AFI(nullptr), MCP(nullptr), + InConstantPool(false), OptimizationGoals(-1) {} const ARMBaseTargetMachine &ARMAsmPrinter::getTM() const { return static_cast<const ARMBaseTargetMachine &>(TM); @@ -97,11 +97,41 @@ void ARMAsmPrinter::emitXXStructor(const DataLayout &DL, const Constant *CV) { const MCExpr *E = MCSymbolRefExpr::create( GetARMGVSymbol(GV, ARMII::MO_NO_FLAG), - (Subtarget->isTargetELF() ? ARM::S_TARGET1 : ARM::S_None), OutContext); + (TM.getTargetTriple().isOSBinFormatELF() ? ARM::S_TARGET1 : ARM::S_None), + OutContext); OutStreamer->emitValue(E, Size); } +// An alias to a cmse entry function should also emit a `__acle_se_` symbol. +void ARMAsmPrinter::emitCMSEVeneerAlias(const GlobalAlias &GA) { + const Function *BaseFn = dyn_cast_or_null<Function>(GA.getAliaseeObject()); + if (!BaseFn || !BaseFn->hasFnAttribute("cmse_nonsecure_entry")) + return; + + MCSymbol *AliasSym = getSymbol(&GA); + MCSymbol *FnSym = getSymbol(BaseFn); + + MCSymbol *SEAliasSym = + OutContext.getOrCreateSymbol(Twine("__acle_se_") + AliasSym->getName()); + MCSymbol *SEBaseSym = + OutContext.getOrCreateSymbol(Twine("__acle_se_") + FnSym->getName()); + + // Mirror alias linkage/visibility onto the veneer-alias symbol. + emitLinkage(&GA, SEAliasSym); + OutStreamer->emitSymbolAttribute(SEAliasSym, MCSA_ELF_TypeFunction); + emitVisibility(SEAliasSym, GA.getVisibility()); + + // emit "__acle_se_<alias> = __acle_se_<aliasee>" + const MCExpr *SEExpr = MCSymbolRefExpr::create(SEBaseSym, OutContext); + OutStreamer->emitAssignment(SEAliasSym, SEExpr); +} + +void ARMAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) { + AsmPrinter::emitGlobalAlias(M, GA); + emitCMSEVeneerAlias(GA); +} + void ARMAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { if (PromotedGlobals.count(GV)) // The global was promoted into a constant pool. It should not be emitted. @@ -115,7 +145,6 @@ void ARMAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { AFI = MF.getInfo<ARMFunctionInfo>(); MCP = MF.getConstantPool(); - Subtarget = &MF.getSubtarget<ARMSubtarget>(); SetupMachineFunction(MF); const Function &F = MF.getFunction(); @@ -153,7 +182,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { else if (OptimizationGoals != (int)OptimizationGoal) // conflicting goals OptimizationGoals = 0; - if (Subtarget->isTargetCOFF()) { + if (TM.getTargetTriple().isOSBinFormatCOFF()) { bool Local = F.hasLocalLinkage(); COFF::SymbolStorageClass Scl = Local ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL; @@ -259,8 +288,8 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, break; } case MachineOperand::MO_ConstantPoolIndex: - if (Subtarget->genExecuteOnly()) - llvm_unreachable("execute-only should not generate constant pools"); + assert(!MF->getSubtarget<ARMSubtarget>().genExecuteOnly() && + "execute-only should not generate constant pools"); GetCPISymbol(MO.getIndex())->print(O, MAI); break; } @@ -595,8 +624,7 @@ void ARMAsmPrinter::emitEndOfAsmFile(Module &M) { ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); if (OptimizationGoals > 0 && - (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || - Subtarget->isTargetMuslAEABI())) + (TT.isTargetAEABI() || TT.isTargetGNUAEABI() || TT.isTargetMuslAEABI())) ATS.emitAttribute(ARMBuildAttrs::ABI_optimization_goals, OptimizationGoals); OptimizationGoals = -1; @@ -884,9 +912,10 @@ static uint8_t getModifierSpecifier(ARMCP::ARMCPModifier Modifier) { MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, unsigned char TargetFlags) { - if (Subtarget->isTargetMachO()) { + const Triple &TT = TM.getTargetTriple(); + if (TT.isOSBinFormatMachO()) { bool IsIndirect = - (TargetFlags & ARMII::MO_NONLAZY) && Subtarget->isGVIndirectSymbol(GV); + (TargetFlags & ARMII::MO_NONLAZY) && getTM().isGVIndirectSymbol(GV); if (!IsIndirect) return getSymbol(GV); @@ -903,9 +932,8 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); return MCSym; - } else if (Subtarget->isTargetCOFF()) { - assert(Subtarget->isTargetWindows() && - "Windows is the only supported COFF target"); + } else if (TT.isOSBinFormatCOFF()) { + assert(TT.isOSWindows() && "Windows is the only supported COFF target"); bool IsIndirect = (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB)); @@ -932,7 +960,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, } return MCSym; - } else if (Subtarget->isTargetELF()) { + } else if (TT.isOSBinFormatELF()) { return getSymbolPreferLocal(*GV); } llvm_unreachable("unexpected target"); @@ -978,7 +1006,8 @@ void ARMAsmPrinter::emitMachineConstantPoolValue( // On Darwin, const-pool entries may get the "FOO$non_lazy_ptr" mangling, so // flag the global as MO_NONLAZY. - unsigned char TF = Subtarget->isTargetMachO() ? ARMII::MO_NONLAZY : 0; + unsigned char TF = + TM.getTargetTriple().isOSBinFormatMachO() ? ARMII::MO_NONLAZY : 0; MCSym = GetARMGVSymbol(GV, TF); } else if (ACPV->isMachineBasicBlock()) { const MachineBasicBlock *MBB = cast<ARMConstantPoolMBB>(ACPV)->getMBB(); @@ -1047,7 +1076,8 @@ void ARMAsmPrinter::emitJumpTableAddrs(const MachineInstr *MI) { // .word (LBB1 - LJTI_0_0) const MCExpr *Expr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); - if (isPositionIndependent() || Subtarget->isROPI()) + const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>(); + if (isPositionIndependent() || STI.isROPI()) Expr = MCBinaryExpr::createSub(Expr, MCSymbolRefExpr::create(JTISymbol, OutContext), OutContext); @@ -1096,7 +1126,8 @@ void ARMAsmPrinter::emitJumpTableTBInst(const MachineInstr *MI, const MachineOperand &MO1 = MI->getOperand(1); unsigned JTI = MO1.getIndex(); - if (Subtarget->isThumb1Only()) + const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>(); + if (STI.isThumb1Only()) emitAlignment(Align(4)); MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); @@ -1904,6 +1935,7 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { ARM_MC::verifyInstructionPredicates(MI->getOpcode(), getSubtargetInfo().getFeatureBits()); + const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>(); const DataLayout &DL = getDataLayout(); MCTargetStreamer &TS = *OutStreamer->getTargetStreamer(); ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); @@ -1915,8 +1947,8 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { } // Emit unwinding stuff for frame-related instructions - if (Subtarget->isTargetEHABICompatible() && - MI->getFlag(MachineInstr::FrameSetup)) + if (TM.getTargetTriple().isTargetEHABICompatible() && + MI->getFlag(MachineInstr::FrameSetup)) EmitUnwindingInstruction(MI); // Do any auto-generated pseudo lowerings. @@ -1982,14 +2014,13 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { // Add 's' bit operand (always reg0 for this) .addReg(0)); - assert(Subtarget->hasV4TOps()); - EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX) - .addReg(MI->getOperand(0).getReg())); + assert(STI.hasV4TOps() && "Expected V4TOps for BX call"); + EmitToStreamer(*OutStreamer, + MCInstBuilder(ARM::BX).addReg(MI->getOperand(0).getReg())); return; } case ARM::tBX_CALL: { - if (Subtarget->hasV5TOps()) - llvm_unreachable("Expected BLX to be selected for v5t+"); + assert(!STI.hasV5TOps() && "Expected BLX to be selected for v5t+"); // On ARM v4t, when doing a call from thumb mode, we need to ensure // that the saved lr has its LSB set correctly (the arch doesn't @@ -2278,8 +2309,8 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { return; } case ARM::CONSTPOOL_ENTRY: { - if (Subtarget->genExecuteOnly()) - llvm_unreachable("execute-only should not generate constant pools"); + assert(!STI.genExecuteOnly() && + "execute-only should not generate constant pools"); /// CONSTPOOL_ENTRY - This instruction represents a floating constant pool /// in the function. The first operand is the ID# for this instruction, the @@ -2485,7 +2516,7 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { case ARM::TRAP: { // Non-Darwin binutils don't yet support the "trap" mnemonic. // FIXME: Remove this special case when they do. - if (!Subtarget->isTargetMachO()) { + if (!TM.getTargetTriple().isOSBinFormatMachO()) { uint32_t Val = 0xe7ffdefeUL; OutStreamer->AddComment("trap"); ATS.emitInst(Val); @@ -2496,7 +2527,7 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { case ARM::tTRAP: { // Non-Darwin binutils don't yet support the "trap" mnemonic. // FIXME: Remove this special case when they do. - if (!Subtarget->isTargetMachO()) { + if (!TM.getTargetTriple().isOSBinFormatMachO()) { uint16_t Val = 0xdefe; OutStreamer->AddComment("trap"); ATS.emitInst(Val, 'n'); @@ -2656,9 +2687,6 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); - const MachineFunction &MF = *MI->getParent()->getParent(); - const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); - if (STI.isTargetDarwin() || STI.isTargetWindows()) { // These platforms always use the same frame register EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12) @@ -2687,7 +2715,7 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { .addReg(0)); } - assert(Subtarget->hasV4TOps()); + assert(STI.hasV4TOps()); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX) .addReg(ScratchReg) // Predicate. @@ -2704,9 +2732,6 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { Register SrcReg = MI->getOperand(0).getReg(); Register ScratchReg = MI->getOperand(1).getReg(); - const MachineFunction &MF = *MI->getParent()->getParent(); - const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); - EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi) .addReg(ScratchReg) .addReg(SrcReg) diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.h b/llvm/lib/Target/ARM/ARMAsmPrinter.h index 9e92b5a..12e20d7 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.h +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.h @@ -9,13 +9,13 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMASMPRINTER_H #define LLVM_LIB_TARGET_ARM_ARMASMPRINTER_H -#include "ARMSubtarget.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/Target/TargetMachine.h" namespace llvm { class ARMFunctionInfo; +class ARMBaseTargetMachine; class MCOperand; class MachineConstantPool; class MachineOperand; @@ -33,10 +33,6 @@ public: static char ID; private: - /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can - /// make the right decision when printing asm code for different targets. - const ARMSubtarget *Subtarget; - /// AFI - Keep a pointer to ARMFunctionInfo for the current /// MachineFunction. ARMFunctionInfo *AFI; @@ -108,6 +104,7 @@ public: void emitEndOfAsmFile(Module &M) override; void emitXXStructor(const DataLayout &DL, const Constant *CV) override; void emitGlobalVariable(const GlobalVariable *GV) override; + void emitGlobalAlias(const Module &M, const GlobalAlias &GA) override; MCSymbol *GetCPISymbol(unsigned CPID) const override; @@ -163,6 +160,8 @@ private: MCSymbol *GetARMGVSymbol(const GlobalValue *GV, unsigned char TargetFlags); + void emitCMSEVeneerAlias(const GlobalAlias &GA); + public: /// EmitMachineConstantPoolValue - Print a machine constantpool value to /// the .s file. diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 22769db..02887ce 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -107,8 +107,9 @@ static const ARM_MLxEntry ARM_MLxTable[] = { { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, }; -ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI) - : ARMGenInstrInfo(STI, ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), +ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI, + const ARMBaseRegisterInfo &TRI) + : ARMGenInstrInfo(STI, TRI, ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), Subtarget(STI) { for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) { if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) @@ -928,15 +929,15 @@ ARMBaseInstrInfo::describeLoadedValue(const MachineInstr &MI, return TargetInstrInfo::describeLoadedValue(MI, Reg); } -const MachineInstrBuilder & -ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, - unsigned SubIdx, unsigned State, - const TargetRegisterInfo *TRI) const { +const MachineInstrBuilder &ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, + unsigned Reg, + unsigned SubIdx, + unsigned State) const { if (!SubIdx) return MIB.addReg(Reg, State); if (Register::isPhysicalRegister(Reg)) - return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); + return MIB.addReg(getRegisterInfo().getSubReg(Reg, SubIdx), State); return MIB.addReg(Reg, State, SubIdx); } @@ -944,18 +945,18 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); Align Alignment = MFI.getObjectAlign(FI); + const ARMBaseRegisterInfo &TRI = getRegisterInfo(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), Alignment); - switch (TRI->getSpillSize(*RC)) { + switch (TRI.getSpillSize(*RC)) { case 2: if (ARM::HPRRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH)) @@ -1010,8 +1011,8 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { if (Subtarget.hasV5TEOps()) { MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD)); - AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); - AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill)); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0); MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) .add(predOps(ARMCC::AL)); } else { @@ -1021,8 +1022,8 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FI) .addMemOperand(MMO) .add(predOps(ARMCC::AL)); - AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); - AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill)); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0); } } else llvm_unreachable("Unknown reg class!"); @@ -1072,9 +1073,9 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FI) .add(predOps(ARMCC::AL)) .addMemOperand(MMO); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); - AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill)); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0); + AddDReg(MIB, SrcReg, ARM::dsub_2, 0); } } else llvm_unreachable("Unknown reg class!"); @@ -1104,10 +1105,10 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FI) .add(predOps(ARMCC::AL)) .addMemOperand(MMO); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); - AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill)); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0); + AddDReg(MIB, SrcReg, ARM::dsub_3, 0); } } else llvm_unreachable("Unknown reg class!"); @@ -1124,14 +1125,14 @@ void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FI) .add(predOps(ARMCC::AL)) .addMemOperand(MMO); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); - AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill)); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0); + AddDReg(MIB, SrcReg, ARM::dsub_7, 0); } else llvm_unreachable("Unknown reg class!"); break; @@ -1207,10 +1208,12 @@ Register ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, return false; } -void ARMBaseInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void ARMBaseInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); @@ -1220,7 +1223,8 @@ void ARMBaseInstrInfo::loadRegFromStackSlot( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Alignment); - switch (TRI->getSpillSize(*RC)) { + const ARMBaseRegisterInfo &TRI = getRegisterInfo(); + switch (TRI.getSpillSize(*RC)) { case 2: if (ARM::HPRRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg) @@ -1271,8 +1275,8 @@ void ARMBaseInstrInfo::loadRegFromStackSlot( if (Subtarget.hasV5TEOps()) { MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); - AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); - AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead); + AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead); MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) .add(predOps(ARMCC::AL)); } else { @@ -1282,8 +1286,8 @@ void ARMBaseInstrInfo::loadRegFromStackSlot( .addFrameIndex(FI) .addMemOperand(MMO) .add(predOps(ARMCC::AL)); - MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead); } if (DestReg.isPhysical()) @@ -1329,9 +1333,9 @@ void ARMBaseInstrInfo::loadRegFromStackSlot( .addFrameIndex(FI) .addMemOperand(MMO) .add(predOps(ARMCC::AL)); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead); if (DestReg.isPhysical()) MIB.addReg(DestReg, RegState::ImplicitDefine); } @@ -1358,10 +1362,10 @@ void ARMBaseInstrInfo::loadRegFromStackSlot( .addFrameIndex(FI) .add(predOps(ARMCC::AL)) .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead); if (DestReg.isPhysical()) MIB.addReg(DestReg, RegState::ImplicitDefine); } @@ -1379,14 +1383,14 @@ void ARMBaseInstrInfo::loadRegFromStackSlot( .addFrameIndex(FI) .add(predOps(ARMCC::AL)) .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead); + MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead); if (DestReg.isPhysical()) MIB.addReg(DestReg, RegState::ImplicitDefine); } else @@ -1652,8 +1656,7 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, unsigned SubIdx, - const MachineInstr &Orig, - const TargetRegisterInfo &TRI) const { + const MachineInstr &Orig) const { unsigned Opcode = Orig.getOpcode(); switch (Opcode) { default: { @@ -6548,7 +6551,7 @@ class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { static int constexpr LAST_IS_USE = MAX_STAGES; static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1; typedef std::bitset<MAX_STAGES + 2> IterNeed; - typedef std::map<unsigned, IterNeed> IterNeeds; + typedef std::map<Register, IterNeed> IterNeeds; void bumpCrossIterationPressure(RegPressureTracker &RPT, const IterNeeds &CIN); @@ -6622,14 +6625,14 @@ void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT, for (const auto &N : CIN) { int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2; for (int I = 0; I < Cnt; ++I) - RPT.increaseRegPressure(Register(N.first), LaneBitmask::getNone(), + RPT.increaseRegPressure(VirtRegOrUnit(N.first), LaneBitmask::getNone(), LaneBitmask::getAll()); } // Decrease pressure by the amounts in CrossIterationNeeds for (const auto &N : CIN) { int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2; for (int I = 0; I < Cnt; ++I) - RPT.decreaseRegPressure(Register(N.first), LaneBitmask::getAll(), + RPT.decreaseRegPressure(VirtRegOrUnit(N.first), LaneBitmask::getAll(), LaneBitmask::getNone()); } } diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 2869e7f..04e2ab0 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -44,7 +44,8 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { protected: // Can be only subclassed. - explicit ARMBaseInstrInfo(const ARMSubtarget &STI); + explicit ARMBaseInstrInfo(const ARMSubtarget &STI, + const ARMBaseRegisterInfo &TRI); void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, unsigned LoadImmOpc, unsigned LoadOpc) const; @@ -125,7 +126,11 @@ public: // if there is not such an opcode. virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0; - virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0; + const ARMBaseRegisterInfo &getRegisterInfo() const { + return static_cast<const ARMBaseRegisterInfo &>( + TargetInstrInfo::getRegisterInfo()); + } + const ARMSubtarget &getSubtarget() const { return Subtarget; } ScheduleHazardRecognizer * @@ -211,14 +216,13 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; bool expandPostRAPseudo(MachineInstr &MI) const override; @@ -227,16 +231,14 @@ public: void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, - const MachineInstr &Orig, - const TargetRegisterInfo &TRI) const override; + const MachineInstr &Orig) const override; MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const override; const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg, - unsigned SubIdx, unsigned State, - const TargetRegisterInfo *TRI) const; + unsigned SubIdx, unsigned State) const; bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI) const override; diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index ce1cdb3..80921ce 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -708,7 +708,7 @@ ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const MCInstrDesc &MCID = TII.get(ADDriOpc); Register BaseReg = MRI.createVirtualRegister(&ARM::GPRRegClass); - MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this)); + MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0)); MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg) .addFrameIndex(FrameIdx).addImm(Offset); @@ -881,8 +881,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Register PredReg = (PIdx == -1) ? Register() : MI.getOperand(PIdx+1).getReg(); const MCInstrDesc &MCID = MI.getDesc(); - const TargetRegisterClass *RegClass = - TII.getRegClass(MCID, FIOperandNum, this); + const TargetRegisterClass *RegClass = TII.getRegClass(MCID, FIOperandNum); if (Offset == 0 && (FrameReg.isVirtual() || RegClass->contains(FrameReg))) // Must be addrmode4/6. diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index f43ec73..80494d9 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -51,7 +51,6 @@ #include <cassert> #include <cstdint> #include <iterator> -#include <utility> #include <vector> using namespace llvm; diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index fffb6373..d69c09f 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -932,6 +932,7 @@ static bool IsAnAddressOperand(const MachineOperand &MO) { return true; case MachineOperand::MO_RegisterMask: case MachineOperand::MO_RegisterLiveOut: + case MachineOperand::MO_LaneMask: return false; case MachineOperand::MO_Metadata: case MachineOperand::MO_MCSymbol: diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 138981a..c19eed1 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -2342,7 +2342,6 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); unsigned Limit = (1 << 12) - 1; for (auto &MBB : MF) { for (auto &MI : MBB) { @@ -2364,7 +2363,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, break; const MCInstrDesc &MCID = MI.getDesc(); - const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI); + const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i); if (RegClass && !RegClass->contains(ARM::SP)) HasNonSPFrameIndex = true; @@ -2537,7 +2536,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); (void)TRI; // Silence unused warning in non-assert builds. - Register FramePtr = RegInfo->getFrameRegister(MF); + Register FramePtr = STI.getFramePointerReg(); ARMSubtarget::PushPopSplitVariation PushPopSplit = STI.getPushPopSplitVariation(MF); @@ -2784,7 +2783,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); - if (HasFP) { + // Save the FP if: + // 1. We currently need it (HasFP), OR + // 2. We might need it later due to stack realignment from aligned DPRCS2 + // saves (which will make hasFP() become true in emitPrologue). + if (HasFP || (isFPReserved(MF) && AFI->getNumAlignedDPRCS2Regs() > 0)) { SavedRegs.set(FramePtr); // If the frame pointer is required by the ABI, also spill LR so that we // emit a complete frame record. diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 847b7af..26b5e5a 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3965,31 +3965,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; // Other cases are autogenerated. break; - case ARMISD::WLSSETUP: { - SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32, - N->getOperand(0)); - ReplaceUses(N, New); - CurDAG->RemoveDeadNode(N); - return; - } - case ARMISD::WLS: { - SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, - N->getOperand(1), N->getOperand(2), - N->getOperand(0)); - ReplaceUses(N, New); - CurDAG->RemoveDeadNode(N); - return; - } - case ARMISD::LE: { - SDValue Ops[] = { N->getOperand(1), - N->getOperand(2), - N->getOperand(0) }; - unsigned Opc = ARM::t2LoopEnd; - SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); - ReplaceUses(N, New); - CurDAG->RemoveDeadNode(N); - return; - } case ARMISD::LDRD: { if (Subtarget->isThumb2()) break; // TableGen handles isel in this case. @@ -4043,17 +4018,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->RemoveDeadNode(N); return; } - case ARMISD::LOOP_DEC: { - SDValue Ops[] = { N->getOperand(1), - N->getOperand(2), - N->getOperand(0) }; - SDNode *Dec = - CurDAG->getMachineNode(ARM::t2LoopDec, dl, - CurDAG->getVTList(MVT::i32, MVT::Other), Ops); - ReplaceUses(N, Dec); - CurDAG->RemoveDeadNode(N); - return; - } case ARMISD::BRCOND: { // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 92fae71..2d26c67 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -508,7 +508,7 @@ const ARMBaseTargetMachine &ARMTargetLowering::getTM() const { ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, const ARMSubtarget &STI) - : TargetLowering(TM_), Subtarget(&STI), + : TargetLowering(TM_, STI), Subtarget(&STI), RegInfo(Subtarget->getRegisterInfo()), Itins(Subtarget->getInstrItineraryData()) { const auto &TM = static_cast<const ARMBaseTargetMachine &>(TM_); @@ -518,74 +518,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, const Triple &TT = TM.getTargetTriple(); - if (TT.isOSBinFormatMachO()) { - // Uses VFP for Thumb libfuncs if available. - if (Subtarget->isThumb() && Subtarget->hasVFP2Base() && - Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) { - // clang-format off - static const struct { - const RTLIB::Libcall Op; - const RTLIB::LibcallImpl Impl; - } LibraryCalls[] = { - // Single-precision floating-point arithmetic. - { RTLIB::ADD_F32, RTLIB::impl___addsf3vfp }, - { RTLIB::SUB_F32, RTLIB::impl___subsf3vfp }, - { RTLIB::MUL_F32, RTLIB::impl___mulsf3vfp }, - { RTLIB::DIV_F32, RTLIB::impl___divsf3vfp }, - - // Double-precision floating-point arithmetic. - { RTLIB::ADD_F64, RTLIB::impl___adddf3vfp }, - { RTLIB::SUB_F64, RTLIB::impl___subdf3vfp }, - { RTLIB::MUL_F64, RTLIB::impl___muldf3vfp }, - { RTLIB::DIV_F64, RTLIB::impl___divdf3vfp }, - - // Single-precision comparisons. - { RTLIB::OEQ_F32, RTLIB::impl___eqsf2vfp }, - { RTLIB::UNE_F32, RTLIB::impl___nesf2vfp }, - { RTLIB::OLT_F32, RTLIB::impl___ltsf2vfp }, - { RTLIB::OLE_F32, RTLIB::impl___lesf2vfp }, - { RTLIB::OGE_F32, RTLIB::impl___gesf2vfp }, - { RTLIB::OGT_F32, RTLIB::impl___gtsf2vfp }, - { RTLIB::UO_F32, RTLIB::impl___unordsf2vfp }, - - // Double-precision comparisons. - { RTLIB::OEQ_F64, RTLIB::impl___eqdf2vfp }, - { RTLIB::UNE_F64, RTLIB::impl___nedf2vfp }, - { RTLIB::OLT_F64, RTLIB::impl___ltdf2vfp }, - { RTLIB::OLE_F64, RTLIB::impl___ledf2vfp }, - { RTLIB::OGE_F64, RTLIB::impl___gedf2vfp }, - { RTLIB::OGT_F64, RTLIB::impl___gtdf2vfp }, - { RTLIB::UO_F64, RTLIB::impl___unorddf2vfp }, - - // Floating-point to integer conversions. - // i64 conversions are done via library routines even when generating VFP - // instructions, so use the same ones. - { RTLIB::FPTOSINT_F64_I32, RTLIB::impl___fixdfsivfp }, - { RTLIB::FPTOUINT_F64_I32, RTLIB::impl___fixunsdfsivfp }, - { RTLIB::FPTOSINT_F32_I32, RTLIB::impl___fixsfsivfp }, - { RTLIB::FPTOUINT_F32_I32, RTLIB::impl___fixunssfsivfp }, - - // Conversions between floating types. - { RTLIB::FPROUND_F64_F32, RTLIB::impl___truncdfsf2vfp }, - { RTLIB::FPEXT_F32_F64, RTLIB::impl___extendsfdf2vfp }, - - // Integer to floating-point conversions. - // i64 conversions are done via library routines even when generating VFP - // instructions, so use the same ones. - // FIXME: There appears to be some naming inconsistency in ARM libgcc: - // e.g., __floatunsidf vs. __floatunssidfvfp. - { RTLIB::SINTTOFP_I32_F64, RTLIB::impl___floatsidfvfp }, - { RTLIB::UINTTOFP_I32_F64, RTLIB::impl___floatunssidfvfp }, - { RTLIB::SINTTOFP_I32_F32, RTLIB::impl___floatsisfvfp }, - { RTLIB::UINTTOFP_I32_F32, RTLIB::impl___floatunssisfvfp }, - }; - // clang-format on - - for (const auto &LC : LibraryCalls) - setLibcallImpl(LC.Op, LC.Impl); - } - } - if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, &ARM::tGPRRegClass); else @@ -614,16 +546,24 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT}) setOperationAction(Op, MVT::f64, Legal); + + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); } } if (Subtarget->hasFullFP16()) { + for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, + ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT}) + setOperationAction(Op, MVT::f16, Legal); + addRegisterClass(MVT::f16, &ARM::HPRRegClass); setOperationAction(ISD::BITCAST, MVT::i16, Custom); setOperationAction(ISD::BITCAST, MVT::f16, Custom); setOperationAction(ISD::FMINNUM, MVT::f16, Legal); setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Legal); } if (Subtarget->hasBF16()) { @@ -933,13 +873,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom); setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); } + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); + if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) { setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom); @@ -947,11 +888,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); } + } else { + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); } if (!Subtarget->hasFP16()) { setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom); + } else { + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); } computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -1291,16 +1237,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) { setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); - setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, LibCall); - setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, LibCall); + setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Expand); } // fp16 is a special v7 extension that adds f16 <-> f32 conversions. if (!Subtarget->hasFP16()) { setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); - setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, LibCall); - setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, LibCall); + setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Expand); } // Strict floating-point comparisons need custom lowering. @@ -1316,34 +1262,26 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::FSINCOS, MVT::f32, Expand); // FP-ARMv8 implements a lot of rounding-like FP operations. - if (Subtarget->hasFPARMv8Base()) { - setOperationAction(ISD::FFLOOR, MVT::f32, Legal); - setOperationAction(ISD::FCEIL, MVT::f32, Legal); - setOperationAction(ISD::FROUND, MVT::f32, Legal); - setOperationAction(ISD::FTRUNC, MVT::f32, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); - setOperationAction(ISD::FRINT, MVT::f32, Legal); - setOperationAction(ISD::FROUNDEVEN, MVT::f32, Legal); - setOperationAction(ISD::FMINNUM, MVT::f32, Legal); - setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); + if (Subtarget->hasFPARMv8Base()) { + for (auto Op : + {ISD::FFLOOR, ISD::FCEIL, ISD::FROUND, + ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT, + ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM, + ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FROUND, + ISD::STRICT_FTRUNC, ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT, + ISD::STRICT_FROUNDEVEN, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM}) { + setOperationAction(Op, MVT::f32, Legal); + + if (Subtarget->hasFP64()) + setOperationAction(Op, MVT::f64, Legal); + } + if (Subtarget->hasNEON()) { setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal); setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); } - - if (Subtarget->hasFP64()) { - setOperationAction(ISD::FFLOOR, MVT::f64, Legal); - setOperationAction(ISD::FCEIL, MVT::f64, Legal); - setOperationAction(ISD::FROUND, MVT::f64, Legal); - setOperationAction(ISD::FTRUNC, MVT::f64, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); - setOperationAction(ISD::FRINT, MVT::f64, Legal); - setOperationAction(ISD::FROUNDEVEN, MVT::f64, Legal); - setOperationAction(ISD::FMINNUM, MVT::f64, Legal); - setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); - } } // FP16 often need to be promoted to call lib functions @@ -1498,6 +1436,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, Align(1ULL << Subtarget->getPreferBranchLogAlignment())); setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4)); + + IsStrictFPEnabled = true; } bool ARMTargetLowering::useSoftFloat() const { @@ -1556,220 +1496,6 @@ ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, return std::make_pair(RRC, Cost); } -const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { -#define MAKE_CASE(V) \ - case V: \ - return #V; - switch ((ARMISD::NodeType)Opcode) { - case ARMISD::FIRST_NUMBER: - break; - MAKE_CASE(ARMISD::Wrapper) - MAKE_CASE(ARMISD::WrapperPIC) - MAKE_CASE(ARMISD::WrapperJT) - MAKE_CASE(ARMISD::COPY_STRUCT_BYVAL) - MAKE_CASE(ARMISD::CALL) - MAKE_CASE(ARMISD::CALL_PRED) - MAKE_CASE(ARMISD::CALL_NOLINK) - MAKE_CASE(ARMISD::tSECALL) - MAKE_CASE(ARMISD::t2CALL_BTI) - MAKE_CASE(ARMISD::BRCOND) - MAKE_CASE(ARMISD::BR_JT) - MAKE_CASE(ARMISD::BR2_JT) - MAKE_CASE(ARMISD::RET_GLUE) - MAKE_CASE(ARMISD::SERET_GLUE) - MAKE_CASE(ARMISD::INTRET_GLUE) - MAKE_CASE(ARMISD::PIC_ADD) - MAKE_CASE(ARMISD::CMP) - MAKE_CASE(ARMISD::CMN) - MAKE_CASE(ARMISD::CMPZ) - MAKE_CASE(ARMISD::CMPFP) - MAKE_CASE(ARMISD::CMPFPE) - MAKE_CASE(ARMISD::CMPFPw0) - MAKE_CASE(ARMISD::CMPFPEw0) - MAKE_CASE(ARMISD::BCC_i64) - MAKE_CASE(ARMISD::FMSTAT) - MAKE_CASE(ARMISD::CMOV) - MAKE_CASE(ARMISD::SSAT) - MAKE_CASE(ARMISD::USAT) - MAKE_CASE(ARMISD::ASRL) - MAKE_CASE(ARMISD::LSRL) - MAKE_CASE(ARMISD::LSLL) - MAKE_CASE(ARMISD::LSLS) - MAKE_CASE(ARMISD::LSRS1) - MAKE_CASE(ARMISD::ASRS1) - MAKE_CASE(ARMISD::RRX) - MAKE_CASE(ARMISD::ADDC) - MAKE_CASE(ARMISD::ADDE) - MAKE_CASE(ARMISD::SUBC) - MAKE_CASE(ARMISD::SUBE) - MAKE_CASE(ARMISD::VMOVRRD) - MAKE_CASE(ARMISD::VMOVDRR) - MAKE_CASE(ARMISD::VMOVhr) - MAKE_CASE(ARMISD::VMOVrh) - MAKE_CASE(ARMISD::VMOVSR) - MAKE_CASE(ARMISD::EH_SJLJ_SETJMP) - MAKE_CASE(ARMISD::EH_SJLJ_LONGJMP) - MAKE_CASE(ARMISD::EH_SJLJ_SETUP_DISPATCH) - MAKE_CASE(ARMISD::TC_RETURN) - MAKE_CASE(ARMISD::THREAD_POINTER) - MAKE_CASE(ARMISD::DYN_ALLOC) - MAKE_CASE(ARMISD::MEMBARRIER_MCR) - MAKE_CASE(ARMISD::PRELOAD) - MAKE_CASE(ARMISD::LDRD) - MAKE_CASE(ARMISD::STRD) - MAKE_CASE(ARMISD::WIN__CHKSTK) - MAKE_CASE(ARMISD::WIN__DBZCHK) - MAKE_CASE(ARMISD::PREDICATE_CAST) - MAKE_CASE(ARMISD::VECTOR_REG_CAST) - MAKE_CASE(ARMISD::MVESEXT) - MAKE_CASE(ARMISD::MVEZEXT) - MAKE_CASE(ARMISD::MVETRUNC) - MAKE_CASE(ARMISD::VCMP) - MAKE_CASE(ARMISD::VCMPZ) - MAKE_CASE(ARMISD::VTST) - MAKE_CASE(ARMISD::VSHLs) - MAKE_CASE(ARMISD::VSHLu) - MAKE_CASE(ARMISD::VSHLIMM) - MAKE_CASE(ARMISD::VSHRsIMM) - MAKE_CASE(ARMISD::VSHRuIMM) - MAKE_CASE(ARMISD::VRSHRsIMM) - MAKE_CASE(ARMISD::VRSHRuIMM) - MAKE_CASE(ARMISD::VRSHRNIMM) - MAKE_CASE(ARMISD::VQSHLsIMM) - MAKE_CASE(ARMISD::VQSHLuIMM) - MAKE_CASE(ARMISD::VQSHLsuIMM) - MAKE_CASE(ARMISD::VQSHRNsIMM) - MAKE_CASE(ARMISD::VQSHRNuIMM) - MAKE_CASE(ARMISD::VQSHRNsuIMM) - MAKE_CASE(ARMISD::VQRSHRNsIMM) - MAKE_CASE(ARMISD::VQRSHRNuIMM) - MAKE_CASE(ARMISD::VQRSHRNsuIMM) - MAKE_CASE(ARMISD::VSLIIMM) - MAKE_CASE(ARMISD::VSRIIMM) - MAKE_CASE(ARMISD::VGETLANEu) - MAKE_CASE(ARMISD::VGETLANEs) - MAKE_CASE(ARMISD::VMOVIMM) - MAKE_CASE(ARMISD::VMVNIMM) - MAKE_CASE(ARMISD::VMOVFPIMM) - MAKE_CASE(ARMISD::VDUP) - MAKE_CASE(ARMISD::VDUPLANE) - MAKE_CASE(ARMISD::VEXT) - MAKE_CASE(ARMISD::VREV64) - MAKE_CASE(ARMISD::VREV32) - MAKE_CASE(ARMISD::VREV16) - MAKE_CASE(ARMISD::VZIP) - MAKE_CASE(ARMISD::VUZP) - MAKE_CASE(ARMISD::VTRN) - MAKE_CASE(ARMISD::VTBL1) - MAKE_CASE(ARMISD::VTBL2) - MAKE_CASE(ARMISD::VMOVN) - MAKE_CASE(ARMISD::VQMOVNs) - MAKE_CASE(ARMISD::VQMOVNu) - MAKE_CASE(ARMISD::VCVTN) - MAKE_CASE(ARMISD::VCVTL) - MAKE_CASE(ARMISD::VIDUP) - MAKE_CASE(ARMISD::VMULLs) - MAKE_CASE(ARMISD::VMULLu) - MAKE_CASE(ARMISD::VQDMULH) - MAKE_CASE(ARMISD::VADDVs) - MAKE_CASE(ARMISD::VADDVu) - MAKE_CASE(ARMISD::VADDVps) - MAKE_CASE(ARMISD::VADDVpu) - MAKE_CASE(ARMISD::VADDLVs) - MAKE_CASE(ARMISD::VADDLVu) - MAKE_CASE(ARMISD::VADDLVAs) - MAKE_CASE(ARMISD::VADDLVAu) - MAKE_CASE(ARMISD::VADDLVps) - MAKE_CASE(ARMISD::VADDLVpu) - MAKE_CASE(ARMISD::VADDLVAps) - MAKE_CASE(ARMISD::VADDLVApu) - MAKE_CASE(ARMISD::VMLAVs) - MAKE_CASE(ARMISD::VMLAVu) - MAKE_CASE(ARMISD::VMLAVps) - MAKE_CASE(ARMISD::VMLAVpu) - MAKE_CASE(ARMISD::VMLALVs) - MAKE_CASE(ARMISD::VMLALVu) - MAKE_CASE(ARMISD::VMLALVps) - MAKE_CASE(ARMISD::VMLALVpu) - MAKE_CASE(ARMISD::VMLALVAs) - MAKE_CASE(ARMISD::VMLALVAu) - MAKE_CASE(ARMISD::VMLALVAps) - MAKE_CASE(ARMISD::VMLALVApu) - MAKE_CASE(ARMISD::VMINVu) - MAKE_CASE(ARMISD::VMINVs) - MAKE_CASE(ARMISD::VMAXVu) - MAKE_CASE(ARMISD::VMAXVs) - MAKE_CASE(ARMISD::UMAAL) - MAKE_CASE(ARMISD::UMLAL) - MAKE_CASE(ARMISD::SMLAL) - MAKE_CASE(ARMISD::SMLALBB) - MAKE_CASE(ARMISD::SMLALBT) - MAKE_CASE(ARMISD::SMLALTB) - MAKE_CASE(ARMISD::SMLALTT) - MAKE_CASE(ARMISD::SMULWB) - MAKE_CASE(ARMISD::SMULWT) - MAKE_CASE(ARMISD::SMLALD) - MAKE_CASE(ARMISD::SMLALDX) - MAKE_CASE(ARMISD::SMLSLD) - MAKE_CASE(ARMISD::SMLSLDX) - MAKE_CASE(ARMISD::SMMLAR) - MAKE_CASE(ARMISD::SMMLSR) - MAKE_CASE(ARMISD::QADD16b) - MAKE_CASE(ARMISD::QSUB16b) - MAKE_CASE(ARMISD::QADD8b) - MAKE_CASE(ARMISD::QSUB8b) - MAKE_CASE(ARMISD::UQADD16b) - MAKE_CASE(ARMISD::UQSUB16b) - MAKE_CASE(ARMISD::UQADD8b) - MAKE_CASE(ARMISD::UQSUB8b) - MAKE_CASE(ARMISD::BUILD_VECTOR) - MAKE_CASE(ARMISD::BFI) - MAKE_CASE(ARMISD::VORRIMM) - MAKE_CASE(ARMISD::VBICIMM) - MAKE_CASE(ARMISD::VBSP) - MAKE_CASE(ARMISD::MEMCPY) - MAKE_CASE(ARMISD::VLD1DUP) - MAKE_CASE(ARMISD::VLD2DUP) - MAKE_CASE(ARMISD::VLD3DUP) - MAKE_CASE(ARMISD::VLD4DUP) - MAKE_CASE(ARMISD::VLD1_UPD) - MAKE_CASE(ARMISD::VLD2_UPD) - MAKE_CASE(ARMISD::VLD3_UPD) - MAKE_CASE(ARMISD::VLD4_UPD) - MAKE_CASE(ARMISD::VLD1x2_UPD) - MAKE_CASE(ARMISD::VLD1x3_UPD) - MAKE_CASE(ARMISD::VLD1x4_UPD) - MAKE_CASE(ARMISD::VLD2LN_UPD) - MAKE_CASE(ARMISD::VLD3LN_UPD) - MAKE_CASE(ARMISD::VLD4LN_UPD) - MAKE_CASE(ARMISD::VLD1DUP_UPD) - MAKE_CASE(ARMISD::VLD2DUP_UPD) - MAKE_CASE(ARMISD::VLD3DUP_UPD) - MAKE_CASE(ARMISD::VLD4DUP_UPD) - MAKE_CASE(ARMISD::VST1_UPD) - MAKE_CASE(ARMISD::VST2_UPD) - MAKE_CASE(ARMISD::VST3_UPD) - MAKE_CASE(ARMISD::VST4_UPD) - MAKE_CASE(ARMISD::VST1x2_UPD) - MAKE_CASE(ARMISD::VST1x3_UPD) - MAKE_CASE(ARMISD::VST1x4_UPD) - MAKE_CASE(ARMISD::VST2LN_UPD) - MAKE_CASE(ARMISD::VST3LN_UPD) - MAKE_CASE(ARMISD::VST4LN_UPD) - MAKE_CASE(ARMISD::WLS) - MAKE_CASE(ARMISD::WLSSETUP) - MAKE_CASE(ARMISD::LE) - MAKE_CASE(ARMISD::LOOP_DEC) - MAKE_CASE(ARMISD::CSINV) - MAKE_CASE(ARMISD::CSNEG) - MAKE_CASE(ARMISD::CSINC) - MAKE_CASE(ARMISD::MEMCPYLOOP) - MAKE_CASE(ARMISD::MEMSETLOOP) -#undef MAKE_CASE - } - return nullptr; -} - EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) @@ -2510,9 +2236,44 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) { Chain = DAG.getStackArgumentTokenFactor(Chain); - if (ByValTempChain) + if (ByValTempChain) { + // In case of large byval copies, re-using the stackframe for tail-calls + // can lead to overwriting incoming arguments on the stack. Force + // loading these stack arguments before the copy to avoid that. + SmallVector<SDValue, 8> IncomingLoad; + for (unsigned I = 0; I < OutVals.size(); ++I) { + if (Outs[I].Flags.isByVal()) + continue; + + SDValue OutVal = OutVals[I]; + LoadSDNode *OutLN = dyn_cast_or_null<LoadSDNode>(OutVal); + if (!OutLN) + continue; + + FrameIndexSDNode *FIN = + dyn_cast_or_null<FrameIndexSDNode>(OutLN->getBasePtr()); + if (!FIN) + continue; + + if (!MFI.isFixedObjectIndex(FIN->getIndex())) + continue; + + for (const CCValAssign &VA : ArgLocs) { + if (VA.isMemLoc()) + IncomingLoad.push_back(OutVal.getValue(1)); + } + } + + // Update the chain to force loads for potentially clobbered argument + // loads to happen before the byval copy. + if (!IncomingLoad.empty()) { + IncomingLoad.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, IncomingLoad); + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain, ByValTempChain); + } AfterFormalArgLoads = true; } @@ -3309,8 +3070,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return LowerInterruptReturn(RetOps, dl, DAG); } - ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE : - ARMISD::RET_GLUE; + unsigned RetNode = + AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE : ARMISD::RET_GLUE; return DAG.getNode(RetNode, dl, MVT::Other, RetOps); } @@ -4826,7 +4587,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, } } - ARMISD::NodeType CompareType; + unsigned CompareType; switch (CondCode) { default: CompareType = ARMISD::CMP; @@ -20904,7 +20665,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment /// specified in the intrinsic calls. bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, + const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index bc2fec3..d0fb58c 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -51,319 +51,6 @@ class TargetMachine; class TargetRegisterInfo; class VectorType; - namespace ARMISD { - - // ARM Specific DAG Nodes - enum NodeType : unsigned { - // Start the numbering where the builtin ops and target ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - Wrapper, // Wrapper - A wrapper node for TargetConstantPool, - // TargetExternalSymbol, and TargetGlobalAddress. - WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in - // PIC mode. - WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable - - // Add pseudo op to model memcpy for struct byval. - COPY_STRUCT_BYVAL, - - CALL, // Function call. - CALL_PRED, // Function call that's predicable. - CALL_NOLINK, // Function call with branch not branch-and-link. - tSECALL, // CMSE non-secure function call. - t2CALL_BTI, // Thumb function call followed by BTI instruction. - BRCOND, // Conditional branch. - BR_JT, // Jumptable branch. - BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump). - RET_GLUE, // Return with a flag operand. - SERET_GLUE, // CMSE Entry function return with a flag operand. - INTRET_GLUE, // Interrupt return with an LR-offset and a flag operand. - - PIC_ADD, // Add with a PC operand and a PIC label. - - ASRL, // MVE long arithmetic shift right. - LSRL, // MVE long shift right. - LSLL, // MVE long shift left. - - CMP, // ARM compare instructions. - CMN, // ARM CMN instructions. - CMPZ, // ARM compare that sets only Z flag. - CMPFP, // ARM VFP compare instruction, sets FPSCR. - CMPFPE, // ARM VFP signalling compare instruction, sets FPSCR. - CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. - CMPFPEw0, // ARM VFP signalling compare against zero instruction, sets - // FPSCR. - FMSTAT, // ARM fmstat instruction. - - CMOV, // ARM conditional move instructions. - - SSAT, // Signed saturation - USAT, // Unsigned saturation - - BCC_i64, - - LSLS, // Flag-setting shift left. - LSRS1, // Flag-setting logical shift right by one bit. - ASRS1, // Flag-setting arithmetic shift right by one bit. - RRX, // Shift right one bit with carry in. - - ADDC, // Add with carry - ADDE, // Add using carry - SUBC, // Sub with carry - SUBE, // Sub using carry - - VMOVRRD, // double to two gprs. - VMOVDRR, // Two gprs to double. - VMOVSR, // move gpr to single, used for f32 literal constructed in a gpr - - EH_SJLJ_SETJMP, // SjLj exception handling setjmp. - EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. - EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch. - - TC_RETURN, // Tail call return pseudo. - - THREAD_POINTER, - - DYN_ALLOC, // Dynamic allocation on the stack. - - MEMBARRIER_MCR, // Memory barrier (MCR) - - PRELOAD, // Preload - - WIN__CHKSTK, // Windows' __chkstk call to do stack probing. - WIN__DBZCHK, // Windows' divide by zero check - - WLS, // Low-overhead loops, While Loop Start branch. See t2WhileLoopStart - WLSSETUP, // Setup for the iteration count of a WLS. See t2WhileLoopSetup. - LOOP_DEC, // Really a part of LE, performs the sub - LE, // Low-overhead loops, Loop End - - PREDICATE_CAST, // Predicate cast for MVE i1 types - VECTOR_REG_CAST, // Reinterpret the current contents of a vector register - - MVESEXT, // Legalization aids for extending a vector into two/four vectors. - MVEZEXT, // or truncating two/four vectors into one. Eventually becomes - MVETRUNC, // stack store/load sequence, if not optimized to anything else. - - VCMP, // Vector compare. - VCMPZ, // Vector compare to zero. - VTST, // Vector test bits. - - // Vector shift by vector - VSHLs, // ...left/right by signed - VSHLu, // ...left/right by unsigned - - // Vector shift by immediate: - VSHLIMM, // ...left - VSHRsIMM, // ...right (signed) - VSHRuIMM, // ...right (unsigned) - - // Vector rounding shift by immediate: - VRSHRsIMM, // ...right (signed) - VRSHRuIMM, // ...right (unsigned) - VRSHRNIMM, // ...right narrow - - // Vector saturating shift by immediate: - VQSHLsIMM, // ...left (signed) - VQSHLuIMM, // ...left (unsigned) - VQSHLsuIMM, // ...left (signed to unsigned) - VQSHRNsIMM, // ...right narrow (signed) - VQSHRNuIMM, // ...right narrow (unsigned) - VQSHRNsuIMM, // ...right narrow (signed to unsigned) - - // Vector saturating rounding shift by immediate: - VQRSHRNsIMM, // ...right narrow (signed) - VQRSHRNuIMM, // ...right narrow (unsigned) - VQRSHRNsuIMM, // ...right narrow (signed to unsigned) - - // Vector shift and insert: - VSLIIMM, // ...left - VSRIIMM, // ...right - - // Vector get lane (VMOV scalar to ARM core register) - // (These are used for 8- and 16-bit element types only.) - VGETLANEu, // zero-extend vector extract element - VGETLANEs, // sign-extend vector extract element - - // Vector move immediate and move negated immediate: - VMOVIMM, - VMVNIMM, - - // Vector move f32 immediate: - VMOVFPIMM, - - // Move H <-> R, clearing top 16 bits - VMOVrh, - VMOVhr, - - // Vector duplicate: - VDUP, - VDUPLANE, - - // Vector shuffles: - VEXT, // extract - VREV64, // reverse elements within 64-bit doublewords - VREV32, // reverse elements within 32-bit words - VREV16, // reverse elements within 16-bit halfwords - VZIP, // zip (interleave) - VUZP, // unzip (deinterleave) - VTRN, // transpose - VTBL1, // 1-register shuffle with mask - VTBL2, // 2-register shuffle with mask - VMOVN, // MVE vmovn - - // MVE Saturating truncates - VQMOVNs, // Vector (V) Saturating (Q) Move and Narrow (N), signed (s) - VQMOVNu, // Vector (V) Saturating (Q) Move and Narrow (N), unsigned (u) - - // MVE float <> half converts - VCVTN, // MVE vcvt f32 -> f16, truncating into either the bottom or top - // lanes - VCVTL, // MVE vcvt f16 -> f32, extending from either the bottom or top lanes - - // MVE VIDUP instruction, taking a start value and increment. - VIDUP, - - // Vector multiply long: - VMULLs, // ...signed - VMULLu, // ...unsigned - - VQDMULH, // MVE vqdmulh instruction - - // MVE reductions - VADDVs, // sign- or zero-extend the elements of a vector to i32, - VADDVu, // add them all together, and return an i32 of their sum - VADDVps, // Same as VADDV[su] but with a v4i1 predicate mask - VADDVpu, - VADDLVs, // sign- or zero-extend elements to i64 and sum, returning - VADDLVu, // the low and high 32-bit halves of the sum - VADDLVAs, // Same as VADDLV[su] but also add an input accumulator - VADDLVAu, // provided as low and high halves - VADDLVps, // Same as VADDLV[su] but with a v4i1 predicate mask - VADDLVpu, - VADDLVAps, // Same as VADDLVp[su] but with a v4i1 predicate mask - VADDLVApu, - VMLAVs, // sign- or zero-extend the elements of two vectors to i32, multiply - VMLAVu, // them and add the results together, returning an i32 of the sum - VMLAVps, // Same as VMLAV[su] with a v4i1 predicate mask - VMLAVpu, - VMLALVs, // Same as VMLAV but with i64, returning the low and - VMLALVu, // high 32-bit halves of the sum - VMLALVps, // Same as VMLALV[su] with a v4i1 predicate mask - VMLALVpu, - VMLALVAs, // Same as VMLALV but also add an input accumulator - VMLALVAu, // provided as low and high halves - VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask - VMLALVApu, - VMINVu, // Find minimum unsigned value of a vector and register - VMINVs, // Find minimum signed value of a vector and register - VMAXVu, // Find maximum unsigned value of a vector and register - VMAXVs, // Find maximum signed value of a vector and register - - SMULWB, // Signed multiply word by half word, bottom - SMULWT, // Signed multiply word by half word, top - UMLAL, // 64bit Unsigned Accumulate Multiply - SMLAL, // 64bit Signed Accumulate Multiply - UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply - SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16 - SMLALBT, // 64-bit signed accumulate multiply bottom, top 16 - SMLALTB, // 64-bit signed accumulate multiply top, bottom 16 - SMLALTT, // 64-bit signed accumulate multiply top, top 16 - SMLALD, // Signed multiply accumulate long dual - SMLALDX, // Signed multiply accumulate long dual exchange - SMLSLD, // Signed multiply subtract long dual - SMLSLDX, // Signed multiply subtract long dual exchange - SMMLAR, // Signed multiply long, round and add - SMMLSR, // Signed multiply long, subtract and round - - // Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b - // stands for. - QADD8b, - QSUB8b, - QADD16b, - QSUB16b, - UQADD8b, - UQSUB8b, - UQADD16b, - UQSUB16b, - - // Operands of the standard BUILD_VECTOR node are not legalized, which - // is fine if BUILD_VECTORs are always lowered to shuffles or other - // operations, but for ARM some BUILD_VECTORs are legal as-is and their - // operands need to be legalized. Define an ARM-specific version of - // BUILD_VECTOR for this purpose. - BUILD_VECTOR, - - // Bit-field insert - BFI, - - // Vector OR with immediate - VORRIMM, - // Vector AND with NOT of immediate - VBICIMM, - - // Pseudo vector bitwise select - VBSP, - - // Pseudo-instruction representing a memory copy using ldm/stm - // instructions. - MEMCPY, - - // Pseudo-instruction representing a memory copy using a tail predicated - // loop - MEMCPYLOOP, - // Pseudo-instruction representing a memset using a tail predicated - // loop - MEMSETLOOP, - - // V8.1MMainline condition select - CSINV, // Conditional select invert. - CSNEG, // Conditional select negate. - CSINC, // Conditional select increment. - - // Vector load N-element structure to all lanes: - FIRST_MEMORY_OPCODE, - VLD1DUP = FIRST_MEMORY_OPCODE, - VLD2DUP, - VLD3DUP, - VLD4DUP, - - // NEON loads with post-increment base updates: - VLD1_UPD, - VLD2_UPD, - VLD3_UPD, - VLD4_UPD, - VLD2LN_UPD, - VLD3LN_UPD, - VLD4LN_UPD, - VLD1DUP_UPD, - VLD2DUP_UPD, - VLD3DUP_UPD, - VLD4DUP_UPD, - VLD1x2_UPD, - VLD1x3_UPD, - VLD1x4_UPD, - - // NEON stores with post-increment base updates: - VST1_UPD, - VST2_UPD, - VST3_UPD, - VST4_UPD, - VST2LN_UPD, - VST3LN_UPD, - VST4LN_UPD, - VST1x2_UPD, - VST1x3_UPD, - VST1x4_UPD, - - // Load/Store of dual registers - LDRD, - STRD, - LAST_MEMORY_OPCODE = STRD, - }; - - } // end namespace ARMISD - namespace ARM { /// Possible values of current rounding mode, which is specified in bits /// 23:22 of FPSCR. @@ -427,8 +114,6 @@ class VectorType; void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, SelectionDAG &DAG) const override; - const char *getTargetNodeName(unsigned Opcode) const override; - bool isSelectSupported(SelectSupportKind Kind) const override { // ARM does not support scalar condition selects on vectors. return (Kind != ScalarCondVectorVal); @@ -630,8 +315,7 @@ class VectorType; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize = false) const override; - bool getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override; diff --git a/llvm/lib/Target/ARM/ARMInstrCDE.td b/llvm/lib/Target/ARM/ARMInstrCDE.td index f4326de..5d4e3ac 100644 --- a/llvm/lib/Target/ARM/ARMInstrCDE.td +++ b/llvm/lib/Target/ARM/ARMInstrCDE.td @@ -115,6 +115,7 @@ class CDE_CX1_Instr<string iname, CX_Params params> !con(params.Iops1, (ins imm_13b:$imm), params.PredOp), !strconcat(iname, params.PAsm, "\t$coproc, $Rd, $imm"), params.Cstr> { + bits<0> p; bits<13> imm; bits<4> Rd; @@ -131,6 +132,7 @@ class CDE_CX2_Instr<string iname, CX_Params params> !con(params.Iops2, (ins imm_9b:$imm), params.PredOp), !strconcat(iname, params.PAsm, "\t$coproc, $Rd, $Rn, $imm"), params.Cstr> { + bits<0> p; bits<9> imm; bits<4> Rd; bits<4> Rn; @@ -149,6 +151,7 @@ class CDE_CX3_Instr<string iname, CX_Params params> !con(params.Iops3, (ins imm_6b:$imm), params.PredOp), !strconcat(iname, params.PAsm, "\t$coproc, $Rd, $Rn, $Rm, $imm"), params.Cstr> { + bits<0> p; bits<6> imm; bits<4> Rd; bits<4> Rn; diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td index 1ad2485..1cd1a9a 100644 --- a/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -1220,6 +1220,7 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, int sz, string opc, string asm, string cstr, list<dag> pattern> : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { bits<0> s; + bits<0> p; let OutOperandList = !con(oops, (outs s_cc_out:$s)); let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, "${s}${p}", asm); @@ -1244,6 +1245,7 @@ class Thumb1pI<dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { + bits<0> p; let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, "${p}", asm); @@ -1343,6 +1345,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { + bits<0> p; let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, "${p}", asm); @@ -1361,6 +1364,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { + bits<0> p; bits<1> s; // condition-code set flag ('1' if the insn should set the flags) let Inst{20} = s; @@ -2221,6 +2225,7 @@ class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> : InstARM<am, 4, im, f, NeonDomain, cstr, itin> { + bits<0> p; let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm); @@ -2234,6 +2239,7 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, 4, im, f, NeonDomain, cstr, itin> { + bits<0> p; let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, "${p}", "\t", asm); diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/llvm/lib/Target/ARM/ARMInstrInfo.cpp index c684de7..f370547 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -25,7 +25,8 @@ #include "llvm/MC/MCInst.h" using namespace llvm; -ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI) {} +ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) + : ARMBaseInstrInfo(STI, RI) {} /// Return the noop instruction to use for a noop. MCInst ARMInstrInfo::getNop() const { diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.h b/llvm/lib/Target/ARM/ARMInstrInfo.h index 178d7a2..9feaf14 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMInstrInfo.h @@ -35,7 +35,7 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const ARMRegisterInfo &getRegisterInfo() const override { return RI; } + const ARMRegisterInfo &getRegisterInfo() const { return RI; } private: void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index f7176a6..ddc8941 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -40,7 +40,7 @@ def SDT_ARMCMov : SDTypeProfile<1, 4, [ SDTCisVT<4, FlagsVT>, // in flags ]>; -def SDT_ARMBrcond : SDTypeProfile<0, 2, [ +def SDT_ARMBrcond : SDTypeProfile<0, 3, [ SDTCisVT<0, OtherVT>, // target basic block SDTCisVT<1, CondCodeVT>, // condition code SDTCisVT<2, FlagsVT>, // in flags @@ -133,9 +133,16 @@ def SDT_ARMIntShiftParts : SDTypeProfile<2, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<4>]>; +// Signed multiply accumulate long dual def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>; + +// Signed multiply accumulate long dual exchange def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>; + +// Signed multiply subtract long dual def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>; + +// Signed multiply subtract long dual exchange def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>; def SDT_ARMCSel : SDTypeProfile<1, 4, [ @@ -146,8 +153,13 @@ def SDT_ARMCSel : SDTypeProfile<1, 4, [ SDTCisVT<3, FlagsVT> // in flags ]>; +// Conditional select invert. def ARMcsinv : SDNode<"ARMISD::CSINV", SDT_ARMCSel>; + +// Conditional select negate. def ARMcsneg : SDNode<"ARMISD::CSNEG", SDT_ARMCSel>; + +// Conditional select increment. def ARMcsinc : SDNode<"ARMISD::CSINC", SDT_ARMCSel>; def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>, @@ -155,110 +167,197 @@ def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; +// Signed multiply long, round and add def ARMsmmlar : SDNode<"ARMISD::SMMLAR", SDT_MulHSR>; + +// Signed multiply long, subtract and round def ARMsmmlsr : SDNode<"ARMISD::SMMLSR", SDT_MulHSR>; -// Node definitions. + +// Wrapper - A wrapper node for TargetConstantPool, +// TargetExternalSymbol, and TargetGlobalAddress. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; + +// WrapperPIC - A wrapper node for TargetGlobalAddress in +// PIC mode. def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; + +// WrapperJT - A wrapper node for TargetJumpTable def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntUnaryOp>; def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +// Add pseudo op to model memcpy for struct byval. def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" , SDT_ARMStructByVal, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad]>; +// Function call. def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +// Function call that's predicable. def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +// Function call with branch not branch-and-link. def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +// Return with a flag operand. def ARMretglue : SDNode<"ARMISD::RET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +// CMSE Entry function return with a flag operand. def ARMseretglue : SDNode<"ARMISD::SERET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +// Interrupt return with an LR-offset and a flag operand. def ARMintretglue : SDNode<"ARMISD::INTRET_GLUE", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +// ARM conditional move instructions. def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov>; +// Signed saturation def ARMssat : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>; +// Unsigned saturation def ARMusat : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>; +// Conditional branch. def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, [SDNPHasChain]>; +// Jumptable branch. def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT, [SDNPHasChain]>; + +// Jumptable branch (2 level - jumptable entry is a jump). def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT, [SDNPHasChain]>; def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64, [SDNPHasChain]>; +// ARM compare instructions. def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp>; +// ARM CMN instructions. def ARMcmn : SDNode<"ARMISD::CMN", SDT_ARMCmp>; +// ARM compare that sets only Z flag. def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp, [SDNPCommutative]>; +// Add with a PC operand and a PIC label. def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>; +// MVE long arithmetic shift right. def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>; + +// MVE long shift right. def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>; + +// MVE long shift left. def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>; +// Flag-setting logical shift right by one bit. def ARMlsrs1 : SDNode<"ARMISD::LSRS1", SDTIntUnaryOpWithFlagsOut>; + +// Flag-setting arithmetic shift right by one bit. def ARMasrs1 : SDNode<"ARMISD::ASRS1", SDTIntUnaryOpWithFlagsOut>; + +// Shift right one bit with carry in. def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOpWithFlagsIn>; +// Add with carry def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags, [SDNPCommutative]>; + +// Sub with carry def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>; + +// Flag-setting shift left. def ARMlsls : SDNode<"ARMISD::LSLS", SDTBinaryArithWithFlags>; + +// Add using carry def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>; + +// Sub using carry def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>; def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; + +// SjLj exception handling setjmp. def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain, SDNPSideEffect]>; + +// SjLj exception handling longjmp. def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain, SDNPSideEffect]>; + +// SjLj exception handling setup_dispatch. def ARMeh_sjlj_setup_dispatch: SDNode<"ARMISD::EH_SJLJ_SETUP_DISPATCH", SDT_ARMEH_SJLJ_SetupDispatch, [SDNPHasChain, SDNPSideEffect]>; +// Memory barrier (MCR) def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, [SDNPHasChain, SDNPSideEffect]>; + +// Preload def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH, [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; +// Tail call return pseudo. def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +// Bit-field insert def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; +// Pseudo-instruction representing a memory copy using ldm/stm instructions. def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad]>; +// Signed multiply word by half word, bottom def ARMsmulwb : SDNode<"ARMISD::SMULWB", SDTIntBinOp, []>; + +// Signed multiply word by half word, top def ARMsmulwt : SDNode<"ARMISD::SMULWT", SDTIntBinOp, []>; + +// 64bit Unsigned Accumulate Multiply +def ARMumlal : SDNode<"ARMISD::UMLAL", SDT_LongMac>; + +// 64bit Signed Accumulate Multiply +def ARMsmlal : SDNode<"ARMISD::SMLAL", SDT_LongMac>; + +// 64-bit Unsigned Accumulate Accumulate Multiply +def ARMumaal : SDNode<"ARMISD::UMAAL", SDT_LongMac>; + +// 64-bit signed accumulate multiply bottom, bottom 16 def ARMsmlalbb : SDNode<"ARMISD::SMLALBB", SDT_LongMac, []>; + +// 64-bit signed accumulate multiply bottom, top 16 def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>; + +// 64-bit signed accumulate multiply top, bottom 16 def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>; + +// 64-bit signed accumulate multiply top, top 16 def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>; +// Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b +// stands for. def ARMqadd8b : SDNode<"ARMISD::QADD8b", SDT_ARMAnd, []>; def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>; def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>; @@ -270,13 +369,15 @@ def ARMuqadd16b : SDNode<"ARMISD::UQADD16b", SDT_ARMAnd, []>; def ARMuqsub16b : SDNode<"ARMISD::UQSUB16b", SDT_ARMAnd, []>; def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; -def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; - def SDT_ARMstrd : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; + +// Load/Store of dual registers +def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def ARMstrd : SDNode<"ARMISD::STRD", SDT_ARMstrd, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; // Vector operations shared between NEON and MVE +// Vector duplicate def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; // VDUPLANE can produce a quad-register result from a double-register source, @@ -287,40 +388,65 @@ def ARMvduplane : SDNode<"ARMISD::VDUPLANE", def SDTARMVIDUP : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; + +// MVE VIDUP instruction, taking a start value and increment. def ARMvidup : SDNode<"ARMISD::VIDUP", SDTARMVIDUP>; def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; + +// reverse elements within 64-bit doublewords def ARMvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; + +// reverse elements within 32-bit words def ARMvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; + +// reverse elements within 16-bit halfwords def ARMvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>; + +// Vector get lane (VMOV scalar to ARM core register) +// (These are used for 8- and 16-bit element types only.) def ARMvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; def ARMvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; + +// Vector move immediate and move negated immediate def ARMvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; def ARMvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; + +// Vector move f32 immediate def ARMvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; + +// Vector OR with immediate def ARMvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; + +// Vector AND with NOT of immediate def ARMvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,]>; + +// Vector shift by immediate def ARMvshlImm : SDNode<"ARMISD::VSHLIMM", SDTARMVSHIMM>; def ARMvshrsImm : SDNode<"ARMISD::VSHRsIMM", SDTARMVSHIMM>; def ARMvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>; + +// Vector shift by vector def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>; def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>; def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>]>; + +// Vector multiply long def ARMvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; def ARMvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; @@ -328,9 +454,13 @@ def SDTARMVCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisInt<3>]>; def SDTARMVCMPZ : SDTypeProfile<1, 2, [SDTCisInt<2>]>; +// Vector compare. def ARMvcmp : SDNode<"ARMISD::VCMP", SDTARMVCMP>; + +// Vector compare to zero. def ARMvcmpz : SDNode<"ARMISD::VCMPZ", SDTARMVCMPZ>; +// Reinterpret the current contents of a vector register // 'VECTOR_REG_CAST' is an operation that reinterprets the contents of a // vector register as a different vector type, without changing the contents of // the register. It differs from 'bitconvert' in that bitconvert reinterprets @@ -5894,13 +6024,17 @@ def MSRbanked : ABI<0b0001, (outs), (ins banked_reg:$banked, GPRnopc:$Rn), // The main point of having separate instruction are extra unmodelled effects // (compared to ordinary calls) like stack pointer change. +// Windows' __chkstk call to do stack probing. def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone, [SDNPHasChain, SDNPSideEffect]>; + let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP], hasNoSchedulingInfo = 1 in def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>; +// Windows' divide by zero check def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK, [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; + let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in def WIN__DBZCHK : PseudoInst<(outs), (ins tGPR:$divisor), NoItinerary, [(win__dbzchk tGPR:$divisor)]>; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index e244134..0973187 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -384,6 +384,22 @@ multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrin (VTI.Vec MQPR:$inactive)))>; } +def vadd : PatFrags<(ops node:$lhs, node:$rhs), + [(fadd node:$lhs, node:$rhs), + (int_arm_mve_vadd node:$lhs, node:$rhs)]>; +def vsub : PatFrags<(ops node:$lhs, node:$rhs), + [(fsub node:$lhs, node:$rhs), + (int_arm_mve_vsub node:$lhs, node:$rhs)]>; +def vmul : PatFrags<(ops node:$lhs, node:$rhs), + [(fmul node:$lhs, node:$rhs), + (int_arm_mve_vmul node:$lhs, node:$rhs)]>; +def vminnm : PatFrags<(ops node:$lhs, node:$rhs), + [(fminnum node:$lhs, node:$rhs), + (int_arm_mve_vminnm node:$lhs, node:$rhs)]>; +def vmaxnm : PatFrags<(ops node:$lhs, node:$rhs), + [(fmaxnum node:$lhs, node:$rhs), + (int_arm_mve_vmaxnm node:$lhs, node:$rhs)]>; + // --------- Start of base classes for the instructions themselves class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm, @@ -683,8 +699,13 @@ class MVE_VADDV<string iname, string suffix, dag iops, string cstr, def SDTVecReduceP : SDTypeProfile<1, 2, [ // VADDLVp SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2> ]>; + +// sign- or zero-extend the elements of a vector to i32, +// add them all together, and return an i32 of their sum def ARMVADDVs : SDNode<"ARMISD::VADDVs", SDTVecReduce>; def ARMVADDVu : SDNode<"ARMISD::VADDVu", SDTVecReduce>; + +// Same as VADDV[su] but with a v4i1 predicate mask def ARMVADDVps : SDNode<"ARMISD::VADDVps", SDTVecReduceP>; def ARMVADDVpu : SDNode<"ARMISD::VADDVpu", SDTVecReduceP>; @@ -806,9 +827,19 @@ multiclass MVE_VADDLV_A<MVEVectorVTInfo VTI> { defvar InstN = !cast<Instruction>(NAME # "no_acc"); defvar letter = VTI.SuffixLetter; + + // sign- or zero-extend elements to i64 and sum, returning + // the low and high 32-bit halves of the sum defvar ARMVADDLV = SDNode<"ARMISD::VADDLV" # letter, SDTVecReduceL>; + + // Same as VADDLV[su] but also add an input accumulator + // provided as low and high halves defvar ARMVADDLVA = SDNode<"ARMISD::VADDLVA" # letter, SDTVecReduceLA>; + + // Same as VADDLV[su] but with a v4i1 predicate mask defvar ARMVADDLVp = SDNode<"ARMISD::VADDLVp" # letter, SDTVecReduceLP>; + + // Same as VADDLVp[su] but with a v4i1 predicate mask defvar ARMVADDLVAp = SDNode<"ARMISD::VADDLVAp" # letter, SDTVecReduceLPA>; let Predicates = [HasMVEInt] in { @@ -943,9 +974,17 @@ multiclass MVE_VMINMAXV_ty<string iname, bit isMin, string intrBaseName> { def SDTVecReduceR : SDTypeProfile<1, 2, [ // Reduction of an integer and vector into an integer SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2> ]>; + +// Find minimum unsigned value of a vector and register def ARMVMINVu : SDNode<"ARMISD::VMINVu", SDTVecReduceR>; + +// Find minimum signed value of a vector and register def ARMVMINVs : SDNode<"ARMISD::VMINVs", SDTVecReduceR>; + +// Find maximum unsigned value of a vector and register def ARMVMAXVu : SDNode<"ARMISD::VMAXVu", SDTVecReduceR>; + +// Find maximum signed value of a vector and register def ARMVMAXVs : SDNode<"ARMISD::VMAXVs", SDTVecReduceR>; defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">; @@ -1146,16 +1185,31 @@ def SDTVecReduce2LAP : SDTypeProfile<2, 5, [ // VMLALVA SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>, SDTCisVec<4>, SDTCisVec<5>, SDTCisVec<6> ]>; + +// sign- or zero-extend the elements of two vectors to i32, multiply +// them and add the results together, returning an i32 of the sum def ARMVMLAVs : SDNode<"ARMISD::VMLAVs", SDTVecReduce2>; def ARMVMLAVu : SDNode<"ARMISD::VMLAVu", SDTVecReduce2>; + +// Same as VMLAV but with i64, returning the low and +// high 32-bit halves of the sum def ARMVMLALVs : SDNode<"ARMISD::VMLALVs", SDTVecReduce2L>; def ARMVMLALVu : SDNode<"ARMISD::VMLALVu", SDTVecReduce2L>; + +// Same as VMLALV but also add an input accumulator +// provided as low and high halves def ARMVMLALVAs : SDNode<"ARMISD::VMLALVAs", SDTVecReduce2LA>; def ARMVMLALVAu : SDNode<"ARMISD::VMLALVAu", SDTVecReduce2LA>; + +// Same as VMLAV[su] with a v4i1 predicate mask def ARMVMLAVps : SDNode<"ARMISD::VMLAVps", SDTVecReduce2P>; def ARMVMLAVpu : SDNode<"ARMISD::VMLAVpu", SDTVecReduce2P>; + +// Same as VMLALV[su] with a v4i1 predicate mask def ARMVMLALVps : SDNode<"ARMISD::VMLALVps", SDTVecReduce2LP>; def ARMVMLALVpu : SDNode<"ARMISD::VMLALVpu", SDTVecReduce2LP>; + +// Same as VMLALVA[su] with a v4i1 predicate mask def ARMVMLALVAps : SDNode<"ARMISD::VMLALVAps", SDTVecReduce2LAP>; def ARMVMLALVApu : SDNode<"ARMISD::VMLALVApu", SDTVecReduce2LAP>; @@ -1441,7 +1495,7 @@ class MVE_VMINMAXNM<string iname, string suffix, bits<2> sz, bit bit_21, let validForTailPredication = 1; } -multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDNode Op, Intrinsic PredInt> { +multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDPatternOperator Op, Intrinsic PredInt> { def "" : MVE_VMINMAXNM<iname, VTI.Suffix, VTI.Size, bit_4>; let Predicates = [HasMVEFloat] in { @@ -1449,10 +1503,10 @@ multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDNode } } -defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, fmaxnum, int_arm_mve_max_predicated>; -defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, fmaxnum, int_arm_mve_max_predicated>; -defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, fminnum, int_arm_mve_min_predicated>; -defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, fminnum, int_arm_mve_min_predicated>; +defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, vmaxnm, int_arm_mve_max_predicated>; +defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, vmaxnm, int_arm_mve_max_predicated>; +defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, vminnm, int_arm_mve_min_predicated>; +defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, vminnm, int_arm_mve_min_predicated>; class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size, @@ -1997,6 +2051,7 @@ class MVE_VQxDMULH_Base<string iname, string suffix, bits<2> size, bit rounding, let validForTailPredication = 1; } +// MVE vqdmulh instruction def MVEvqdmulh : SDNode<"ARMISD::VQDMULH", SDTIntBinOp>; multiclass MVE_VQxDMULH_m<string iname, MVEVectorVTInfo VTI, @@ -3566,7 +3621,7 @@ class MVE_VMUL_fp<string iname, string suffix, bits<2> size, list<dag> pattern=[ let validForTailPredication = 1; } -multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDNode Op, +multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDPatternOperator Op, Intrinsic PredInt, SDPatternOperator IdentityVec> { def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size>; defvar Inst = !cast<Instruction>(NAME); @@ -3577,7 +3632,7 @@ multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDNode Op, } multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> - : MVE_VMULT_fp_m<"vmul", VTI, fmul, int_arm_mve_mul_predicated, IdentityVec>; + : MVE_VMULT_fp_m<"vmul", VTI, vmul, int_arm_mve_mul_predicated, IdentityVec>; def ARMimmOneF: PatLeaf<(bitconvert (v4f32 (ARMvmovFPImm (i32 112))))>; // 1.0 float def ARMimmOneH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2620))))>; // 1.0 half @@ -3674,6 +3729,10 @@ multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> { if fms then { def : Pat<(VTI.Vec (fma (fneg m1), m2, add)), (Inst $add, $m1, $m2)>; + def : Pat<(VTI.Vec (int_arm_mve_fma (fneg m1), m2, add)), + (Inst $add, $m1, $m2)>; + def : Pat<(VTI.Vec (int_arm_mve_fma m1, (fneg m2), add)), + (Inst $add, $m1, $m2)>; def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), (VTI.Vec (fma (fneg m1), m2, add)), add)), @@ -3685,6 +3744,8 @@ multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> { } else { def : Pat<(VTI.Vec (fma m1, m2, add)), (Inst $add, $m1, $m2)>; + def : Pat<(VTI.Vec (int_arm_mve_fma m1, m2, add)), + (Inst $add, $m1, $m2)>; def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), (VTI.Vec (fma m1, m2, add)), add)), @@ -3701,7 +3762,7 @@ defm MVE_VFMSf32 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v4f32>; defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>; multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI, - SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> { + SDPatternOperator Op, Intrinsic PredInt, SDPatternOperator IdentityVec> { def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size, 0, 1, bit_21> { let validForTailPredication = 1; } @@ -3713,9 +3774,9 @@ multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI, } multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> - : MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated, IdentityVec>; + : MVE_VADDSUB_fp_m<"vadd", 0, VTI, vadd, int_arm_mve_add_predicated, IdentityVec>; multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> - : MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated, IdentityVec>; + : MVE_VADDSUB_fp_m<"vsub", 1, VTI, vsub, int_arm_mve_sub_predicated, IdentityVec>; def ARMimmMinusZeroF: PatLeaf<(bitconvert (v4i32 (ARMvmovImm (i32 1664))))>; // -0.0 float def ARMimmMinusZeroH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2688))))>; // -0.0 half @@ -4093,7 +4154,7 @@ class MVE_VMAXMINNMA<string iname, string suffix, bits<2> size, bit bit_12, } multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI, - SDNode unpred_op, Intrinsic pred_int, + SDPatternOperator unpred_op, Intrinsic pred_int, bit bit_12> { def "" : MVE_VMAXMINNMA<iname, VTI.Suffix, VTI.Size, bit_12>; defvar Inst = !cast<Instruction>(NAME); @@ -4113,13 +4174,13 @@ multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI, } multiclass MVE_VMAXNMA<MVEVectorVTInfo VTI, bit bit_12> - : MVE_VMAXMINNMA_m<"vmaxnma", VTI, fmaxnum, int_arm_mve_vmaxnma_predicated, bit_12>; + : MVE_VMAXMINNMA_m<"vmaxnma", VTI, vmaxnm, int_arm_mve_vmaxnma_predicated, bit_12>; defm MVE_VMAXNMAf32 : MVE_VMAXNMA<MVE_v4f32, 0b0>; defm MVE_VMAXNMAf16 : MVE_VMAXNMA<MVE_v8f16, 0b0>; multiclass MVE_VMINNMA<MVEVectorVTInfo VTI, bit bit_12> - : MVE_VMAXMINNMA_m<"vminnma", VTI, fminnum, int_arm_mve_vminnma_predicated, bit_12>; + : MVE_VMAXMINNMA_m<"vminnma", VTI, vminnm, int_arm_mve_vminnma_predicated, bit_12>; defm MVE_VMINNMAf32 : MVE_VMINNMA<MVE_v4f32, 0b1>; defm MVE_VMINNMAf16 : MVE_VMINNMA<MVE_v8f16, 0b1>; @@ -4414,6 +4475,7 @@ let Predicates = [HasMVEInt] in { defm PEOR : two_predops<xor, t2EORrr>; } +// Predicate cast for MVE i1 types // Occasionally we need to cast between a i32 and a boolean vector, for // example when moving between rGPR and VPR.P0 as part of predicate vector // shuffles. We also sometimes need to cast between different predicate @@ -4810,6 +4872,7 @@ defm MVE_VQMOVNu32 : MVE_VxMOVxN_halves<"vqmovn", "u32", 0b1, 0b1, 0b01>; defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>; defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>; +// MVE vmovn def MVEvmovn : SDNode<"ARMISD::VMOVN", SDTARMVEXT>; multiclass MVE_VMOVN_p<Instruction Inst, bit top, @@ -4880,7 +4943,11 @@ defm : MVE_VQMOVN_p<MVE_VQMOVUNs16th, 1, 0, 1, MVE_v16i8, MVE_v8i16>; def SDTARMVMOVNQ : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVec<2>, SDTCisVT<3, i32>]>; + +// Vector (V) Saturating (Q) Move and Narrow (N), signed (s) def MVEvqmovns : SDNode<"ARMISD::VQMOVNs", SDTARMVMOVNQ>; + +// Vector (V) Saturating (Q) Move and Narrow (N), unsigned (u) def MVEvqmovnu : SDNode<"ARMISD::VQMOVNu", SDTARMVMOVNQ>; let Predicates = [HasMVEInt] in { @@ -4938,7 +5005,11 @@ class MVE_VCVT_ff<string iname, string suffix, bit op, bit T, def SDTARMVCVTL : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i32>]>; + +// MVE vcvt f32 -> f16, truncating into either the bottom or top lanes def MVEvcvtn : SDNode<"ARMISD::VCVTN", SDTARMVMOVNQ>; + +// MVE vcvt f16 -> f32, extending from either the bottom or top lanes def MVEvcvtl : SDNode<"ARMISD::VCVTL", SDTARMVCVTL>; multiclass MVE_VCVT_f2h_m<string iname, int half> { @@ -5342,21 +5413,22 @@ defm MVE_VHSUB_qr_u16 : MVE_VHSUB_qr_m<MVE_v8u16, subnuw, ARMvshruImm>; defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32, subnuw, ARMvshruImm>; multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract, - SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> { + SDPatternOperator Op, Intrinsic PredInt, + SDPatternOperator IdentityVec> { def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract, VTI.Size>; defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), IdentityVec>; } let Predicates = [HasMVEFloat] in { - defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, fadd, + defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, vadd, int_arm_mve_add_predicated, ARMimmMinusZeroF>; - defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, fadd, + defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, vadd, int_arm_mve_add_predicated, ARMimmMinusZeroH>; - defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, fsub, + defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, vsub, int_arm_mve_sub_predicated, ARMimmAllZerosV>; - defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, fsub, + defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, vsub, int_arm_mve_sub_predicated, ARMimmAllZerosV>; } @@ -5539,7 +5611,7 @@ defm MVE_VQRDMULH_qr_s32 : MVE_VQRDMULH_qr_m<MVE_v4s32>; multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> { let validForTailPredication = 1 in def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11, VTI.Size>; - defm : MVE_TwoOpPatternDup<VTI, fmul, int_arm_mve_mul_predicated, (? ), + defm : MVE_TwoOpPatternDup<VTI, vmul, int_arm_mve_mul_predicated, (? ), !cast<Instruction>(NAME), IdentityVec>; } @@ -5612,6 +5684,8 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI, if scalar_addend then { def : Pat<(VTI.Vec (fma v1, v2, vs)), (VTI.Vec (Inst v1, v2, is))>; + def : Pat<(VTI.Vec (int_arm_mve_fma v1, v2, vs)), + (VTI.Vec (Inst v1, v2, is))>; def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), (VTI.Vec (fma v1, v2, vs)), v1)), @@ -5621,6 +5695,10 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI, (VTI.Vec (Inst v2, v1, is))>; def : Pat<(VTI.Vec (fma vs, v1, v2)), (VTI.Vec (Inst v2, v1, is))>; + def : Pat<(VTI.Vec (int_arm_mve_fma v1, vs, v2)), + (VTI.Vec (Inst v2, v1, is))>; + def : Pat<(VTI.Vec (int_arm_mve_fma vs, v1, v2)), + (VTI.Vec (Inst v2, v1, is))>; def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), (VTI.Vec (fma vs, v2, v1)), v1)), @@ -6865,6 +6943,9 @@ class MVE_WLSTP<string asm, bits<2> size> def SDT_MVEMEMCPYLOOPNODE : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; + +// Pseudo-instruction representing a memory copy using a tail predicated +// loop def MVE_MEMCPYLOOPNODE : SDNode<"ARMISD::MEMCPYLOOP", SDT_MVEMEMCPYLOOPNODE, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; @@ -6877,6 +6958,9 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CPSR] in { def SDT_MVEMEMSETLOOPNODE : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisVT<1, v16i8>, SDTCisVT<2, i32>]>; + +// Pseudo-instruction representing a memset using a tail predicated +// loop def MVE_MEMSETLOOPNODE : SDNode<"ARMISD::MEMSETLOOP", SDT_MVEMEMSETLOOPNODE, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 37f0103..90e74a5 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -475,6 +475,8 @@ def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), //===----------------------------------------------------------------------===// def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; + +// Vector test bits. def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>; // Types for vector shift by immediates. The "SHX" version is for long and @@ -487,10 +489,12 @@ def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>; +// Vector rounding shift by immediate def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>; def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>; def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>; +// Vector saturating shift by immediate def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>; def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>; def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>; @@ -498,13 +502,16 @@ def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>; def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>; def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>; +// Vector saturating rounding shift by immediate def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>; def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>; def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>; +// Vector shift and insert def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>; def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>; +// Pseudo vector bitwise select def NEONvbsp : SDNode<"ARMISD::VBSP", SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, @@ -518,15 +525,25 @@ def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; + +// zip (interleave) def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; + +// unzip (deinterleave) def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; + +// transpose def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, SDTCisVT<2, v8i8>]>; def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; + +// 1-register shuffle with mask def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; + +// 2-register shuffle with mask def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index 0c5ea3e..0ee98e6 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -14,6 +14,7 @@ // Thumb specific DAG Nodes. // +// CMSE non-secure function call. def ARMtsecall : SDNode<"ARMISD::tSECALL", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; @@ -483,6 +484,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def tBX : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bx${p}\t$Rm", []>, T1Special<{1,1,0,?}>, Sched<[WriteBr]> { // A6.2.3 & A8.6.25 + bits<0> p; bits<4> Rm; let Inst{6-3} = Rm; let Inst{2-0} = 0b000; @@ -491,6 +493,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def tBXNS : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bxns${p}\t$Rm", []>, Requires<[IsThumb, Has8MSecExt]>, T1Special<{1,1,0,?}>, Sched<[WriteBr]> { + bits<0> p; bits<4> Rm; let Inst{6-3} = Rm; let Inst{2-0} = 0b100; @@ -523,6 +526,7 @@ let isCall = 1, "bl${p}\t$func", [(ARMcall tglobaladdr:$func)]>, Requires<[IsThumb]>, Sched<[WriteBrL]> { + bits<0> p; bits<24> func; let Inst{26} = func{23}; let Inst{25-16} = func{20-11}; @@ -536,6 +540,7 @@ let isCall = 1, (outs), (ins pred:$p, thumb_blx_target:$func), IIC_Br, "blx${p}\t$func", []>, Requires<[IsThumb, HasV5T, IsNotMClass]>, Sched<[WriteBrL]> { + bits<0> p; bits<24> func; let Inst{26} = func{23}; let Inst{25-16} = func{20-11}; @@ -550,6 +555,7 @@ let isCall = 1, "blx${p}\t$func", []>, Requires<[IsThumb, HasV5T]>, T1Special<{1,1,1,?}>, Sched<[WriteBrL]> { // A6.2.3 & A8.6.24; + bits<0> p; bits<4> func; let Inst{6-3} = func; let Inst{2-0} = 0b000; @@ -565,6 +571,7 @@ let isCall = 1, "blxns${p}\t$func", []>, Requires<[IsThumb, Has8MSecExt]>, T1Special<{1,1,1,?}>, Sched<[WriteBrL]> { + bits<0> p; bits<4> func; let Inst{6-3} = func; let Inst{2-0} = 0b100; @@ -824,6 +831,7 @@ let hasSideEffects = 0 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1, variadicOpsAreDefs = 1 in def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops), IIC_iLoad_m, "ldm${p}\t$Rn, $regs", []>, T1Encoding<{1,1,0,0,1,?}> { + bits<0> p; bits<3> Rn; bits<8> regs; let Inst{10-8} = Rn; @@ -854,6 +862,7 @@ def tSTMIA_UPD : Thumb1I<(outs tGPR:$wb), AddrModeNone, 2, IIC_iStore_mu, "stm${p}\t$Rn!, $regs", "$Rn = $wb", []>, T1Encoding<{1,1,0,0,0,?}> { + bits<0> p; bits<3> Rn; bits<8> regs; let Inst{10-8} = Rn; @@ -872,6 +881,7 @@ def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops), IIC_iPop, "pop${p}\t$regs", []>, T1Misc<{1,1,0,?,?,?,?}>, Sched<[WriteLd]> { + bits<0> p; bits<16> regs; let Inst{8} = regs{15}; let Inst{7-0} = regs{7-0}; @@ -882,6 +892,7 @@ def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops), IIC_iStore_m, "push${p}\t$regs", []>, T1Misc<{0,1,0,?,?,?,?}>, Sched<[WriteST]> { + bits<0> p; bits<16> regs; let Inst{8} = regs{14}; let Inst{7-0} = regs{7-0}; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index c229c8e..596196c 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2059,6 +2059,7 @@ multiclass thumb2_ld_mult<string asm, InstrItinClass itin, def IA : T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> { + bits<0> p; bits<4> Rn; bits<16> regs; @@ -2074,6 +2075,7 @@ multiclass thumb2_ld_mult<string asm, InstrItinClass itin, def IA_UPD : T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> { + bits<0> p; bits<4> Rn; bits<16> regs; @@ -2089,6 +2091,7 @@ multiclass thumb2_ld_mult<string asm, InstrItinClass itin, def DB : T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> { + bits<0> p; bits<4> Rn; bits<16> regs; @@ -2104,6 +2107,7 @@ multiclass thumb2_ld_mult<string asm, InstrItinClass itin, def DB_UPD : T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + bits<0> p; bits<4> Rn; bits<16> regs; @@ -2128,6 +2132,7 @@ multiclass thumb2_st_mult<string asm, InstrItinClass itin, def IA : T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> { + bits<0> p; bits<4> Rn; bits<16> regs; @@ -2146,6 +2151,7 @@ multiclass thumb2_st_mult<string asm, InstrItinClass itin, def IA_UPD : T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> { + bits<0> p; bits<4> Rn; bits<16> regs; @@ -2164,6 +2170,7 @@ multiclass thumb2_st_mult<string asm, InstrItinClass itin, def DB : T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> { + bits<0> p; bits<4> Rn; bits<16> regs; @@ -2182,6 +2189,7 @@ multiclass thumb2_st_mult<string asm, InstrItinClass itin, def DB_UPD : T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + bits<0> p; bits<4> Rn; bits<16> regs; @@ -4030,9 +4038,11 @@ def t2TBH : T2I<(outs), (ins (addrmode_tbh $Rn, $Rm):$addr), IIC_Br, // FIXME: should be able to write a pattern for ARMBrcond, but can't use // a two-value operand where a dag node expects ", "two operands. :( let isBranch = 1, isTerminator = 1 in -def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, - "b", ".w\t$target", - [/*(ARMbrcond bb:$target, imm:$cc)*/]>, Sched<[WriteBr]> { +def t2Bcc : Thumb2XI<(outs), (ins brtarget:$target, pred:$p), + AddrModeNone, 4, IIC_Br, + "b${p}.w\t$target", "", + [/*(ARMbrcond bb:$target, imm:$cc)*/]>, + Sched<[WriteBr]> { let Inst{31-27} = 0b11110; let Inst{15-14} = 0b10; let Inst{12} = 0; @@ -5481,6 +5491,7 @@ class V8_1MI<dag oops, dag iops, AddrMode am, InstrItinClass itin, string asm, def t2CLRM : V8_1MI<(outs), (ins pred:$p, reglist_with_apsr:$regs, variable_ops), AddrModeNone, NoItinerary, "clrm${p}", "$regs", "", []> { + bits<0> p; bits<16> regs; let Inst{31-16} = 0b1110100010011111; @@ -5509,6 +5520,7 @@ def t2BF_LabelPseudo def t2BFi : t2BF<(ins bflabel_u4:$b_label, bflabel_s16:$label, pred:$p), !strconcat("bf", "${p}"), "$b_label, $label"> { + bits<0> p; bits<4> b_label; bits<16> label; @@ -5540,6 +5552,7 @@ def t2BFic : t2BF<(ins bflabel_u4:$b_label, bflabel_s12:$label, def t2BFr : t2BF<(ins bflabel_u4:$b_label, rGPR:$Rn, pred:$p), !strconcat("bfx", "${p}"), "$b_label, $Rn"> { + bits<0> p; bits<4> b_label; bits<4> Rn; @@ -5551,6 +5564,7 @@ def t2BFr : t2BF<(ins bflabel_u4:$b_label, rGPR:$Rn, pred:$p), def t2BFLi : t2BF<(ins bflabel_u4:$b_label, bflabel_s18:$label, pred:$p), !strconcat("bfl", "${p}"), "$b_label, $label"> { + bits<0> p; bits<4> b_label; bits<18> label; @@ -5563,6 +5577,7 @@ def t2BFLi : t2BF<(ins bflabel_u4:$b_label, bflabel_s18:$label, pred:$p), def t2BFLr : t2BF<(ins bflabel_u4:$b_label, rGPR:$Rn, pred:$p), !strconcat("bflx", "${p}"), "$b_label, $Rn"> { + bits<0> p; bits<4> b_label; bits<4> Rn; @@ -5581,6 +5596,25 @@ class t2LOL<dag oops, dag iops, string asm, string ops> let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB]; } +// Setup for the iteration count of a WLS. See t2WhileLoopSetup. +def arm_wlssetup + : SDNode<"ARMISD::WLSSETUP", + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<1, 0>]>, + [SDNPSideEffect]>; + +// Low-overhead loops, While Loop Start branch. See t2WhileLoopStart +def arm_wls : SDNode<"ARMISD::WLS", + SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>, + [SDNPHasChain]>; + +// Really a part of LE, performs the sub +def arm_loop_dec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>; + +// Low-overhead loops, Loop End +def arm_le : SDNode<"ARMISD::LE", + SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>, + [SDNPHasChain]>; + let isNotDuplicable = 1 in { def t2WLS : t2LOL<(outs GPRlr:$LR), (ins rGPR:$Rn, wlslabel_u11:$label), @@ -5650,16 +5684,19 @@ def t2DoLoopStartTP : // t2WhileLoopSetup to setup LR and t2WhileLoopStart to perform the branch. Not // valid after reg alloc, as it should be lowered during MVETPAndVPTOptimisations // into a t2WhileLoopStartLR (or expanded). +let hasSideEffects = 1 in def t2WhileLoopSetup : - t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br, []>; + t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br, + [(set i32:$lr, (arm_wlssetup i32:$tc))]>; // A pseudo to represent the decrement in a low overhead loop. A t2LoopDec and // t2LoopEnd together represent a LE instruction. Ideally these are converted // to a t2LoopEndDec which is lowered as a single instruction. let hasSideEffects = 0 in def t2LoopDec : - t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size), - 4, IIC_Br, []>, Sched<[WriteBr]>; + t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size), 4, IIC_Br, + [(set i32:$Rm, (arm_loop_dec i32:$Rn, timm:$size))]>, + Sched<[WriteBr]>; let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in { // The branch in a t2WhileLoopSetup/t2WhileLoopStart pair, eventually turned @@ -5667,8 +5704,8 @@ let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in { def t2WhileLoopStart : t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target), - 4, IIC_Br, []>, - Sched<[WriteBr]>; + 4, IIC_Br, [(arm_wls i32:$tc, bb:$target)]>, + Sched<[WriteBr]>; // WhileLoopStartLR that sets up LR and branches on zero, equivalent to WLS. It // is lowered in the ARMLowOverheadLoops pass providing the branches are within @@ -5690,8 +5727,9 @@ def t2WhileLoopStartTP : // t2LoopEnd - the branch half of a t2LoopDec/t2LoopEnd pair. def t2LoopEnd : - t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target), - 8, IIC_Br, []>, Sched<[WriteBr]>; + t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target), + 8, IIC_Br, [(arm_le i32:$tc, bb:$target)]>, + Sched<[WriteBr]>; // The combination of a t2LoopDec and t2LoopEnd, performing both the LR // decrement and branch as a single instruction. Is lowered to a LE or @@ -5803,6 +5841,7 @@ let Predicates = [IsThumb2, HasV8_1MMainline, HasPACBTI] in { def t2PACG : V8_1MI<(outs rGPR:$Rd), (ins pred:$p, GPRnopc:$Rn, GPRnopc:$Rm), AddrModeNone, NoItinerary, "pacg${p}", "$Rd, $Rn, $Rm", "", []> { + bits<0> p; bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -5818,6 +5857,7 @@ let hasSideEffects = 1 in { class PACBTIAut<dag iops, string asm, bit b> : V8_1MI<(outs), iops, AddrModeNone, NoItinerary, asm, "$Ra, $Rn, $Rm", "", []> { + bits<0> p; bits<4> Ra; bits<4> Rn; bits<4> Rm; @@ -5873,6 +5913,7 @@ def t2AUT : PACBTIHintSpaceUseInst<"aut", 0b00101101> { let hasSideEffects = 1; } +// Thumb function call followed by BTI instruction. def ARMt2CallBTI : SDNode<"ARMISD::t2CALL_BTI", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index e2cc97b..5f5f703 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -28,11 +28,20 @@ def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>; +// ARM VFP compare instruction, sets FPSCR. def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_CMPFP>; + +// ARM VFP compare against zero instruction, sets FPSCR. def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0>; + +// ARM VFP signalling compare instruction, sets FPSCR. def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_CMPFP>; + +// ARM VFP signalling compare against zero instruction, sets +// FPSCR. def arm_cmpfpe0 : SDNode<"ARMISD::CMPFPEw0", SDT_CMPFP0>; +// ARM fmstat instruction. def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTypeProfile<1, 1, [ SDTCisVT<0, FlagsVT>, // out flags @@ -40,12 +49,19 @@ def arm_fmstat : SDNode<"ARMISD::FMSTAT", ]> >; +// Two gprs to double. def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; + +// double to two gprs. def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>; + +// move gpr to single, used for f32 literal constructed in a gpr def arm_vmovsr : SDNode<"ARMISD::VMOVSR", SDT_VMOVSR>; def SDT_VMOVhr : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, i32>] >; def SDT_VMOVrh : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisFP<1>] >; + +// Move H <-> R, clearing top 16 bits def arm_vmovhr : SDNode<"ARMISD::VMOVhr", SDT_VMOVhr>; def arm_vmovrh : SDNode<"ARMISD::VMOVrh", SDT_VMOVrh>; @@ -798,7 +814,7 @@ def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), def : FP16Pat<(f32 (any_fpextend (f16 HPR:$Sm))), (VCVTBHS (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>; -def : FP16Pat<(f16_to_fp GPR:$a), +def : FP16Pat<(any_f16_to_fp GPR:$a), (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; let hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPSCR_RM] in @@ -810,7 +826,7 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sda, def : FP16Pat<(f16 (any_fpround SPR:$Sm)), (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$Sm), HPR)>; -def : FP16Pat<(fp_to_f16 SPR:$a), +def : FP16Pat<(any_fp_to_f16 SPR:$a), (i32 (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$a), GPR))>; def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (any_fpround (f32 SPR:$src2))), imm_even:$lane), (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), @@ -875,7 +891,7 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, def : FullFP16Pat<(f64 (any_fpextend (f16 HPR:$Sm))), (VCVTBHD (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>, Requires<[HasFPARMv8, HasDPVFP]>; -def : FP16Pat<(f64 (f16_to_fp GPR:$a)), +def : FP16Pat<(f64 (any_f16_to_fp GPR:$a)), (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>, Requires<[HasFPARMv8, HasDPVFP]>; @@ -901,7 +917,7 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, def : FullFP16Pat<(f16 (any_fpround DPR:$Dm)), (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$Dm), HPR)>, Requires<[HasFPARMv8, HasDPVFP]>; -def : FP16Pat<(fp_to_f16 (f64 DPR:$a)), +def : FP16Pat<(any_fp_to_f16 (f64 DPR:$a)), (i32 (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$a), GPR))>, Requires<[HasFPARMv8, HasDPVFP]>; diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index cd4299b..db37b76 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -2424,7 +2424,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps( Ops.pop_back(); const MCInstrDesc &MCID = TII->get(NewOpc); - const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); + const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0); MRI->constrainRegClass(FirstReg, TRC); MRI->constrainRegClass(SecondReg, TRC); @@ -3014,7 +3014,7 @@ static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, MachineFunction *MF = MI->getMF(); MachineRegisterInfo &MRI = MF->getRegInfo(); const MCInstrDesc &MCID = TII->get(MI->getOpcode()); - const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp, TRI); + const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp); MRI.constrainRegClass(NewBaseReg, TRC); int OldOffset = MI->getOperand(BaseOp + 1).getImm(); @@ -3071,10 +3071,10 @@ static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset, const MCInstrDesc &MCID = TII->get(NewOpcode); // Constrain the def register class - const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); + const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0); MRI.constrainRegClass(NewReg, TRC); // And do the same for the base operand - TRC = TII->getRegClass(MCID, 2, TRI); + TRC = TII->getRegClass(MCID, 2); MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC); unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask); diff --git a/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/llvm/lib/Target/ARM/ARMMCInstLower.cpp index f5d6597..c040904 100644 --- a/llvm/lib/Target/ARM/ARMMCInstLower.cpp +++ b/llvm/lib/Target/ARM/ARMMCInstLower.cpp @@ -112,8 +112,8 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, MCOp = GetSymbolRef(MO, GetJTISymbol(MO.getIndex())); break; case MachineOperand::MO_ConstantPoolIndex: - if (Subtarget->genExecuteOnly()) - llvm_unreachable("execute-only should not generate constant pools"); + assert(!MF->getSubtarget<ARMSubtarget>().genExecuteOnly() && + "execute-only should not generate constant pools"); MCOp = GetSymbolRef(MO, GetCPISymbol(MO.getIndex())); break; case MachineOperand::MO_BlockAddress: diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index 72eb3d0..b689760 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/Support/ErrorHandling.h" -#include <utility> namespace llvm { diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index bf7c962f..501dce9 100644 --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -10,9 +10,14 @@ // //===----------------------------------------------------------------------===// +#include "ARMSelectionDAGInfo.h" #include "ARMTargetTransformInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Support/CommandLine.h" + +#define GET_SDNODE_DESC +#include "ARMGenSDNodeInfo.inc" + using namespace llvm; #define DEBUG_TYPE "arm-selectiondag-info" @@ -30,9 +35,83 @@ static cl::opt<TPLoop::MemTransfer> EnableMemtransferTPLoop( "Allow (may be subject to certain conditions) " "conversion of memcpy to TP loop."))); +ARMSelectionDAGInfo::ARMSelectionDAGInfo() + : SelectionDAGGenTargetInfo(ARMGenSDNodeInfo) {} + +const char *ARMSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { +#define MAKE_CASE(V) \ + case V: \ + return #V; + + // These nodes don't have corresponding entries in *.td files yet. + switch (static_cast<ARMISD::NodeType>(Opcode)) { + MAKE_CASE(ARMISD::DYN_ALLOC) + MAKE_CASE(ARMISD::MVESEXT) + MAKE_CASE(ARMISD::MVEZEXT) + MAKE_CASE(ARMISD::MVETRUNC) + MAKE_CASE(ARMISD::BUILD_VECTOR) + MAKE_CASE(ARMISD::VLD1DUP) + MAKE_CASE(ARMISD::VLD2DUP) + MAKE_CASE(ARMISD::VLD3DUP) + MAKE_CASE(ARMISD::VLD4DUP) + MAKE_CASE(ARMISD::VLD1_UPD) + MAKE_CASE(ARMISD::VLD2_UPD) + MAKE_CASE(ARMISD::VLD3_UPD) + MAKE_CASE(ARMISD::VLD4_UPD) + MAKE_CASE(ARMISD::VLD1x2_UPD) + MAKE_CASE(ARMISD::VLD1x3_UPD) + MAKE_CASE(ARMISD::VLD1x4_UPD) + MAKE_CASE(ARMISD::VLD2LN_UPD) + MAKE_CASE(ARMISD::VLD3LN_UPD) + MAKE_CASE(ARMISD::VLD4LN_UPD) + MAKE_CASE(ARMISD::VLD1DUP_UPD) + MAKE_CASE(ARMISD::VLD2DUP_UPD) + MAKE_CASE(ARMISD::VLD3DUP_UPD) + MAKE_CASE(ARMISD::VLD4DUP_UPD) + MAKE_CASE(ARMISD::VST1_UPD) + MAKE_CASE(ARMISD::VST3_UPD) + MAKE_CASE(ARMISD::VST1x2_UPD) + MAKE_CASE(ARMISD::VST1x3_UPD) + MAKE_CASE(ARMISD::VST1x4_UPD) + MAKE_CASE(ARMISD::VST2LN_UPD) + MAKE_CASE(ARMISD::VST3LN_UPD) + MAKE_CASE(ARMISD::VST4LN_UPD) + } +#undef MAKE_CASE + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); +} + bool ARMSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= ARMISD::FIRST_MEMORY_OPCODE && - Opcode <= ARMISD::LAST_MEMORY_OPCODE; + // These nodes don't have corresponding entries in *.td files yet. + if (Opcode >= ARMISD::FIRST_MEMORY_OPCODE && + Opcode <= ARMISD::LAST_MEMORY_OPCODE) + return true; + + return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode); +} + +void ARMSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + switch (N->getOpcode()) { + default: + break; + case ARMISD::WIN__DBZCHK: + // invalid number of results; expected 2, got 1 + case ARMISD::WIN__CHKSTK: + // invalid number of results; expected 1, got 2 + case ARMISD::COPY_STRUCT_BYVAL: + // invalid number of operands; expected 6, got 5 + case ARMISD::MEMCPY: + // invalid number of operands; expected 5, got 4 + case ARMISD::VMOVRRD: + // operand #0 must have type f64, but has type v1i64/v4f16/v8i8 + case ARMISD::VMOVIMM: + // operand #0 must have type i32, but has type i16 + return; + } + + SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); } // Emit, if possible, a specialized version of the given Libcall. Typically this diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h index d68150e..38d2a65 100644 --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -17,7 +17,62 @@ #include "llvm/CodeGen/RuntimeLibcallUtil.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "ARMGenSDNodeInfo.inc" + namespace llvm { +namespace ARMISD { + +enum NodeType : unsigned { + DYN_ALLOC = GENERATED_OPCODE_END, // Dynamic allocation on the stack. + + MVESEXT, // Legalization aids for extending a vector into two/four vectors. + MVEZEXT, // or truncating two/four vectors into one. Eventually becomes + MVETRUNC, // stack store/load sequence, if not optimized to anything else. + + // Operands of the standard BUILD_VECTOR node are not legalized, which + // is fine if BUILD_VECTORs are always lowered to shuffles or other + // operations, but for ARM some BUILD_VECTORs are legal as-is and their + // operands need to be legalized. Define an ARM-specific version of + // BUILD_VECTOR for this purpose. + BUILD_VECTOR, + + // Vector load N-element structure to all lanes: + FIRST_MEMORY_OPCODE, + VLD1DUP = FIRST_MEMORY_OPCODE, + VLD2DUP, + VLD3DUP, + VLD4DUP, + + // NEON loads with post-increment base updates: + VLD1_UPD, + VLD2_UPD, + VLD3_UPD, + VLD4_UPD, + VLD2LN_UPD, + VLD3LN_UPD, + VLD4LN_UPD, + VLD1DUP_UPD, + VLD2DUP_UPD, + VLD3DUP_UPD, + VLD4DUP_UPD, + VLD1x2_UPD, + VLD1x3_UPD, + VLD1x4_UPD, + + // NEON stores with post-increment base updates: + VST1_UPD, + VST3_UPD, + VST2LN_UPD, + VST3LN_UPD, + VST4LN_UPD, + VST1x2_UPD, + VST1x3_UPD, + VST1x4_UPD, + LAST_MEMORY_OPCODE = VST1x4_UPD, +}; + +} // namespace ARMISD namespace ARM_AM { static inline ShiftOpc getShiftOpcForNode(unsigned Opcode) { @@ -35,10 +90,17 @@ namespace ARM_AM { } } // end namespace ARM_AM -class ARMSelectionDAGInfo : public SelectionDAGTargetInfo { +class ARMSelectionDAGInfo : public SelectionDAGGenTargetInfo { public: + ARMSelectionDAGInfo(); + + const char *getTargetNodeName(unsigned Opcode) const override; + bool isTargetMemoryOpcode(unsigned Opcode) const override; + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, @@ -66,6 +128,6 @@ public: RTLIB::Libcall LC) const; }; -} +} // namespace llvm #endif diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 58bc338..e6af32d 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -129,6 +129,76 @@ const RegisterBankInfo *ARMSubtarget::getRegBankInfo() const { return RegBankInfo.get(); } +void ARMSubtarget::initLibcallLoweringInfo(LibcallLoweringInfo &Info) const { + const Triple &TT = getTargetTriple(); + if (TT.isOSBinFormatMachO()) { + // Uses VFP for Thumb libfuncs if available. + if (isThumb() && hasVFP2Base() && hasARMOps() && !useSoftFloat()) { + // clang-format off + static const struct { + const RTLIB::Libcall Op; + const RTLIB::LibcallImpl Impl; + } LibraryCalls[] = { + // Single-precision floating-point arithmetic. + { RTLIB::ADD_F32, RTLIB::impl___addsf3vfp }, + { RTLIB::SUB_F32, RTLIB::impl___subsf3vfp }, + { RTLIB::MUL_F32, RTLIB::impl___mulsf3vfp }, + { RTLIB::DIV_F32, RTLIB::impl___divsf3vfp }, + + // Double-precision floating-point arithmetic. + { RTLIB::ADD_F64, RTLIB::impl___adddf3vfp }, + { RTLIB::SUB_F64, RTLIB::impl___subdf3vfp }, + { RTLIB::MUL_F64, RTLIB::impl___muldf3vfp }, + { RTLIB::DIV_F64, RTLIB::impl___divdf3vfp }, + + // Single-precision comparisons. + { RTLIB::OEQ_F32, RTLIB::impl___eqsf2vfp }, + { RTLIB::UNE_F32, RTLIB::impl___nesf2vfp }, + { RTLIB::OLT_F32, RTLIB::impl___ltsf2vfp }, + { RTLIB::OLE_F32, RTLIB::impl___lesf2vfp }, + { RTLIB::OGE_F32, RTLIB::impl___gesf2vfp }, + { RTLIB::OGT_F32, RTLIB::impl___gtsf2vfp }, + { RTLIB::UO_F32, RTLIB::impl___unordsf2vfp }, + + // Double-precision comparisons. + { RTLIB::OEQ_F64, RTLIB::impl___eqdf2vfp }, + { RTLIB::UNE_F64, RTLIB::impl___nedf2vfp }, + { RTLIB::OLT_F64, RTLIB::impl___ltdf2vfp }, + { RTLIB::OLE_F64, RTLIB::impl___ledf2vfp }, + { RTLIB::OGE_F64, RTLIB::impl___gedf2vfp }, + { RTLIB::OGT_F64, RTLIB::impl___gtdf2vfp }, + { RTLIB::UO_F64, RTLIB::impl___unorddf2vfp }, + + // Floating-point to integer conversions. + // i64 conversions are done via library routines even when generating VFP + // instructions, so use the same ones. + { RTLIB::FPTOSINT_F64_I32, RTLIB::impl___fixdfsivfp }, + { RTLIB::FPTOUINT_F64_I32, RTLIB::impl___fixunsdfsivfp }, + { RTLIB::FPTOSINT_F32_I32, RTLIB::impl___fixsfsivfp }, + { RTLIB::FPTOUINT_F32_I32, RTLIB::impl___fixunssfsivfp }, + + // Conversions between floating types. + { RTLIB::FPROUND_F64_F32, RTLIB::impl___truncdfsf2vfp }, + { RTLIB::FPEXT_F32_F64, RTLIB::impl___extendsfdf2vfp }, + + // Integer to floating-point conversions. + // i64 conversions are done via library routines even when generating VFP + // instructions, so use the same ones. + // FIXME: There appears to be some naming inconsistency in ARM libgcc: + // e.g., __floatunsidf vs. __floatunssidfvfp. + { RTLIB::SINTTOFP_I32_F64, RTLIB::impl___floatsidfvfp }, + { RTLIB::UINTTOFP_I32_F64, RTLIB::impl___floatunssidfvfp }, + { RTLIB::SINTTOFP_I32_F32, RTLIB::impl___floatsisfvfp }, + { RTLIB::UINTTOFP_I32_F32, RTLIB::impl___floatunssisfvfp }, + }; + // clang-format on + + for (const auto &LC : LibraryCalls) + Info.setLibcallImpl(LC.Op, LC.Impl); + } + } +} + bool ARMSubtarget::isXRaySupported() const { // We don't currently suppport Thumb, but Windows requires Thumb. return hasV6Ops() && hasARMOps() && !isTargetWindows(); @@ -309,26 +379,21 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { } bool ARMSubtarget::isROPI() const { + // FIXME: This should ideally come from a function attribute, to work + // correctly with LTO. return TM.getRelocationModel() == Reloc::ROPI || TM.getRelocationModel() == Reloc::ROPI_RWPI; } + bool ARMSubtarget::isRWPI() const { + // FIXME: This should ideally come from a function attribute, to work + // correctly with LTO. return TM.getRelocationModel() == Reloc::RWPI || TM.getRelocationModel() == Reloc::ROPI_RWPI; } bool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const { - if (!TM.shouldAssumeDSOLocal(GV)) - return true; - - // 32 bit macho has no relocation for a-b if a is undefined, even if b is in - // the section that is being relocated. This means we have to use o load even - // for GVs that are known to be local to the dso. - if (isTargetMachO() && TM.isPositionIndependent() && - (GV->isDeclarationForLinker() || GV->hasCommonLinkage())) - return true; - - return false; + return TM.isGVIndirectSymbol(GV); } bool ARMSubtarget::isGVInGOT(const GlobalValue *GV) const { diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 4a0883c..2a90f42 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -258,6 +258,7 @@ public: InstructionSelector *getInstructionSelector() const override; const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; + void initLibcallLoweringInfo(LibcallLoweringInfo &Info) const override; private: ARMSelectionDAGInfo TSInfo; diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.h b/llvm/lib/Target/ARM/ARMTargetMachine.h index c417c4c..1f74e9f 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -98,6 +98,20 @@ public: return true; } + bool isGVIndirectSymbol(const GlobalValue *GV) const { + if (!shouldAssumeDSOLocal(GV)) + return true; + + // 32 bit macho has no relocation for a-b if a is undefined, even if b is in + // the section that is being relocated. This means we have to use o load + // even for GVs that are known to be local to the dso. + if (getTargetTriple().isOSBinFormatMachO() && isPositionIndependent() && + (GV->isDeclarationForLinker() || GV->hasCommonLinkage())) + return true; + + return false; + } + yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override; yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 24f58a6..88a7fb1 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -66,6 +66,11 @@ extern cl::opt<bool> EnableMaskedGatherScatters; extern cl::opt<unsigned> MVEMaxSupportedInterleaveFactor; +static cl::opt<int> ArmForceUnrollThreshold( + "arm-force-unroll-threshold", cl::init(12), cl::Hidden, + cl::desc( + "Threshold for forced unrolling of small loops in Arm architecture")); + /// Convert a vector load intrinsic into a simple llvm load instruction. /// This is beneficial when the underlying object being addressed comes /// from a constant, since we get constant-folding for free. @@ -1125,7 +1130,8 @@ bool ARMTTIImpl::isProfitableLSRChainElement(Instruction *I) const { } bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment, - unsigned /*AddressSpace*/) const { + unsigned /*AddressSpace*/, + TTI::MaskKind /*MaskKind*/) const { if (!EnableMaskedLoadStores || !ST->hasMVEIntegerOps()) return false; @@ -1631,20 +1637,36 @@ InstructionCost ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, } InstructionCost -ARMTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, +ARMTTIImpl::getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, + TTI::TargetCostKind CostKind) const { + switch (MICA.getID()) { + case Intrinsic::masked_scatter: + case Intrinsic::masked_gather: + return getGatherScatterOpCost(MICA, CostKind); + case Intrinsic::masked_load: + case Intrinsic::masked_store: + return getMaskedMemoryOpCost(MICA, CostKind); + } + return BaseT::getMemIntrinsicInstrCost(MICA, CostKind); +} + +InstructionCost +ARMTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const { + unsigned IID = MICA.getID(); + Type *Src = MICA.getDataType(); + Align Alignment = MICA.getAlignment(); + unsigned AddressSpace = MICA.getAddressSpace(); if (ST->hasMVEIntegerOps()) { - if (Opcode == Instruction::Load && + if (IID == Intrinsic::masked_load && isLegalMaskedLoad(Src, Alignment, AddressSpace)) return ST->getMVEVectorCostFactor(CostKind); - if (Opcode == Instruction::Store && + if (IID == Intrinsic::masked_store && isLegalMaskedStore(Src, Alignment, AddressSpace)) return ST->getMVEVectorCostFactor(CostKind); } if (!isa<FixedVectorType>(Src)) - return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, - CostKind); + return BaseT::getMemIntrinsicInstrCost(MICA, CostKind); // Scalar cost, which is currently very high due to the efficiency of the // generated code. return cast<FixedVectorType>(Src)->getNumElements() * 8; @@ -1691,13 +1713,19 @@ InstructionCost ARMTTIImpl::getInterleavedMemoryOpCost( UseMaskForCond, UseMaskForGaps); } -InstructionCost ARMTTIImpl::getGatherScatterOpCost( - unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, - Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const { +InstructionCost +ARMTTIImpl::getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, + TTI::TargetCostKind CostKind) const { + + Type *DataTy = MICA.getDataType(); + const Value *Ptr = MICA.getPointer(); + bool VariableMask = MICA.getVariableMask(); + Align Alignment = MICA.getAlignment(); + const Instruction *I = MICA.getInst(); + using namespace PatternMatch; if (!ST->hasMVEIntegerOps() || !EnableMaskedGatherScatters) - return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, - Alignment, CostKind, I); + return BaseT::getMemIntrinsicInstrCost(MICA, CostKind); assert(DataTy->isVectorTy() && "Can't do gather/scatters on scalar!"); auto *VTy = cast<FixedVectorType>(DataTy); @@ -2728,7 +2756,7 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, // Force unrolling small loops can be very useful because of the branch // taken cost of the backedge. - if (Cost < 12) + if (Cost < ArmForceUnrollThreshold) UP.Force = true; } diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 0810c55..a232563 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -186,12 +186,16 @@ public: bool isProfitableLSRChainElement(Instruction *I) const override; - bool isLegalMaskedLoad(Type *DataTy, Align Alignment, - unsigned AddressSpace) const override; - - bool isLegalMaskedStore(Type *DataTy, Align Alignment, - unsigned AddressSpace) const override { - return isLegalMaskedLoad(DataTy, Alignment, AddressSpace); + bool + isLegalMaskedLoad(Type *DataTy, Align Alignment, unsigned AddressSpace, + TTI::MaskKind MaskKind = + TTI::MaskKind::VariableOrConstantMask) const override; + + bool + isLegalMaskedStore(Type *DataTy, Align Alignment, unsigned AddressSpace, + TTI::MaskKind MaskKind = + TTI::MaskKind::VariableOrConstantMask) const override { + return isLegalMaskedLoad(DataTy, Alignment, AddressSpace, MaskKind); } bool forceScalarizeMaskedGather(VectorType *VTy, @@ -275,20 +279,19 @@ public: const Instruction *I = nullptr) const override; InstructionCost - getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind) const override; + getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, + TTI::TargetCostKind CostKind) const override; + + InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, + TTI::TargetCostKind CostKind) const; InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond = false, bool UseMaskForGaps = false) const override; - InstructionCost - getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, - bool VariableMask, Align Alignment, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr) const override; + InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, + TTI::TargetCostKind CostKind) const; InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt index fa778ca..d99368e 100644 --- a/llvm/lib/Target/ARM/CMakeLists.txt +++ b/llvm/lib/Target/ARM/CMakeLists.txt @@ -6,8 +6,7 @@ tablegen(LLVM ARMGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM ARMGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM ARMGenCallingConv.inc -gen-callingconv) tablegen(LLVM ARMGenDAGISel.inc -gen-dag-isel) -tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler - -ignore-non-decodable-operands) +tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM ARMGenFastISel.inc -gen-fast-isel) tablegen(LLVM ARMGenGlobalISel.inc -gen-global-isel) tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info) @@ -15,6 +14,7 @@ tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM ARMGenRegisterBank.inc -gen-register-bank) tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM ARMGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM ARMGenSystemRegister.inc -gen-searchable-tables) diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index b119146..44f50dd 100644 --- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -149,7 +149,7 @@ private: raw_ostream &CStream) const; bool isVectorPredicable(const MCInst &MI) const; - DecodeStatus AddThumbPredicate(MCInst&) const; + DecodeStatus checkThumbPredicate(MCInst &) const; void UpdateThumbPredicate(DecodeStatus &S, MCInst &MI) const; llvm::endianness InstructionEndianness; @@ -618,6 +618,23 @@ static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, return S; } +// This overload is used to decode a `pred` operand that is not encoded into +// instruction. This is the case for almost all predicable Thumb instructions +// (exceptions are tBcc and t2Bcc). Some predicable Thumb instructions have ARM +// equivalents where they are not predicable (always executed). This function +// is used to decode `pred` operand of these ARM instructions, too. +static DecodeStatus DecodePredicateOperand(MCInst &Inst, + const MCDisassembler *Decoder) { + const auto *D = static_cast<const ARMDisassembler *>(Decoder); + unsigned CC = ARMCC::AL; + if (D->getSubtargetInfo().hasFeature(ARM::ModeThumb)) + CC = D->ITBlock.getITCC(); + MCRegister CondReg = CC == ARMCC::AL ? ARM::NoRegister : ARM::CPSR; + Inst.addOperand(MCOperand::createImm(CC)); + Inst.addOperand(MCOperand::createReg(CondReg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, uint64_t Address, const MCDisassembler *Decoder) { @@ -1050,6 +1067,40 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) return MCDisassembler::Fail; break; + case ARM::t2LDC2L_OFFSET: + case ARM::t2LDC2L_OPTION: + case ARM::t2LDC2L_POST: + case ARM::t2LDC2L_PRE: + case ARM::t2LDC2_OFFSET: + case ARM::t2LDC2_OPTION: + case ARM::t2LDC2_POST: + case ARM::t2LDC2_PRE: + case ARM::t2LDCL_OFFSET: + case ARM::t2LDCL_OPTION: + case ARM::t2LDCL_POST: + case ARM::t2LDCL_PRE: + case ARM::t2LDC_OFFSET: + case ARM::t2LDC_OPTION: + case ARM::t2LDC_POST: + case ARM::t2LDC_PRE: + case ARM::t2STC2L_OFFSET: + case ARM::t2STC2L_OPTION: + case ARM::t2STC2L_POST: + case ARM::t2STC2L_PRE: + case ARM::t2STC2_OFFSET: + case ARM::t2STC2_OPTION: + case ARM::t2STC2_POST: + case ARM::t2STC2_PRE: + case ARM::t2STCL_OFFSET: + case ARM::t2STCL_OPTION: + case ARM::t2STCL_POST: + case ARM::t2STCL_PRE: + case ARM::t2STC_OFFSET: + case ARM::t2STC_OPTION: + case ARM::t2STC_POST: + case ARM::t2STC_PRE: + DecodePredicateOperand(Inst, Decoder); + break; default: break; } @@ -1217,6 +1268,8 @@ static DecodeStatus DecodeTSBInstruction(MCInst &Inst, unsigned Insn, // the only available operand), but LLVM expects the instruction to have one // operand, so we need to add the csync when decoding. Inst.addOperand(MCOperand::createImm(ARM_TSB::CSYNC)); + if (Inst.getOpcode() == ARM::t2TSB) + DecodePredicateOperand(Inst, Decoder); return MCDisassembler::Success; } @@ -1650,6 +1703,7 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, if(imm > 4) return MCDisassembler::Fail; Inst.setOpcode(ARM::t2HINT); Inst.addOperand(MCOperand::createImm(imm)); + DecodePredicateOperand(Inst, Decoder); } return S; @@ -1675,6 +1729,7 @@ DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, Inst.setOpcode(Opcode); if (Opcode == ARM::t2HINT) { Inst.addOperand(MCOperand::createImm(imm)); + DecodePredicateOperand(Inst, Decoder); } return MCDisassembler::Success; @@ -1702,6 +1757,7 @@ static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder)) Inst.addOperand(MCOperand::createImm(imm)); + DecodePredicateOperand(Inst, Decoder); return S; } @@ -1906,6 +1962,7 @@ static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::createImm(imm32)); + DecodePredicateOperand(Inst, Decoder); return Status; } @@ -2231,6 +2288,7 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn, break; } + DecodePredicateOperand(Inst, Decoder); return S; } @@ -2502,6 +2560,7 @@ static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn, break; } + DecodePredicateOperand(Inst, Decoder); return S; } @@ -2605,6 +2664,7 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn, !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; + DecodePredicateOperand(Inst, Decoder); return S; } @@ -2654,6 +2714,7 @@ static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; } + DecodePredicateOperand(Inst, Decoder); return S; } @@ -2690,6 +2751,7 @@ static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; } + DecodePredicateOperand(Inst, Decoder); return S; } @@ -2743,6 +2805,7 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; } + DecodePredicateOperand(Inst, Decoder); return S; } @@ -2789,6 +2852,7 @@ static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn, break; } + DecodePredicateOperand(Inst, Decoder); return S; } @@ -2861,6 +2925,7 @@ static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; Inst.addOperand(MCOperand::createImm(8 << size)); + DecodePredicateOperand(Inst, Decoder); return S; } @@ -2926,6 +2991,7 @@ static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; + DecodePredicateOperand(Inst, Decoder); return S; } @@ -2951,6 +3017,7 @@ static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, } Inst.addOperand(MCOperand::createImm(imm)); + DecodePredicateOperand(Inst, Decoder); return S; } @@ -3113,6 +3180,7 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, } Inst.addOperand(MCOperand::createImm(imm)); + DecodePredicateOperand(Inst, Decoder); return S; } @@ -3197,6 +3265,7 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder))) return MCDisassembler::Fail; + DecodePredicateOperand(Inst, Decoder); return S; } @@ -3341,6 +3410,7 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, if (!Check(S, DecodeT2AddrModeImm8(Inst, imm, Address, Decoder))) return MCDisassembler::Fail; + DecodePredicateOperand(Inst, Decoder); return S; } @@ -3449,6 +3519,7 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, if (!Check(S, DecodeT2AddrModeImm12(Inst, imm, Address, Decoder))) return MCDisassembler::Fail; + DecodePredicateOperand(Inst, Decoder); return S; } @@ -3488,6 +3559,7 @@ static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, uint64_t Address, return MCDisassembler::Fail; if (!Check(S, DecodeT2AddrModeImm8(Inst, imm, Address, Decoder))) return MCDisassembler::Fail; + DecodePredicateOperand(Inst, Decoder); return S; } @@ -3678,6 +3750,7 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, if (!Check(S, DecodeT2AddrModeImm8(Inst, addr, Address, Decoder))) return MCDisassembler::Fail; + DecodePredicateOperand(Inst, Decoder); return S; } @@ -3690,6 +3763,7 @@ static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn, Inst.addOperand(MCOperand::createReg(ARM::SP)); Inst.addOperand(MCOperand::createImm(imm)); + DecodePredicateOperand(Inst, Decoder); return MCDisassembler::Success; } @@ -3716,6 +3790,7 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, return MCDisassembler::Fail; } + DecodePredicateOperand(Inst, Decoder); return S; } @@ -3840,6 +3915,7 @@ static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; + DecodePredicateOperand(Inst, Decoder); return S; } @@ -4305,6 +4381,7 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address, return MCDisassembler::Fail; Inst.addOperand(MCOperand::createImm(index)); + DecodePredicateOperand(Inst, Decoder); return S; } @@ -4370,6 +4447,7 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address, return MCDisassembler::Fail; Inst.addOperand(MCOperand::createImm(index)); + DecodePredicateOperand(Inst, Decoder); return S; } @@ -4437,6 +4515,7 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, return MCDisassembler::Fail; Inst.addOperand(MCOperand::createImm(index)); + DecodePredicateOperand(Inst, Decoder); return S; } @@ -4500,6 +4579,7 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address, return MCDisassembler::Fail; Inst.addOperand(MCOperand::createImm(index)); + DecodePredicateOperand(Inst, Decoder); return S; } @@ -4570,6 +4650,7 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, return MCDisassembler::Fail; Inst.addOperand(MCOperand::createImm(index)); + DecodePredicateOperand(Inst, Decoder); return S; } @@ -4633,6 +4714,7 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address, return MCDisassembler::Fail; Inst.addOperand(MCOperand::createImm(index)); + DecodePredicateOperand(Inst, Decoder); return S; } @@ -4714,6 +4796,7 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, return MCDisassembler::Fail; Inst.addOperand(MCOperand::createImm(index)); + DecodePredicateOperand(Inst, Decoder); return S; } @@ -4786,6 +4869,7 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address, return MCDisassembler::Fail; Inst.addOperand(MCOperand::createImm(index)); + DecodePredicateOperand(Inst, Decoder); return S; } @@ -4904,6 +4988,7 @@ static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn, if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder))) return MCDisassembler::Fail; + DecodePredicateOperand(Inst, Decoder); return S; } @@ -4939,6 +5024,7 @@ static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder))) return MCDisassembler::Fail; + DecodePredicateOperand(Inst, Decoder); return S; } @@ -4965,6 +5051,7 @@ static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, uint64_t Address, Val = -Val; } Inst.addOperand(MCOperand::createImm(Val)); + DecodePredicateOperand(Inst, Decoder); return S; } @@ -5062,6 +5149,7 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, return MCDisassembler::Fail; Inst.addOperand(MCOperand::createImm(64 - imm)); + DecodePredicateOperand(Inst, Decoder); return S; } @@ -5121,6 +5209,7 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, return MCDisassembler::Fail; Inst.addOperand(MCOperand::createImm(64 - imm)); + DecodePredicateOperand(Inst, Decoder); return S; } @@ -5326,8 +5415,10 @@ static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { DecodeStatus S = MCDisassembler::Success; - if (Inst.getOpcode() == ARM::MVE_LCTP) + if (Inst.getOpcode() == ARM::MVE_LCTP) { + DecodePredicateOperand(Inst, Decoder); return S; + } unsigned Imm = fieldFromInstruction(Insn, 11, 1) | fieldFromInstruction(Insn, 1, 10) << 1; @@ -5372,6 +5463,7 @@ static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address, Check(S, MCDisassembler::SoftFail); // an SBZ bit is wrong: soft fail Inst.setOpcode(ARM::MVE_LCTP); + DecodePredicateOperand(Inst, Decoder); } else { Inst.addOperand(MCOperand::createReg(ARM::LR)); if (!Check(S, DecoderGPRRegisterClass(Inst, @@ -5762,6 +5854,7 @@ static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn, if (!Check(S, DecodeMVEPairVectorIndexOperand<0>(Inst, index, Address, Decoder))) return MCDisassembler::Fail; + DecodePredicateOperand(Inst, Decoder); return S; } @@ -5788,6 +5881,7 @@ static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn, if (!Check(S, DecodeMVEPairVectorIndexOperand<0>(Inst, index, Address, Decoder))) return MCDisassembler::Fail; + DecodePredicateOperand(Inst, Decoder); return S; } @@ -5833,6 +5927,8 @@ DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address, if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; + DecodePredicateOperand(Inst, Decoder); + if (fieldFromInstruction (Insn, 6, 3) != 4) return MCDisassembler::SoftFail; @@ -5868,6 +5964,7 @@ DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address, Inst.addOperand(MCOperand::createImm(Saturate)); } + DecodePredicateOperand(Inst, Decoder); return S; } @@ -5971,10 +6068,12 @@ static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn, if (TypeT3) { Inst.setOpcode(sign1 ? ARM::t2SUBspImm12 : ARM::t2ADDspImm12); Inst.addOperand(MCOperand::createImm(Imm12)); // zext imm12 + DecodePredicateOperand(Inst, Decoder); } else { Inst.setOpcode(sign1 ? ARM::t2SUBspImm : ARM::t2ADDspImm); if (!Check(DS, DecodeT2SOImm(Inst, Imm12, Address, Decoder))) // imm12 return MCDisassembler::Fail; + DecodePredicateOperand(Inst, Decoder); if (!Check(DS, DecodeCCOutOperand(Inst, S, Address, Decoder))) // cc_out return MCDisassembler::Fail; } @@ -5994,7 +6093,7 @@ static DecodeStatus DecodeLazyLoadStoreMul(MCInst &Inst, unsigned Insn, if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; // An optional predicate, '$p' in the assembly. - DecodePredicateOperand(Inst, ARMCC::AL, Address, Decoder); + DecodePredicateOperand(Inst, Decoder); // An immediate that represents a floating point registers list. '$regs' in // the assembly. Inst.addOperand(MCOperand::createImm(0)); // Arbitrary value, has no effect. @@ -6115,28 +6214,17 @@ DecodeStatus ARMDisassembler::getARMInstruction(MCInst &MI, uint64_t &Size, return checkDecodedInstruction(MI, Size, Address, CS, Insn, Result); } - struct DecodeTable { - const uint8_t *P; - bool DecodePred; - }; - - const DecodeTable Tables[] = { - {DecoderTableVFP32, false}, {DecoderTableVFPV832, false}, - {DecoderTableNEONData32, true}, {DecoderTableNEONLoadStore32, true}, - {DecoderTableNEONDup32, false}, {DecoderTablev8NEON32, false}, - {DecoderTablev8Crypto32, false}, + const uint8_t *Tables[] = { + DecoderTableVFP32, DecoderTableVFPV832, + DecoderTableNEONData32, DecoderTableNEONLoadStore32, + DecoderTableNEONDup32, DecoderTablev8NEON32, + DecoderTablev8Crypto32, }; - for (auto Table : Tables) { - Result = decodeInstruction(Table.P, MI, Insn, Address, this, STI); + for (const uint8_t *Table : Tables) { + Result = decodeInstruction(Table, MI, Insn, Address, this, STI); if (Result != MCDisassembler::Fail) { Size = 4; - // Add a fake predicate operand, because we share these instruction - // definitions with Thumb2 where these instructions are predicable. - if (Table.DecodePred && MCII->get(MI.getOpcode()).isPredicable()) { - MI.addOperand(MCOperand::createImm(ARMCC::AL)); - MI.addOperand(MCOperand::createReg(ARM::NoRegister)); - } return Result; } } @@ -6161,18 +6249,16 @@ bool ARMDisassembler::isVectorPredicable(const MCInst &MI) const { return false; } -// Most Thumb instructions don't have explicit predicates in the -// encoding, but rather get their predicates from IT context. We need -// to fix up the predicate operands using this context information as a -// post-pass. +// Most Thumb instructions don't have explicit predicates in the encoding, +// but rather get their predicates from IT context. Here, we check that the +// decoded instruction is allowed to have the decoded predicate and advance +// IT/VPT block states. MCDisassembler::DecodeStatus -ARMDisassembler::AddThumbPredicate(MCInst &MI) const { +ARMDisassembler::checkThumbPredicate(MCInst &MI) const { MCDisassembler::DecodeStatus S = Success; const FeatureBitset &FeatureBits = getSubtargetInfo().getFeatureBits(); - // A few instructions actually have predicates encoded in them. Don't - // try to overwrite it if we're seeing one of those. switch (MI.getOpcode()) { case ARM::tBcc: case ARM::t2Bcc: @@ -6218,34 +6304,10 @@ ARMDisassembler::AddThumbPredicate(MCInst &MI) const { (isVectorPredicable(MI) && ITBlock.instrInITBlock())) S = SoftFail; - // If we're in an IT block, base the predicate on that. Otherwise, - // assume a predicate of AL. - unsigned CC = ARMCC::AL; - if (ITBlock.instrInITBlock()) { - CC = ITBlock.getITCC(); + if (ITBlock.instrInITBlock()) ITBlock.advanceITState(); - } else if (VPTBlock.instrInVPTBlock()) { + else if (VPTBlock.instrInVPTBlock()) VPTBlock.advanceVPTState(); - } - - const MCInstrDesc &MCID = MCII->get(MI.getOpcode()); - - MCInst::iterator CCI = MI.begin(); - for (unsigned i = 0; i < MCID.NumOperands; ++i, ++CCI) { - if (MCID.operands()[i].isPredicate() || CCI == MI.end()) - break; - } - - if (MCID.isPredicable()) { - CCI = MI.insert(CCI, MCOperand::createImm(CC)); - ++CCI; - if (CC == ARMCC::AL) - MI.insert(CCI, MCOperand::createReg(ARM::NoRegister)); - else - MI.insert(CCI, MCOperand::createReg(ARM::CPSR)); - } else if (CC != ARMCC::AL) { - Check(S, SoftFail); - } return S; } @@ -6307,7 +6369,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size, decodeInstruction(DecoderTableThumb16, MI, Insn16, Address, this, STI); if (Result != MCDisassembler::Fail) { Size = 2; - Check(Result, AddThumbPredicate(MI)); + Check(Result, checkThumbPredicate(MI)); return Result; } @@ -6315,7 +6377,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size, STI); if (Result) { Size = 2; - Check(Result, AddThumbPredicate(MI)); + Check(Result, checkThumbPredicate(MI)); return Result; } @@ -6329,7 +6391,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size, if (MI.getOpcode() == ARM::t2IT && ITBlock.instrInITBlock()) Result = MCDisassembler::SoftFail; - Check(Result, AddThumbPredicate(MI)); + Check(Result, checkThumbPredicate(MI)); // If we find an IT instruction, we need to parse its condition // code and mask operands so that we can apply them correctly @@ -6367,7 +6429,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size, if (isVPTOpcode(MI.getOpcode()) && VPTBlock.instrInVPTBlock()) Result = MCDisassembler::SoftFail; - Check(Result, AddThumbPredicate(MI)); + Check(Result, checkThumbPredicate(MI)); if (isVPTOpcode(MI.getOpcode())) { unsigned Mask = MI.getOperand(0).getImm(); @@ -6381,7 +6443,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size, decodeInstruction(DecoderTableThumb32, MI, Insn32, Address, this, STI); if (Result != MCDisassembler::Fail) { Size = 4; - Check(Result, AddThumbPredicate(MI)); + Check(Result, checkThumbPredicate(MI)); return Result; } @@ -6389,7 +6451,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size, decodeInstruction(DecoderTableThumb232, MI, Insn32, Address, this, STI); if (Result != MCDisassembler::Fail) { Size = 4; - Check(Result, AddThumbPredicate(MI)); + Check(Result, checkThumbPredicate(MI)); return checkDecodedInstruction(MI, Size, Address, CS, Insn32, Result); } @@ -6428,7 +6490,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size, Address, this, STI); if (Result != MCDisassembler::Fail) { Size = 4; - Check(Result, AddThumbPredicate(MI)); + Check(Result, checkThumbPredicate(MI)); return Result; } } @@ -6442,7 +6504,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size, Address, this, STI); if (Result != MCDisassembler::Fail) { Size = 4; - Check(Result, AddThumbPredicate(MI)); + Check(Result, checkThumbPredicate(MI)); return Result; } @@ -6475,7 +6537,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size, decodeInstruction(DecoderTable, MI, Insn32, Address, this, STI); if (Result != MCDisassembler::Fail) { Size = 4; - Check(Result, AddThumbPredicate(MI)); + Check(Result, checkThumbPredicate(MI)); return Result; } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 01fe13b..cc21844 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -428,7 +428,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, // signed 16bit range. if ((Kind == ARM::fixup_arm_movw_lo16 || Kind == ARM::fixup_arm_movt_hi16 || Kind == ARM::fixup_t2_movw_lo16 || Kind == ARM::fixup_t2_movt_hi16) && - (Addend < minIntN(16) || Addend > maxIntN(16))) { + !IsResolved && (Addend < minIntN(16) || Addend > maxIntN(16))) { Ctx.reportError(Fixup.getLoc(), "Relocation Not In Range"); return 0; } @@ -1238,7 +1238,7 @@ uint64_t ARMAsmBackendDarwin::generateCompactUnwindEncoding( // Verify standard frame (lr/r7) was used. if (CFARegister != ARM::R7) { DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() << "frame register is " - << CFARegister + << CFARegister.id() << " instead of r7\n"); return CU::UNWIND_ARM_MODE_DWARF; } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp index ca366ed..060d1f8 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -54,7 +54,7 @@ void ARMWinCOFFStreamer::emitWindowsUnwindTables() { } void ARMWinCOFFStreamer::finishImpl() { - emitFrames(nullptr); + emitFrames(); emitWindowsUnwindTables(); MCWinCOFFStreamer::finishImpl(); diff --git a/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/llvm/lib/Target/ARM/MLxExpansionPass.cpp index 8e1bf1d..eb237b4 100644 --- a/llvm/lib/Target/ARM/MLxExpansionPass.cpp +++ b/llvm/lib/Target/ARM/MLxExpansionPass.cpp @@ -283,7 +283,7 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, const MCInstrDesc &MCID1 = TII->get(MulOpc); const MCInstrDesc &MCID2 = TII->get(AddSubOpc); - Register TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI)); + Register TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0)); MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg) .addReg(Src1Reg, getKillRegState(Src1Kill)) diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 4b8c2fd..01f588f 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -24,7 +24,7 @@ using namespace llvm; Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(STI) {} + : ARMBaseInstrInfo(STI, RI), RI(STI) {} /// Return the noop instruction to use for a noop. MCInst Thumb1InstrInfo::getNop() const { @@ -116,7 +116,6 @@ void Thumb1InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { assert((RC == &ARM::tGPRRegClass || @@ -142,10 +141,12 @@ void Thumb1InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, } } -void Thumb1InstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void Thumb1InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { assert((RC->hasSuperClassEq(&ARM::tGPRRegClass) || (DestReg.isPhysical() && isARMLowRegister(DestReg))) && "Unknown regclass!"); diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/llvm/lib/Target/ARM/Thumb1InstrInfo.h index 68b326c..289a30a 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.h @@ -35,7 +35,7 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const ThumbRegisterInfo &getRegisterInfo() const override { return RI; } + const ThumbRegisterInfo &getRegisterInfo() const { return RI; } void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, @@ -43,14 +43,13 @@ public: bool RenamableSrc = false) const override; void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; bool canCopyGluedNodeDuringSchedule(SDNode *N) const override; diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index f5653d4..efb92c9 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -46,7 +46,7 @@ PreferNoCSEL("prefer-no-csel", cl::Hidden, cl::init(false)); Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(STI) {} + : ARMBaseInstrInfo(STI, RI), RI(STI) {} /// Return the noop instruction to use for a noop. MCInst Thumb2InstrInfo::getNop() const { @@ -165,7 +165,6 @@ void Thumb2InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { DebugLoc DL; @@ -197,20 +196,22 @@ void Thumb2InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, } MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8)); - AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); - AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill)); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0); MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL)); return; } - ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI, + ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, Register()); } -void Thumb2InstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void Thumb2InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + Register DestReg, int FI, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); MachineMemOperand *MMO = MF.getMachineMemOperand( @@ -238,8 +239,8 @@ void Thumb2InstrInfo::loadRegFromStackSlot( } MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8)); - AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); - AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead); + AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead); MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL)); if (DestReg.isPhysical()) @@ -247,8 +248,7 @@ void Thumb2InstrInfo::loadRegFromStackSlot( return; } - ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI, - Register()); + ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, Register()); } void Thumb2InstrInfo::expandLoadStackGuard( @@ -564,7 +564,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, bool isSub = false; MachineFunction &MF = *MI.getParent()->getParent(); - const TargetRegisterClass *RegClass = TII.getRegClass(Desc, FrameRegIdx, TRI); + const TargetRegisterClass *RegClass = TII.getRegClass(Desc, FrameRegIdx); // Memory operands in inline assembly always use AddrModeT2_i12. if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h index 1b0bf2d..1e11cb3 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -44,21 +44,20 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const ThumbRegisterInfo &getRegisterInfo() const override { return RI; } + const ThumbRegisterInfo &getRegisterInfo() const { return RI; } MachineInstr *optimizeSelect(MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &SeenMIs, diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp index 12875c2..85e705d 100644 --- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -66,8 +66,8 @@ static void emitThumb1LoadConstPool(MachineBasicBlock &MBB, const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); const TargetInstrInfo &TII = *STI.getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); - const Constant *C = ConstantInt::get( - Type::getInt32Ty(MBB.getParent()->getFunction().getContext()), Val); + const Constant *C = ConstantInt::getSigned( + Type::getInt32Ty(MBB.getParent()->getFunction().getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align(4)); BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci)) @@ -85,8 +85,8 @@ static void emitThumb2LoadConstPool(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); - const Constant *C = ConstantInt::get( - Type::getInt32Ty(MBB.getParent()->getFunction().getContext()), Val); + const Constant *C = ConstantInt::getSigned( + Type::getInt32Ty(MBB.getParent()->getFunction().getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align(4)); BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci)) |
