aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSimon Tatham <simon.tatham@arm.com>2019-05-28 16:13:20 +0000
committerSimon Tatham <simon.tatham@arm.com>2019-05-28 16:13:20 +0000
commit760df47b778a530e9368a4b8706940ba103d57ba (patch)
tree9a13dc0ad9e5f288ea4c1f02bd4d02e3753de989 /llvm/lib/Target
parent448a79d123f34ed873174102bdfb4ab936c7fe36 (diff)
downloadllvm-760df47b778a530e9368a4b8706940ba103d57ba.zip
llvm-760df47b778a530e9368a4b8706940ba103d57ba.tar.gz
llvm-760df47b778a530e9368a4b8706940ba103d57ba.tar.bz2
[ARM] Replace fp-only-sp and d16 with fp64 and d32.
Those two subtarget features were awkward because their semantics are reversed: each one indicates the _lack_ of support for something in the architecture, rather than the presence. As a consequence, you don't get the behavior you want if you combine two sets of feature bits. Each SubtargetFeature for an FP architecture version now comes in four versions, one for each combination of those options. So you can still say (for example) '+vfp2' in a feature string and it will mean what it's always meant, but there's a new string '+vfp2d16sp' meaning the version without those extra options. A lot of this change is just mechanically replacing positive checks for the old features with negative checks for the new ones. But one more interesting change is that I've rearranged getFPUFeatures() so that the main FPU feature is appended to the output list *before* rather than after the features derived from the Restriction field, so that -fp64 and -d32 can override defaults added by the main feature. Reviewers: dmgreen, samparker, SjoerdMeijer Subscribers: srhines, javed.absar, eraman, kristof.beyls, hiraditya, zzheng, Petar.Avramovic, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D60691 llvm-svn: 361845
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/ARM/ARM.td86
-rw-r--r--llvm/lib/Target/ARM/ARMAsmPrinter.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp9
-rw-r--r--llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMFastISel.cpp40
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp6
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp70
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td24
-rw-r--r--llvm/lib/Target/ARM/ARMInstructionSelector.cpp11
-rw-r--r--llvm/lib/Target/ARM/ARMLegalizerInfo.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h35
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp31
-rw-r--r--llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp46
16 files changed, 203 insertions, 175 deletions
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 48eba22..20a61d3 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -32,12 +32,40 @@ def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat",
//
// Floating Point, HW Division and Neon Support
-def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
- "Enable VFP2 instructions">;
+def FeatureFP64 : SubtargetFeature<"fp64", "HasFP64", "true",
+ "Floating point unit supports "
+ "double precision">;
+
+def FeatureD32 : SubtargetFeature<"d32", "HasD32", "true",
+ "Extend FP to 32 double registers">;
+
+multiclass VFPver<string name, string query, string description,
+ list<SubtargetFeature> prev = [],
+ list<SubtargetFeature> otherimplies = []> {
+ def _D16_SP: SubtargetFeature<
+ name#"d16sp", query#"D16SP", "true",
+ description#" with only 16 d-registers and no double precision",
+ !foreach(v, prev, !cast<SubtargetFeature>(v # "_D16_SP")) # otherimplies>;
+ def _SP: SubtargetFeature<
+ name#"sp", query#"SP", "true",
+ description#" with no double precision",
+ !foreach(v, prev, !cast<SubtargetFeature>(v # "_SP")) #
+ otherimplies # [FeatureD32, !cast<SubtargetFeature>(NAME # "_D16_SP")]>;
+ def _D16: SubtargetFeature<
+ name#"d16", query#"D16", "true",
+ description#" with only 16 d-registers",
+ !foreach(v, prev, !cast<SubtargetFeature>(v # "_D16")) #
+ otherimplies # [FeatureFP64, !cast<SubtargetFeature>(NAME # "_D16_SP")]>;
+ def "": SubtargetFeature<
+ name, query, "true", description,
+ prev # otherimplies # [
+ !cast<SubtargetFeature>(NAME # "_D16"),
+ !cast<SubtargetFeature>(NAME # "_SP")]>;
+}
-def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
- "Enable VFP3 instructions",
- [FeatureVFP2]>;
+defm FeatureVFP2: VFPver<"vfp2", "HasVFPv2", "Enable VFP2 instructions">;
+defm FeatureVFP3: VFPver<"vfp3", "HasVFPv3", "Enable VFP3 instructions",
+ [FeatureVFP2]>;
def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable NEON instructions",
@@ -47,31 +75,22 @@ def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision "
"floating point">;
-def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
- "Enable VFP4 instructions",
- [FeatureVFP3, FeatureFP16]>;
+defm FeatureVFP4: VFPver<"vfp4", "HasVFPv4", "Enable VFP4 instructions",
+ [FeatureVFP3], [FeatureFP16]>;
-def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8",
- "true", "Enable ARMv8 FP",
- [FeatureVFP4]>;
+defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP",
+ [FeatureVFP4]>;
def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
"Enable full half-precision "
"floating point",
- [FeatureFPARMv8]>;
+ [FeatureFPARMv8_D16_SP]>;
def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
"Enable full half-precision "
"floating point fml instructions",
[FeatureFullFP16]>;
-def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
- "Floating point unit supports "
- "single precision only">;
-
-def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
- "Restrict FP to 16 double registers">;
-
def FeatureHWDivThumb : SubtargetFeature<"hwdiv",
"HasHardwareDivideInThumb", "true",
"Enable divide instructions in Thumb">;
@@ -943,14 +962,12 @@ def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4,
FeatureHasRetAddrStack,
FeatureSlowFPBrcc,
FeatureHasSlowFPVMLx,
- FeatureVFP3,
- FeatureD16,
+ FeatureVFP3_D16,
FeatureAvoidPartialCPSR]>;
def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
FeatureHasRetAddrStack,
- FeatureVFP3,
- FeatureD16,
+ FeatureVFP3_D16,
FeatureSlowFPBrcc,
FeatureHWDivARM,
FeatureHasSlowFPVMLx,
@@ -958,8 +975,7 @@ def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7,
FeatureHasRetAddrStack,
- FeatureVFP3,
- FeatureD16,
+ FeatureVFP3_D16,
FeatureFP16,
FeatureMP,
FeatureSlowFPBrcc,
@@ -969,8 +985,7 @@ def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7,
def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r,
FeatureHasRetAddrStack,
- FeatureVFP3,
- FeatureD16,
+ FeatureVFP3_D16,
FeatureFP16,
FeatureMP,
FeatureSlowFPBrcc,
@@ -991,10 +1006,8 @@ def : ProcessorModel<"sc300", CortexM4Model, [ARMv7m,
FeatureUseAA,
FeatureHasNoBranchPredictor]>;
-def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em,
- FeatureVFP4,
- FeatureVFPOnlySP,
- FeatureD16,
+def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em,
+ FeatureVFP4_D16_SP,
FeaturePrefLoopAlign32,
FeatureHasSlowFPVMLx,
FeatureUseMISched,
@@ -1002,17 +1015,14 @@ def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em,
FeatureHasNoBranchPredictor]>;
def : ProcNoItin<"cortex-m7", [ARMv7em,
- FeatureFPARMv8,
- FeatureD16]>;
+ FeatureFPARMv8_D16]>;
def : ProcNoItin<"cortex-m23", [ARMv8mBaseline,
FeatureNoMovt]>;
def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline,
FeatureDSP,
- FeatureFPARMv8,
- FeatureD16,
- FeatureVFPOnlySP,
+ FeatureFPARMv8_D16_SP,
FeaturePrefLoopAlign32,
FeatureHasSlowFPVMLx,
FeatureUseMISched,
@@ -1021,9 +1031,7 @@ def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline,
def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline,
FeatureDSP,
- FeatureFPARMv8,
- FeatureD16,
- FeatureVFPOnlySP,
+ FeatureFPARMv8_D16_SP,
FeaturePrefLoopAlign32,
FeatureHasSlowFPVMLx,
FeatureUseMISched,
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 239b95f..6bede80 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -657,7 +657,7 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
ARMBuildAttrs::IEEEDenormals);
else {
- if (!STI.hasVFP2()) {
+ if (!STI.hasVFP2Base()) {
// When the target doesn't have an FPU (by design or
// intention), the assumptions made on the software support
// mirror that of the equivalent hardware support *if it
@@ -667,7 +667,7 @@ void ARMAsmPrinter::emitAttributes() {
if (STI.hasV7Ops())
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
ARMBuildAttrs::PreserveFPSign);
- } else if (STI.hasVFP3()) {
+ } else if (STI.hasVFP3Base()) {
// In VFPv4, VFPv4U, VFPv3, or VFPv3U, it is preserved. That is,
// the sign bit of the zero matches the sign bit of the input or
// result that is being flushed to zero.
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 22c53d9..fbef5d7 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -133,7 +133,7 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
ScheduleHazardRecognizer *ARMBaseInstrInfo::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
- if (Subtarget.isThumb2() || Subtarget.hasVFP2())
+ if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
}
@@ -830,7 +830,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = ARM::VMOVRS;
else if (SPRDest && GPRSrc)
Opc = ARM::VMOVSR;
- else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
+ else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
Opc = ARM::VMOVD;
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
Opc = ARM::VORRq;
@@ -890,7 +890,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BeginIdx = ARM::dsub_0;
SubRegs = 4;
Spacing = 2;
- } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
+ } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
+ !Subtarget.hasFP64()) {
Opc = ARM::VMOVS;
BeginIdx = ARM::ssub_0;
SubRegs = 2;
@@ -1481,7 +1482,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
// changed into a VORR that can go down the NEON pipeline.
- if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
+ if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
return false;
// Look for a copy between even S-registers. That is where we keep floats
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 1870e4c..96200a0 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -149,7 +149,7 @@ ARMBaseRegisterInfo::getTLSCallPreservedMask(const MachineFunction &MF) const {
const uint32_t *
ARMBaseRegisterInfo::getSjLjDispatchPreservedMask(const MachineFunction &MF) const {
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
- if (!STI.useSoftFloat() && STI.hasVFP2() && !STI.isThumb1Only())
+ if (!STI.useSoftFloat() && STI.hasVFP2Base() && !STI.isThumb1Only())
return CSR_NoRegs_RegMask;
else
return CSR_FPRegs_RegMask;
@@ -193,7 +193,7 @@ getReservedRegs(const MachineFunction &MF) const {
if (STI.isR9Reserved())
markSuperRegs(Reserved, ARM::R9);
// Reserve D16-D31 if the subtarget doesn't support them.
- if (!STI.hasVFP3() || STI.hasD16()) {
+ if (!STI.hasD32()) {
static_assert(ARM::D31 == ARM::D16 + 15, "Register list not consecutive!");
for (unsigned R = 0; R < 16; ++R)
markSuperRegs(Reserved, ARM::D16 + R);
diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp
index cd01b70..6e274d26 100644
--- a/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -441,7 +441,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
}
// Require VFP2 for loading fp constants.
- if (!Subtarget->hasVFP2()) return false;
+ if (!Subtarget->hasVFP2Base()) return false;
// MachineConstantPool wants an explicit alignment.
unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
@@ -969,7 +969,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
break;
case MVT::f32:
- if (!Subtarget->hasVFP2()) return false;
+ if (!Subtarget->hasVFP2Base()) return false;
// Unaligned loads need special handling. Floats require word-alignment.
if (Alignment && Alignment < 4) {
needVMOV = true;
@@ -982,7 +982,8 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
}
break;
case MVT::f64:
- if (!Subtarget->hasVFP2()) return false;
+ // Can load and store double precision even without FeatureFP64
+ if (!Subtarget->hasVFP2Base()) return false;
// FIXME: Unaligned loads need special handling. Doublewords require
// word-alignment.
if (Alignment && Alignment < 4)
@@ -1107,7 +1108,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
}
break;
case MVT::f32:
- if (!Subtarget->hasVFP2()) return false;
+ if (!Subtarget->hasVFP2Base()) return false;
// Unaligned stores need special handling. Floats require word-alignment.
if (Alignment && Alignment < 4) {
unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
@@ -1122,7 +1123,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
}
break;
case MVT::f64:
- if (!Subtarget->hasVFP2()) return false;
+ // Can load and store double precision even without FeatureFP64
+ if (!Subtarget->hasVFP2Base()) return false;
// FIXME: Unaligned stores need special handling. Doublewords require
// word-alignment.
if (Alignment && Alignment < 4)
@@ -1353,10 +1355,10 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
if (!SrcEVT.isSimple()) return false;
MVT SrcVT = SrcEVT.getSimpleVT();
- if (Ty->isFloatTy() && !Subtarget->hasVFP2())
+ if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
return false;
- if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()))
+ if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
return false;
// Check to see if the 2nd operand is a constant that we can encode directly
@@ -1506,7 +1508,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
bool ARMFastISel::SelectFPExt(const Instruction *I) {
// Make sure we have VFP and that we're extending float to double.
- if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false;
+ if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
Value *V = I->getOperand(0);
if (!I->getType()->isDoubleTy() ||
@@ -1525,7 +1527,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) {
bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
// Make sure we have VFP and that we're truncating double to float.
- if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false;
+ if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
Value *V = I->getOperand(0);
if (!(I->getType()->isFloatTy() &&
@@ -1544,7 +1546,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
// Make sure we have VFP.
- if (!Subtarget->hasVFP2()) return false;
+ if (!Subtarget->hasVFP2Base()) return false;
MVT DstVT;
Type *Ty = I->getType();
@@ -1576,7 +1578,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
unsigned Opc;
if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
- else if (Ty->isDoubleTy() && !Subtarget->isFPOnlySP())
+ else if (Ty->isDoubleTy() && Subtarget->hasFP64())
Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
else return false;
@@ -1589,7 +1591,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
// Make sure we have VFP.
- if (!Subtarget->hasVFP2()) return false;
+ if (!Subtarget->hasVFP2Base()) return false;
MVT DstVT;
Type *RetTy = I->getType();
@@ -1602,7 +1604,7 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
unsigned Opc;
Type *OpTy = I->getOperand(0)->getType();
if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
- else if (OpTy->isDoubleTy() && !Subtarget->isFPOnlySP())
+ else if (OpTy->isDoubleTy() && Subtarget->hasFP64())
Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
else return false;
@@ -1808,9 +1810,9 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
// if we have them.
// FIXME: It'd be nice to use NEON instructions.
Type *Ty = I->getType();
- if (Ty->isFloatTy() && !Subtarget->hasVFP2())
+ if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
return false;
- if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()))
+ if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
return false;
unsigned Opc;
@@ -1852,7 +1854,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
default:
report_fatal_error("Unsupported calling convention");
case CallingConv::Fast:
- if (Subtarget->hasVFP2() && !isVarArg) {
+ if (Subtarget->hasVFP2Base() && !isVarArg) {
if (!Subtarget->isAAPCS_ABI())
return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
// For AAPCS ABI targets, just use VFP variant of the calling convention.
@@ -1863,7 +1865,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
case CallingConv::CXX_FAST_TLS:
// Use target triple & subtarget features to do actual dispatch.
if (Subtarget->isAAPCS_ABI()) {
- if (Subtarget->hasVFP2() &&
+ if (Subtarget->hasVFP2Base() &&
TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
else
@@ -1932,11 +1934,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
case MVT::i32:
break;
case MVT::f32:
- if (!Subtarget->hasVFP2())
+ if (!Subtarget->hasVFP2Base())
return false;
break;
case MVT::f64:
- if (!Subtarget->hasVFP2())
+ if (!Subtarget->hasVFP2Base())
return false;
break;
}
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index cb66d16..492c83c 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -4043,9 +4043,9 @@ bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
// If an opcode was found then we can lower the read to a VFP instruction.
if (Opcode) {
- if (!Subtarget->hasVFP2())
+ if (!Subtarget->hasVFP2Base())
return false;
- if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
+ if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
return false;
Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
@@ -4154,7 +4154,7 @@ bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
.Default(0);
if (Opcode) {
- if (!Subtarget->hasVFP2())
+ if (!Subtarget->hasVFP2Base())
return false;
Ops = { N->getOperand(2), getAL(CurDAG, DL),
CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 88d318e..7dd2fef 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -241,7 +241,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (Subtarget->isTargetMachO()) {
// Uses VFP for Thumb libfuncs if available.
- if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
+ if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
static const struct {
const RTLIB::Libcall Op;
@@ -510,7 +510,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
else
addRegisterClass(MVT::i32, &ARM::GPRRegClass);
- if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
+ if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
!Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, &ARM::SPRRegClass);
addRegisterClass(MVT::f64, &ARM::DPRRegClass);
@@ -698,7 +698,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
// NEON only has FMA instructions as of VFP4.
- if (!Subtarget->hasVFP4()) {
+ if (!Subtarget->hasVFP4Base()) {
setOperationAction(ISD::FMA, MVT::v2f32, Expand);
setOperationAction(ISD::FMA, MVT::v4f32, Expand);
}
@@ -732,7 +732,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
}
- if (Subtarget->isFPOnlySP()) {
+ if (!Subtarget->hasFP64()) {
// When targeting a floating-point unit with only single-precision
// operations, f64 is legal for the few double-precision instructions which
// are present However, no double-precision operations other than moves,
@@ -1030,7 +1030,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
+ if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
!Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2.
@@ -1080,7 +1080,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
- if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
+ if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
!Subtarget->isThumb1Only()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
@@ -1088,7 +1088,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
- if (!Subtarget->hasVFP4()) {
+ if (!Subtarget->hasVFP4Base()) {
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
}
@@ -1096,7 +1096,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// Various VFP goodness
if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
// FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
- if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
+ if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
}
@@ -1116,7 +1116,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
// FP-ARMv8 implements a lot of rounding-like FP operations.
- if (Subtarget->hasFPARMv8()) {
+ if (Subtarget->hasFPARMv8Base()) {
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FROUND, MVT::f32, Legal);
@@ -1130,7 +1130,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
- if (!Subtarget->isFPOnlySP()) {
+ if (Subtarget->hasFP64()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FROUND, MVT::f64, Legal);
@@ -1202,7 +1202,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setStackPointerRegisterToSaveRestore(ARM::SP);
if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
- !Subtarget->hasVFP2() || Subtarget->hasMinSize())
+ !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
setSchedulingPreference(Sched::RegPressure);
else
setSchedulingPreference(Sched::Hybrid);
@@ -1637,7 +1637,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
case CallingConv::C:
if (!Subtarget->isAAPCS_ABI())
return CallingConv::ARM_APCS;
- else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
+ else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
!isVarArg)
return CallingConv::ARM_AAPCS_VFP;
@@ -1646,10 +1646,11 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
case CallingConv::Fast:
case CallingConv::CXX_FAST_TLS:
if (!Subtarget->isAAPCS_ABI()) {
- if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
+ if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
return CallingConv::Fast;
return CallingConv::ARM_APCS;
- } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
+ } else if (Subtarget->hasVFP2Base() &&
+ !Subtarget->isThumb1Only() && !isVarArg)
return CallingConv::ARM_AAPCS_VFP;
else
return CallingConv::ARM_AAPCS;
@@ -3912,7 +3913,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
SelectionDAG &DAG, const SDLoc &dl,
bool InvalidOnQNaN) const {
- assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
+ assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
if (!isFloatingPointZero(RHS))
@@ -4225,7 +4226,7 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
SDValue TrueVal, SDValue ARMcc, SDValue CCR,
SDValue Cmp, SelectionDAG &DAG) const {
- if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
+ if (!Subtarget->hasFP64() && VT == MVT::f64) {
FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
@@ -4474,7 +4475,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue TrueVal = Op.getOperand(2);
SDValue FalseVal = Op.getOperand(3);
- if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
+ if (!Subtarget->hasFP64() && LHS.getValueType() == MVT::f64) {
DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
dl);
@@ -4497,9 +4498,9 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// inverting the compare condition, swapping 'less' and 'greater') and
// sometimes need to swap the operands to the VSEL (which inverts the
// condition in the sense of firing whenever the previous condition didn't)
- if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f16 ||
- TrueVal.getValueType() == MVT::f32 ||
- TrueVal.getValueType() == MVT::f64)) {
+ if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
+ TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
@@ -4522,7 +4523,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
// must use VSEL (limited condition codes), due to not having conditional f16
// moves.
- if (Subtarget->hasFPARMv8() &&
+ if (Subtarget->hasFPARMv8Base() &&
!(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
(TrueVal.getValueType() == MVT::f16 ||
TrueVal.getValueType() == MVT::f32 ||
@@ -4715,7 +4716,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(4);
SDLoc dl(Op);
- if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
+ if (!Subtarget->hasFP64() && LHS.getValueType() == MVT::f64) {
DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
dl);
@@ -4862,7 +4863,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorFP_TO_INT(Op, DAG);
- if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
+ if (!Subtarget->hasFP64() && Op.getOperand(0).getValueType() == MVT::f64) {
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::FP_TO_SINT)
LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
@@ -4926,7 +4927,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorINT_TO_FP(Op, DAG);
- if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
+ if (!Subtarget->hasFP64() && Op.getValueType() == MVT::f64) {
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::SINT_TO_FP)
LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
@@ -5909,12 +5910,12 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
}
}
- if (!ST->hasVFP3())
+ if (!ST->hasVFP3Base())
return SDValue();
// Use the default (constant pool) lowering for double constants when we have
// an SP-only FPU
- if (IsDouble && Subtarget->isFPOnlySP())
+ if (IsDouble && !Subtarget->hasFP64())
return SDValue();
// Try splatting with a VMOV.f32...
@@ -11356,7 +11357,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
const ARMSubtarget *Subtarget) {
// vmovrrd(vmovdrr x, y) -> x,y
SDValue InDouble = N->getOperand(0);
- if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
+ if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
// vmovrrd(load f64) -> (load i32), (load i32)
@@ -13303,7 +13304,7 @@ static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
unsigned NumBytes = std::max(VT.getSizeInBits() / 8, 1U);
// VLDR and LDRD: 4 * imm8
- if ((VT.isFloatingPoint() && Subtarget->hasVFP2()) || NumBytes == 8)
+ if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
return isShiftedUInt<8, 2>(V);
if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
@@ -13347,7 +13348,7 @@ static bool isLegalAddressImmediate(int64_t V, EVT VT,
return isUInt<8>(V);
case MVT::f32:
case MVT::f64:
- if (!Subtarget->hasVFP2()) // FIXME: NEON?
+ if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
return false;
return isShiftedUInt<8, 2>(V);
}
@@ -13910,7 +13911,7 @@ const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
// Although we are correct (we are free to emit anything, without
// constraints), we might break use cases that would expect us to be more
// efficient and emit something else.
- if (!Subtarget->hasVFP2())
+ if (!Subtarget->hasVFP2Base())
return "r";
if (ConstraintVT.isFloatingPoint())
return "w";
@@ -14392,7 +14393,7 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
}
SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
- assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
+ assert(Op.getValueType() == MVT::f64 && !Subtarget->hasFP64() &&
"Unexpected type for custom-lowering FP_EXTEND");
RTLIB::Libcall LC;
@@ -14404,8 +14405,7 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
}
SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
- assert(Op.getOperand(0).getValueType() == MVT::f64 &&
- Subtarget->isFPOnlySP() &&
+ assert(Op.getOperand(0).getValueType() == MVT::f64 && !Subtarget->hasFP64() &&
"Unexpected type for custom-lowering FP_ROUND");
RTLIB::Libcall LC;
@@ -14468,13 +14468,13 @@ bool ARM::isBitFieldInvertedMask(unsigned v) {
/// materialize the FP immediate as a load from a constant pool.
bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
- if (!Subtarget->hasVFP3())
+ if (!Subtarget->hasVFP3Base())
return false;
if (VT == MVT::f16 && Subtarget->hasFullFP16())
return ARM_AM::getFP16Imm(Imm) != -1;
if (VT == MVT::f32)
return ARM_AM::getFP32Imm(Imm) != -1;
- if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
+ if (VT == MVT::f64 && Subtarget->hasFP64())
return ARM_AM::getFP64Imm(Imm) != -1;
return false;
}
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index f55e73a..d0821b9 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -258,18 +258,18 @@ def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
-def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
-def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
- AssemblerPredicate<"FeatureVFP2", "VFP2">;
-def HasVFP3 : Predicate<"Subtarget->hasVFP3()">,
- AssemblerPredicate<"FeatureVFP3", "VFP3">;
-def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
- AssemblerPredicate<"FeatureVFP4", "VFP4">;
-def HasDPVFP : Predicate<"!Subtarget->isFPOnlySP()">,
- AssemblerPredicate<"!FeatureVFPOnlySP",
+def NoVFP : Predicate<"!Subtarget->hasVFP2Base()">;
+def HasVFP2 : Predicate<"Subtarget->hasVFP2Base()">,
+ AssemblerPredicate<"FeatureVFP2_D16_SP", "VFP2">;
+def HasVFP3 : Predicate<"Subtarget->hasVFP3Base()">,
+ AssemblerPredicate<"FeatureVFP3_D16_SP", "VFP3">;
+def HasVFP4 : Predicate<"Subtarget->hasVFP4Base()">,
+ AssemblerPredicate<"FeatureVFP4_D16_SP", "VFP4">;
+def HasDPVFP : Predicate<"Subtarget->hasFP64()">,
+ AssemblerPredicate<"FeatureFP64",
"double precision VFP">;
-def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
- AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">;
+def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8Base()">,
+ AssemblerPredicate<"FeatureFPARMv8_D16_SP", "FPARMv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON", "NEON">;
def HasSHA2 : Predicate<"Subtarget->hasSHA2()">,
@@ -371,7 +371,7 @@ def UseMulOps : Predicate<"Subtarget->useMulOps()">;
// Do not use them for Darwin platforms.
def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
" FPOpFusion::Fast && "
- " Subtarget->hasVFP4()) && "
+ " Subtarget->hasVFP4Base()) && "
"!Subtarget->isTargetDarwin() &&"
"Subtarget->useFPVMLx()">;
diff --git a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
index b97924c..4485a47 100644
--- a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -232,7 +232,7 @@ static bool selectMergeValues(MachineInstrBuilder &MIB,
MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
- assert(TII.getSubtarget().hasVFP2() && "Can't select merge without VFP");
+ assert(TII.getSubtarget().hasVFP2Base() && "Can't select merge without VFP");
// We only support G_MERGE_VALUES as a way to stick together two scalar GPRs
// into one DPR.
@@ -263,7 +263,8 @@ static bool selectUnmergeValues(MachineInstrBuilder &MIB,
MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
- assert(TII.getSubtarget().hasVFP2() && "Can't select unmerge without VFP");
+ assert(TII.getSubtarget().hasVFP2Base() &&
+ "Can't select unmerge without VFP");
// We only support G_UNMERGE_VALUES as a way to break up one DPR into two
// GPRs.
@@ -1036,12 +1037,12 @@ bool ARMInstructionSelector::select(MachineInstr &I,
return selectCmp(Helper, MIB, MRI);
}
case G_FCMP: {
- assert(STI.hasVFP2() && "Can't select fcmp without VFP");
+ assert(STI.hasVFP2Base() && "Can't select fcmp without VFP");
unsigned OpReg = I.getOperand(2).getReg();
unsigned Size = MRI.getType(OpReg).getSizeInBits();
- if (Size == 64 && STI.isFPOnlySP()) {
+ if (Size == 64 && !STI.hasFP64()) {
LLVM_DEBUG(dbgs() << "Subtarget only supports single precision");
return false;
}
@@ -1087,7 +1088,7 @@ bool ARMInstructionSelector::select(MachineInstr &I,
LLT ValTy = MRI.getType(Reg);
const auto ValSize = ValTy.getSizeInBits();
- assert((ValSize != 64 || STI.hasVFP2()) &&
+ assert((ValSize != 64 || STI.hasVFP2Base()) &&
"Don't know how to load/store 64-bit value without VFP");
const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize);
diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index 8f20293..458cafd 100644
--- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -157,7 +157,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
getActionDefinitionsBuilder(G_BRCOND).legalFor({s1});
- if (!ST.useSoftFloat() && ST.hasVFP2()) {
+ if (!ST.useSoftFloat() && ST.hasVFP2Base()) {
getActionDefinitionsBuilder(
{G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FCONSTANT, G_FNEG})
.legalFor({s32, s64});
@@ -208,7 +208,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
.libcallForCartesianProduct({s32, s64}, {s32});
}
- if (!ST.useSoftFloat() && ST.hasVFP4())
+ if (!ST.useSoftFloat() && ST.hasVFP4Base())
getActionDefinitionsBuilder(G_FMA).legalFor({s32, s64});
else
getActionDefinitionsBuilder(G_FMA).libcallFor({s32, s64});
diff --git a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
index d03b482..4566ac2 100644
--- a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -453,7 +453,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
for (const auto &Mapping : OperandsMapping[i]) {
assert(
(Mapping.RegBank->getID() != ARM::FPRRegBankID ||
- MF.getSubtarget<ARMSubtarget>().hasVFP2()) &&
+ MF.getSubtarget<ARMSubtarget>().hasVFP2Base()) &&
"Trying to use floating point register bank on target without vfp");
}
}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 9500a9f..abedc6f 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -166,6 +166,21 @@ protected:
bool HasFPARMv8 = false;
bool HasNEON = false;
+ /// Versions of the VFP flags restricted to single precision, or to
+ /// 16 d-registers, or both.
+ bool HasVFPv2SP = false;
+ bool HasVFPv3SP = false;
+ bool HasVFPv4SP = false;
+ bool HasFPARMv8SP = false;
+ bool HasVFPv2D16 = false;
+ bool HasVFPv3D16 = false;
+ bool HasVFPv4D16 = false;
+ bool HasFPARMv8D16 = false;
+ bool HasVFPv2D16SP = false;
+ bool HasVFPv3D16SP = false;
+ bool HasVFPv4D16SP = false;
+ bool HasFPARMv8D16SP = false;
+
/// HasDotProd - True if the ARMv8.2A dot product instructions are supported.
bool HasDotProd = false;
@@ -232,9 +247,9 @@ protected:
/// HasFP16FML - True if subtarget supports half-precision FP fml operations
bool HasFP16FML = false;
- /// HasD16 - True if subtarget is limited to 16 double precision
+ /// HasD32 - True if subtarget has the full 32 double precision
/// FP registers for VFPv3.
- bool HasD16 = false;
+ bool HasD32 = false;
/// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode
bool HasHardwareDivideInThumb = false;
@@ -291,9 +306,9 @@ protected:
/// extension.
bool HasVirtualization = false;
- /// FPOnlySP - If true, the floating point unit only supports single
+ /// HasFP64 - If true, the floating point unit supports double
/// precision.
- bool FPOnlySP = false;
+ bool HasFP64 = false;
/// If true, the processor supports the Performance Monitor Extensions. These
/// include a generic cycle-counter as well as more fine-grained (often
@@ -569,10 +584,10 @@ public:
bool hasARMOps() const { return !NoARM; }
- bool hasVFP2() const { return HasVFPv2; }
- bool hasVFP3() const { return HasVFPv3; }
- bool hasVFP4() const { return HasVFPv4; }
- bool hasFPARMv8() const { return HasFPARMv8; }
+ bool hasVFP2Base() const { return HasVFPv2D16SP; }
+ bool hasVFP3Base() const { return HasVFPv3D16SP; }
+ bool hasVFP4Base() const { return HasVFPv4D16SP; }
+ bool hasFPARMv8Base() const { return HasFPARMv8D16SP; }
bool hasNEON() const { return HasNEON; }
bool hasSHA2() const { return HasSHA2; }
bool hasAES() const { return HasAES; }
@@ -601,7 +616,7 @@ public:
bool useFPVMLx() const { return !SlowFPVMLx; }
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
- bool isFPOnlySP() const { return FPOnlySP; }
+ bool hasFP64() const { return HasFP64; }
bool hasPerfMon() const { return HasPerfMon; }
bool hasTrustZone() const { return HasTrustZone; }
bool has8MSecExt() const { return Has8MSecExt; }
@@ -638,7 +653,7 @@ public:
bool genExecuteOnly() const { return GenExecuteOnly; }
bool hasFP16() const { return HasFP16; }
- bool hasD16() const { return HasD16; }
+ bool hasD32() const { return HasD32; }
bool hasFullFP16() const { return HasFullFP16; }
bool hasFP16FML() const { return HasFP16FML; }
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 2fbcd8b..882a63c 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -48,7 +48,7 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
const ARMTargetLowering *TLI;
// Currently the following features are excluded from InlineFeatureWhitelist.
- // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16
+ // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
// Depending on whether they are set or unset, different
// instructions/registers are available. For example, inlining a callee with
// -thumb-mode in a caller with +thumb-mode, may cause the assembler to
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index f4af747..f8a00f7 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -492,8 +492,8 @@ class ARMAsmParser : public MCTargetAsmParser {
return getSTI().getFeatureBits()[ARM::FeatureDSP];
}
- bool hasD16() const {
- return getSTI().getFeatureBits()[ARM::FeatureD16];
+ bool hasD32() const {
+ return getSTI().getFeatureBits()[ARM::FeatureD32];
}
bool hasV8_1aOps() const {
@@ -3424,7 +3424,7 @@ int ARMAsmParser::tryParseRegister() {
}
// Some FPUs only have 16 D registers, so D16-D31 are invalid
- if (hasD16() && RegNum >= ARM::D16 && RegNum <= ARM::D31)
+ if (!hasD32() && RegNum >= ARM::D16 && RegNum <= ARM::D31)
return -1;
Parser.Lex(); // Eat identifier token.
@@ -10415,11 +10415,11 @@ ARMAsmParser::getCustomOperandDiag(ARMMatchResultTy MatchError) {
: "operand must be a register in range [r0, r12] or r14";
// DPR contains 16 registers for some FPUs, and 32 for others.
case Match_DPR:
- return hasD16() ? "operand must be a register in range [d0, d15]"
- : "operand must be a register in range [d0, d31]";
+ return hasD32() ? "operand must be a register in range [d0, d31]"
+ : "operand must be a register in range [d0, d15]";
case Match_DPR_RegList:
- return hasD16() ? "operand must be a list of registers in range [d0, d15]"
- : "operand must be a list of registers in range [d0, d31]";
+ return hasD32() ? "operand must be a list of registers in range [d0, d31]"
+ : "operand must be a list of registers in range [d0, d15]";
// For all other diags, use the static string from tablegen.
default:
@@ -10621,14 +10621,15 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
{ ARM::AEK_CRC, {Feature_HasV8Bit}, {ARM::FeatureCRC} },
{ ARM::AEK_CRYPTO, {Feature_HasV8Bit},
{ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
- { ARM::AEK_FP, {Feature_HasV8Bit}, {ARM::FeatureFPARMv8} },
+ { ARM::AEK_FP, {Feature_HasV8Bit},
+ {ARM::FeatureVFP2_D16_SP, ARM::FeatureFPARMv8} },
{ (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM),
{Feature_HasV7Bit, Feature_IsNotMClassBit},
{ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} },
{ ARM::AEK_MP, {Feature_HasV7Bit, Feature_IsNotMClassBit},
{ARM::FeatureMP} },
{ ARM::AEK_SIMD, {Feature_HasV8Bit},
- {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
+ {ARM::FeatureNEON, ARM::FeatureVFP2_D16_SP, ARM::FeatureFPARMv8} },
{ ARM::AEK_SEC, {Feature_HasV6KBit}, {ARM::FeatureTrustZone} },
// FIXME: Only available in A-class, isel not predicated
{ ARM::AEK_VIRT, {Feature_HasV7Bit}, {ARM::FeatureVirtualization} },
@@ -10678,12 +10679,12 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
"allowed for the current base architecture");
MCSubtargetInfo &STI = copySTI();
- FeatureBitset ToggleFeatures = EnableFeature
- ? (~STI.getFeatureBits() & Extension.Features)
- : ( STI.getFeatureBits() & Extension.Features);
-
- FeatureBitset Features =
- ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
+ if (EnableFeature) {
+ STI.SetFeatureBitsTransitively(Extension.Features);
+ } else {
+ STI.ClearFeatureBitsTransitively(Extension.Features);
+ }
+ FeatureBitset Features = ComputeAvailableFeatures(STI.getFeatureBits());
setAvailableFeatures(Features);
return false;
}
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index d4b2be7..6948f7a 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -1043,9 +1043,9 @@ static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
const FeatureBitset &featureBits =
((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
- bool hasD16 = featureBits[ARM::FeatureD16];
+ bool hasD32 = featureBits[ARM::FeatureD32];
- if (RegNo > 31 || (hasD16 && RegNo > 15))
+ if (RegNo > 31 || (!hasD32 && RegNo > 15))
return MCDisassembler::Fail;
unsigned Register = DPRDecoderTable[RegNo];
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 8f9c665..9502a5d 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -222,37 +222,37 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
? ARMBuildAttrs::AllowNeonARMv8_1a
: ARMBuildAttrs::AllowNeonARMv8);
} else {
- if (STI.hasFeature(ARM::FeatureFPARMv8))
+ if (STI.hasFeature(ARM::FeatureFPARMv8_D16_SP))
// FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
// FPU, but there are two different names for it depending on the CPU.
- emitFPU(STI.hasFeature(ARM::FeatureD16)
- ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV5_SP_D16
- : ARM::FK_FPV5_D16)
- : ARM::FK_FP_ARMV8);
- else if (STI.hasFeature(ARM::FeatureVFP4))
- emitFPU(STI.hasFeature(ARM::FeatureD16)
- ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV4_SP_D16
- : ARM::FK_VFPV4_D16)
- : ARM::FK_VFPV4);
- else if (STI.hasFeature(ARM::FeatureVFP3))
+ emitFPU(STI.hasFeature(ARM::FeatureD32)
+ ? ARM::FK_FP_ARMV8
+ : (STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_FPV5_D16
+ : ARM::FK_FPV5_SP_D16));
+ else if (STI.hasFeature(ARM::FeatureVFP4_D16_SP))
+ emitFPU(STI.hasFeature(ARM::FeatureD32)
+ ? ARM::FK_VFPV4
+ : (STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_VFPV4_D16
+ : ARM::FK_FPV4_SP_D16));
+ else if (STI.hasFeature(ARM::FeatureVFP3_D16_SP))
emitFPU(
- STI.hasFeature(ARM::FeatureD16)
- // +d16
- ? (STI.hasFeature(ARM::FeatureVFPOnlySP)
- ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16
- : ARM::FK_VFPV3XD)
- : (STI.hasFeature(ARM::FeatureFP16)
+ STI.hasFeature(ARM::FeatureD32)
+ // +d32
+ ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16
+ : ARM::FK_VFPV3)
+ // -d32
+ : (STI.hasFeature(ARM::FeatureFP64)
+ ? (STI.hasFeature(ARM::FeatureFP16)
? ARM::FK_VFPV3_D16_FP16
- : ARM::FK_VFPV3_D16))
- // -d16
- : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16
- : ARM::FK_VFPV3));
- else if (STI.hasFeature(ARM::FeatureVFP2))
+ : ARM::FK_VFPV3_D16)
+ : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16
+ : ARM::FK_VFPV3XD)));
+ else if (STI.hasFeature(ARM::FeatureVFP2_D16_SP))
emitFPU(ARM::FK_VFPV2);
}
// ABI_HardFP_use attribute to indicate single precision FP.
- if (STI.hasFeature(ARM::FeatureVFPOnlySP))
+ if (STI.hasFeature(ARM::FeatureVFP2_D16_SP) && !STI.hasFeature(ARM::FeatureFP64))
emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
ARMBuildAttrs::HardFPSinglePrecision);