diff options
Diffstat (limited to 'llvm/lib/Target/RISCV')
-rw-r--r-- | llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVCallingConv.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVCallingConv.h | 6 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVFeatures.td | 12 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 37 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVMacroFusion.td | 56 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVProcessors.td | 7 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 25 |
9 files changed, 138 insertions, 41 deletions
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp index d2b75a6..34026ed 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp @@ -45,8 +45,8 @@ public: CCValAssign::LocInfo LocInfo, const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, CCState &State) override { - if (RISCVAssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State, Info.IsFixed, - IsRet, Info.Ty)) + if (RISCVAssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State, IsRet, + Info.Ty)) return true; StackSize = State.getStackSize(); @@ -196,8 +196,8 @@ public: if (LocVT.isScalableVector()) MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); - if (RISCVAssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State, - /*IsFixed=*/true, IsRet, Info.Ty)) + if (RISCVAssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State, IsRet, + Info.Ty)) return true; StackSize = State.getStackSize(); @@ -454,7 +454,7 @@ bool RISCVCallLowering::canLowerReturn(MachineFunction &MF, for (unsigned I = 0, E = Outs.size(); I < E; ++I) { MVT VT = MVT::getVT(Outs[I].Ty); if (CC_RISCV(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo, - /*IsFixed=*/true, /*isRet=*/true, nullptr)) + /*isRet=*/true, nullptr)) return false; } return true; diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp index cb6117e..70127e3 100644 --- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp @@ -324,7 +324,7 @@ static MCRegister allocateRVVReg(MVT ValVT, unsigned ValNo, CCState &State, // Implements the RISC-V calling convention. Returns true upon failure. bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) { + CCState &State, bool IsRet, Type *OrigTy) { const MachineFunction &MF = State.getMachineFunction(); const DataLayout &DL = MF.getDataLayout(); const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); @@ -379,12 +379,12 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, break; case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_LP64F: - UseGPRForF16_F32 = !IsFixed; + UseGPRForF16_F32 = ArgFlags.isVarArg(); break; case RISCVABI::ABI_ILP32D: case RISCVABI::ABI_LP64D: - UseGPRForF16_F32 = !IsFixed; - UseGPRForF64 = !IsFixed; + UseGPRForF16_F32 = ArgFlags.isVarArg(); + UseGPRForF64 = ArgFlags.isVarArg(); break; } @@ -465,7 +465,7 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, // currently if we are using ILP32E calling convention. This behavior may be // changed when RV32E/ILP32E is ratified. unsigned TwoXLenInBytes = (2 * XLen) / 8; - if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && + if (ArgFlags.isVarArg() && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes && ABI != RISCVABI::ABI_ILP32E) { unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); @@ -620,8 +620,8 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, // benchmark. But theoretically, it may have benefit for some cases. bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State, - bool IsFixed, bool IsRet, Type *OrigTy) { + ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, + Type *OrigTy) { const MachineFunction &MF = State.getMachineFunction(); const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); const RISCVTargetLowering &TLI = *Subtarget.getTargetLowering(); diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.h b/llvm/lib/Target/RISCV/RISCVCallingConv.h index bf823b7..2030ce1 100644 --- a/llvm/lib/Target/RISCV/RISCVCallingConv.h +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.h @@ -21,15 +21,15 @@ namespace llvm { typedef bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, - bool IsFixed, bool IsRet, Type *OrigTy); + bool IsRet, Type *OrigTy); bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State, bool IsFixed, bool IsRet, Type *OrigTy); + CCState &State, bool IsRet, Type *OrigTy); bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State, bool IsFixed, bool IsRet, Type *OrigTy); + CCState &State, bool IsRet, Type *OrigTy); bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 171940e..a7329d2 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1700,6 +1700,18 @@ def TuneNLogNVRGather def TunePostRAScheduler : SubtargetFeature<"use-postra-scheduler", "UsePostRAScheduler", "true", "Schedule again after register allocation">; +def TuneDisableMISchedLoadClustering : SubtargetFeature<"disable-misched-load-clustering", + "EnableMISchedLoadClustering", "false", "Disable load clustering in the machine scheduler">; + +def TuneDisableMISchedStoreClustering : SubtargetFeature<"disable-misched-store-clustering", + "EnableMISchedStoreClustering", "false", "Disable store clustering in the machine scheduler">; + +def TuneDisablePostMISchedLoadClustering : SubtargetFeature<"disable-postmisched-load-clustering", + "EnablePostMISchedLoadClustering", "false", "Disable PostRA load clustering in the machine scheduler">; + +def TuneDisablePostMISchedStoreClustering : SubtargetFeature<"disable-postmisched-store-clustering", + "EnablePostMISchedStoreClustering", "false", "Disable PostRA store clustering in the machine scheduler">; + def TuneDisableLatencySchedHeuristic : SubtargetFeature<"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", "Disable latency scheduling heuristic">; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 0077ecf..e4aa8b8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -22282,8 +22282,8 @@ void RISCVTargetLowering::analyzeInputArgs( else if (In.isOrigArg()) ArgTy = FType->getParamType(In.getOrigArgIndex()); - if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, - /*IsFixed=*/true, IsRet, ArgTy)) { + if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet, + ArgTy)) { LLVM_DEBUG(dbgs() << "InputArg #" << Idx << " has unhandled type " << ArgVT << '\n'); llvm_unreachable(nullptr); @@ -22300,8 +22300,8 @@ void RISCVTargetLowering::analyzeOutputArgs( ISD::ArgFlagsTy ArgFlags = Out.Flags; Type *OrigTy = CLI ? CLI->getArgs()[Out.OrigArgIndex].Ty : nullptr; - if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, Out.IsFixed, - IsRet, OrigTy)) { + if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet, + OrigTy)) { LLVM_DEBUG(dbgs() << "OutputArg #" << Idx << " has unhandled type " << ArgVT << "\n"); llvm_unreachable(nullptr); @@ -23083,7 +23083,7 @@ bool RISCVTargetLowering::CanLowerReturn( MVT VT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo, - /*IsFixed=*/true, /*IsRet=*/true, nullptr)) + /*IsRet=*/true, nullptr)) return false; } return true; @@ -24691,7 +24691,7 @@ SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size); if (Align) SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), - DAG.getSignedConstant(-(uint64_t)Align->value(), dl, VT)); + DAG.getSignedConstant(-Align->value(), dl, VT)); // Set the real SP to the new value with a probing loop. Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 04ffb05..413ad8b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -629,9 +629,6 @@ def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 8)), def : Pat<(and (or (shl GPR:$rs2, (XLenVT 8)), (zexti8 (XLenVT GPR:$rs1))), 0xFFFF), (PACKH GPR:$rs1, GPR:$rs2)>; -def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 24)), - (shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))), - (SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>; def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)), (zexti8 (XLenVT GPR:$rs1))), @@ -642,11 +639,15 @@ let Predicates = [HasStdExtZbkb, IsRV32] in { def : Pat<(i32 (or (zexti16 (i32 GPR:$rs1)), (shl GPR:$rs2, (i32 16)))), (PACK GPR:$rs1, GPR:$rs2)>; +def : Pat<(or (shl GPR:$rs2, (XLenVT 24)), + (shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))), + (SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>; + // Match a pattern of 2 bytes being inserted into bits [31:16], with bits // bits [15:0] coming from a zero extended value. We can use pack with packh for // bits [31:16]. If bits [15:0] can also be a packh, it can be matched // separately. -def : Pat<(or (or (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)), +def : Pat<(or (or (shl GPR:$op1rs2, (XLenVT 24)), (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), (zexti16 (XLenVT GPR:$rs1))), (PACK (XLenVT GPR:$rs1), @@ -657,12 +658,40 @@ let Predicates = [HasStdExtZbkb, IsRV64] in { def : Pat<(i64 (or (zexti32 (i64 GPR:$rs1)), (shl GPR:$rs2, (i64 32)))), (PACK GPR:$rs1, GPR:$rs2)>; +def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 24)), + (shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))), + (SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>; +def : Pat<(binop_allwusers<or> (shl GPR:$rs2, (XLenVT 24)), + (shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))), + (SLLI (XLenVT (PACKH GPR:$rs1, GPR:$rs2)), (XLenVT 16))>; + def : Pat<(binop_allwusers<or> (shl GPR:$rs2, (i64 16)), (zexti16 (i64 GPR:$rs1))), (PACKW GPR:$rs1, GPR:$rs2)>; def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32), (zexti16 (i64 GPR:$rs1)))), (PACKW GPR:$rs1, GPR:$rs2)>; + +// Match a pattern of 2 bytes being inserted into bits [31:16], with bits +// bits [15:0] coming from a zero extended value, and bits [63:32] being +// ignored. We can use packw with packh for bits [31:16]. If bits [15:0] can +// also be a packh, it can be matched separately. +def : Pat<(binop_allwusers<or> + (or (shl GPR:$op1rs2, (XLenVT 24)), + (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), + (zexti16 (XLenVT GPR:$rs1))), + (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; +// We need to manually reassociate the patterns because of the binop_allwusers. +def : Pat<(binop_allwusers<or> + (or (zexti16 (XLenVT GPR:$rs1)), + (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), + (shl GPR:$op1rs2, (XLenVT 24))), + (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; +def : Pat<(binop_allwusers<or> + (or (zexti16 (XLenVT GPR:$rs1)), + (shl GPR:$op1rs1, (XLenVT 24))), + (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))), + (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; } // Predicates = [HasStdExtZbkb, IsRV64] let Predicates = [HasStdExtZbb, IsRV32] in diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/llvm/lib/Target/RISCV/RISCVMacroFusion.td index 875a93d..39e099b 100644 --- a/llvm/lib/Target/RISCV/RISCVMacroFusion.td +++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.td @@ -91,3 +91,59 @@ def TuneLDADDFusion CheckIsImmOperand<2>, CheckImmOperand<2, 0> ]>>; + +defvar Load = [LB, LH, LW, LD, LBU, LHU, LWU]; + +// Fuse add(.uw) followed by a load (lb, lh, lw, ld, lbu, lhu, lwu): +// add(.uw) rd, rs1, rs2 +// load rd, imm12(rd) +def TuneADDLoadFusion + : SimpleFusion<"add-load-fusion", "HasADDLoadFusion", "Enable ADD(.UW) + load macrofusion", + CheckOpcode<[ADD, ADD_UW]>, + CheckOpcode<Load>>; + +// Fuse AUIPC followed by by a load (lb, lh, lw, ld, lbu, lhu, lwu) +// auipc rd, imm20 +// load rd, imm12(rd) +def TuneAUIPCLoadFusion + : SimpleFusion<"auipc-load-fusion", "HasAUIPCLoadFusion", + "Enable AUIPC + load macrofusion", + CheckOpcode<[AUIPC]>, + CheckOpcode<Load>>; + +// Fuse LUI followed by a load (lb, lh, lw, ld, lbu, lhu, lwu) +// lui rd, imm[31:12] +// load rd, imm12(rd) +def TuneLUILoadFusion + : SimpleFusion<"lui-load-fusion", "HasLUILoadFusion", + "Enable LUI + load macrofusion", + CheckOpcode<[LUI]>, + CheckOpcode<Load>>; + +// Bitfield extract fusion: similar to TuneShiftedZExtWFusion +// but without the immediate restriction +// slli rd, rs1, imm12 +// srli rd, rd, imm12 +def TuneBFExtFusion + : SimpleFusion<"bfext-fusion", "HasBFExtFusion", + "Enable SLLI+SRLI (bitfield extract) macrofusion", + CheckOpcode<[SLLI]>, + CheckOpcode<[SRLI]>>; + +// Fuse ADDI followed by a load (lb, lh, lw, ld, lbu, lhu, lwu) +// addi rd, rs1, imm12 +// load rd, imm12(rd) +def TuneADDILoadFusion + : SimpleFusion<"addi-load-fusion", "HasADDILoadFusion", + "Enable ADDI + load macrofusion", + CheckOpcode<[ADDI]>, + CheckOpcode<Load>>; + +// Fuse shXadd(.uw) followed by a load (lb, lh, lw, ld, lbu, lhu, lwu) +// shXadd(.uw) rd, rs1, rs2 +// load rd, imm12(rd) +def TuneSHXADDLoadFusion + : SimpleFusion<"shxadd-load-fusion", "HasSHXADDLoadFusion", + "Enable SH(1|2|3)ADD(.UW) + load macrofusion", + CheckOpcode<[SH1ADD, SH2ADD, SH3ADD, SH1ADD_UW, SH2ADD_UW, SH3ADD_UW]>, + CheckOpcode<Load>>; diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 838edf6..31d2b3a 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -590,12 +590,17 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1", FeatureStdExtZicboz, FeatureVendorXVentanaCondOps], [TuneVentanaVeyron, + TuneDisableMISchedLoadClustering, + TuneDisablePostMISchedLoadClustering, + TuneDisablePostMISchedStoreClustering, TuneLUIADDIFusion, TuneAUIPCADDIFusion, TuneZExtHFusion, TuneZExtWFusion, TuneShiftedZExtWFusion, - TuneLDADDFusion]> { + TuneADDLoadFusion, + TuneAUIPCLoadFusion, + TuneLUILoadFusion]> { let MVendorID = 0x61f; let MArchID = 0x8000000000010000; let MImpID = 0x111; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 3f2a83f..66ce134 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -94,16 +94,6 @@ static cl::opt<bool> cl::desc("Enable the loop data prefetch pass"), cl::init(true)); -static cl::opt<bool> EnableMISchedLoadStoreClustering( - "riscv-misched-load-store-clustering", cl::Hidden, - cl::desc("Enable load and store clustering in the machine scheduler"), - cl::init(true)); - -static cl::opt<bool> EnablePostMISchedLoadStoreClustering( - "riscv-postmisched-load-store-clustering", cl::Hidden, - cl::desc("Enable PostRA load and store clustering in the machine scheduler"), - cl::init(true)); - static cl::opt<bool> DisableVectorMaskMutation( "riscv-disable-vector-mask-mutation", cl::desc("Disable the vector mask scheduling mutation"), cl::init(false), @@ -294,15 +284,17 @@ bool RISCVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, ScheduleDAGInstrs * RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const { + const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>(); ScheduleDAGMILive *DAG = createSchedLive(C); - if (EnableMISchedLoadStoreClustering) { + + if (ST.enableMISchedLoadClustering()) DAG->addMutation(createLoadClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); + + if (ST.enableMISchedStoreClustering()) DAG->addMutation(createStoreClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); - } - const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>(); if (!DisableVectorMaskMutation && ST.hasVInstructions()) DAG->addMutation(createRISCVVectorMaskDAGMutation(DAG->TRI)); @@ -311,13 +303,16 @@ RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const { ScheduleDAGInstrs * RISCVTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const { + const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>(); ScheduleDAGMI *DAG = createSchedPostRA(C); - if (EnablePostMISchedLoadStoreClustering) { + + if (ST.enablePostMISchedLoadClustering()) DAG->addMutation(createLoadClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); + + if (ST.enablePostMISchedStoreClustering()) DAG->addMutation(createStoreClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); - } return DAG; } |