diff options
Diffstat (limited to 'llvm/lib')
23 files changed, 360 insertions, 238 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 40464e9..fed5e72 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7641,7 +7641,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue(GN0, 0).hasOneUse() && isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) && - TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) { + TLI.isVectorLoadExtDesirable(SDValue(N, 0))) { SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(), GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()}; @@ -15745,7 +15745,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x) if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) { if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() && - TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) { + TLI.isVectorLoadExtDesirable(SDValue(N, 0))) { SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(), GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()}; @@ -16772,12 +16772,8 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false, /*Depth*/ 1)) continue; - bool HadMaybePoisonOperands = !MaybePoisonOperands.empty(); - bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second; - if (IsNewMaybePoisonOperand) + if (MaybePoisonOperands.insert(Op).second) MaybePoisonOperandNumbers.push_back(OpNo); - if (!HadMaybePoisonOperands) - continue; } // NOTE: the whole op may be not guaranteed to not be undef or poison because // it could create undef or poison due to it's poison-generating flags. @@ -18727,6 +18723,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI)) return FTrunc; + // fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x) + if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() && + TLI.isTypeDesirableForOp(ISD::SINT_TO_FP, + N0.getOperand(0).getValueType())) + return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0)); + return SDValue(); } @@ -18764,6 +18766,12 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI)) return FTrunc; + // fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x) + if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() && + TLI.isTypeDesirableForOp(ISD::UINT_TO_FP, + N0.getOperand(0).getValueType())) + return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0)); + return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5453828..2458115 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5544,6 +5544,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::USUBSAT: case ISD::MULHU: case ISD::MULHS: + case ISD::ABDU: + case ISD::ABDS: case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: diff --git a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp index 559d808..222dc88 100644 --- a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp @@ -43,12 +43,6 @@ namespace llvm { using namespace dwarf_linker; using namespace dwarf_linker::classic; -enum InvalidStmtSeqOffset { - MaxStmtSeqOffset = UINT64_MAX, - OrigOffsetMissing = MaxStmtSeqOffset - 1, - NewOffsetMissing = MaxStmtSeqOffset - 2, -}; - /// Hold the input and output of the debug info size in bytes. struct DebugInfoSize { uint64_t Input; @@ -2321,7 +2315,7 @@ void DWARFLinker::DIECloner::generateLineTableForUnit(CompileUnit &Unit) { // Some sequences are discarded by the DWARFLinker if they are invalid // (empty). if (OrigRowIter == SeqOffToOrigRow.end()) { - StmtSeq.set(OrigOffsetMissing); + StmtSeq.set(UINT64_MAX); continue; } size_t OrigRowIndex = OrigRowIter->second; @@ -2331,7 +2325,7 @@ void DWARFLinker::DIECloner::generateLineTableForUnit(CompileUnit &Unit) { if (NewRowIter == OrigRowToNewRow.end()) { // If the original row index is not found in the map, update the // stmt_sequence attribute to the 'invalid offset' magic value. - StmtSeq.set(NewOffsetMissing); + StmtSeq.set(UINT64_MAX); continue; } diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index 67433f2..d5b8f22 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -106,26 +106,12 @@ void MCObjectStreamer::emitFrames(MCAsmBackend *MAB) { MCDwarfFrameEmitter::Emit(*this, MAB, false); } -static bool canReuseDataFragment(const MCFragment &F, - const MCAssembler &Assembler, - const MCSubtargetInfo *STI) { - if (!F.hasInstructions()) - return true; - // Do not add data after a linker-relaxable instruction. The difference - // between a new label and a label at or before the linker-relaxable - // instruction cannot be resolved at assemble-time. - if (F.isLinkerRelaxable()) - return false; - // If the subtarget is changed mid fragment we start a new fragment to record - // the new STI. - return !STI || F.getSubtargetInfo() == STI; -} - -MCFragment * -MCObjectStreamer::getOrCreateDataFragment(const MCSubtargetInfo *STI) { +MCFragment *MCObjectStreamer::getOrCreateDataFragment() { + // TODO: Start a new fragment whenever finalizing the variable-size tail of a + // previous one, so that all getOrCreateDataFragment calls can be replaced + // with getCurrentFragment auto *F = getCurrentFragment(); - if (F->getKind() != MCFragment::FT_Data || - !canReuseDataFragment(*F, *Assembler, STI)) { + if (F->getKind() != MCFragment::FT_Data) { F = getContext().allocFragment<MCFragment>(); insert(F); } @@ -363,16 +349,23 @@ void MCObjectStreamer::emitInstToData(const MCInst &Inst, F->doneAppending(); if (!Fixups.empty()) F->appendFixups(Fixups); + F->setHasInstructions(STI); + bool MarkedLinkerRelaxable = false; for (auto &Fixup : MutableArrayRef(F->getFixups()).slice(FixupStartIndex)) { Fixup.setOffset(Fixup.getOffset() + CodeOffset); - if (Fixup.isLinkerRelaxable()) { - F->setLinkerRelaxable(); + if (!Fixup.isLinkerRelaxable()) + continue; + F->setLinkerRelaxable(); + // Do not add data after a linker-relaxable instruction. The difference + // between a new label and a label at or before the linker-relaxable + // instruction cannot be resolved at assemble-time. + if (!MarkedLinkerRelaxable) { + MarkedLinkerRelaxable = true; getCurrentSectionOnly()->setLinkerRelaxable(); + newFragment(); } } - - F->setHasInstructions(STI); } void MCObjectStreamer::emitInstToFragment(const MCInst &Inst, @@ -568,8 +561,10 @@ void MCObjectStreamer::emitCodeAlignment(Align Alignment, // if the alignment is larger than the minimum NOP size. unsigned Size; if (getAssembler().getBackend().shouldInsertExtraNopBytesForCodeAlign(*F, - Size)) + Size)) { getCurrentSectionOnly()->setLinkerRelaxable(); + newFragment(); + } } void MCObjectStreamer::emitValueToOffset(const MCExpr *Offset, diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index ec8b402..c7c3df3 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -571,7 +571,7 @@ bool ELFAsmParser::parseSectionArguments(bool IsPush, SMLoc loc) { return TokError("expected end of directive"); } - if (Mergeable) + if (Mergeable || TypeName == "llvm_cfi_jump_table") if (parseMergeSize(Size)) return true; if (Flags & ELF::SHF_LINK_ORDER) @@ -637,6 +637,8 @@ EndStmt: Type = ELF::SHT_LLVM_LTO; else if (TypeName == "llvm_jt_sizes") Type = ELF::SHT_LLVM_JT_SIZES; + else if (TypeName == "llvm_cfi_jump_table") + Type = ELF::SHT_LLVM_CFI_JUMP_TABLE; else if (TypeName.getAsInteger(0, Type)) return TokError("unknown section type"); } diff --git a/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp b/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp index 665d92e..7f09349 100644 --- a/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp +++ b/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp @@ -9,6 +9,7 @@ #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCRegister.h" +#include "llvm/MC/MCStreamer.h" using namespace llvm; @@ -22,6 +23,10 @@ MCTargetAsmParser::~MCTargetAsmParser() = default; MCSubtargetInfo &MCTargetAsmParser::copySTI() { MCSubtargetInfo &STICopy = getContext().getSubtargetCopy(getSTI()); STI = &STICopy; + // The returned STI will likely be modified. Create a new fragment to prevent + // mixing STI values within a fragment. + if (getStreamer().getCurrentFragment()) + getStreamer().newFragment(); return STICopy; } diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp index cc7cdf2..299fe40 100644 --- a/llvm/lib/MC/MCSectionELF.cpp +++ b/llvm/lib/MC/MCSectionELF.cpp @@ -176,11 +176,13 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, OS << "llvm_lto"; else if (Type == ELF::SHT_LLVM_JT_SIZES) OS << "llvm_jt_sizes"; + else if (Type == ELF::SHT_LLVM_CFI_JUMP_TABLE) + OS << "llvm_cfi_jump_table"; else OS << "0x" << Twine::utohexstr(Type); if (EntrySize) { - assert(Flags & ELF::SHF_MERGE); + assert((Flags & ELF::SHF_MERGE) || Type == ELF::SHT_LLVM_CFI_JUMP_TABLE); OS << "," << EntrySize; } diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index d814ab88..c3ecf8f 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -1404,6 +1404,19 @@ MCSymbol *MCStreamer::endSection(MCSection *Section) { return Sym; } +void MCStreamer::insert(MCFragment *F) { + auto *Sec = CurFrag->getParent(); + F->setParent(Sec); + F->setLayoutOrder(CurFrag->getLayoutOrder() + 1); + CurFrag->Next = F; + CurFrag = F; + Sec->curFragList()->Tail = F; +} + +void MCStreamer::newFragment() { + insert(getContext().allocFragment<MCFragment>()); +} + static VersionTuple targetVersionOrMinimumSupportedOSVersion(const Triple &Target, VersionTuple TargetVersion) { diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index af073f6..788c602 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -321,6 +321,7 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_OFFLOADING); STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_LTO); STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_JT_SIZES) + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_CFI_JUMP_TABLE) STRINGIFY_ENUM_CASE(ELF, SHT_GNU_SFRAME); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d04e6c4..f026726 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6439,7 +6439,9 @@ bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { } } - return true; + EVT PreExtScalarVT = ExtVal->getOperand(0).getValueType().getScalarType(); + return PreExtScalarVT == MVT::i8 || PreExtScalarVT == MVT::i16 || + PreExtScalarVT == MVT::i32 || PreExtScalarVT == MVT::i64; } unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index cbbb57c..bf2f37b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4558,6 +4558,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_cvt_pk_u16: case Intrinsic::amdgcn_cvt_pk_f16_fp8: case Intrinsic::amdgcn_cvt_pk_f16_bf8: + case Intrinsic::amdgcn_sat_pk4_i4_i8: + case Intrinsic::amdgcn_sat_pk4_u4_u8: case Intrinsic::amdgcn_fmed3: case Intrinsic::amdgcn_cubeid: case Intrinsic::amdgcn_cubema: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index ab7d340..9e1951e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2850,6 +2850,7 @@ def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>; def VOP_BF16_BF16 : VOPProfile<[bf16, bf16, untyped, untyped]>; +def VOP1_I16_I32 : VOPProfile<[i16, i32, untyped, untyped]>; def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 2a6fcad..991d9f8 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3427,30 +3427,32 @@ def : GCNPat < (S_LSHL_B32 SReg_32:$src1, (i16 16)) >; +foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in +let True16Predicate = p in { def : GCNPat < (v2i16 (DivergentBinFrag<build_vector> (i16 0), (i16 VGPR_32:$src1))), (v2i16 (V_LSHLREV_B32_e64 (i16 16), VGPR_32:$src1)) >; - def : GCNPat < - (v2i16 (UniformBinFrag<build_vector> (i16 SReg_32:$src1), (i16 0))), - (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1) + (v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src1), (i16 0))), + (v2i16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1)) >; def : GCNPat < - (v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src1), (i16 0))), - (v2i16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1)) + (v2f16 (DivergentBinFrag<build_vector> (f16 VGPR_32:$src1), (f16 FP_ZERO))), + (v2f16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1)) >; +} def : GCNPat < - (v2f16 (UniformBinFrag<build_vector> (f16 SReg_32:$src1), (f16 FP_ZERO))), + (v2i16 (UniformBinFrag<build_vector> (i16 SReg_32:$src1), (i16 0))), (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1) >; def : GCNPat < - (v2f16 (DivergentBinFrag<build_vector> (f16 VGPR_32:$src1), (f16 FP_ZERO))), - (v2f16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1)) + (v2f16 (UniformBinFrag<build_vector> (f16 SReg_32:$src1), (f16 FP_ZERO))), + (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1) >; foreach vecTy = [v2i16, v2f16, v2bf16] in { diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 1bbbb61..f621f85 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -803,6 +803,9 @@ let SubtargetPredicate = isGFX1250Plus in { def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_fp8, V_CVT_F16_FP8_fake16_e64, 1>; def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_bf8, V_CVT_F16_BF8_fake16_e64, 1>; } + + defm V_SAT_PK4_I4_I8 : VOP1Inst_t16<"v_sat_pk4_i4_i8", VOP1_I16_I32, int_amdgcn_sat_pk4_i4_i8>; + defm V_SAT_PK4_U4_U8 : VOP1Inst_t16<"v_sat_pk4_u4_u8", VOP1_I16_I32, int_amdgcn_sat_pk4_u4_u8>; } // End SubtargetPredicate = isGFX1250Plus let SubtargetPredicate = isGFX10Plus in { @@ -1080,6 +1083,13 @@ multiclass VOP1_Real_FULL_t16_and_fake16_gfx1250< VOP1_Real_FULL_with_name<GFX1250Gen, op, opName#"_fake16", asmName>; } +multiclass VOP1_Real_OpSelIsDPP_gfx1250<bits<9> op> : VOP1_Real_e32<GFX1250Gen, op> { + defvar ps = !cast<VOP_Pseudo>(NAME#"_e64"); + def _e64_gfx1250 : + VOP3_Real_Gen<ps, GFX1250Gen>, + VOP3OpSelIsDPP_gfx12<{0, 1, 1, op{6-0}}, ps.Pfl>; +} + defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX12Not12_50Gen, 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">; defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX1250Gen, 0x06c, "V_CVT_F32_FP8_gfx1250", "v_cvt_f32_fp8">; @@ -1147,8 +1157,12 @@ defm V_MOV_B64 : VOP1_Real_FULL <GFX1250Gen, 0x1d>; defm V_TANH_F32 : VOP1_Real_FULL<GFX1250Gen, 0x01e>; defm V_TANH_F16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x01f>; +defm V_PERMLANE16_SWAP_B32 : VOP1_Real_OpSelIsDPP_gfx1250<0x049>; defm V_TANH_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x04a>; +defm V_PRNG_B32 : VOP1_Real_FULL<GFX1250Gen, 0x04b>; defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">; +defm V_SAT_PK4_I4_I8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x073>; +defm V_SAT_PK4_U4_U8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x074>; defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>; defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>; defm V_CVT_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x077>; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 2b91ea7..a25ebdf 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -331,10 +331,19 @@ class VOP3OpSel_gfx9 <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> { // Special case for v_permlane16_swap_b32/v_permlane32_swap_b32 // op_sel[0]/op_sel[1] are treated as bound_ctrl and fi dpp operands. -class VOP3OpSelIsDPP_gfx9 <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> { +class VOP3OpSelIsDPP_base { bits<1> fi; bits<1> bound_ctrl; +} + +class VOP3OpSelIsDPP_gfx9 <bits<10> op, VOPProfile P> : VOP3OpSelIsDPP_base, VOP3e_vi <op, P> { + // OPSEL[0] specifies FI + let Inst{11} = fi; + // OPSEL[1] specifies BOUND_CTRL + let Inst{12} = bound_ctrl; +} +class VOP3OpSelIsDPP_gfx12 <bits<10> op, VOPProfile P> : VOP3OpSelIsDPP_base, VOP3e_gfx11_gfx12 <op, P> { // OPSEL[0] specifies FI let Inst{11} = fi; // OPSEL[1] specifies BOUND_CTRL diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 5bd3170..22cff7c 100644 --- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -43,12 +43,12 @@ namespace { class HexagonDisassembler : public MCDisassembler { public: std::unique_ptr<MCInstrInfo const> const MCII; - std::unique_ptr<MCInst *> CurrentBundle; + mutable std::unique_ptr<MCInst> CurrentBundle; mutable MCInst const *CurrentExtender; HexagonDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII) - : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *), + : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(nullptr), CurrentExtender(nullptr) {} DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB, @@ -57,7 +57,23 @@ public: DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, raw_ostream &CStream) const override; + + DecodeStatus getInstructionBundle(MCInst &Instr, uint64_t &Size, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &CStream) const override; + void remapInstruction(MCInst &Instr) const; + +private: + bool makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address, + uint64_t &BytesToSkip, raw_ostream &CS) const; + + void resetBundle() const { + CurrentBundle.reset(); + CurrentInstruction = nullptr; + } + + mutable MCOperand *CurrentInstruction = nullptr; }; static uint64_t fullValue(HexagonDisassembler const &Disassembler, MCInst &MI, @@ -171,43 +187,88 @@ LLVMInitializeHexagonDisassembler() { createHexagonDisassembler); } -DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, - ArrayRef<uint8_t> Bytes, - uint64_t Address, - raw_ostream &CS) const { - CommentStream = &CS; - - DecodeStatus Result = DecodeStatus::Success; +bool HexagonDisassembler::makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address, + uint64_t &BytesToSkip, + raw_ostream &CS) const { bool Complete = false; - Size = 0; + DecodeStatus Result = DecodeStatus::Success; - *CurrentBundle = &MI; - MI.setOpcode(Hexagon::BUNDLE); - MI.addOperand(MCOperand::createImm(0)); + CurrentBundle.reset(new MCInst); + CurrentBundle->setOpcode(Hexagon::BUNDLE); + CurrentBundle->addOperand(MCOperand::createImm(0)); while (Result == Success && !Complete) { if (Bytes.size() < HEXAGON_INSTR_SIZE) - return MCDisassembler::Fail; + return false; MCInst *Inst = getContext().createMCInst(); - Result = getSingleInstruction(*Inst, MI, Bytes, Address, CS, Complete); - MI.addOperand(MCOperand::createInst(Inst)); - Size += HEXAGON_INSTR_SIZE; + Result = getSingleInstruction(*Inst, *CurrentBundle, Bytes, Address, CS, + Complete); + CurrentBundle->addOperand(MCOperand::createInst(Inst)); + BytesToSkip += HEXAGON_INSTR_SIZE; Bytes = Bytes.slice(HEXAGON_INSTR_SIZE); } if (Result == MCDisassembler::Fail) - return Result; - if (Size > HEXAGON_MAX_PACKET_SIZE) - return MCDisassembler::Fail; + return false; + if (BytesToSkip > HEXAGON_MAX_PACKET_SIZE) + return false; const auto ArchSTI = Hexagon_MC::getArchSubtarget(&STI); const auto STI_ = (ArchSTI != nullptr) ? *ArchSTI : STI; - HexagonMCChecker Checker(getContext(), *MCII, STI_, MI, + HexagonMCChecker Checker(getContext(), *MCII, STI_, *CurrentBundle, *getContext().getRegisterInfo(), false); if (!Checker.check()) - return MCDisassembler::Fail; - remapInstruction(MI); + return false; + remapInstruction(*CurrentBundle); + return true; +} + +DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address, + raw_ostream &CS) const { + CommentStream = &CS; + + Size = 0; + uint64_t BytesToSkip = 0; + + if (!CurrentBundle) { + if (!makeBundle(Bytes, Address, BytesToSkip, CS)) { + Size = BytesToSkip; + resetBundle(); + return MCDisassembler::Fail; + } + CurrentInstruction = (CurrentBundle->begin() + 1); + } + + MI = *(CurrentInstruction->getInst()); + Size = HEXAGON_INSTR_SIZE; + if (++CurrentInstruction == CurrentBundle->end()) + resetBundle(); return MCDisassembler::Success; } +DecodeStatus HexagonDisassembler::getInstructionBundle(MCInst &MI, + uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address, + raw_ostream &CS) const { + CommentStream = &CS; + Size = 0; + uint64_t BytesToSkip = 0; + assert(!CurrentBundle); + + if (!makeBundle(Bytes, Address, BytesToSkip, CS)) { + Size = BytesToSkip; + resetBundle(); + return MCDisassembler::Fail; + } + + MI = *CurrentBundle; + Size = HEXAGON_INSTR_SIZE * HexagonMCInstrInfo::bundleSize(MI); + resetBundle(); + + return Success; +} + void HexagonDisassembler::remapInstruction(MCInst &Instr) const { for (auto I: HexagonMCInstrInfo::bundleInstructions(Instr)) { auto &MI = const_cast<MCInst &>(*I.getInst()); @@ -482,7 +543,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB, unsigned Offset = 1; bool Vector = HexagonMCInstrInfo::isVector(*MCII, MI); bool PrevVector = false; - auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle); + auto Instructions = HexagonMCInstrInfo::bundleInstructions(*CurrentBundle); auto i = Instructions.end() - 1; for (auto n = Instructions.begin() - 1;; --i, ++Offset) { if (i == n) diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index 9030e43..f83e06c 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -33,30 +33,18 @@ void HexagonInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) { void HexagonInstPrinter::printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &OS) { - assert(HexagonMCInstrInfo::isBundle(*MI)); - assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE); - assert(HexagonMCInstrInfo::bundleSize(*MI) > 0); - HasExtender = false; - for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MI)) { - MCInst const &MCI = *I.getInst(); - if (HexagonMCInstrInfo::isDuplex(MII, MCI)) { - printInstruction(MCI.getOperand(1).getInst(), Address, OS); - OS << '\v'; - HasExtender = false; - printInstruction(MCI.getOperand(0).getInst(), Address, OS); - } else - printInstruction(&MCI, Address, OS); - HasExtender = HexagonMCInstrInfo::isImmext(MCI); - OS << "\n"; - } - - bool IsLoop0 = HexagonMCInstrInfo::isInnerLoop(*MI); - bool IsLoop1 = HexagonMCInstrInfo::isOuterLoop(*MI); - if (IsLoop0) { - OS << (IsLoop1 ? " :endloop01" : " :endloop0"); - } else if (IsLoop1) { - OS << " :endloop1"; + if (HexagonMCInstrInfo::isDuplex(MII, *MI)) { + printInstruction(MI->getOperand(1).getInst(), Address, OS); + OS << '\v'; + HasExtender = false; + printInstruction(MI->getOperand(0).getInst(), Address, OS); + } else { + printInstruction(MI, Address, OS); } + HasExtender = HexagonMCInstrInfo::isImmext(*MI); + if ((MI->getOpcode() & HexagonII::INST_PARSE_MASK) == + HexagonII::INST_PARSE_PACKET_END) + HasExtender = false; } void HexagonInstPrinter::printOperand(MCInst const *MI, unsigned OpNo, diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 980df81..bfea50e 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -252,8 +252,21 @@ public: std::string Buffer; { raw_string_ostream TempStream(Buffer); - InstPrinter.printInst(&Inst, Address, "", STI, TempStream); + for (auto &I : HexagonMCInstrInfo::bundleInstructions(Inst)) { + InstPrinter.printInst(I.getInst(), Address, "", STI, TempStream); + TempStream << "\n"; + } + } + + std::string LoopString = ""; + bool IsLoop0 = HexagonMCInstrInfo::isInnerLoop(Inst); + bool IsLoop1 = HexagonMCInstrInfo::isOuterLoop(Inst); + if (IsLoop0) { + LoopString += (IsLoop1 ? " :endloop01" : " :endloop0"); + } else if (IsLoop1) { + LoopString += " :endloop1"; } + StringRef Contents(Buffer); auto PacketBundle = Contents.rsplit('\n'); auto HeadTail = PacketBundle.first.split('\n'); @@ -275,9 +288,9 @@ public: } if (HexagonMCInstrInfo::isMemReorderDisabled(Inst)) - OS << "\n\t} :mem_noshuf" << PacketBundle.second; + OS << "\n\t} :mem_noshuf" << LoopString; else - OS << "\t}" << PacketBundle.second; + OS << "\t}" << LoopString; } void finish() override { finishAttributeSection(); } diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 6c8e3da..23b4554 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -95,6 +95,11 @@ static const std::pair<MCPhysReg, int8_t> FixedCSRFIQCIInterruptMap[] = { /* -21, -22, -23, -24 are reserved */ }; +/// Returns true if DWARF CFI instructions ("frame moves") should be emitted. +static bool needsDwarfCFI(const MachineFunction &MF) { + return MF.needsFrameMoves(); +} + // For now we use x3, a.k.a gp, as pointer to shadow call stack. // User should not use x3 in their asm. static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB, @@ -141,6 +146,9 @@ static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB, .addImm(-SlotSize) .setMIFlag(MachineInstr::FrameSetup); + if (!needsDwarfCFI(MF)) + return; + // Emit a CFI instruction that causes SlotSize to be subtracted from the value // of the shadow stack pointer when unwinding past this frame. char DwarfSCSReg = TRI->getDwarfRegNum(SCSPReg, /*IsEH*/ true); @@ -199,8 +207,10 @@ static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB, .addReg(SCSPReg) .addImm(-SlotSize) .setMIFlag(MachineInstr::FrameDestroy); - // Restore the SCS pointer - CFIInstBuilder(MBB, MI, MachineInstr::FrameDestroy).buildRestore(SCSPReg); + if (needsDwarfCFI(MF)) { + // Restore the SCS pointer + CFIInstBuilder(MBB, MI, MachineInstr::FrameDestroy).buildRestore(SCSPReg); + } } // Insert instruction to swap mscratchsw with sp @@ -935,6 +945,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, MBBI = std::prev(MBBI, getRVVCalleeSavedInfo(MF, CSI).size() + getUnmanagedCSI(MF, CSI).size()); CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup); + bool NeedsDwarfCFI = needsDwarfCFI(MF); // If libcalls are used to spill and restore callee-saved registers, the frame // has two sections; the opaque section managed by the libcalls, and the @@ -962,10 +973,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, alignTo((STI.getXLen() / 8) * LibCallRegs, getStackAlign()); RVFI->setLibCallStackSize(LibCallFrameSize); - CFIBuilder.buildDefCFAOffset(LibCallFrameSize); - for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI)) - CFIBuilder.buildOffset(CS.getReg(), - MFI.getObjectOffset(CS.getFrameIdx())); + if (NeedsDwarfCFI) { + CFIBuilder.buildDefCFAOffset(LibCallFrameSize); + for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI)) + CFIBuilder.buildOffset(CS.getReg(), + MFI.getObjectOffset(CS.getFrameIdx())); + } } // FIXME (note copied from Lanai): This appears to be overallocating. Needs @@ -996,14 +1009,17 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // could only be the next instruction. ++PossiblePush; - // Insert the CFI metadata before where we think the `(QC.)CM.PUSH(FP)` - // could be. The PUSH will also get its own CFI metadata for its own - // modifications, which should come after the PUSH. - CFIInstBuilder PushCFIBuilder(MBB, PossiblePush, MachineInstr::FrameSetup); - PushCFIBuilder.buildDefCFAOffset(QCIInterruptPushAmount); - for (const CalleeSavedInfo &CS : getQCISavedInfo(MF, CSI)) - PushCFIBuilder.buildOffset(CS.getReg(), - MFI.getObjectOffset(CS.getFrameIdx())); + if (NeedsDwarfCFI) { + // Insert the CFI metadata before where we think the `(QC.)CM.PUSH(FP)` + // could be. The PUSH will also get its own CFI metadata for its own + // modifications, which should come after the PUSH. + CFIInstBuilder PushCFIBuilder(MBB, PossiblePush, + MachineInstr::FrameSetup); + PushCFIBuilder.buildDefCFAOffset(QCIInterruptPushAmount); + for (const CalleeSavedInfo &CS : getQCISavedInfo(MF, CSI)) + PushCFIBuilder.buildOffset(CS.getReg(), + MFI.getObjectOffset(CS.getFrameIdx())); + } } if (RVFI->isPushable(MF) && PossiblePush != MBB.end() && @@ -1017,10 +1033,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, PossiblePush->getOperand(1).setImm(StackAdj); StackSize -= StackAdj; - CFIBuilder.buildDefCFAOffset(RealStackSize - StackSize); - for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI)) - CFIBuilder.buildOffset(CS.getReg(), - MFI.getObjectOffset(CS.getFrameIdx())); + if (NeedsDwarfCFI) { + CFIBuilder.buildDefCFAOffset(RealStackSize - StackSize); + for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI)) + CFIBuilder.buildOffset(CS.getReg(), + MFI.getObjectOffset(CS.getFrameIdx())); + } } // Allocate space on the stack if necessary. @@ -1031,7 +1049,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, bool DynAllocation = MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation(); if (StackSize != 0) - allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, /*EmitCFI=*/true, + allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, NeedsDwarfCFI, NeedProbe, ProbeSize, DynAllocation, MachineInstr::FrameSetup); @@ -1049,8 +1067,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // Iterate over list of callee-saved registers and emit .cfi_offset // directives. - for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI)) - CFIBuilder.buildOffset(CS.getReg(), MFI.getObjectOffset(CS.getFrameIdx())); + if (NeedsDwarfCFI) + for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI)) + CFIBuilder.buildOffset(CS.getReg(), + MFI.getObjectOffset(CS.getFrameIdx())); // Generate new FP. if (hasFP(MF)) { @@ -1069,7 +1089,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, MachineInstr::FrameSetup, getStackAlign()); } - CFIBuilder.buildDefCFA(FPReg, RVFI->getVarArgsSaveSize()); + if (NeedsDwarfCFI) + CFIBuilder.buildDefCFA(FPReg, RVFI->getVarArgsSaveSize()); } uint64_t SecondSPAdjustAmount = 0; @@ -1080,15 +1101,16 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, "SecondSPAdjustAmount should be greater than zero"); allocateStack(MBB, MBBI, MF, SecondSPAdjustAmount, - getStackSizeWithRVVPadding(MF), !hasFP(MF), NeedProbe, - ProbeSize, DynAllocation, MachineInstr::FrameSetup); + getStackSizeWithRVVPadding(MF), NeedsDwarfCFI && !hasFP(MF), + NeedProbe, ProbeSize, DynAllocation, + MachineInstr::FrameSetup); } if (RVVStackSize) { if (NeedProbe) { allocateAndProbeStackForRVV(MF, MBB, MBBI, DL, RVVStackSize, - MachineInstr::FrameSetup, !hasFP(MF), - DynAllocation); + MachineInstr::FrameSetup, + NeedsDwarfCFI && !hasFP(MF), DynAllocation); } else { // We must keep the stack pointer aligned through any intermediate // updates. @@ -1097,14 +1119,15 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, MachineInstr::FrameSetup, getStackAlign()); } - if (!hasFP(MF)) { + if (NeedsDwarfCFI && !hasFP(MF)) { // Emit .cfi_def_cfa_expression "sp + StackSize + RVVStackSize * vlenb". CFIBuilder.insertCFIInst(createDefCFAExpression( *RI, SPReg, getStackSizeWithRVVPadding(MF), RVVStackSize / 8)); } std::advance(MBBI, getRVVCalleeSavedInfo(MF, CSI).size()); - emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF)); + if (NeedsDwarfCFI) + emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF)); } if (hasFP(MF)) { @@ -1171,8 +1194,9 @@ void RISCVFrameLowering::deallocateStack(MachineFunction &MF, MachineInstr::FrameDestroy, getStackAlign()); StackSize = 0; - CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy) - .buildDefCFAOffset(CFAOffset); + if (needsDwarfCFI(MF)) + CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy) + .buildDefCFAOffset(CFAOffset); } void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, @@ -1212,6 +1236,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, std::next(MBBI, getRVVCalleeSavedInfo(MF, CSI).size()); CFIInstBuilder CFIBuilder(MBB, FirstScalarCSRRestoreInsn, MachineInstr::FrameDestroy); + bool NeedsDwarfCFI = needsDwarfCFI(MF); uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); uint64_t RealStackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount @@ -1232,10 +1257,11 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, StackOffset::getScalable(RVVStackSize), MachineInstr::FrameDestroy, getStackAlign()); - if (!hasFP(MF)) - CFIBuilder.buildDefCFA(SPReg, RealStackSize); - - emitCalleeSavedRVVEpilogCFI(MBB, FirstScalarCSRRestoreInsn); + if (NeedsDwarfCFI) { + if (!hasFP(MF)) + CFIBuilder.buildDefCFA(SPReg, RealStackSize); + emitCalleeSavedRVVEpilogCFI(MBB, FirstScalarCSRRestoreInsn); + } } if (FirstSPAdjustAmount) { @@ -1251,7 +1277,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, StackOffset::getFixed(SecondSPAdjustAmount), MachineInstr::FrameDestroy, getStackAlign()); - if (!hasFP(MF)) + if (NeedsDwarfCFI && !hasFP(MF)) CFIBuilder.buildDefCFAOffset(FirstSPAdjustAmount); } @@ -1272,7 +1298,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, getStackAlign()); } - if (hasFP(MF)) + if (NeedsDwarfCFI && hasFP(MF)) CFIBuilder.buildDefCFA(SPReg, RealStackSize); // Skip to after the restores of scalar callee-saved registers @@ -1295,8 +1321,9 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, } // Recover callee-saved registers. - for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI)) - CFIBuilder.buildRestore(CS.getReg()); + if (NeedsDwarfCFI) + for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI)) + CFIBuilder.buildRestore(CS.getReg()); if (RVFI->isPushable(MF) && MBBI != MBB.end() && isPop(MBBI->getOpcode())) { // Use available stack adjustment in pop instruction to deallocate stack @@ -1315,15 +1342,17 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, auto NextI = next_nodbg(MBBI, MBB.end()); if (NextI == MBB.end() || NextI->getOpcode() != RISCV::PseudoRET) { ++MBBI; - CFIBuilder.setInsertPoint(MBBI); + if (NeedsDwarfCFI) { + CFIBuilder.setInsertPoint(MBBI); - for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI)) - CFIBuilder.buildRestore(CS.getReg()); + for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI)) + CFIBuilder.buildRestore(CS.getReg()); - // Update CFA Offset. If this is a QCI interrupt function, there will be a - // leftover offset which is deallocated by `QC.C.MILEAVERET`, otherwise - // getQCIInterruptStackSize() will be 0. - CFIBuilder.buildDefCFAOffset(RVFI->getQCIInterruptStackSize()); + // Update CFA Offset. If this is a QCI interrupt function, there will + // be a leftover offset which is deallocated by `QC.C.MILEAVERET`, + // otherwise getQCIInterruptStackSize() will be 0. + CFIBuilder.buildDefCFAOffset(RVFI->getQCIInterruptStackSize()); + } } } @@ -1812,7 +1841,8 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr( // allocateStack. bool DynAllocation = MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation(); - allocateStack(MBB, MI, MF, -Amount, -Amount, !hasFP(MF), + allocateStack(MBB, MI, MF, -Amount, -Amount, + needsDwarfCFI(MF) && !hasFP(MF), /*NeedProbe=*/true, ProbeSize, DynAllocation, MachineInstr::NoFlags); } else { diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp index 38cc0ce..dd68a55 100644 --- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp +++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp @@ -102,6 +102,56 @@ static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) { return false; } +/// Do the common operand retrieval and validition required by the +/// routines below. +static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy, + Instruction *I, Value *&Ptr, Value *&Mask, + Value *&VL, Align &Alignment) { + + IRBuilder<> Builder(I); + const DataLayout &DL = I->getDataLayout(); + ElementCount EC = VTy->getElementCount(); + if (auto *LI = dyn_cast<LoadInst>(I)) { + assert(LI->isSimple()); + Ptr = LI->getPointerOperand(); + Alignment = LI->getAlign(); + assert(!Mask && "Unexpected mask on a load"); + Mask = Builder.getAllOnesMask(EC); + VL = isa<FixedVectorType>(VTy) ? Builder.CreateElementCount(XLenTy, EC) + : Constant::getAllOnesValue(XLenTy); + return true; + } + if (auto *SI = dyn_cast<StoreInst>(I)) { + assert(SI->isSimple()); + Ptr = SI->getPointerOperand(); + Alignment = SI->getAlign(); + assert(!Mask && "Unexpected mask on a store"); + Mask = Builder.getAllOnesMask(EC); + VL = isa<FixedVectorType>(VTy) ? Builder.CreateElementCount(XLenTy, EC) + : Constant::getAllOnesValue(XLenTy); + return true; + } + auto *VPLdSt = cast<VPIntrinsic>(I); + assert((VPLdSt->getIntrinsicID() == Intrinsic::vp_load || + VPLdSt->getIntrinsicID() == Intrinsic::vp_store) && + "Unexpected intrinsic"); + Ptr = VPLdSt->getMemoryPointerParam(); + Alignment = VPLdSt->getPointerAlignment().value_or( + DL.getABITypeAlign(VTy->getElementType())); + + assert(Mask && "vp.load and vp.store needs a mask!"); + + Value *WideEVL = VPLdSt->getVectorLengthParam(); + // Conservatively check if EVL is a multiple of factor, otherwise some + // (trailing) elements might be lost after the transformation. + if (!isMultipleOfN(WideEVL, I->getDataLayout(), Factor)) + return false; + + auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor); + VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy); + return true; +} + /// Lower an interleaved load into a vlsegN intrinsic. /// /// E.g. Lower an interleaved load (Factor = 2): @@ -127,32 +177,8 @@ bool RISCVTargetLowering::lowerInterleavedLoad( Value *Ptr, *VL; Align Alignment; - if (auto *LI = dyn_cast<LoadInst>(Load)) { - assert(LI->isSimple()); - Ptr = LI->getPointerOperand(); - Alignment = LI->getAlign(); - assert(!Mask && "Unexpected mask on a load\n"); - Mask = Builder.getAllOnesMask(VTy->getElementCount()); - VL = Builder.CreateElementCount(XLenTy, VTy->getElementCount()); - } else { - auto *VPLoad = cast<VPIntrinsic>(Load); - assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load && - "Unexpected intrinsic"); - Ptr = VPLoad->getMemoryPointerParam(); - Alignment = VPLoad->getPointerAlignment().value_or( - DL.getABITypeAlign(VTy->getElementType())); - - assert(Mask && "vp.load needs a mask!"); - - Value *WideEVL = VPLoad->getVectorLengthParam(); - // Conservatively check if EVL is a multiple of factor, otherwise some - // (trailing) elements might be lost after the transformation. - if (!isMultipleOfN(WideEVL, DL, Factor)) - return false; - - auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor); - VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy); - } + if (!getMemOperands(Factor, VTy, XLenTy, Load, Ptr, Mask, VL, Alignment)) + return false; Type *PtrTy = Ptr->getType(); unsigned AS = PtrTy->getPointerAddressSpace(); @@ -296,34 +322,8 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( Value *Ptr, *VL; Align Alignment; - if (auto *LI = dyn_cast<LoadInst>(Load)) { - assert(LI->isSimple()); - Ptr = LI->getPointerOperand(); - Alignment = LI->getAlign(); - assert(!Mask && "Unexpected mask on a load\n"); - Mask = Builder.getAllOnesMask(ResVTy->getElementCount()); - VL = isa<FixedVectorType>(ResVTy) - ? Builder.CreateElementCount(XLenTy, ResVTy->getElementCount()) - : Constant::getAllOnesValue(XLenTy); - } else { - auto *VPLoad = cast<VPIntrinsic>(Load); - assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load && - "Unexpected intrinsic"); - Ptr = VPLoad->getMemoryPointerParam(); - Alignment = VPLoad->getPointerAlignment().value_or( - DL.getABITypeAlign(ResVTy->getElementType())); - - assert(Mask && "vp.load needs a mask!"); - - Value *WideEVL = VPLoad->getVectorLengthParam(); - // Conservatively check if EVL is a multiple of factor, otherwise some - // (trailing) elements might be lost after the transformation. - if (!isMultipleOfN(WideEVL, Load->getDataLayout(), Factor)) - return false; - - auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor); - VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy); - } + if (!getMemOperands(Factor, ResVTy, XLenTy, Load, Ptr, Mask, VL, Alignment)) + return false; Type *PtrTy = Ptr->getType(); unsigned AS = PtrTy->getPointerAddressSpace(); @@ -385,34 +385,8 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore( Value *Ptr, *VL; Align Alignment; - if (auto *SI = dyn_cast<StoreInst>(Store)) { - assert(SI->isSimple()); - Ptr = SI->getPointerOperand(); - Alignment = SI->getAlign(); - assert(!Mask && "Unexpected mask on a store"); - Mask = Builder.getAllOnesMask(InVTy->getElementCount()); - VL = isa<FixedVectorType>(InVTy) - ? Builder.CreateElementCount(XLenTy, InVTy->getElementCount()) - : Constant::getAllOnesValue(XLenTy); - } else { - auto *VPStore = cast<VPIntrinsic>(Store); - assert(VPStore->getIntrinsicID() == Intrinsic::vp_store && - "Unexpected intrinsic"); - Ptr = VPStore->getMemoryPointerParam(); - Alignment = VPStore->getPointerAlignment().value_or( - DL.getABITypeAlign(InVTy->getElementType())); - - assert(Mask && "vp.store needs a mask!"); - - Value *WideEVL = VPStore->getVectorLengthParam(); - // Conservatively check if EVL is a multiple of factor, otherwise some - // (trailing) elements might be lost after the transformation. - if (!isMultipleOfN(WideEVL, DL, Factor)) - return false; - - auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor); - VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy); - } + if (!getMemOperands(Factor, InVTy, XLenTy, Store, Ptr, Mask, VL, Alignment)) + return false; Type *PtrTy = Ptr->getType(); unsigned AS = Ptr->getType()->getPointerAddressSpace(); if (!isLegalInterleavedAccessType(InVTy, Factor, Alignment, AS, DL)) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d91ea1ea..6281124 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1323,11 +1323,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); } - if (Subtarget.hasGFNI()) { + if (!Subtarget.useSoftFloat() && Subtarget.hasGFNI()) { setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); setOperationAction(ISD::BITREVERSE, MVT::i16, Custom); setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); setOperationAction(ISD::BITREVERSE, MVT::i64, Custom); + + for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { + setOperationAction(ISD::BITREVERSE, VT, Custom); + } } if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) { @@ -32694,7 +32698,8 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, if (Subtarget.hasXOP() && !VT.is512BitVector()) return LowerBITREVERSE_XOP(Op, DAG); - assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE"); + assert((Subtarget.hasSSSE3() || Subtarget.hasGFNI()) && + "SSSE3 or GFNI required for BITREVERSE"); SDValue In = Op.getOperand(0); SDLoc DL(Op); diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 636bd81..9e318b0 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -3790,6 +3790,11 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { continue; } + // Do not consider uses inside lifetime intrinsics. These are not + // actually materialized. + if (UserInst->isLifetimeStartOrEnd()) + continue; + std::pair<size_t, Immediate> P = getUse(S, LSRUse::Basic, MemAccessTy()); size_t LUIdx = P.first; diff --git a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp index 6226596..40dc02c 100644 --- a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp +++ b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp @@ -232,13 +232,7 @@ void alignAndPadAlloca(memtag::AllocaInfo &Info, llvm::Align Alignment) { NewAI->setSwiftError(Info.AI->isSwiftError()); NewAI->copyMetadata(*Info.AI); - Value *NewPtr = NewAI; - - // TODO: Remove when typed pointers dropped - if (Info.AI->getType() != NewAI->getType()) - NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI->getIterator()); - - Info.AI->replaceAllUsesWith(NewPtr); + Info.AI->replaceAllUsesWith(NewAI); Info.AI->eraseFromParent(); Info.AI = NewAI; } |