aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp22
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp2
-rw-r--r--llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp10
-rw-r--r--llvm/lib/MC/MCObjectStreamer.cpp43
-rw-r--r--llvm/lib/MC/MCParser/ELFAsmParser.cpp4
-rw-r--r--llvm/lib/MC/MCParser/MCTargetAsmParser.cpp5
-rw-r--r--llvm/lib/MC/MCSectionELF.cpp4
-rw-r--r--llvm/lib/MC/MCStreamer.cpp13
-rw-r--r--llvm/lib/Object/ELF.cpp1
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td1
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td18
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td14
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td11
-rw-r--r--llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp109
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp34
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp19
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp122
-rw-r--r--llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp138
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp9
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp5
-rw-r--r--llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp8
23 files changed, 360 insertions, 238 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 40464e9..fed5e72 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7641,7 +7641,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue(GN0, 0).hasOneUse() &&
isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
- TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
+ TLI.isVectorLoadExtDesirable(SDValue(N, 0))) {
SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
@@ -15745,7 +15745,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
- TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
+ TLI.isVectorLoadExtDesirable(SDValue(N, 0))) {
SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
@@ -16772,12 +16772,8 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
/*Depth*/ 1))
continue;
- bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
- bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
- if (IsNewMaybePoisonOperand)
+ if (MaybePoisonOperands.insert(Op).second)
MaybePoisonOperandNumbers.push_back(OpNo);
- if (!HadMaybePoisonOperands)
- continue;
}
// NOTE: the whole op may be not guaranteed to not be undef or poison because
// it could create undef or poison due to it's poison-generating flags.
@@ -18727,6 +18723,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
return FTrunc;
+ // fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x)
+ if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() &&
+ TLI.isTypeDesirableForOp(ISD::SINT_TO_FP,
+ N0.getOperand(0).getValueType()))
+ return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0));
+
return SDValue();
}
@@ -18764,6 +18766,12 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
return FTrunc;
+ // fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x)
+ if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() &&
+ TLI.isTypeDesirableForOp(ISD::UINT_TO_FP,
+ N0.getOperand(0).getValueType()))
+ return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0));
+
return SDValue();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 5453828..2458115 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5544,6 +5544,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::USUBSAT:
case ISD::MULHU:
case ISD::MULHS:
+ case ISD::ABDU:
+ case ISD::ABDS:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
diff --git a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp
index 559d808..222dc88 100644
--- a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp
+++ b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp
@@ -43,12 +43,6 @@ namespace llvm {
using namespace dwarf_linker;
using namespace dwarf_linker::classic;
-enum InvalidStmtSeqOffset {
- MaxStmtSeqOffset = UINT64_MAX,
- OrigOffsetMissing = MaxStmtSeqOffset - 1,
- NewOffsetMissing = MaxStmtSeqOffset - 2,
-};
-
/// Hold the input and output of the debug info size in bytes.
struct DebugInfoSize {
uint64_t Input;
@@ -2321,7 +2315,7 @@ void DWARFLinker::DIECloner::generateLineTableForUnit(CompileUnit &Unit) {
// Some sequences are discarded by the DWARFLinker if they are invalid
// (empty).
if (OrigRowIter == SeqOffToOrigRow.end()) {
- StmtSeq.set(OrigOffsetMissing);
+ StmtSeq.set(UINT64_MAX);
continue;
}
size_t OrigRowIndex = OrigRowIter->second;
@@ -2331,7 +2325,7 @@ void DWARFLinker::DIECloner::generateLineTableForUnit(CompileUnit &Unit) {
if (NewRowIter == OrigRowToNewRow.end()) {
// If the original row index is not found in the map, update the
// stmt_sequence attribute to the 'invalid offset' magic value.
- StmtSeq.set(NewOffsetMissing);
+ StmtSeq.set(UINT64_MAX);
continue;
}
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index 67433f2..d5b8f22 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -106,26 +106,12 @@ void MCObjectStreamer::emitFrames(MCAsmBackend *MAB) {
MCDwarfFrameEmitter::Emit(*this, MAB, false);
}
-static bool canReuseDataFragment(const MCFragment &F,
- const MCAssembler &Assembler,
- const MCSubtargetInfo *STI) {
- if (!F.hasInstructions())
- return true;
- // Do not add data after a linker-relaxable instruction. The difference
- // between a new label and a label at or before the linker-relaxable
- // instruction cannot be resolved at assemble-time.
- if (F.isLinkerRelaxable())
- return false;
- // If the subtarget is changed mid fragment we start a new fragment to record
- // the new STI.
- return !STI || F.getSubtargetInfo() == STI;
-}
-
-MCFragment *
-MCObjectStreamer::getOrCreateDataFragment(const MCSubtargetInfo *STI) {
+MCFragment *MCObjectStreamer::getOrCreateDataFragment() {
+ // TODO: Start a new fragment whenever finalizing the variable-size tail of a
+ // previous one, so that all getOrCreateDataFragment calls can be replaced
+ // with getCurrentFragment
auto *F = getCurrentFragment();
- if (F->getKind() != MCFragment::FT_Data ||
- !canReuseDataFragment(*F, *Assembler, STI)) {
+ if (F->getKind() != MCFragment::FT_Data) {
F = getContext().allocFragment<MCFragment>();
insert(F);
}
@@ -363,16 +349,23 @@ void MCObjectStreamer::emitInstToData(const MCInst &Inst,
F->doneAppending();
if (!Fixups.empty())
F->appendFixups(Fixups);
+ F->setHasInstructions(STI);
+ bool MarkedLinkerRelaxable = false;
for (auto &Fixup : MutableArrayRef(F->getFixups()).slice(FixupStartIndex)) {
Fixup.setOffset(Fixup.getOffset() + CodeOffset);
- if (Fixup.isLinkerRelaxable()) {
- F->setLinkerRelaxable();
+ if (!Fixup.isLinkerRelaxable())
+ continue;
+ F->setLinkerRelaxable();
+ // Do not add data after a linker-relaxable instruction. The difference
+ // between a new label and a label at or before the linker-relaxable
+ // instruction cannot be resolved at assemble-time.
+ if (!MarkedLinkerRelaxable) {
+ MarkedLinkerRelaxable = true;
getCurrentSectionOnly()->setLinkerRelaxable();
+ newFragment();
}
}
-
- F->setHasInstructions(STI);
}
void MCObjectStreamer::emitInstToFragment(const MCInst &Inst,
@@ -568,8 +561,10 @@ void MCObjectStreamer::emitCodeAlignment(Align Alignment,
// if the alignment is larger than the minimum NOP size.
unsigned Size;
if (getAssembler().getBackend().shouldInsertExtraNopBytesForCodeAlign(*F,
- Size))
+ Size)) {
getCurrentSectionOnly()->setLinkerRelaxable();
+ newFragment();
+ }
}
void MCObjectStreamer::emitValueToOffset(const MCExpr *Offset,
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index ec8b402..c7c3df3 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -571,7 +571,7 @@ bool ELFAsmParser::parseSectionArguments(bool IsPush, SMLoc loc) {
return TokError("expected end of directive");
}
- if (Mergeable)
+ if (Mergeable || TypeName == "llvm_cfi_jump_table")
if (parseMergeSize(Size))
return true;
if (Flags & ELF::SHF_LINK_ORDER)
@@ -637,6 +637,8 @@ EndStmt:
Type = ELF::SHT_LLVM_LTO;
else if (TypeName == "llvm_jt_sizes")
Type = ELF::SHT_LLVM_JT_SIZES;
+ else if (TypeName == "llvm_cfi_jump_table")
+ Type = ELF::SHT_LLVM_CFI_JUMP_TABLE;
else if (TypeName.getAsInteger(0, Type))
return TokError("unknown section type");
}
diff --git a/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp b/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp
index 665d92e..7f09349 100644
--- a/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp
@@ -9,6 +9,7 @@
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCRegister.h"
+#include "llvm/MC/MCStreamer.h"
using namespace llvm;
@@ -22,6 +23,10 @@ MCTargetAsmParser::~MCTargetAsmParser() = default;
MCSubtargetInfo &MCTargetAsmParser::copySTI() {
MCSubtargetInfo &STICopy = getContext().getSubtargetCopy(getSTI());
STI = &STICopy;
+ // The returned STI will likely be modified. Create a new fragment to prevent
+ // mixing STI values within a fragment.
+ if (getStreamer().getCurrentFragment())
+ getStreamer().newFragment();
return STICopy;
}
diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp
index cc7cdf2..299fe40 100644
--- a/llvm/lib/MC/MCSectionELF.cpp
+++ b/llvm/lib/MC/MCSectionELF.cpp
@@ -176,11 +176,13 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
OS << "llvm_lto";
else if (Type == ELF::SHT_LLVM_JT_SIZES)
OS << "llvm_jt_sizes";
+ else if (Type == ELF::SHT_LLVM_CFI_JUMP_TABLE)
+ OS << "llvm_cfi_jump_table";
else
OS << "0x" << Twine::utohexstr(Type);
if (EntrySize) {
- assert(Flags & ELF::SHF_MERGE);
+ assert((Flags & ELF::SHF_MERGE) || Type == ELF::SHT_LLVM_CFI_JUMP_TABLE);
OS << "," << EntrySize;
}
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
index d814ab88..c3ecf8f 100644
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -1404,6 +1404,19 @@ MCSymbol *MCStreamer::endSection(MCSection *Section) {
return Sym;
}
+void MCStreamer::insert(MCFragment *F) {
+ auto *Sec = CurFrag->getParent();
+ F->setParent(Sec);
+ F->setLayoutOrder(CurFrag->getLayoutOrder() + 1);
+ CurFrag->Next = F;
+ CurFrag = F;
+ Sec->curFragList()->Tail = F;
+}
+
+void MCStreamer::newFragment() {
+ insert(getContext().allocFragment<MCFragment>());
+}
+
static VersionTuple
targetVersionOrMinimumSupportedOSVersion(const Triple &Target,
VersionTuple TargetVersion) {
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index af073f6..788c602 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -321,6 +321,7 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_OFFLOADING);
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_LTO);
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_JT_SIZES)
+ STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_CFI_JUMP_TABLE)
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_SFRAME);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d04e6c4..f026726 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6439,7 +6439,9 @@ bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
}
}
- return true;
+ EVT PreExtScalarVT = ExtVal->getOperand(0).getValueType().getScalarType();
+ return PreExtScalarVT == MVT::i8 || PreExtScalarVT == MVT::i16 ||
+ PreExtScalarVT == MVT::i32 || PreExtScalarVT == MVT::i64;
}
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index cbbb57c..bf2f37b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4558,6 +4558,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_cvt_pk_u16:
case Intrinsic::amdgcn_cvt_pk_f16_fp8:
case Intrinsic::amdgcn_cvt_pk_f16_bf8:
+ case Intrinsic::amdgcn_sat_pk4_i4_i8:
+ case Intrinsic::amdgcn_sat_pk4_u4_u8:
case Intrinsic::amdgcn_fmed3:
case Intrinsic::amdgcn_cubeid:
case Intrinsic::amdgcn_cubema:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index ab7d340..9e1951e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2850,6 +2850,7 @@ def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>;
def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>;
def VOP_BF16_BF16 : VOPProfile<[bf16, bf16, untyped, untyped]>;
+def VOP1_I16_I32 : VOPProfile<[i16, i32, untyped, untyped]>;
def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 2a6fcad..991d9f8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3427,30 +3427,32 @@ def : GCNPat <
(S_LSHL_B32 SReg_32:$src1, (i16 16))
>;
+foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
+let True16Predicate = p in {
def : GCNPat <
(v2i16 (DivergentBinFrag<build_vector> (i16 0), (i16 VGPR_32:$src1))),
(v2i16 (V_LSHLREV_B32_e64 (i16 16), VGPR_32:$src1))
>;
-
def : GCNPat <
- (v2i16 (UniformBinFrag<build_vector> (i16 SReg_32:$src1), (i16 0))),
- (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
+ (v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src1), (i16 0))),
+ (v2i16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1))
>;
def : GCNPat <
- (v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src1), (i16 0))),
- (v2i16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1))
+ (v2f16 (DivergentBinFrag<build_vector> (f16 VGPR_32:$src1), (f16 FP_ZERO))),
+ (v2f16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1))
>;
+}
def : GCNPat <
- (v2f16 (UniformBinFrag<build_vector> (f16 SReg_32:$src1), (f16 FP_ZERO))),
+ (v2i16 (UniformBinFrag<build_vector> (i16 SReg_32:$src1), (i16 0))),
(S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
>;
def : GCNPat <
- (v2f16 (DivergentBinFrag<build_vector> (f16 VGPR_32:$src1), (f16 FP_ZERO))),
- (v2f16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1))
+ (v2f16 (UniformBinFrag<build_vector> (f16 SReg_32:$src1), (f16 FP_ZERO))),
+ (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
>;
foreach vecTy = [v2i16, v2f16, v2bf16] in {
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 1bbbb61..f621f85 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -803,6 +803,9 @@ let SubtargetPredicate = isGFX1250Plus in {
def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_fp8, V_CVT_F16_FP8_fake16_e64, 1>;
def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f16_bf8, V_CVT_F16_BF8_fake16_e64, 1>;
}
+
+ defm V_SAT_PK4_I4_I8 : VOP1Inst_t16<"v_sat_pk4_i4_i8", VOP1_I16_I32, int_amdgcn_sat_pk4_i4_i8>;
+ defm V_SAT_PK4_U4_U8 : VOP1Inst_t16<"v_sat_pk4_u4_u8", VOP1_I16_I32, int_amdgcn_sat_pk4_u4_u8>;
} // End SubtargetPredicate = isGFX1250Plus
let SubtargetPredicate = isGFX10Plus in {
@@ -1080,6 +1083,13 @@ multiclass VOP1_Real_FULL_t16_and_fake16_gfx1250<
VOP1_Real_FULL_with_name<GFX1250Gen, op, opName#"_fake16", asmName>;
}
+multiclass VOP1_Real_OpSelIsDPP_gfx1250<bits<9> op> : VOP1_Real_e32<GFX1250Gen, op> {
+ defvar ps = !cast<VOP_Pseudo>(NAME#"_e64");
+ def _e64_gfx1250 :
+ VOP3_Real_Gen<ps, GFX1250Gen>,
+ VOP3OpSelIsDPP_gfx12<{0, 1, 1, op{6-0}}, ps.Pfl>;
+}
+
defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX12Not12_50Gen, 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">;
defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX1250Gen, 0x06c, "V_CVT_F32_FP8_gfx1250", "v_cvt_f32_fp8">;
@@ -1147,8 +1157,12 @@ defm V_MOV_B64 : VOP1_Real_FULL <GFX1250Gen, 0x1d>;
defm V_TANH_F32 : VOP1_Real_FULL<GFX1250Gen, 0x01e>;
defm V_TANH_F16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x01f>;
+defm V_PERMLANE16_SWAP_B32 : VOP1_Real_OpSelIsDPP_gfx1250<0x049>;
defm V_TANH_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x04a>;
+defm V_PRNG_B32 : VOP1_Real_FULL<GFX1250Gen, 0x04b>;
defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
+defm V_SAT_PK4_I4_I8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x073>;
+defm V_SAT_PK4_U4_U8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x074>;
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
defm V_CVT_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x077>;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 2b91ea7..a25ebdf 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -331,10 +331,19 @@ class VOP3OpSel_gfx9 <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
// Special case for v_permlane16_swap_b32/v_permlane32_swap_b32
// op_sel[0]/op_sel[1] are treated as bound_ctrl and fi dpp operands.
-class VOP3OpSelIsDPP_gfx9 <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
+class VOP3OpSelIsDPP_base {
bits<1> fi;
bits<1> bound_ctrl;
+}
+
+class VOP3OpSelIsDPP_gfx9 <bits<10> op, VOPProfile P> : VOP3OpSelIsDPP_base, VOP3e_vi <op, P> {
+ // OPSEL[0] specifies FI
+ let Inst{11} = fi;
+ // OPSEL[1] specifies BOUND_CTRL
+ let Inst{12} = bound_ctrl;
+}
+class VOP3OpSelIsDPP_gfx12 <bits<10> op, VOPProfile P> : VOP3OpSelIsDPP_base, VOP3e_gfx11_gfx12 <op, P> {
// OPSEL[0] specifies FI
let Inst{11} = fi;
// OPSEL[1] specifies BOUND_CTRL
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 5bd3170..22cff7c 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -43,12 +43,12 @@ namespace {
class HexagonDisassembler : public MCDisassembler {
public:
std::unique_ptr<MCInstrInfo const> const MCII;
- std::unique_ptr<MCInst *> CurrentBundle;
+ mutable std::unique_ptr<MCInst> CurrentBundle;
mutable MCInst const *CurrentExtender;
HexagonDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
MCInstrInfo const *MCII)
- : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *),
+ : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(nullptr),
CurrentExtender(nullptr) {}
DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB,
@@ -57,7 +57,23 @@ public:
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const override;
+
+ DecodeStatus getInstructionBundle(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &CStream) const override;
+
void remapInstruction(MCInst &Instr) const;
+
+private:
+ bool makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address,
+ uint64_t &BytesToSkip, raw_ostream &CS) const;
+
+ void resetBundle() const {
+ CurrentBundle.reset();
+ CurrentInstruction = nullptr;
+ }
+
+ mutable MCOperand *CurrentInstruction = nullptr;
};
static uint64_t fullValue(HexagonDisassembler const &Disassembler, MCInst &MI,
@@ -171,43 +187,88 @@ LLVMInitializeHexagonDisassembler() {
createHexagonDisassembler);
}
-DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
- ArrayRef<uint8_t> Bytes,
- uint64_t Address,
- raw_ostream &CS) const {
- CommentStream = &CS;
-
- DecodeStatus Result = DecodeStatus::Success;
+bool HexagonDisassembler::makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address,
+ uint64_t &BytesToSkip,
+ raw_ostream &CS) const {
bool Complete = false;
- Size = 0;
+ DecodeStatus Result = DecodeStatus::Success;
- *CurrentBundle = &MI;
- MI.setOpcode(Hexagon::BUNDLE);
- MI.addOperand(MCOperand::createImm(0));
+ CurrentBundle.reset(new MCInst);
+ CurrentBundle->setOpcode(Hexagon::BUNDLE);
+ CurrentBundle->addOperand(MCOperand::createImm(0));
while (Result == Success && !Complete) {
if (Bytes.size() < HEXAGON_INSTR_SIZE)
- return MCDisassembler::Fail;
+ return false;
MCInst *Inst = getContext().createMCInst();
- Result = getSingleInstruction(*Inst, MI, Bytes, Address, CS, Complete);
- MI.addOperand(MCOperand::createInst(Inst));
- Size += HEXAGON_INSTR_SIZE;
+ Result = getSingleInstruction(*Inst, *CurrentBundle, Bytes, Address, CS,
+ Complete);
+ CurrentBundle->addOperand(MCOperand::createInst(Inst));
+ BytesToSkip += HEXAGON_INSTR_SIZE;
Bytes = Bytes.slice(HEXAGON_INSTR_SIZE);
}
if (Result == MCDisassembler::Fail)
- return Result;
- if (Size > HEXAGON_MAX_PACKET_SIZE)
- return MCDisassembler::Fail;
+ return false;
+ if (BytesToSkip > HEXAGON_MAX_PACKET_SIZE)
+ return false;
const auto ArchSTI = Hexagon_MC::getArchSubtarget(&STI);
const auto STI_ = (ArchSTI != nullptr) ? *ArchSTI : STI;
- HexagonMCChecker Checker(getContext(), *MCII, STI_, MI,
+ HexagonMCChecker Checker(getContext(), *MCII, STI_, *CurrentBundle,
*getContext().getRegisterInfo(), false);
if (!Checker.check())
- return MCDisassembler::Fail;
- remapInstruction(MI);
+ return false;
+ remapInstruction(*CurrentBundle);
+ return true;
+}
+
+DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &CS) const {
+ CommentStream = &CS;
+
+ Size = 0;
+ uint64_t BytesToSkip = 0;
+
+ if (!CurrentBundle) {
+ if (!makeBundle(Bytes, Address, BytesToSkip, CS)) {
+ Size = BytesToSkip;
+ resetBundle();
+ return MCDisassembler::Fail;
+ }
+ CurrentInstruction = (CurrentBundle->begin() + 1);
+ }
+
+ MI = *(CurrentInstruction->getInst());
+ Size = HEXAGON_INSTR_SIZE;
+ if (++CurrentInstruction == CurrentBundle->end())
+ resetBundle();
return MCDisassembler::Success;
}
+DecodeStatus HexagonDisassembler::getInstructionBundle(MCInst &MI,
+ uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &CS) const {
+ CommentStream = &CS;
+ Size = 0;
+ uint64_t BytesToSkip = 0;
+ assert(!CurrentBundle);
+
+ if (!makeBundle(Bytes, Address, BytesToSkip, CS)) {
+ Size = BytesToSkip;
+ resetBundle();
+ return MCDisassembler::Fail;
+ }
+
+ MI = *CurrentBundle;
+ Size = HEXAGON_INSTR_SIZE * HexagonMCInstrInfo::bundleSize(MI);
+ resetBundle();
+
+ return Success;
+}
+
void HexagonDisassembler::remapInstruction(MCInst &Instr) const {
for (auto I: HexagonMCInstrInfo::bundleInstructions(Instr)) {
auto &MI = const_cast<MCInst &>(*I.getInst());
@@ -482,7 +543,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
unsigned Offset = 1;
bool Vector = HexagonMCInstrInfo::isVector(*MCII, MI);
bool PrevVector = false;
- auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
+ auto Instructions = HexagonMCInstrInfo::bundleInstructions(*CurrentBundle);
auto i = Instructions.end() - 1;
for (auto n = Instructions.begin() - 1;; --i, ++Offset) {
if (i == n)
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 9030e43..f83e06c 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -33,30 +33,18 @@ void HexagonInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) {
void HexagonInstPrinter::printInst(const MCInst *MI, uint64_t Address,
StringRef Annot, const MCSubtargetInfo &STI,
raw_ostream &OS) {
- assert(HexagonMCInstrInfo::isBundle(*MI));
- assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE);
- assert(HexagonMCInstrInfo::bundleSize(*MI) > 0);
- HasExtender = false;
- for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MI)) {
- MCInst const &MCI = *I.getInst();
- if (HexagonMCInstrInfo::isDuplex(MII, MCI)) {
- printInstruction(MCI.getOperand(1).getInst(), Address, OS);
- OS << '\v';
- HasExtender = false;
- printInstruction(MCI.getOperand(0).getInst(), Address, OS);
- } else
- printInstruction(&MCI, Address, OS);
- HasExtender = HexagonMCInstrInfo::isImmext(MCI);
- OS << "\n";
- }
-
- bool IsLoop0 = HexagonMCInstrInfo::isInnerLoop(*MI);
- bool IsLoop1 = HexagonMCInstrInfo::isOuterLoop(*MI);
- if (IsLoop0) {
- OS << (IsLoop1 ? " :endloop01" : " :endloop0");
- } else if (IsLoop1) {
- OS << " :endloop1";
+ if (HexagonMCInstrInfo::isDuplex(MII, *MI)) {
+ printInstruction(MI->getOperand(1).getInst(), Address, OS);
+ OS << '\v';
+ HasExtender = false;
+ printInstruction(MI->getOperand(0).getInst(), Address, OS);
+ } else {
+ printInstruction(MI, Address, OS);
}
+ HasExtender = HexagonMCInstrInfo::isImmext(*MI);
+ if ((MI->getOpcode() & HexagonII::INST_PARSE_MASK) ==
+ HexagonII::INST_PARSE_PACKET_END)
+ HasExtender = false;
}
void HexagonInstPrinter::printOperand(MCInst const *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 980df81..bfea50e 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -252,8 +252,21 @@ public:
std::string Buffer;
{
raw_string_ostream TempStream(Buffer);
- InstPrinter.printInst(&Inst, Address, "", STI, TempStream);
+ for (auto &I : HexagonMCInstrInfo::bundleInstructions(Inst)) {
+ InstPrinter.printInst(I.getInst(), Address, "", STI, TempStream);
+ TempStream << "\n";
+ }
+ }
+
+ std::string LoopString = "";
+ bool IsLoop0 = HexagonMCInstrInfo::isInnerLoop(Inst);
+ bool IsLoop1 = HexagonMCInstrInfo::isOuterLoop(Inst);
+ if (IsLoop0) {
+ LoopString += (IsLoop1 ? " :endloop01" : " :endloop0");
+ } else if (IsLoop1) {
+ LoopString += " :endloop1";
}
+
StringRef Contents(Buffer);
auto PacketBundle = Contents.rsplit('\n');
auto HeadTail = PacketBundle.first.split('\n');
@@ -275,9 +288,9 @@ public:
}
if (HexagonMCInstrInfo::isMemReorderDisabled(Inst))
- OS << "\n\t} :mem_noshuf" << PacketBundle.second;
+ OS << "\n\t} :mem_noshuf" << LoopString;
else
- OS << "\t}" << PacketBundle.second;
+ OS << "\t}" << LoopString;
}
void finish() override { finishAttributeSection(); }
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 6c8e3da..23b4554 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -95,6 +95,11 @@ static const std::pair<MCPhysReg, int8_t> FixedCSRFIQCIInterruptMap[] = {
/* -21, -22, -23, -24 are reserved */
};
+/// Returns true if DWARF CFI instructions ("frame moves") should be emitted.
+static bool needsDwarfCFI(const MachineFunction &MF) {
+ return MF.needsFrameMoves();
+}
+
// For now we use x3, a.k.a gp, as pointer to shadow call stack.
// User should not use x3 in their asm.
static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -141,6 +146,9 @@ static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
.addImm(-SlotSize)
.setMIFlag(MachineInstr::FrameSetup);
+ if (!needsDwarfCFI(MF))
+ return;
+
// Emit a CFI instruction that causes SlotSize to be subtracted from the value
// of the shadow stack pointer when unwinding past this frame.
char DwarfSCSReg = TRI->getDwarfRegNum(SCSPReg, /*IsEH*/ true);
@@ -199,8 +207,10 @@ static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB,
.addReg(SCSPReg)
.addImm(-SlotSize)
.setMIFlag(MachineInstr::FrameDestroy);
- // Restore the SCS pointer
- CFIInstBuilder(MBB, MI, MachineInstr::FrameDestroy).buildRestore(SCSPReg);
+ if (needsDwarfCFI(MF)) {
+ // Restore the SCS pointer
+ CFIInstBuilder(MBB, MI, MachineInstr::FrameDestroy).buildRestore(SCSPReg);
+ }
}
// Insert instruction to swap mscratchsw with sp
@@ -935,6 +945,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
MBBI = std::prev(MBBI, getRVVCalleeSavedInfo(MF, CSI).size() +
getUnmanagedCSI(MF, CSI).size());
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
+ bool NeedsDwarfCFI = needsDwarfCFI(MF);
// If libcalls are used to spill and restore callee-saved registers, the frame
// has two sections; the opaque section managed by the libcalls, and the
@@ -962,10 +973,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
alignTo((STI.getXLen() / 8) * LibCallRegs, getStackAlign());
RVFI->setLibCallStackSize(LibCallFrameSize);
- CFIBuilder.buildDefCFAOffset(LibCallFrameSize);
- for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
- CFIBuilder.buildOffset(CS.getReg(),
- MFI.getObjectOffset(CS.getFrameIdx()));
+ if (NeedsDwarfCFI) {
+ CFIBuilder.buildDefCFAOffset(LibCallFrameSize);
+ for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
+ CFIBuilder.buildOffset(CS.getReg(),
+ MFI.getObjectOffset(CS.getFrameIdx()));
+ }
}
// FIXME (note copied from Lanai): This appears to be overallocating. Needs
@@ -996,14 +1009,17 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// could only be the next instruction.
++PossiblePush;
- // Insert the CFI metadata before where we think the `(QC.)CM.PUSH(FP)`
- // could be. The PUSH will also get its own CFI metadata for its own
- // modifications, which should come after the PUSH.
- CFIInstBuilder PushCFIBuilder(MBB, PossiblePush, MachineInstr::FrameSetup);
- PushCFIBuilder.buildDefCFAOffset(QCIInterruptPushAmount);
- for (const CalleeSavedInfo &CS : getQCISavedInfo(MF, CSI))
- PushCFIBuilder.buildOffset(CS.getReg(),
- MFI.getObjectOffset(CS.getFrameIdx()));
+ if (NeedsDwarfCFI) {
+ // Insert the CFI metadata before where we think the `(QC.)CM.PUSH(FP)`
+ // could be. The PUSH will also get its own CFI metadata for its own
+ // modifications, which should come after the PUSH.
+ CFIInstBuilder PushCFIBuilder(MBB, PossiblePush,
+ MachineInstr::FrameSetup);
+ PushCFIBuilder.buildDefCFAOffset(QCIInterruptPushAmount);
+ for (const CalleeSavedInfo &CS : getQCISavedInfo(MF, CSI))
+ PushCFIBuilder.buildOffset(CS.getReg(),
+ MFI.getObjectOffset(CS.getFrameIdx()));
+ }
}
if (RVFI->isPushable(MF) && PossiblePush != MBB.end() &&
@@ -1017,10 +1033,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
PossiblePush->getOperand(1).setImm(StackAdj);
StackSize -= StackAdj;
- CFIBuilder.buildDefCFAOffset(RealStackSize - StackSize);
- for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
- CFIBuilder.buildOffset(CS.getReg(),
- MFI.getObjectOffset(CS.getFrameIdx()));
+ if (NeedsDwarfCFI) {
+ CFIBuilder.buildDefCFAOffset(RealStackSize - StackSize);
+ for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
+ CFIBuilder.buildOffset(CS.getReg(),
+ MFI.getObjectOffset(CS.getFrameIdx()));
+ }
}
// Allocate space on the stack if necessary.
@@ -1031,7 +1049,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
bool DynAllocation =
MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
if (StackSize != 0)
- allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, /*EmitCFI=*/true,
+ allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, NeedsDwarfCFI,
NeedProbe, ProbeSize, DynAllocation,
MachineInstr::FrameSetup);
@@ -1049,8 +1067,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// Iterate over list of callee-saved registers and emit .cfi_offset
// directives.
- for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
- CFIBuilder.buildOffset(CS.getReg(), MFI.getObjectOffset(CS.getFrameIdx()));
+ if (NeedsDwarfCFI)
+ for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
+ CFIBuilder.buildOffset(CS.getReg(),
+ MFI.getObjectOffset(CS.getFrameIdx()));
// Generate new FP.
if (hasFP(MF)) {
@@ -1069,7 +1089,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
MachineInstr::FrameSetup, getStackAlign());
}
- CFIBuilder.buildDefCFA(FPReg, RVFI->getVarArgsSaveSize());
+ if (NeedsDwarfCFI)
+ CFIBuilder.buildDefCFA(FPReg, RVFI->getVarArgsSaveSize());
}
uint64_t SecondSPAdjustAmount = 0;
@@ -1080,15 +1101,16 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
"SecondSPAdjustAmount should be greater than zero");
allocateStack(MBB, MBBI, MF, SecondSPAdjustAmount,
- getStackSizeWithRVVPadding(MF), !hasFP(MF), NeedProbe,
- ProbeSize, DynAllocation, MachineInstr::FrameSetup);
+ getStackSizeWithRVVPadding(MF), NeedsDwarfCFI && !hasFP(MF),
+ NeedProbe, ProbeSize, DynAllocation,
+ MachineInstr::FrameSetup);
}
if (RVVStackSize) {
if (NeedProbe) {
allocateAndProbeStackForRVV(MF, MBB, MBBI, DL, RVVStackSize,
- MachineInstr::FrameSetup, !hasFP(MF),
- DynAllocation);
+ MachineInstr::FrameSetup,
+ NeedsDwarfCFI && !hasFP(MF), DynAllocation);
} else {
// We must keep the stack pointer aligned through any intermediate
// updates.
@@ -1097,14 +1119,15 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
MachineInstr::FrameSetup, getStackAlign());
}
- if (!hasFP(MF)) {
+ if (NeedsDwarfCFI && !hasFP(MF)) {
// Emit .cfi_def_cfa_expression "sp + StackSize + RVVStackSize * vlenb".
CFIBuilder.insertCFIInst(createDefCFAExpression(
*RI, SPReg, getStackSizeWithRVVPadding(MF), RVVStackSize / 8));
}
std::advance(MBBI, getRVVCalleeSavedInfo(MF, CSI).size());
- emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF));
+ if (NeedsDwarfCFI)
+ emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF));
}
if (hasFP(MF)) {
@@ -1171,8 +1194,9 @@ void RISCVFrameLowering::deallocateStack(MachineFunction &MF,
MachineInstr::FrameDestroy, getStackAlign());
StackSize = 0;
- CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
- .buildDefCFAOffset(CFAOffset);
+ if (needsDwarfCFI(MF))
+ CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
+ .buildDefCFAOffset(CFAOffset);
}
void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
@@ -1212,6 +1236,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
std::next(MBBI, getRVVCalleeSavedInfo(MF, CSI).size());
CFIInstBuilder CFIBuilder(MBB, FirstScalarCSRRestoreInsn,
MachineInstr::FrameDestroy);
+ bool NeedsDwarfCFI = needsDwarfCFI(MF);
uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
uint64_t RealStackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount
@@ -1232,10 +1257,11 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
StackOffset::getScalable(RVVStackSize),
MachineInstr::FrameDestroy, getStackAlign());
- if (!hasFP(MF))
- CFIBuilder.buildDefCFA(SPReg, RealStackSize);
-
- emitCalleeSavedRVVEpilogCFI(MBB, FirstScalarCSRRestoreInsn);
+ if (NeedsDwarfCFI) {
+ if (!hasFP(MF))
+ CFIBuilder.buildDefCFA(SPReg, RealStackSize);
+ emitCalleeSavedRVVEpilogCFI(MBB, FirstScalarCSRRestoreInsn);
+ }
}
if (FirstSPAdjustAmount) {
@@ -1251,7 +1277,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
StackOffset::getFixed(SecondSPAdjustAmount),
MachineInstr::FrameDestroy, getStackAlign());
- if (!hasFP(MF))
+ if (NeedsDwarfCFI && !hasFP(MF))
CFIBuilder.buildDefCFAOffset(FirstSPAdjustAmount);
}
@@ -1272,7 +1298,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
getStackAlign());
}
- if (hasFP(MF))
+ if (NeedsDwarfCFI && hasFP(MF))
CFIBuilder.buildDefCFA(SPReg, RealStackSize);
// Skip to after the restores of scalar callee-saved registers
@@ -1295,8 +1321,9 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
}
// Recover callee-saved registers.
- for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
- CFIBuilder.buildRestore(CS.getReg());
+ if (NeedsDwarfCFI)
+ for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
+ CFIBuilder.buildRestore(CS.getReg());
if (RVFI->isPushable(MF) && MBBI != MBB.end() && isPop(MBBI->getOpcode())) {
// Use available stack adjustment in pop instruction to deallocate stack
@@ -1315,15 +1342,17 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
auto NextI = next_nodbg(MBBI, MBB.end());
if (NextI == MBB.end() || NextI->getOpcode() != RISCV::PseudoRET) {
++MBBI;
- CFIBuilder.setInsertPoint(MBBI);
+ if (NeedsDwarfCFI) {
+ CFIBuilder.setInsertPoint(MBBI);
- for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
- CFIBuilder.buildRestore(CS.getReg());
+ for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
+ CFIBuilder.buildRestore(CS.getReg());
- // Update CFA Offset. If this is a QCI interrupt function, there will be a
- // leftover offset which is deallocated by `QC.C.MILEAVERET`, otherwise
- // getQCIInterruptStackSize() will be 0.
- CFIBuilder.buildDefCFAOffset(RVFI->getQCIInterruptStackSize());
+ // Update CFA Offset. If this is a QCI interrupt function, there will
+ // be a leftover offset which is deallocated by `QC.C.MILEAVERET`,
+ // otherwise getQCIInterruptStackSize() will be 0.
+ CFIBuilder.buildDefCFAOffset(RVFI->getQCIInterruptStackSize());
+ }
}
}
@@ -1812,7 +1841,8 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
// allocateStack.
bool DynAllocation =
MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
- allocateStack(MBB, MI, MF, -Amount, -Amount, !hasFP(MF),
+ allocateStack(MBB, MI, MF, -Amount, -Amount,
+ needsDwarfCFI(MF) && !hasFP(MF),
/*NeedProbe=*/true, ProbeSize, DynAllocation,
MachineInstr::NoFlags);
} else {
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index 38cc0ce..dd68a55 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -102,6 +102,56 @@ static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
return false;
}
+/// Do the common operand retrieval and validition required by the
+/// routines below.
+static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy,
+ Instruction *I, Value *&Ptr, Value *&Mask,
+ Value *&VL, Align &Alignment) {
+
+ IRBuilder<> Builder(I);
+ const DataLayout &DL = I->getDataLayout();
+ ElementCount EC = VTy->getElementCount();
+ if (auto *LI = dyn_cast<LoadInst>(I)) {
+ assert(LI->isSimple());
+ Ptr = LI->getPointerOperand();
+ Alignment = LI->getAlign();
+ assert(!Mask && "Unexpected mask on a load");
+ Mask = Builder.getAllOnesMask(EC);
+ VL = isa<FixedVectorType>(VTy) ? Builder.CreateElementCount(XLenTy, EC)
+ : Constant::getAllOnesValue(XLenTy);
+ return true;
+ }
+ if (auto *SI = dyn_cast<StoreInst>(I)) {
+ assert(SI->isSimple());
+ Ptr = SI->getPointerOperand();
+ Alignment = SI->getAlign();
+ assert(!Mask && "Unexpected mask on a store");
+ Mask = Builder.getAllOnesMask(EC);
+ VL = isa<FixedVectorType>(VTy) ? Builder.CreateElementCount(XLenTy, EC)
+ : Constant::getAllOnesValue(XLenTy);
+ return true;
+ }
+ auto *VPLdSt = cast<VPIntrinsic>(I);
+ assert((VPLdSt->getIntrinsicID() == Intrinsic::vp_load ||
+ VPLdSt->getIntrinsicID() == Intrinsic::vp_store) &&
+ "Unexpected intrinsic");
+ Ptr = VPLdSt->getMemoryPointerParam();
+ Alignment = VPLdSt->getPointerAlignment().value_or(
+ DL.getABITypeAlign(VTy->getElementType()));
+
+ assert(Mask && "vp.load and vp.store needs a mask!");
+
+ Value *WideEVL = VPLdSt->getVectorLengthParam();
+ // Conservatively check if EVL is a multiple of factor, otherwise some
+ // (trailing) elements might be lost after the transformation.
+ if (!isMultipleOfN(WideEVL, I->getDataLayout(), Factor))
+ return false;
+
+ auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
+ VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
+ return true;
+}
+
/// Lower an interleaved load into a vlsegN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
@@ -127,32 +177,8 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
Value *Ptr, *VL;
Align Alignment;
- if (auto *LI = dyn_cast<LoadInst>(Load)) {
- assert(LI->isSimple());
- Ptr = LI->getPointerOperand();
- Alignment = LI->getAlign();
- assert(!Mask && "Unexpected mask on a load\n");
- Mask = Builder.getAllOnesMask(VTy->getElementCount());
- VL = Builder.CreateElementCount(XLenTy, VTy->getElementCount());
- } else {
- auto *VPLoad = cast<VPIntrinsic>(Load);
- assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load &&
- "Unexpected intrinsic");
- Ptr = VPLoad->getMemoryPointerParam();
- Alignment = VPLoad->getPointerAlignment().value_or(
- DL.getABITypeAlign(VTy->getElementType()));
-
- assert(Mask && "vp.load needs a mask!");
-
- Value *WideEVL = VPLoad->getVectorLengthParam();
- // Conservatively check if EVL is a multiple of factor, otherwise some
- // (trailing) elements might be lost after the transformation.
- if (!isMultipleOfN(WideEVL, DL, Factor))
- return false;
-
- auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
- VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
- }
+ if (!getMemOperands(Factor, VTy, XLenTy, Load, Ptr, Mask, VL, Alignment))
+ return false;
Type *PtrTy = Ptr->getType();
unsigned AS = PtrTy->getPointerAddressSpace();
@@ -296,34 +322,8 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
Value *Ptr, *VL;
Align Alignment;
- if (auto *LI = dyn_cast<LoadInst>(Load)) {
- assert(LI->isSimple());
- Ptr = LI->getPointerOperand();
- Alignment = LI->getAlign();
- assert(!Mask && "Unexpected mask on a load\n");
- Mask = Builder.getAllOnesMask(ResVTy->getElementCount());
- VL = isa<FixedVectorType>(ResVTy)
- ? Builder.CreateElementCount(XLenTy, ResVTy->getElementCount())
- : Constant::getAllOnesValue(XLenTy);
- } else {
- auto *VPLoad = cast<VPIntrinsic>(Load);
- assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load &&
- "Unexpected intrinsic");
- Ptr = VPLoad->getMemoryPointerParam();
- Alignment = VPLoad->getPointerAlignment().value_or(
- DL.getABITypeAlign(ResVTy->getElementType()));
-
- assert(Mask && "vp.load needs a mask!");
-
- Value *WideEVL = VPLoad->getVectorLengthParam();
- // Conservatively check if EVL is a multiple of factor, otherwise some
- // (trailing) elements might be lost after the transformation.
- if (!isMultipleOfN(WideEVL, Load->getDataLayout(), Factor))
- return false;
-
- auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
- VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
- }
+ if (!getMemOperands(Factor, ResVTy, XLenTy, Load, Ptr, Mask, VL, Alignment))
+ return false;
Type *PtrTy = Ptr->getType();
unsigned AS = PtrTy->getPointerAddressSpace();
@@ -385,34 +385,8 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
Value *Ptr, *VL;
Align Alignment;
- if (auto *SI = dyn_cast<StoreInst>(Store)) {
- assert(SI->isSimple());
- Ptr = SI->getPointerOperand();
- Alignment = SI->getAlign();
- assert(!Mask && "Unexpected mask on a store");
- Mask = Builder.getAllOnesMask(InVTy->getElementCount());
- VL = isa<FixedVectorType>(InVTy)
- ? Builder.CreateElementCount(XLenTy, InVTy->getElementCount())
- : Constant::getAllOnesValue(XLenTy);
- } else {
- auto *VPStore = cast<VPIntrinsic>(Store);
- assert(VPStore->getIntrinsicID() == Intrinsic::vp_store &&
- "Unexpected intrinsic");
- Ptr = VPStore->getMemoryPointerParam();
- Alignment = VPStore->getPointerAlignment().value_or(
- DL.getABITypeAlign(InVTy->getElementType()));
-
- assert(Mask && "vp.store needs a mask!");
-
- Value *WideEVL = VPStore->getVectorLengthParam();
- // Conservatively check if EVL is a multiple of factor, otherwise some
- // (trailing) elements might be lost after the transformation.
- if (!isMultipleOfN(WideEVL, DL, Factor))
- return false;
-
- auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
- VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
- }
+ if (!getMemOperands(Factor, InVTy, XLenTy, Store, Ptr, Mask, VL, Alignment))
+ return false;
Type *PtrTy = Ptr->getType();
unsigned AS = Ptr->getType()->getPointerAddressSpace();
if (!isLegalInterleavedAccessType(InVTy, Factor, Alignment, AS, DL))
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d91ea1ea..6281124 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1323,11 +1323,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
}
- if (Subtarget.hasGFNI()) {
+ if (!Subtarget.useSoftFloat() && Subtarget.hasGFNI()) {
setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i64, Custom);
+
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
+ setOperationAction(ISD::BITREVERSE, VT, Custom);
+ }
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
@@ -32694,7 +32698,8 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
if (Subtarget.hasXOP() && !VT.is512BitVector())
return LowerBITREVERSE_XOP(Op, DAG);
- assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE");
+ assert((Subtarget.hasSSSE3() || Subtarget.hasGFNI()) &&
+ "SSSE3 or GFNI required for BITREVERSE");
SDValue In = Op.getOperand(0);
SDLoc DL(Op);
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 636bd81..9e318b0 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -3790,6 +3790,11 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
continue;
}
+ // Do not consider uses inside lifetime intrinsics. These are not
+ // actually materialized.
+ if (UserInst->isLifetimeStartOrEnd())
+ continue;
+
std::pair<size_t, Immediate> P =
getUse(S, LSRUse::Basic, MemAccessTy());
size_t LUIdx = P.first;
diff --git a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
index 6226596..40dc02c 100644
--- a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
+++ b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
@@ -232,13 +232,7 @@ void alignAndPadAlloca(memtag::AllocaInfo &Info, llvm::Align Alignment) {
NewAI->setSwiftError(Info.AI->isSwiftError());
NewAI->copyMetadata(*Info.AI);
- Value *NewPtr = NewAI;
-
- // TODO: Remove when typed pointers dropped
- if (Info.AI->getType() != NewAI->getType())
- NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI->getIterator());
-
- Info.AI->replaceAllUsesWith(NewPtr);
+ Info.AI->replaceAllUsesWith(NewAI);
Info.AI->eraseFromParent();
Info.AI = NewAI;
}