aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp11
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp122
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td20
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp7
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp23
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td10
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp69
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td5
-rw-r--r--llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp61
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h3
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.cpp37
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h6
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp75
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp21
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td22
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td1
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp36
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h6
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td134
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp12
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp14
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h3
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp11
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h3
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp29
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp19
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h3
-rw-r--r--llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp15
-rw-r--r--llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp14
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp49
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h3
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp13
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp15
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp10
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h3
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp6
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp380
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td6
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp12
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp14
-rw-r--r--llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp24
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp49
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h3
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h21
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp5
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp157
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td22
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td4
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp3
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp23
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.td23
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp180
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.td3
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp7
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td2
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp11
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp13
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp11
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp11
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp12
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp11
-rw-r--r--llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp11
70 files changed, 1254 insertions, 706 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
index 509cbb0..e8d3161 100644
--- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
@@ -813,8 +813,8 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) {
}
}
- if (!F.hasFnAttribute(Attribute::HybridPatchable) || F.isDeclaration() ||
- F.hasLocalLinkage() ||
+ if (!F.hasFnAttribute(Attribute::HybridPatchable) ||
+ F.isDeclarationForLinker() || F.hasLocalLinkage() ||
F.getName().ends_with(HybridPatchableTargetSuffix))
continue;
@@ -857,7 +857,7 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) {
SetVector<GlobalValue *> DirectCalledFns;
for (Function &F : Mod)
- if (!F.isDeclaration() &&
+ if (!F.isDeclarationForLinker() &&
F.getCallingConv() != CallingConv::ARM64EC_Thunk_Native &&
F.getCallingConv() != CallingConv::ARM64EC_Thunk_X64)
processFunction(F, DirectCalledFns, FnsMap);
@@ -869,7 +869,8 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) {
};
SmallVector<ThunkInfo> ThunkMapping;
for (Function &F : Mod) {
- if (!F.isDeclaration() && (!F.hasLocalLinkage() || F.hasAddressTaken()) &&
+ if (!F.isDeclarationForLinker() &&
+ (!F.hasLocalLinkage() || F.hasAddressTaken()) &&
F.getCallingConv() != CallingConv::ARM64EC_Thunk_Native &&
F.getCallingConv() != CallingConv::ARM64EC_Thunk_X64) {
if (!F.hasComdat())
@@ -959,7 +960,7 @@ bool AArch64Arm64ECCallLowering::processFunction(
// unprototyped functions in C)
if (Function *F = CB->getCalledFunction()) {
if (!LowerDirectToIndirect || F->hasLocalLinkage() ||
- F->isIntrinsic() || !F->isDeclaration())
+ F->isIntrinsic() || !F->isDeclarationForLinker())
continue;
DirectCalledFns.insert(F);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4f6e3dd..2b6ea86 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -162,10 +162,10 @@ static cl::opt<bool> UseFEATCPACodegen(
cl::init(false));
/// Value type used for condition codes.
-static const MVT MVT_CC = MVT::i32;
+constexpr MVT CondCodeVT = MVT::i32;
/// Value type used for NZCV flags.
-static constexpr MVT FlagsVT = MVT::i32;
+constexpr MVT FlagsVT = MVT::i32;
static const MCPhysReg GPRArgRegs[] = {AArch64::X0, AArch64::X1, AArch64::X2,
AArch64::X3, AArch64::X4, AArch64::X5,
@@ -3472,6 +3472,12 @@ static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
}
}
+/// Like SelectionDAG::getCondCode(), but for AArch64 condition codes.
+static SDValue getCondCode(SelectionDAG &DAG, AArch64CC::CondCode CC) {
+ // TODO: Should be TargetConstant (need to s/imm/timm in patterns).
+ return DAG.getConstant(CC, SDLoc(), CondCodeVT);
+}
+
static bool isLegalArithImmed(uint64_t C) {
// Matches AArch64DAGToDAGISel::SelectArithImmed().
bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
@@ -3678,7 +3684,7 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
if (Opcode == 0)
Opcode = AArch64ISD::CCMP;
- SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
+ SDValue Condition = getCondCode(DAG, Predicate);
AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
@@ -4075,7 +4081,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
}
- AArch64cc = DAG.getConstant(AArch64CC, DL, MVT_CC);
+ AArch64cc = getCondCode(DAG, AArch64CC);
return Cmp;
}
@@ -4195,7 +4201,7 @@ SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
AArch64CC::CondCode CC;
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
- SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), DL, MVT::i32);
+ SDValue CCVal = getCondCode(DAG, getInvertedCondCode(CC));
return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
CCVal, Overflow);
}
@@ -4274,8 +4280,8 @@ static SDValue carryFlagToValue(SDValue Glue, EVT VT, SelectionDAG &DAG,
SDLoc DL(Glue);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
- unsigned Cond = Invert ? AArch64CC::LO : AArch64CC::HS;
- SDValue CC = DAG.getConstant(Cond, DL, MVT::i32);
+ AArch64CC::CondCode Cond = Invert ? AArch64CC::LO : AArch64CC::HS;
+ SDValue CC = getCondCode(DAG, Cond);
return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Glue);
}
@@ -4285,7 +4291,7 @@ static SDValue overflowFlagToValue(SDValue Glue, EVT VT, SelectionDAG &DAG) {
SDLoc DL(Glue);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
- SDValue CC = DAG.getConstant(AArch64CC::VS, DL, MVT::i32);
+ SDValue CC = getCondCode(DAG, AArch64CC::VS);
return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Glue);
}
@@ -4334,7 +4340,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// We use an inverted condition, because the conditional select is inverted
// too. This will allow it to be selected to a single instruction:
// CSINC Wd, WZR, WZR, invert(cond).
- SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), DL, MVT::i32);
+ SDValue CCVal = getCondCode(DAG, getInvertedCondCode(CC));
Overflow =
DAG.getNode(AArch64ISD::CSEL, DL, MVT::i32, FVal, TVal, CCVal, Overflow);
@@ -7124,8 +7130,7 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
Op.getOperand(0), DAG.getConstant(0, DL, VT));
return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
- DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
- Cmp.getValue(1));
+ getCondCode(DAG, AArch64CC::PL), Cmp.getValue(1));
}
static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
@@ -7136,7 +7141,7 @@ static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
AArch64CC::CondCode CC;
if (SDValue Cmp = emitConjunction(DAG, Cond, CC)) {
SDLoc DL(Op);
- SDValue CCVal = DAG.getConstant(CC, DL, MVT::i32);
+ SDValue CCVal = getCondCode(DAG, CC);
return DAG.getNode(AArch64ISD::BRCOND, DL, MVT::Other, Chain, Dest, CCVal,
Cmp);
}
@@ -10575,7 +10580,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
if (CC == ISD::SETNE)
OFCC = getInvertedCondCode(OFCC);
- SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
+ SDValue CCVal = getCondCode(DAG, OFCC);
return DAG.getNode(AArch64ISD::BRCOND, DL, MVT::Other, Chain, Dest, CCVal,
Overflow);
@@ -10648,7 +10653,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
AArch64CC::isValidCBCond(changeIntCCToAArch64CC(CC)) &&
ProduceNonFlagSettingCondBr) {
SDValue Cond =
- DAG.getTargetConstant(changeIntCCToAArch64CC(CC), DL, MVT::i32);
+ DAG.getTargetConstant(changeIntCCToAArch64CC(CC), DL, CondCodeVT);
return DAG.getNode(AArch64ISD::CB, DL, MVT::Other, Chain, Cond, LHS, RHS,
Dest);
}
@@ -10667,11 +10672,11 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
- SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32);
+ SDValue CC1Val = getCondCode(DAG, CC1);
SDValue BR1 =
DAG.getNode(AArch64ISD::BRCOND, DL, MVT::Other, Chain, Dest, CC1Val, Cmp);
if (CC2 != AArch64CC::AL) {
- SDValue CC2Val = DAG.getConstant(CC2, DL, MVT::i32);
+ SDValue CC2Val = getCondCode(DAG, CC2);
return DAG.getNode(AArch64ISD::BRCOND, DL, MVT::Other, BR1, Dest, CC2Val,
Cmp);
}
@@ -11160,7 +11165,7 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (CC2 == AArch64CC::AL) {
changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
CC2);
- SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32);
+ SDValue CC1Val = getCondCode(DAG, CC1);
// Note that we inverted the condition above, so we reverse the order of
// the true and false operands here. This will allow the setcc to be
@@ -11173,11 +11178,11 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// of the first as the RHS. We're effectively OR'ing the two CC's together.
// FIXME: It would be nice if we could match the two CSELs to two CSINCs.
- SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32);
+ SDValue CC1Val = getCondCode(DAG, CC1);
SDValue CS1 =
DAG.getNode(AArch64ISD::CSEL, DL, VT, TVal, FVal, CC1Val, Cmp);
- SDValue CC2Val = DAG.getConstant(CC2, DL, MVT::i32);
+ SDValue CC2Val = getCondCode(DAG, CC2);
Res = DAG.getNode(AArch64ISD::CSEL, DL, VT, TVal, CS1, CC2Val, Cmp);
}
return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, DL) : Res;
@@ -11205,8 +11210,7 @@ SDValue AArch64TargetLowering::LowerSETCCCARRY(SDValue Op,
ISD::CondCode Cond = cast<CondCodeSDNode>(Op.getOperand(3))->get();
ISD::CondCode CondInv = ISD::getSetCCInverse(Cond, VT);
- SDValue CCVal =
- DAG.getConstant(changeIntCCToAArch64CC(CondInv), DL, MVT::i32);
+ SDValue CCVal = getCondCode(DAG, changeIntCCToAArch64CC(CondInv));
// Inputs are swapped because the condition is inverted. This will allow
// matching with a single CSINC instruction.
return DAG.getNode(AArch64ISD::CSEL, DL, OpVT, FVal, TVal, CCVal,
@@ -11360,18 +11364,6 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
- // Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
- // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
- // supported types.
- if (CC == ISD::SETGT && RHSC && RHSC->isAllOnes() && CTVal && CFVal &&
- CTVal->isOne() && CFVal->isAllOnes() &&
- LHS.getValueType() == TVal.getValueType()) {
- EVT VT = LHS.getValueType();
- SDValue Shift =
- DAG.getNode(ISD::SRA, DL, VT, LHS,
- DAG.getConstant(VT.getSizeInBits() - 1, DL, VT));
- return DAG.getNode(ISD::OR, DL, VT, Shift, DAG.getConstant(1, DL, VT));
- }
// Check for SMAX(lhs, 0) and SMIN(lhs, 0) patterns.
// (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1))
@@ -11577,13 +11569,13 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
}
// Emit first, and possibly only, CSEL.
- SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32);
+ SDValue CC1Val = getCondCode(DAG, CC1);
SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, DL, VT, TVal, FVal, CC1Val, Cmp);
// If we need a second CSEL, emit it, using the output of the first as the
// RHS. We're effectively OR'ing the two CC's together.
if (CC2 != AArch64CC::AL) {
- SDValue CC2Val = DAG.getConstant(CC2, DL, MVT::i32);
+ SDValue CC2Val = getCondCode(DAG, CC2);
return DAG.getNode(AArch64ISD::CSEL, DL, VT, TVal, CS1, CC2Val, Cmp);
}
@@ -11685,7 +11677,7 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
AArch64CC::CondCode OFCC;
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
- SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
+ SDValue CCVal = getCondCode(DAG, OFCC);
return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
CCVal, Overflow);
@@ -12525,10 +12517,10 @@ static AArch64CC::CondCode parseConstraintCode(llvm::StringRef Constraint) {
/// WZR, invert(<cond>)'.
static SDValue getSETCC(AArch64CC::CondCode CC, SDValue NZCV, const SDLoc &DL,
SelectionDAG &DAG) {
- return DAG.getNode(
- AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i32),
- DAG.getConstant(getInvertedCondCode(CC), DL, MVT::i32), NZCV);
+ return DAG.getNode(AArch64ISD::CSINC, DL, MVT::i32,
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ getCondCode(DAG, getInvertedCondCode(CC)), NZCV);
}
// Lower @cc flag output via getSETCC.
@@ -18699,7 +18691,7 @@ AArch64TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
Created.push_back(Cmp.getNode());
Created.push_back(And.getNode());
} else {
- SDValue CCVal = DAG.getConstant(AArch64CC::MI, DL, MVT_CC);
+ SDValue CCVal = getCondCode(DAG, AArch64CC::MI);
SDVTList VTs = DAG.getVTList(VT, FlagsVT);
SDValue Negs = DAG.getNode(AArch64ISD::SUBS, DL, VTs, Zero, N0);
@@ -19571,11 +19563,11 @@ static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG) {
if (N->getOpcode() == ISD::AND) {
AArch64CC::CondCode InvCC0 = AArch64CC::getInvertedCondCode(CC0);
- Condition = DAG.getConstant(InvCC0, DL, MVT_CC);
+ Condition = getCondCode(DAG, InvCC0);
NZCV = AArch64CC::getNZCVToSatisfyCondCode(CC1);
} else {
AArch64CC::CondCode InvCC1 = AArch64CC::getInvertedCondCode(CC1);
- Condition = DAG.getConstant(CC0, DL, MVT_CC);
+ Condition = getCondCode(DAG, CC0);
NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvCC1);
}
@@ -19596,8 +19588,7 @@ static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG) {
Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
}
return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0),
- CSel0.getOperand(1), DAG.getConstant(CC1, DL, MVT::i32),
- CCmp);
+ CSel0.getOperand(1), getCondCode(DAG, CC1), CCmp);
}
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
@@ -19802,7 +19793,7 @@ static SDValue performANDSETCCCombine(SDNode *N,
SDLoc DL(N);
return DAG.getNode(AArch64ISD::CSINC, DL, VT, DAG.getConstant(0, DL, VT),
DAG.getConstant(0, DL, VT),
- DAG.getConstant(InvertedCC, DL, MVT::i32), Cmp);
+ getCondCode(DAG, InvertedCC), Cmp);
}
}
return SDValue();
@@ -20793,7 +20784,7 @@ static SDValue performAddCSelIntoCSinc(SDNode *N, SelectionDAG &DAG) {
"Unexpected constant value");
SDValue NewNode = DAG.getNode(ISD::ADD, DL, VT, RHS, SDValue(CTVal, 0));
- SDValue CCVal = DAG.getConstant(AArch64CC, DL, MVT::i32);
+ SDValue CCVal = getCondCode(DAG, AArch64CC);
SDValue Cmp = LHS.getOperand(3);
return DAG.getNode(AArch64ISD::CSINC, DL, VT, NewNode, RHS, CCVal, Cmp);
@@ -20979,7 +20970,7 @@ static SDValue foldADCToCINC(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
// (CINC x cc cond) <=> (CSINC x x !cc cond)
- SDValue CC = DAG.getConstant(AArch64CC::LO, DL, MVT::i32);
+ SDValue CC = getCondCode(DAG, AArch64CC::LO);
return DAG.getNode(AArch64ISD::CSINC, DL, VT, LHS, LHS, CC, Cond);
}
@@ -22052,7 +22043,7 @@ static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
// Convert CC to integer based on requested condition.
// NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
- SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32);
+ SDValue CC = getCondCode(DAG, getInvertedCondCode(Cond));
SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
return DAG.getZExtOrTrunc(Res, DL, VT);
}
@@ -25093,10 +25084,9 @@ static SDValue performBRCONDCombine(SDNode *N,
auto CSelCC = getCSETCondCode(CSel);
if (CSelCC) {
SDLoc DL(N);
- return DAG.getNode(
- N->getOpcode(), DL, N->getVTList(), Chain, Dest,
- DAG.getConstant(getInvertedCondCode(*CSelCC), DL, MVT::i32),
- CSel.getOperand(3));
+ return DAG.getNode(N->getOpcode(), DL, N->getVTList(), Chain, Dest,
+ getCondCode(DAG, getInvertedCondCode(*CSelCC)),
+ CSel.getOperand(3));
}
}
@@ -25237,7 +25227,7 @@ static SDValue foldCSELOfCSEL(SDNode *Op, SelectionDAG &DAG) {
SDLoc DL(Op);
EVT VT = Op->getValueType(0);
- SDValue CCValue = DAG.getConstant(CC, DL, MVT::i32);
+ SDValue CCValue = getCondCode(DAG, CC);
return DAG.getNode(AArch64ISD::CSEL, DL, VT, L, R, CCValue, Cond);
}
@@ -25314,8 +25304,7 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
SDValue TValReassoc = Reassociate(TReassocOp, 0);
SDValue FValReassoc = Reassociate(FReassocOp, 1);
return DAG.getNode(AArch64ISD::CSEL, SDLoc(N), VT, TValReassoc, FValReassoc,
- DAG.getConstant(NewCC, SDLoc(N->getOperand(2)), MVT_CC),
- NewCmp.getValue(1));
+ getCondCode(DAG, NewCC), NewCmp.getValue(1));
};
auto CC = static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
@@ -25456,8 +25445,7 @@ static SDValue performCSELCombine(SDNode *N,
SDValue Sub = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(),
Cond.getOperand(1), Cond.getOperand(0));
return DAG.getNode(AArch64ISD::CSEL, DL, N->getVTList(), N->getOperand(0),
- N->getOperand(1),
- DAG.getConstant(NewCond, DL, MVT::i32),
+ N->getOperand(1), getCondCode(DAG, NewCond),
Sub.getValue(1));
}
}
@@ -25557,10 +25545,9 @@ static SDValue performSETCCCombine(SDNode *N,
auto NewCond = getInvertedCondCode(OldCond);
// csel 0, 1, !cond, X
- SDValue CSEL =
- DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), LHS.getOperand(0),
- LHS.getOperand(1), DAG.getConstant(NewCond, DL, MVT::i32),
- LHS.getOperand(3));
+ SDValue CSEL = DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(),
+ LHS.getOperand(0), LHS.getOperand(1),
+ getCondCode(DAG, NewCond), LHS.getOperand(3));
return DAG.getZExtOrTrunc(CSEL, DL, VT);
}
@@ -25630,8 +25617,7 @@ static SDValue performFlagSettingCombine(SDNode *N,
// If the flag result isn't used, convert back to a generic opcode.
if (!N->hasAnyUseOfValue(1)) {
SDValue Res = DCI.DAG.getNode(GenericOpcode, DL, VT, N->ops());
- return DCI.DAG.getMergeValues({Res, DCI.DAG.getConstant(0, DL, MVT::i32)},
- DL);
+ return DCI.CombineTo(N, Res, SDValue(N, 1));
}
// Combine identical generic nodes into this node, re-using the result.
@@ -27013,10 +26999,10 @@ static SDValue performRNDRCombine(SDNode *N, SelectionDAG &DAG) {
SDValue A = DAG.getNode(
AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, FlagsVT, MVT::Other),
N->getOperand(0), DAG.getConstant(Register, DL, MVT::i32));
- SDValue B = DAG.getNode(
- AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i32),
- DAG.getConstant(AArch64CC::NE, DL, MVT::i32), A.getValue(1));
+ SDValue B = DAG.getNode(AArch64ISD::CSINC, DL, MVT::i32,
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ getCondCode(DAG, AArch64CC::NE), A.getValue(1));
return DAG.getMergeValues(
{A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL);
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 251fd44..ac31236 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -448,8 +448,13 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
SDTCisVT<1, FlagsVT>,
SDTCisVT<4, FlagsVT>]>;
+// Value type used for condition codes.
+// Should be kept in sync with its C++ counterpart.
+defvar CondCodeVT = i32;
+
def SDT_AArch64Brcond : SDTypeProfile<0, 3,
- [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
+ [SDTCisVT<0, OtherVT>,
+ SDTCisVT<1, CondCodeVT>,
SDTCisVT<2, FlagsVT>]>;
def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
@@ -458,22 +463,22 @@ def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
def SDT_AArch64CSel : SDTypeProfile<1, 4,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
- SDTCisInt<3>,
+ SDTCisVT<3, CondCodeVT>,
SDTCisVT<4, FlagsVT>]>;
def SDT_AArch64CCMP : SDTypeProfile<1, 5,
[SDTCisVT<0, FlagsVT>,
SDTCisInt<1>,
SDTCisSameAs<1, 2>,
SDTCisInt<3>,
- SDTCisInt<4>,
- SDTCisVT<5, i32>]>;
+ SDTCisVT<4, CondCodeVT>,
+ SDTCisVT<5, FlagsVT>]>;
def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
[SDTCisVT<0, FlagsVT>,
SDTCisFP<1>,
SDTCisSameAs<1, 2>,
SDTCisInt<3>,
- SDTCisInt<4>,
- SDTCisVT<5, i32>]>;
+ SDTCisVT<4, CondCodeVT>,
+ SDTCisVT<5, FlagsVT>]>;
def SDT_AArch64FCmp : SDTypeProfile<1, 2, [SDTCisVT<0, FlagsVT>,
SDTCisFP<1>,
SDTCisSameAs<2, 1>]>;
@@ -546,7 +551,8 @@ def SDT_AArch64TBL : SDTypeProfile<1, 2, [
]>;
def SDT_AArch64cb : SDTypeProfile<0, 4,
- [SDTCisVT<0, i32>, SDTCisInt<1>, SDTCisInt<2>,
+ [SDTCisVT<0, CondCodeVT>,
+ SDTCisInt<1>, SDTCisInt<2>,
SDTCisVT<3, OtherVT>]>;
// non-extending masked load fragment.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 18ca22f..e1adc0b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -270,6 +270,13 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
SMECallAttrs CallAttrs(*Caller, *Callee);
+ // Never inline a function explicitly marked as being streaming,
+ // into a non-streaming function. Assume it was marked as streaming
+ // for a reason.
+ if (CallAttrs.caller().hasNonStreamingInterfaceAndBody() &&
+ CallAttrs.callee().hasStreamingInterfaceOrBody())
+ return false;
+
// When inlining, we should consider the body of the function, not the
// interface.
if (CallAttrs.callee().hasStreamingBody()) {
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index b9d3e1b..7a2b679 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -79,8 +79,7 @@ public:
}
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value) const override;
@@ -421,9 +420,8 @@ static bool shouldForceRelocation(const MCFixup &Fixup) {
}
void AArch64AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (shouldForceRelocation(Fixup))
IsResolved = false;
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
@@ -460,8 +458,8 @@ void AArch64AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Shift the value into position.
Value <<= Info.TargetOffset;
- unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// Used to point to big endian bytes.
unsigned FulleSizeInBytes = getFixupKindContainereSizeInBytes(Fixup.getKind());
@@ -471,15 +469,16 @@ void AArch64AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
if (FulleSizeInBytes == 0) {
// Handle as little-endian
for (unsigned i = 0; i != NumBytes; ++i) {
- Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ Data[i] |= uint8_t((Value >> (i * 8)) & 0xff);
}
} else {
// Handle as big-endian
- assert((Offset + FulleSizeInBytes) <= Data.size() && "Invalid fixup size!");
+ assert(Fixup.getOffset() + FulleSizeInBytes <= F.getSize() &&
+ "Invalid fixup size!");
assert(NumBytes <= FulleSizeInBytes && "Invalid fixup size!");
for (unsigned i = 0; i != NumBytes; ++i) {
unsigned Idx = FulleSizeInBytes - 1 - i;
- Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
+ Data[Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
@@ -492,9 +491,9 @@ void AArch64AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// If the immediate is negative, generate MOVN else MOVZ.
// (Bit 30 = 0) ==> MOVN, (Bit 30 = 1) ==> MOVZ.
if (SignedValue < 0)
- Data[Offset + 3] &= ~(1 << 6);
+ Data[3] &= ~(1 << 6);
else
- Data[Offset + 3] |= (1 << 6);
+ Data[3] |= (1 << 6);
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 8a0c4ac..18f3c47 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1160,6 +1160,12 @@ def FeatureTanhInsts : SubtargetFeature<"tanh-insts",
"Has v_tanh_f32/f16 instructions"
>;
+def FeatureTensorCvtLutInsts : SubtargetFeature<"tensor-cvt-lut-insts",
+ "HasTensorCvtLutInsts",
+ "true",
+ "Has v_perm_pk16* instructions"
+>;
+
def FeatureTransposeLoadF4F6Insts : SubtargetFeature<"transpose-load-f4f6-insts",
"HasTransposeLoadF4F6Insts",
"true",
@@ -2030,6 +2036,7 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureDPPSrc1SGPR,
FeatureBitOp3Insts,
FeatureTanhInsts,
+ FeatureTensorCvtLutInsts,
FeatureTransposeLoadF4F6Insts,
FeatureBF16TransInsts,
FeatureBF16ConversionInsts,
@@ -2785,6 +2792,9 @@ def HasBitOp3Insts : Predicate<"Subtarget->hasBitOp3Insts()">,
def HasTanhInsts : Predicate<"Subtarget->hasTanhInsts()">,
AssemblerPredicate<(all_of FeatureTanhInsts)>;
+def HasTensorCvtLutInsts : Predicate<"Subtarget->hasTensorCvtLutInsts()">,
+ AssemblerPredicate<(all_of FeatureTensorCvtLutInsts)>;
+
def HasTransposeLoadF4F6Insts : Predicate<"Subtarget->hasTransposeLoadF4F6Insts()">,
AssemblerPredicate<(all_of FeatureTransposeLoadF4F6Insts)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 31c4f62..d059480 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -367,6 +367,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v4f64, MVT::v4bf16, Expand);
setTruncStoreAction(MVT::v4f64, MVT::v4f16, Expand);
+ setTruncStoreAction(MVT::v5i32, MVT::v5i1, Expand);
+ setTruncStoreAction(MVT::v5i32, MVT::v5i8, Expand);
+ setTruncStoreAction(MVT::v5i32, MVT::v5i16, Expand);
+
+ setTruncStoreAction(MVT::v6i32, MVT::v6i1, Expand);
+ setTruncStoreAction(MVT::v6i32, MVT::v6i8, Expand);
+ setTruncStoreAction(MVT::v6i32, MVT::v6i16, Expand);
+
+ setTruncStoreAction(MVT::v7i32, MVT::v7i1, Expand);
+ setTruncStoreAction(MVT::v7i32, MVT::v7i8, Expand);
+ setTruncStoreAction(MVT::v7i32, MVT::v7i16, Expand);
+
setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand);
setTruncStoreAction(MVT::v8f64, MVT::v8bf16, Expand);
setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index c8e45d4..d11e5a3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3204,6 +3204,18 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(B, MI, 5);
return;
}
+ case Intrinsic::amdgcn_permlane_bcast:
+ case Intrinsic::amdgcn_permlane_up:
+ case Intrinsic::amdgcn_permlane_down:
+ case Intrinsic::amdgcn_permlane_xor:
+ // Doing a waterfall loop over these wouldn't make any sense.
+ constrainOpWithReadfirstlane(B, MI, 3);
+ constrainOpWithReadfirstlane(B, MI, 4);
+ return;
+ case Intrinsic::amdgcn_permlane_idx_gen: {
+ constrainOpWithReadfirstlane(B, MI, 3);
+ return;
+ }
case Intrinsic::amdgcn_sbfe:
applyMappingBFE(B, OpdMapper, true);
return;
@@ -4591,6 +4603,42 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp8:
case Intrinsic::amdgcn_cvt_scale_pk8_f32_bf8:
case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp4:
+ case Intrinsic::amdgcn_cvt_scale_pk16_f16_fp6:
+ case Intrinsic::amdgcn_cvt_scale_pk16_bf16_fp6:
+ case Intrinsic::amdgcn_cvt_scale_pk16_f16_bf6:
+ case Intrinsic::amdgcn_cvt_scale_pk16_bf16_bf6:
+ case Intrinsic::amdgcn_cvt_scale_pk16_f32_fp6:
+ case Intrinsic::amdgcn_cvt_scale_pk16_f32_bf6:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk16_fp6_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_pk16_bf6_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp8_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_bf8_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk8_fp4_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_f32:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_f16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_fp6_bf16:
+ case Intrinsic::amdgcn_cvt_scalef32_sr_pk16_bf6_bf16:
case Intrinsic::amdgcn_sat_pk4_i4_i8:
case Intrinsic::amdgcn_sat_pk4_u4_u8:
case Intrinsic::amdgcn_fmed3:
@@ -4765,6 +4813,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8:
case Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8:
case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
+ case Intrinsic::amdgcn_perm_pk16_b4_u4:
+ case Intrinsic::amdgcn_perm_pk16_b6_u4:
+ case Intrinsic::amdgcn_perm_pk16_b8_u4:
return getDefaultMappingVOP(MI);
case Intrinsic::amdgcn_log:
case Intrinsic::amdgcn_exp2:
@@ -4902,6 +4953,24 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[5] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
break;
}
+ case Intrinsic::amdgcn_permlane_bcast:
+ case Intrinsic::amdgcn_permlane_up:
+ case Intrinsic::amdgcn_permlane_down:
+ case Intrinsic::amdgcn_permlane_xor: {
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ OpdsMapping[3] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[4] = getSGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+ break;
+ }
+ case Intrinsic::amdgcn_permlane_idx_gen: {
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ OpdsMapping[3] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ break;
+ }
case Intrinsic::amdgcn_permlane16_var:
case Intrinsic::amdgcn_permlanex16_var: {
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index dfe0cbf..10b8606 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -321,6 +321,11 @@ def : SourceOfDivergence<int_amdgcn_permlane16>;
def : SourceOfDivergence<int_amdgcn_permlanex16>;
def : SourceOfDivergence<int_amdgcn_permlane16_var>;
def : SourceOfDivergence<int_amdgcn_permlanex16_var>;
+def : SourceOfDivergence<int_amdgcn_permlane_bcast>;
+def : SourceOfDivergence<int_amdgcn_permlane_up>;
+def : SourceOfDivergence<int_amdgcn_permlane_down>;
+def : SourceOfDivergence<int_amdgcn_permlane_xor>;
+def : SourceOfDivergence<int_amdgcn_permlane_idx_gen>;
def : SourceOfDivergence<int_amdgcn_mov_dpp>;
def : SourceOfDivergence<int_amdgcn_mov_dpp8>;
def : SourceOfDivergence<int_amdgcn_update_dpp>;
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 94886b0..96cb5ae 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -152,7 +152,12 @@ static bool isPermlane(const MachineInstr &MI) {
Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e32 ||
Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e64 ||
Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e32 ||
- Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e64;
+ Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e64 ||
+ Opcode == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
+ Opcode == AMDGPU::V_PERMLANE_UP_B32_e64 ||
+ Opcode == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
+ Opcode == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
+ Opcode == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64;
}
static bool isLdsDma(const MachineInstr &MI) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 33b66a6..96d5668 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -592,10 +592,13 @@ bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand,
// This is a best effort to set things up for a post-RA pass. Optimizations
// like generating loads of multiple registers should ideally be done within
// the scheduler pass by combining the loads during DAG postprocessing.
- const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
- const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
- if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
- CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+ unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+ unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+ bool CandIsClusterSucc =
+ isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+ bool TryCandIsClusterSucc =
+ isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+ if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;
@@ -666,10 +669,13 @@ bool GCNMaxMemoryClauseSchedStrategy::tryCandidate(SchedCandidate &Cand,
// MaxMemoryClause-specific: We prioritize clustered instructions as we would
// get more benefit from clausing these memory instructions.
- const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
- const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
- if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
- CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+ unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+ unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+ bool CandIsClusterSucc =
+ isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+ bool TryCandIsClusterSucc =
+ isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+ if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;
@@ -936,11 +942,9 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
Pressure.resize(Regions.size());
RegionsWithHighRP.resize(Regions.size());
RegionsWithExcessRP.resize(Regions.size());
- RegionsWithMinOcc.resize(Regions.size());
RegionsWithIGLPInstrs.resize(Regions.size());
RegionsWithHighRP.reset();
RegionsWithExcessRP.reset();
- RegionsWithMinOcc.reset();
RegionsWithIGLPInstrs.reset();
runSchedStages();
@@ -1090,8 +1094,7 @@ bool PreRARematStage::initGCNSchedStage() {
// fixed if there is another pass after this pass.
assert(!S.hasNextStage());
- if (!GCNSchedStage::initGCNSchedStage() || DAG.RegionsWithMinOcc.none() ||
- DAG.Regions.size() == 1)
+ if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() == 1)
return false;
// Before performing any IR modification record the parent region of each MI
@@ -1133,11 +1136,6 @@ void UnclusteredHighRPStage::finalizeGCNSchedStage() {
SavedMutations.swap(DAG.Mutations);
S.SGPRLimitBias = S.VGPRLimitBias = 0;
if (DAG.MinOccupancy > InitialOccupancy) {
- for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)
- DAG.RegionsWithMinOcc[IDX] =
- DAG.Pressure[IDX].getOccupancy(
- DAG.ST, DAG.MFI.getDynamicVGPRBlockSize()) == DAG.MinOccupancy;
-
LLVM_DEBUG(dbgs() << StageID
<< " stage successfully increased occupancy to "
<< DAG.MinOccupancy << '\n');
@@ -1209,11 +1207,15 @@ bool GCNSchedStage::initGCNRegion() {
}
bool UnclusteredHighRPStage::initGCNRegion() {
- // Only reschedule regions with the minimum occupancy or regions that may have
- // spilling (excess register pressure).
- if ((!DAG.RegionsWithMinOcc[RegionIdx] ||
- DAG.MinOccupancy <= InitialOccupancy) &&
- !DAG.RegionsWithExcessRP[RegionIdx])
+ // Only reschedule regions that have excess register pressure (i.e. spilling)
+ // or had minimum occupancy at the beginning of the stage (as long as
+ // rescheduling of previous regions did not make occupancy drop back down to
+ // the initial minimum).
+ unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();
+ if (!DAG.RegionsWithExcessRP[RegionIdx] &&
+ (DAG.MinOccupancy <= InitialOccupancy ||
+ DAG.Pressure[RegionIdx].getOccupancy(ST, DynamicVGPRBlockSize) !=
+ InitialOccupancy))
return false;
return GCNSchedStage::initGCNRegion();
@@ -1278,9 +1280,6 @@ void GCNSchedStage::checkScheduling() {
if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
DAG.Pressure[RegionIdx] = PressureAfter;
- DAG.RegionsWithMinOcc[RegionIdx] =
- PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize) ==
- DAG.MinOccupancy;
// Early out if we have achieved the occupancy target.
LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
@@ -1314,7 +1313,6 @@ void GCNSchedStage::checkScheduling() {
if (NewOccupancy < DAG.MinOccupancy) {
DAG.MinOccupancy = NewOccupancy;
MFI.limitOccupancy(DAG.MinOccupancy);
- DAG.RegionsWithMinOcc.reset();
LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
<< DAG.MinOccupancy << ".\n");
}
@@ -1336,14 +1334,10 @@ void GCNSchedStage::checkScheduling() {
// Revert if this region's schedule would cause a drop in occupancy or
// spilling.
- if (shouldRevertScheduling(WavesAfter)) {
+ if (shouldRevertScheduling(WavesAfter))
revertScheduling();
- } else {
+ else
DAG.Pressure[RegionIdx] = PressureAfter;
- DAG.RegionsWithMinOcc[RegionIdx] =
- PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize) ==
- DAG.MinOccupancy;
- }
}
unsigned
@@ -1573,9 +1567,6 @@ bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
}
void GCNSchedStage::revertScheduling() {
- DAG.RegionsWithMinOcc[RegionIdx] =
- PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize()) ==
- DAG.MinOccupancy;
LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
DAG.RegionEnd = DAG.RegionBegin;
int SkippedDebugInstr = 0;
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 94cd795..32139a9 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -250,9 +250,6 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// limit. Register pressure in these regions usually will result in spilling.
BitVector RegionsWithExcessRP;
- // Regions that has the same occupancy as the latest MinOccupancy
- BitVector RegionsWithMinOcc;
-
// Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
BitVector RegionsWithIGLPInstrs;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index 0a0a107..0237a60 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -340,6 +340,43 @@ void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
Policy.ShouldTrackLaneMasks = true;
}
+void GCNSubtarget::overridePostRASchedPolicy(MachineSchedPolicy &Policy,
+ const SchedRegion &Region) const {
+ const Function &F = Region.RegionBegin->getMF()->getFunction();
+ Attribute PostRADirectionAttr = F.getFnAttribute("amdgpu-post-ra-direction");
+ if (!PostRADirectionAttr.isValid())
+ return;
+
+ StringRef PostRADirectionStr = PostRADirectionAttr.getValueAsString();
+ if (PostRADirectionStr == "topdown") {
+ Policy.OnlyTopDown = true;
+ Policy.OnlyBottomUp = false;
+ } else if (PostRADirectionStr == "bottomup") {
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = true;
+ } else if (PostRADirectionStr == "bidirectional") {
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = false;
+ } else {
+ DiagnosticInfoOptimizationFailure Diag(
+ F, F.getSubprogram(), "invalid value for postRA direction attribute");
+ F.getContext().diagnose(Diag);
+ }
+
+ LLVM_DEBUG({
+ const char *DirStr = "default";
+ if (Policy.OnlyTopDown && !Policy.OnlyBottomUp)
+ DirStr = "topdown";
+ else if (!Policy.OnlyTopDown && Policy.OnlyBottomUp)
+ DirStr = "bottomup";
+ else if (!Policy.OnlyTopDown && !Policy.OnlyBottomUp)
+ DirStr = "bidirectional";
+
+ dbgs() << "Post-MI-sched direction (" << F.getName() << "): " << DirStr
+ << '\n';
+ });
+}
+
void GCNSubtarget::mirFileLoaded(MachineFunction &MF) const {
if (isWave32()) {
// Fix implicit $vcc operands after MIParser has verified that they match
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index bdd900d..c84ba1a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -236,6 +236,7 @@ protected:
bool Has64BitLiterals = false;
bool HasBitOp3Insts = false;
bool HasTanhInsts = false;
+ bool HasTensorCvtLutInsts = false;
bool HasTransposeLoadF4F6Insts = false;
bool HasPrngInst = false;
bool HasBVHDualAndBVH8Insts = false;
@@ -1041,6 +1042,9 @@ public:
void overrideSchedPolicy(MachineSchedPolicy &Policy,
const SchedRegion &Region) const override;
+ void overridePostRASchedPolicy(MachineSchedPolicy &Policy,
+ const SchedRegion &Region) const override;
+
void mirFileLoaded(MachineFunction &MF) const override;
unsigned getMaxNumUserSGPRs() const {
@@ -1408,6 +1412,8 @@ public:
bool hasTanhInsts() const { return HasTanhInsts; }
+ bool hasTensorCvtLutInsts() const { return HasTensorCvtLutInsts; }
+
bool hasAddPC64Inst() const { return GFX1250Insts; }
bool hasMinimum3Maximum3PKF16() const {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 2a920f6..4e4660c 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -33,8 +33,7 @@ public:
AMDGPUAsmBackend(const Target &T) : MCAsmBackend(llvm::endianness::little) {}
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value) const override;
@@ -129,9 +128,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
void AMDGPUAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (Target.getSpecifier())
IsResolved = false;
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
@@ -148,13 +146,13 @@ void AMDGPUAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
Value <<= Info.TargetOffset;
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- uint32_t Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the bits from
// the fixup value.
for (unsigned i = 0; i != NumBytes; ++i)
- Data[Offset + i] |= static_cast<uint8_t>((Value >> (i * 8)) & 0xff);
+ Data[i] |= static_cast<uint8_t>((Value >> (i * 8)) & 0xff);
}
std::optional<MCFixupKind>
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 11552b3..9b348d4 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -983,6 +983,7 @@ void SIFrameLowering::emitCSRSpillStores(
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
// Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
// registers. However, save all lanes of callee-saved VGPRs. Due to this, we
@@ -1005,6 +1006,12 @@ void SIFrameLowering::emitCSRSpillStores(
}
};
+ for (const Register Reg : make_first_range(WWMScratchRegs)) {
+ if (!MRI.isReserved(Reg)) {
+ MRI.addLiveIn(Reg);
+ MBB.addLiveIn(Reg);
+ }
+ }
StoreWWMRegisters(WWMScratchRegs);
auto EnableAllLanes = [&]() {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ad26757..4d67e4a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16825,56 +16825,51 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
return std::pair(0U, RC);
}
- if (Constraint.starts_with("{") && Constraint.ends_with("}")) {
- StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
- if (RegName.consume_front("v")) {
+ auto [Kind, Idx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Constraint);
+ if (Kind != '\0') {
+ if (Kind == 'v') {
RC = &AMDGPU::VGPR_32RegClass;
- } else if (RegName.consume_front("s")) {
+ } else if (Kind == 's') {
RC = &AMDGPU::SGPR_32RegClass;
- } else if (RegName.consume_front("a")) {
+ } else if (Kind == 'a') {
RC = &AMDGPU::AGPR_32RegClass;
}
if (RC) {
- uint32_t Idx;
- if (RegName.consume_front("[")) {
- uint32_t End;
- bool Failed = RegName.consumeInteger(10, Idx);
- Failed |= !RegName.consume_front(":");
- Failed |= RegName.consumeInteger(10, End);
- Failed |= !RegName.consume_back("]");
- if (!Failed) {
- uint32_t Width = (End - Idx + 1) * 32;
- // Prohibit constraints for register ranges with a width that does not
- // match the required type.
- if (VT.SimpleTy != MVT::Other && Width != VT.getSizeInBits())
+ if (NumRegs > 1) {
+ if (Idx >= RC->getNumRegs() || Idx + NumRegs - 1 > RC->getNumRegs())
+ return std::pair(0U, nullptr);
+
+ uint32_t Width = NumRegs * 32;
+ // Prohibit constraints for register ranges with a width that does not
+ // match the required type.
+ if (VT.SimpleTy != MVT::Other && Width != VT.getSizeInBits())
+ return std::pair(0U, nullptr);
+
+ MCRegister Reg = RC->getRegister(Idx);
+ if (SIRegisterInfo::isVGPRClass(RC))
+ RC = TRI->getVGPRClassForBitWidth(Width);
+ else if (SIRegisterInfo::isSGPRClass(RC))
+ RC = TRI->getSGPRClassForBitWidth(Width);
+ else if (SIRegisterInfo::isAGPRClass(RC))
+ RC = TRI->getAGPRClassForBitWidth(Width);
+ if (RC) {
+ Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, RC);
+ if (!Reg) {
+ // The register class does not contain the requested register,
+ // e.g., because it is an SGPR pair that would violate alignment
+ // requirements.
return std::pair(0U, nullptr);
- MCRegister Reg = RC->getRegister(Idx);
- if (SIRegisterInfo::isVGPRClass(RC))
- RC = TRI->getVGPRClassForBitWidth(Width);
- else if (SIRegisterInfo::isSGPRClass(RC))
- RC = TRI->getSGPRClassForBitWidth(Width);
- else if (SIRegisterInfo::isAGPRClass(RC))
- RC = TRI->getAGPRClassForBitWidth(Width);
- if (RC) {
- Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, RC);
- if (!Reg) {
- // The register class does not contain the requested register,
- // e.g., because it is an SGPR pair that would violate alignment
- // requirements.
- return std::pair(0U, nullptr);
- }
- return std::pair(Reg, RC);
}
+ return std::pair(Reg, RC);
}
- } else {
- // Check for lossy scalar/vector conversions.
- if (VT.isVector() && VT.getSizeInBits() != 32)
- return std::pair(0U, nullptr);
- bool Failed = RegName.getAsInteger(10, Idx);
- if (!Failed && Idx < RC->getNumRegs())
- return std::pair(RC->getRegister(Idx), RC);
}
+
+ // Check for lossy scalar/vector conversions.
+ if (VT.isVector() && VT.getSizeInBits() != 32)
+ return std::pair(0U, nullptr);
+ if (Idx < RC->getNumRegs())
+ return std::pair(RC->getRegister(Idx), RC);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 044a681..3f61bbd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6304,10 +6304,14 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
};
if (Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
- Opc == AMDGPU::V_PERMLANEX16_B32_e64) {
+ Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
+ Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
+ Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
+ Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
+ Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
+ Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
// src1 and src2 must be scalar
MachineOperand &Src1 = MI.getOperand(VOP3Idx[1]);
- MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]);
const DebugLoc &DL = MI.getDebugLoc();
if (Src1.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) {
Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
@@ -6315,11 +6319,14 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
.add(Src1);
Src1.ChangeToRegister(Reg, false);
}
- if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) {
- Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
- .add(Src2);
- Src2.ChangeToRegister(Reg, false);
+ if (VOP3Idx[2] != -1) {
+ MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]);
+ if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) {
+ Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
+ .add(Src2);
+ Src2.ChangeToRegister(Reg, false);
+ }
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index a3e20ba..4698a58 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1774,6 +1774,7 @@ class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> {
!eq(VT.Size, 256) : VOPDstOperand<VReg_256>,
!eq(VT.Size, 192) : VOPDstOperand<VReg_192>,
!eq(VT.Size, 128) : VOPDstOperand<VReg_128>,
+ !eq(VT.Size, 96) : VOPDstOperand<VReg_96>,
!eq(VT.Size, 64) : VOPDstOperand<VReg_64>,
!eq(VT.Size, 32) : VOPDstOperand<VGPR_32>,
!eq(VT.Size, 16) : op16,
@@ -1924,6 +1925,7 @@ class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> {
!eq(VT, v2f16) : VCSrc_v2f16,
!eq(VT, v2bf16) : VCSrc_v2bf16,
!eq(VT, f32) : VCSrc_f32,
+ !eq(VT, v2i32) : VCSrc_v2b32,
1 : VCSrc_b32);
}
@@ -2935,6 +2937,9 @@ def VOP_V2BF16_F32_F32_I32 : VOPProfile <[v2bf16, f32, f32, i32]>;
def VOP_V2F16_F32_F32_I32 : VOPProfile <[v2f16, f32, f32, i32]>;
def VOP_V6I32_V32F16_F32 : VOPProfile<[v6i32, v32f16, f32, untyped]>;
def VOP_V6I32_V32BF16_F32 : VOPProfile<[v6i32, v32bf16, f32, untyped]>;
+def VOP_V3I32_V16F16_F32 : VOPProfile<[v3i32, v16f16, f32, untyped]>;
+def VOP_V3I32_V16BF16_F32 : VOPProfile<[v3i32, v16bf16, f32, untyped]>;
+def VOP_V3I32_V16F32_F32 : VOPProfile<[v3i32, v16f32, f32, untyped]>;
def VOP_V6I32_V16F32_V16F32_F32 : VOPProfile<[v6i32, v16f32, v16f32, f32]>;
def VOP_V2F16_I32_F32 : VOPProfile<[v2f16, i32, f32, untyped]>;
def VOP_V2I16_F32_F32_F32 : VOPProfile<[v2i16, f32, f32, f32]>;
@@ -2948,6 +2953,8 @@ def VOP_BF16_F32_I32 : VOPProfile<[bf16, f32, i32, untyped]>;
def VOP_F16_F32_I32 : VOPProfile<[f16, f32, i32, untyped]>;
def VOP_I32_BF16_I32_F32 : VOPProfile<[i32, bf16, i32, f32]>;
def VOP_I32_F16_I32_F32 : VOPProfile<[i32, f16, i32, f32]>;
+def VOP_V16F16_V3I32_I32 : VOPProfile<[v16f16, v3i32, i32, untyped]>;
+def VOP_V16BF16_V3I32_I32 : VOPProfile<[v16bf16, v3i32, i32, untyped]>;
def VOP_V8F16_V2I32_I32 : VOPProfile<[v8f16, v2i32, i32, untyped]>;
def VOP_V8BF16_V2I32_I32 : VOPProfile<[v8bf16, v2i32, i32, untyped]>;
def VOP_V8F16_I32_I32 : VOPProfile<[v8f16, i32, i32, untyped]>;
@@ -2955,11 +2962,26 @@ def VOP_V8BF16_I32_I32 : VOPProfile<[v8bf16, i32, i32, untyped]>;
def VOP_V16F32_V3I32_I32 : VOPProfile<[v16f32, v3i32, i32, untyped]>;
def VOP_V8F32_V2I32_I32 : VOPProfile<[v8f32, v2i32, i32, untyped]>;
def VOP_V8F32_I32_I32 : VOPProfile<[v8f32, i32, i32, untyped]>;
+def VOP_V2I32_V8BF16_F32 : VOPProfile<[v2i32, v8bf16, f32, untyped]>;
+def VOP_V2I32_V8F16_F32 : VOPProfile<[v2i32, v8f16, f32, untyped]>;
+def VOP_V2I32_V8F32_F32 : VOPProfile<[v2i32, v8f32, f32, untyped]>;
+def VOP_I32_V8F32_F32 : VOPProfile<[i32, v8f32, f32, untyped]>;
+def VOP_I32_V8F16_F32 : VOPProfile<[i32, v8f16, f32, untyped]>;
+def VOP_I32_V8BF16_F32 : VOPProfile<[i32, v8bf16, f32, untyped]>;
def VOP_I32_F32_I32_F32 : VOPProfile<[i32, f32, i32, f32]>;
def VOP_V6I32_V32BF16_I32_F32 : VOPProfile<[v6i32, v32bf16, i32, f32]>;
def VOP_V6I32_V32F16_I32_F32 : VOPProfile<[v6i32, v32f16, i32, f32]>;
def VOP_V6I32_V32F32_I32_F32 : VOPProfile<[v6i32, v32f32, i32, f32]>;
+def VOP_V3I32_V16F16_I32_F32 : VOPProfile<[v3i32, v16f16, i32, f32]>;
+def VOP_V3I32_V16BF16_I32_F32 : VOPProfile<[v3i32, v16bf16, i32, f32]>;
+def VOP_V3I32_V16F32_I32_F32 : VOPProfile<[v3i32, v16f32, i32, f32]>;
+def VOP_V2I32_V8BF16_I32_F32 : VOPProfile<[v2i32, v8bf16, i32, f32]>;
+def VOP_V2I32_V8F16_I32_F32 : VOPProfile<[v2i32, v8f16, i32, f32]>;
+def VOP_V2I32_V8F32_I32_F32 : VOPProfile<[v2i32, v8f32, i32, f32]>;
+def VOP_I32_V8F32_I32_F32 : VOPProfile<[i32, v8f32, i32, f32]>;
+def VOP_I32_V8F16_I32_F32 : VOPProfile<[i32, v8f16, i32, f32]>;
+def VOP_I32_V8BF16_I32_F32 : VOPProfile<[i32, v8bf16, i32, f32]>;
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 36d1a3b..08d07c9 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1302,6 +1302,7 @@ def VCSrc_f64 : SrcRegOrImm9 <VS_64, "OPERAND_REG_INLINE_C_FP64">;
def VCSrc_v2b16 : SrcRegOrImm9 <VS_32, "OPERAND_REG_INLINE_C_V2INT16">;
def VCSrc_v2bf16: SrcRegOrImm9 <VS_32, "OPERAND_REG_INLINE_C_V2BF16">;
def VCSrc_v2f16 : SrcRegOrImm9 <VS_32, "OPERAND_REG_INLINE_C_V2FP16">;
+def VCSrc_v2b32 : SrcRegOrImm9 <VS_64, "OPERAND_REG_INLINE_C_V2INT32">;
// True 16 Operands
def VCSrcT_b16 : SrcRegOrImm9_t16 <"OPERAND_REG_INLINE_C_INT16">;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 5827f18..65fa088 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1548,6 +1548,42 @@ bool shouldEmitConstantsToTextSection(const Triple &TT) {
return TT.getArch() == Triple::r600;
}
+static bool isValidRegPrefix(char C) {
+ return C == 'v' || C == 's' || C == 'a';
+}
+
+std::tuple<char, unsigned, unsigned>
+parseAsmConstraintPhysReg(StringRef Constraint) {
+ StringRef RegName = Constraint;
+ if (!RegName.consume_front("{") || !RegName.consume_back("}"))
+ return {};
+
+ char Kind = RegName.front();
+ if (!isValidRegPrefix(Kind))
+ return {};
+
+ RegName = RegName.drop_front();
+ if (RegName.consume_front("[")) {
+ unsigned Idx, End;
+ bool Failed = RegName.consumeInteger(10, Idx);
+ Failed |= !RegName.consume_front(":");
+ Failed |= RegName.consumeInteger(10, End);
+ Failed |= !RegName.consume_back("]");
+ if (!Failed) {
+ unsigned NumRegs = End - Idx + 1;
+ if (NumRegs > 1)
+ return {Kind, Idx, NumRegs};
+ }
+ } else {
+ unsigned Idx;
+ bool Failed = RegName.getAsInteger(10, Idx);
+ if (!Failed)
+ return {Kind, Idx, 1};
+ }
+
+ return {};
+}
+
std::pair<unsigned, unsigned>
getIntegerPairAttribute(const Function &F, StringRef Name,
std::pair<unsigned, unsigned> Default,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 74d59f4..1252e35 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1012,6 +1012,12 @@ bool isReadOnlySegment(const GlobalValue *GV);
/// target triple \p TT, false otherwise.
bool shouldEmitConstantsToTextSection(const Triple &TT);
+/// Returns a valid charcode or 0 in the first entry if this is a valid physical
+/// register constraint. Followed by the start register number, and the register
+/// width. Does not validate the number of registers exists in the class.
+std::tuple<char, unsigned, unsigned>
+parseAsmConstraintPhysReg(StringRef Constraint);
+
/// \returns Integer value requested using \p F's \p Name attribute.
///
/// \returns \p Default if attribute is not present.
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 1ffe39d..f4b6af6 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1053,6 +1053,14 @@ def VOP3_PERMLANE_VAR_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, untyped
let HasExtDPP = 0;
}
+class VOP3_PERMLANE_NOOPSEL_Profile<VOPProfile P> : VOP3_Profile<P> {
+ let Ins64 = !con((ins VRegSrc_32:$src0, SSrc_b32:$src1),
+ !if(P.HasSrc2, (ins SSrc_b32:$src2), (ins)));
+ let HasClamp = 0;
+ let HasExtVOP3DPP = 0;
+ let HasExtDPP = 0;
+}
+
def opsel_i1timm : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant(
N->getZExtValue() ? SISrcMods::OP_SEL_0 : SISrcMods::NONE,
@@ -1136,6 +1144,18 @@ class PermlaneVarPat<SDPatternOperator permlane,
VGPR_32:$src1, VGPR_32:$vdst_in)
>;
+class PermlaneNoDppPat3Src<SDPatternOperator permlane,
+ Instruction inst> : GCNPat<
+ (permlane i32:$src0, i32:$src1, i32:$src2),
+ (inst VGPR_32:$src0, SCSrc_b32:$src1, SCSrc_b32:$src2)
+>;
+
+class PermlaneNoDppPat2Src<SDPatternOperator permlane,
+ Instruction inst> : GCNPat<
+ (permlane i32:$src0, i32:$src1),
+ (inst VGPR_32:$src0, SCSrc_b32:$src1)
+>;
+
class VOP3_BITOP3_Profile<VOPProfile pfl, VOP3Features f> : VOP3_Profile<pfl, f> {
let HasClamp = 0;
let HasOMod = 0;
@@ -1522,6 +1542,20 @@ let SubtargetPredicate = isGFX12Plus in {
} // End SubtargetPredicate = isGFX12Plus
+let SubtargetPredicate = isGFX1250Plus, WaveSizePredicate = isWave32 in {
+ defm V_PERMLANE_BCAST_B32 : VOP3Inst<"v_permlane_bcast_b32", VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32_I32>>;
+ defm V_PERMLANE_UP_B32 : VOP3Inst<"v_permlane_up_b32", VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32_I32>>;
+ defm V_PERMLANE_DOWN_B32 : VOP3Inst<"v_permlane_down_b32", VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32_I32>>;
+ defm V_PERMLANE_XOR_B32 : VOP3Inst<"v_permlane_xor_b32", VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32_I32>>;
+ defm V_PERMLANE_IDX_GEN_B32 : VOP3Inst<"v_permlane_idx_gen_b32", VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32>>;
+
+ def : PermlaneNoDppPat3Src<int_amdgcn_permlane_bcast, V_PERMLANE_BCAST_B32_e64>;
+ def : PermlaneNoDppPat3Src<int_amdgcn_permlane_up, V_PERMLANE_UP_B32_e64>;
+ def : PermlaneNoDppPat3Src<int_amdgcn_permlane_down, V_PERMLANE_DOWN_B32_e64>;
+ def : PermlaneNoDppPat3Src<int_amdgcn_permlane_xor, V_PERMLANE_XOR_B32_e64>;
+ def : PermlaneNoDppPat2Src<int_amdgcn_permlane_idx_gen, V_PERMLANE_IDX_GEN_B32_e64>;
+} // End SubtargetPredicate = isGFX1250Plus, WaveSizePredicate = isWave32
+
let HasClamp = 0, HasModifiers = 1 in {
def BitOp3_B16_Profile : VOP3_BITOP3_Profile<VOPProfile <[i16, i16, i16, i16, i32]>, VOP3_OPSEL>;
def BitOp3_B16_t16_Profile : VOP3_Profile_True16<BitOp3_B16_Profile>;
@@ -1692,6 +1726,12 @@ multiclass VOP3CvtScaleSelInst<string OpName, VOPProfile P, SDPatternOperator no
}
}
+let HasExtVOP3DPP = 0, HasModifiers = 0 in {
+def VOP3_V2I32_I32_I32_V2I32 : VOP3_Profile<VOPProfile<[v2i32, i32, i32, v2i32]>>;
+def VOP3_V3I32_I32_I64_V2I32 : VOP3_Profile<VOPProfile<[v3i32, i32, i64, v2i32]>>;
+def VOP3_V4I32_I64_I64_V2I32 : VOP3_Profile<VOPProfile<[v4i32, i64, i64, v2i32]>>;
+}
+
let Src0RC64 = VSrc_NoInline_v2f16 in {
def VOP3_CVT_PK_F8_F16_Profile : VOP3_Profile<VOP_I16_V2F16>;
def VOP3_CVT_PK_F8_F16_True16_Profile : VOP3_Profile_True16<VOP3_CVT_PK_F8_F16_Profile>;
@@ -1737,6 +1777,12 @@ let SubtargetPredicate = isGFX1250Plus in {
defm V_CVT_SCALE_PK8_BF16_BF8 : VOP3CvtScaleSelInst<"v_cvt_scale_pk8_bf16_bf8", VOP_V8BF16_V2I32_I32, int_amdgcn_cvt_scale_pk8_bf16_bf8>;
defm V_CVT_SCALE_PK8_F32_FP8 : VOP3CvtScaleSelInst<"v_cvt_scale_pk8_f32_fp8", VOP_V8F32_V2I32_I32, int_amdgcn_cvt_scale_pk8_f32_fp8>;
defm V_CVT_SCALE_PK8_F32_BF8 : VOP3CvtScaleSelInst<"v_cvt_scale_pk8_f32_bf8", VOP_V8F32_V2I32_I32, int_amdgcn_cvt_scale_pk8_f32_bf8>;
+ defm V_CVT_SCALE_PK16_F16_FP6 : VOP3CvtScaleSelInst<"v_cvt_scale_pk16_f16_fp6", VOP_V16F16_V3I32_I32, int_amdgcn_cvt_scale_pk16_f16_fp6>;
+ defm V_CVT_SCALE_PK16_BF16_FP6 : VOP3CvtScaleSelInst<"v_cvt_scale_pk16_bf16_fp6", VOP_V16BF16_V3I32_I32, int_amdgcn_cvt_scale_pk16_bf16_fp6>;
+ defm V_CVT_SCALE_PK16_F16_BF6 : VOP3CvtScaleSelInst<"v_cvt_scale_pk16_f16_bf6", VOP_V16F16_V3I32_I32, int_amdgcn_cvt_scale_pk16_f16_bf6>;
+ defm V_CVT_SCALE_PK16_BF16_BF6 : VOP3CvtScaleSelInst<"v_cvt_scale_pk16_bf16_bf6", VOP_V16BF16_V3I32_I32, int_amdgcn_cvt_scale_pk16_bf16_bf6>;
+ defm V_CVT_SCALE_PK16_F32_FP6 : VOP3CvtScaleSelInst<"v_cvt_scale_pk16_f32_fp6", VOP_V16F32_V3I32_I32, int_amdgcn_cvt_scale_pk16_f32_fp6>;
+ defm V_CVT_SCALE_PK16_F32_BF6 : VOP3CvtScaleSelInst<"v_cvt_scale_pk16_f32_bf6", VOP_V16F32_V3I32_I32, int_amdgcn_cvt_scale_pk16_f32_bf6>;
} // End Constraints = "@earlyclobber $vdst"
defm V_CVT_SCALE_PK8_F16_FP4 : VOP3CvtScaleSelInst<"v_cvt_scale_pk8_f16_fp4", VOP_V8F16_I32_I32, int_amdgcn_cvt_scale_pk8_f16_fp4>;
@@ -1744,6 +1790,44 @@ let SubtargetPredicate = isGFX1250Plus in {
defm V_CVT_SCALE_PK8_F32_FP4 : VOP3CvtScaleSelInst<"v_cvt_scale_pk8_f32_fp4", VOP_V8F32_I32_I32, int_amdgcn_cvt_scale_pk8_f32_fp4>;
} // End ReadsModeReg = 0
+ let Constraints = "@earlyclobber $vdst" in {
+ let WaveSizePredicate = isWave32 in {
+ defm V_CVT_SCALEF32_PK8_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_pk8_fp8_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8BF16_F32>, int_amdgcn_cvt_scalef32_pk8_fp8_bf16>;
+ defm V_CVT_SCALEF32_PK8_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_pk8_bf8_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8BF16_F32>, int_amdgcn_cvt_scalef32_pk8_bf8_bf16>;
+ defm V_CVT_SCALEF32_PK8_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk8_fp8_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F16_F32>, int_amdgcn_cvt_scalef32_pk8_fp8_f16>;
+ defm V_CVT_SCALEF32_PK8_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk8_bf8_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F16_F32>, int_amdgcn_cvt_scalef32_pk8_bf8_f16>;
+ defm V_CVT_SCALEF32_PK8_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk8_fp8_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F32_F32>, int_amdgcn_cvt_scalef32_pk8_fp8_f32>;
+ defm V_CVT_SCALEF32_PK8_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk8_bf8_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F32_F32>, int_amdgcn_cvt_scalef32_pk8_bf8_f32>;
+ defm V_CVT_SCALEF32_PK8_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk8_fp4_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_I32_V8F32_F32>, int_amdgcn_cvt_scalef32_pk8_fp4_f32>;
+ defm V_CVT_SCALEF32_PK8_FP4_F16 : VOP3Inst<"v_cvt_scalef32_pk8_fp4_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_I32_V8F16_F32>, int_amdgcn_cvt_scalef32_pk8_fp4_f16>;
+ defm V_CVT_SCALEF32_PK8_FP4_BF16 : VOP3Inst<"v_cvt_scalef32_pk8_fp4_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_I32_V8BF16_F32>, int_amdgcn_cvt_scalef32_pk8_fp4_bf16>;
+ } // End WaveSizePredicate = isWave32
+ defm V_CVT_SCALEF32_PK16_FP6_F32 : VOP3Inst<"v_cvt_scalef32_pk16_fp6_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16F32_F32>, int_amdgcn_cvt_scalef32_pk16_fp6_f32>;
+ defm V_CVT_SCALEF32_PK16_BF6_F32 : VOP3Inst<"v_cvt_scalef32_pk16_bf6_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16F32_F32>, int_amdgcn_cvt_scalef32_pk16_bf6_f32>;
+ defm V_CVT_SCALEF32_PK16_FP6_F16 : VOP3Inst<"v_cvt_scalef32_pk16_fp6_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16F16_F32>, int_amdgcn_cvt_scalef32_pk16_fp6_f16>;
+ defm V_CVT_SCALEF32_PK16_BF6_F16 : VOP3Inst<"v_cvt_scalef32_pk16_bf6_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16F16_F32>, int_amdgcn_cvt_scalef32_pk16_bf6_f16>;
+ defm V_CVT_SCALEF32_PK16_FP6_BF16 : VOP3Inst<"v_cvt_scalef32_pk16_fp6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16BF16_F32>, int_amdgcn_cvt_scalef32_pk16_fp6_bf16>;
+ defm V_CVT_SCALEF32_PK16_BF6_BF16 : VOP3Inst<"v_cvt_scalef32_pk16_bf6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16BF16_F32>, int_amdgcn_cvt_scalef32_pk16_bf6_bf16>;
+
+ let WaveSizePredicate = isWave32 in {
+ defm V_CVT_SCALEF32_SR_PK8_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_sr_pk8_fp8_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8BF16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_fp8_bf16>;
+ defm V_CVT_SCALEF32_SR_PK8_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_sr_pk8_bf8_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8BF16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_bf8_bf16>;
+ defm V_CVT_SCALEF32_SR_PK8_FP8_F16 : VOP3Inst<"v_cvt_scalef32_sr_pk8_fp8_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_fp8_f16>;
+ defm V_CVT_SCALEF32_SR_PK8_BF8_F16 : VOP3Inst<"v_cvt_scalef32_sr_pk8_bf8_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_bf8_f16>;
+ defm V_CVT_SCALEF32_SR_PK8_FP8_F32 : VOP3Inst<"v_cvt_scalef32_sr_pk8_fp8_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F32_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_fp8_f32>;
+ defm V_CVT_SCALEF32_SR_PK8_BF8_F32 : VOP3Inst<"v_cvt_scalef32_sr_pk8_bf8_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F32_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_bf8_f32>;
+ defm V_CVT_SCALEF32_SR_PK8_FP4_F32 : VOP3Inst<"v_cvt_scalef32_sr_pk8_fp4_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_I32_V8F32_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_fp4_f32>;
+ defm V_CVT_SCALEF32_SR_PK8_FP4_F16 : VOP3Inst<"v_cvt_scalef32_sr_pk8_fp4_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_I32_V8F16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_fp4_f16>;
+ defm V_CVT_SCALEF32_SR_PK8_FP4_BF16 : VOP3Inst<"v_cvt_scalef32_sr_pk8_fp4_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_I32_V8BF16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk8_fp4_bf16>;
+ } // End WaveSizePredicate = isWave32
+ defm V_CVT_SCALEF32_SR_PK16_BF6_BF16 : VOP3Inst<"v_cvt_scalef32_sr_pk16_bf6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16BF16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk16_bf6_bf16>;
+ defm V_CVT_SCALEF32_SR_PK16_BF6_F16 : VOP3Inst<"v_cvt_scalef32_sr_pk16_bf6_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16F16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk16_bf6_f16>;
+ defm V_CVT_SCALEF32_SR_PK16_BF6_F32 : VOP3Inst<"v_cvt_scalef32_sr_pk16_bf6_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16F32_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk16_bf6_f32>;
+ defm V_CVT_SCALEF32_SR_PK16_FP6_BF16 : VOP3Inst<"v_cvt_scalef32_sr_pk16_fp6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16BF16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk16_fp6_bf16>;
+ defm V_CVT_SCALEF32_SR_PK16_FP6_F16 : VOP3Inst<"v_cvt_scalef32_sr_pk16_fp6_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16F16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk16_fp6_f16>;
+ defm V_CVT_SCALEF32_SR_PK16_FP6_F32 : VOP3Inst<"v_cvt_scalef32_sr_pk16_fp6_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V3I32_V16F32_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk16_fp6_f32>;
+ } // End Constraints = "@earlyclobber $vdst"
+
let True16Predicate = UseRealTrue16Insts in {
def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_fp8_f16, V_CVT_SR_FP8_F16_t16_e64, f16>;
def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_bf8_f16, V_CVT_SR_BF8_F16_t16_e64, f16>;
@@ -1754,6 +1838,12 @@ let SubtargetPredicate = isGFX1250Plus in {
}
} // End SubtargetPredicate = isGFX1250Plus
+let SubtargetPredicate = HasTensorCvtLutInsts in {
+ defm V_PERM_PK16_B4_U4 : VOP3Inst<"v_perm_pk16_b4_u4", VOP3_V2I32_I32_I32_V2I32, int_amdgcn_perm_pk16_b4_u4>;
+ defm V_PERM_PK16_B6_U4 : VOP3Inst<"v_perm_pk16_b6_u4", VOP3_V3I32_I32_I64_V2I32, int_amdgcn_perm_pk16_b6_u4>;
+ defm V_PERM_PK16_B8_U4 : VOP3Inst<"v_perm_pk16_b8_u4", VOP3_V4I32_I64_I64_V2I32, int_amdgcn_perm_pk16_b8_u4>;
+} // End SubtargetPredicate = HasTensorCvtLutInsts
+
class Cvt_Scale_Sr_F32ToBF16F16_Pat<SDPatternOperator node, VOP3_Pseudo inst, ValueType DstTy> : GCNPat<
(DstTy (node DstTy:$vdst_in, f32:$src0, i32:$src1, timm:$word_sel)),
(inst (DstSelToOpSelXForm $word_sel), $src0, 0, $src1, VGPR_32:$vdst_in)
@@ -1973,6 +2063,11 @@ defm V_ADD_MAX_I32 : VOP3Only_Realtriple_gfx1250<0x25e>;
defm V_ADD_MAX_U32 : VOP3Only_Realtriple_gfx1250<0x25f>;
defm V_ADD_MIN_I32 : VOP3Only_Realtriple_gfx1250<0x260>;
defm V_ADD_MIN_U32 : VOP3Only_Realtriple_gfx1250<0x261>;
+defm V_PERMLANE_BCAST_B32 : VOP3Only_Real_Base_gfx12<0x270>;
+defm V_PERMLANE_UP_B32 : VOP3Only_Real_Base_gfx12<0x271>;
+defm V_PERMLANE_DOWN_B32 : VOP3Only_Real_Base_gfx12<0x272>;
+defm V_PERMLANE_XOR_B32 : VOP3Only_Real_Base_gfx12<0x273>;
+defm V_PERMLANE_IDX_GEN_B32 : VOP3Only_Real_Base_gfx12<0x314>;
//===----------------------------------------------------------------------===//
// GFX11, GFX12
@@ -2147,6 +2242,9 @@ let AssemblerPredicate = isGFX11Plus in {
}
// These instructions differ from GFX12 variant by supporting DPP:
+defm V_PERM_PK16_B4_U4 : VOP3Only_Real_Base_gfx1250<0x23f>;
+defm V_PERM_PK16_B6_U4 : VOP3Only_Real_Base_gfx1250<0x242>;
+defm V_PERM_PK16_B8_U4 : VOP3Only_Real_Base_gfx1250<0x243>;
defm V_LSHL_ADD_U64 : VOP3Only_Realtriple_gfx1250<0x252>;
defm V_ASHR_PK_I8_I32 : VOP3Only_Realtriple_gfx1250<0x290>;
defm V_ASHR_PK_U8_I32 : VOP3Only_Realtriple_gfx1250<0x291>;
@@ -2159,6 +2257,42 @@ defm V_CVT_SCALE_PK8_F32_FP8 : VOP3Only_ScaleSel_Real_gfx1250<0x2aa>;
defm V_CVT_SCALE_PK8_F16_BF8 : VOP3Only_ScaleSel_Real_gfx1250<0x2ab>;
defm V_CVT_SCALE_PK8_BF16_BF8 : VOP3Only_ScaleSel_Real_gfx1250<0x2ac>;
defm V_CVT_SCALE_PK8_F32_BF8 : VOP3Only_ScaleSel_Real_gfx1250<0x2ad>;
+defm V_CVT_SCALEF32_PK8_FP4_F32 : VOP3Only_Real_Base_gfx1250<0x2b0>;
+defm V_CVT_SCALEF32_PK8_FP4_F16 : VOP3Only_Real_Base_gfx1250<0x2b3>;
+defm V_CVT_SCALEF32_PK8_FP8_BF16 : VOP3Only_Real_Base_gfx1250<0x2b4>;
+defm V_CVT_SCALEF32_PK8_BF8_BF16 : VOP3Only_Real_Base_gfx1250<0x2b5>;
+defm V_CVT_SCALEF32_PK8_FP4_BF16 : VOP3Only_Real_Base_gfx1250<0x2b8>;
+defm V_CVT_SCALEF32_PK8_FP8_F32 : VOP3Only_Real_Base_gfx1250<0x2c3>;
+defm V_CVT_SCALEF32_PK8_FP8_F16 : VOP3Only_Real_Base_gfx1250<0x2c4>;
+defm V_CVT_SCALEF32_PK8_BF8_F32 : VOP3Only_Real_Base_gfx1250<0x2c5>;
+defm V_CVT_SCALEF32_PK8_BF8_F16 : VOP3Only_Real_Base_gfx1250<0x2c6>;
+defm V_CVT_SCALE_PK16_F16_FP6 : VOP3Only_ScaleSel_Real_gfx1250<0x2c7>;
+defm V_CVT_SCALE_PK16_BF16_FP6 : VOP3Only_ScaleSel_Real_gfx1250<0x2c8>;
+defm V_CVT_SCALE_PK16_F32_FP6 : VOP3Only_ScaleSel_Real_gfx1250<0x2c9>;
+defm V_CVT_SCALE_PK16_F16_BF6 : VOP3Only_ScaleSel_Real_gfx1250<0x2ca>;
+defm V_CVT_SCALE_PK16_BF16_BF6 : VOP3Only_ScaleSel_Real_gfx1250<0x2cb>;
+defm V_CVT_SCALE_PK16_F32_BF6 : VOP3Only_ScaleSel_Real_gfx1250<0x2cc>;
+defm V_CVT_SCALEF32_PK16_FP6_F32 : VOP3Only_Real_Base_gfx1250<0x2cd>;
+defm V_CVT_SCALEF32_PK16_BF6_F32 : VOP3Only_Real_Base_gfx1250<0x2ce>;
+defm V_CVT_SCALEF32_PK16_FP6_F16 : VOP3Only_Real_Base_gfx1250<0x2cf>;
+defm V_CVT_SCALEF32_PK16_BF6_F16 : VOP3Only_Real_Base_gfx1250<0x2d0>;
+defm V_CVT_SCALEF32_PK16_FP6_BF16 : VOP3Only_Real_Base_gfx1250<0x2d1>;
+defm V_CVT_SCALEF32_PK16_BF6_BF16 : VOP3Only_Real_Base_gfx1250<0x2d2>;
+defm V_CVT_SCALEF32_SR_PK16_FP6_F32 : VOP3Only_Real_Base_gfx1250<0x2d3>;
+defm V_CVT_SCALEF32_SR_PK16_BF6_F32 : VOP3Only_Real_Base_gfx1250<0x2d4>;
+defm V_CVT_SCALEF32_SR_PK16_FP6_F16 : VOP3Only_Real_Base_gfx1250<0x2d5>;
+defm V_CVT_SCALEF32_SR_PK16_BF6_F16 : VOP3Only_Real_Base_gfx1250<0x2d6>;
+defm V_CVT_SCALEF32_SR_PK16_FP6_BF16 : VOP3Only_Real_Base_gfx1250<0x2d7>;
+defm V_CVT_SCALEF32_SR_PK16_BF6_BF16 : VOP3Only_Real_Base_gfx1250<0x2d8>;
+defm V_CVT_SCALEF32_SR_PK8_FP4_F32 : VOP3Only_Real_Base_gfx1250<0x297>;
+defm V_CVT_SCALEF32_SR_PK8_FP8_F32 : VOP3Only_Real_Base_gfx1250<0x298>;
+defm V_CVT_SCALEF32_SR_PK8_BF8_F32 : VOP3Only_Real_Base_gfx1250<0x299>;
+defm V_CVT_SCALEF32_SR_PK8_FP4_F16 : VOP3Only_Real_Base_gfx1250<0x2b9>;
+defm V_CVT_SCALEF32_SR_PK8_FP4_BF16 : VOP3Only_Real_Base_gfx1250<0x2bc>;
+defm V_CVT_SCALEF32_SR_PK8_FP8_F16 : VOP3Only_Real_Base_gfx1250<0x2bf>;
+defm V_CVT_SCALEF32_SR_PK8_FP8_BF16 : VOP3Only_Real_Base_gfx1250<0x2c0>;
+defm V_CVT_SCALEF32_SR_PK8_BF8_F16 : VOP3Only_Real_Base_gfx1250<0x2c1>;
+defm V_CVT_SCALEF32_SR_PK8_BF8_BF16 : VOP3Only_Real_Base_gfx1250<0x2c2>;
defm V_CVT_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36d>;
defm V_CVT_SR_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36e>;
defm V_CVT_PK_F16_F32 : VOP3Only_Realtriple_gfx1250<0x36f>;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index bd4b75f..9366256 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -5521,18 +5521,6 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
if (Op.getValueType().isInteger()) {
- // Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
- // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
- // supported types.
- if (CC == ISD::SETGT && RHSC && RHSC->isAllOnes() && CTVal && CFVal &&
- CTVal->isOne() && CFVal->isAllOnes() &&
- LHS.getValueType() == TrueVal.getValueType()) {
- EVT VT = LHS.getValueType();
- SDValue Shift =
- DAG.getNode(ISD::SRA, dl, VT, LHS,
- DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
- return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT));
- }
// Check for SMAX(lhs, 0) and SMIN(lhs, 0) patterns.
// (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1))
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 146fc67..c221d22 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -1108,9 +1108,8 @@ std::optional<bool> ARMAsmBackend::evaluateFixup(const MCFragment &F,
}
void ARMAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (IsResolved && shouldForceRelocation(Fixup, Target))
IsResolved = false;
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
@@ -1124,14 +1123,15 @@ void ARMAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
return; // Doesn't change encoding.
const unsigned NumBytes = getFixupKindNumBytes(Kind);
- unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// Used to point to big endian bytes.
unsigned FullSizeBytes;
if (Endian == llvm::endianness::big) {
FullSizeBytes = getFixupKindContainerSizeBytes(Kind);
- assert((Offset + FullSizeBytes) <= Data.size() && "Invalid fixup size!");
+ assert(Fixup.getOffset() + FullSizeBytes <= F.getSize() &&
+ "Invalid fixup size!");
assert(NumBytes <= FullSizeBytes && "Invalid fixup size!");
}
@@ -1141,7 +1141,7 @@ void ARMAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
for (unsigned i = 0; i != NumBytes; ++i) {
unsigned Idx =
Endian == llvm::endianness::little ? i : (FullSizeBytes - 1 - i);
- Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
+ Data[Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 07d2cf7..2844232 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -40,8 +40,7 @@ public:
std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &,
uint64_t &) override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
unsigned getRelaxedOpcode(unsigned Op, const MCSubtargetInfo &STI) const;
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index 128cc0b..05a7d03 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -368,9 +368,8 @@ AVRAsmBackend::createObjectTargetWriter() const {
}
void AVRAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
// AVR sets the fixup value to bypass the assembly time overflow with a
// relocation.
if (IsResolved) {
@@ -397,14 +396,14 @@ void AVRAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Shift the value into position.
Value <<= Info.TargetOffset;
- unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
for (unsigned i = 0; i < NumBytes; ++i) {
uint8_t mask = (((Value >> (i * 8)) & 0xff));
- Data[Offset + i] |= mask;
+ Data[i] |= mask;
}
}
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
index 68c839e..9633669 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
@@ -38,8 +38,7 @@ public:
createObjectTargetWriter() const override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index dda8753..53933f9 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -27,8 +27,7 @@ public:
~BPFAsmBackend() override = default;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override;
@@ -66,35 +65,32 @@ bool BPFAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
}
void BPFAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
if (Fixup.getKind() == FK_SecRel_8) {
// The Value is 0 for global variables, and the in-section offset
// for static variables. Write to the immediate field of the inst.
assert(Value <= UINT32_MAX);
- support::endian::write<uint32_t>(&Data[Fixup.getOffset() + 4],
- static_cast<uint32_t>(Value),
+ support::endian::write<uint32_t>(Data + 4, static_cast<uint32_t>(Value),
Endian);
} else if (Fixup.getKind() == FK_Data_4 && !Fixup.isPCRel()) {
- support::endian::write<uint32_t>(&Data[Fixup.getOffset()], Value, Endian);
+ support::endian::write<uint32_t>(Data, Value, Endian);
} else if (Fixup.getKind() == FK_Data_8) {
- support::endian::write<uint64_t>(&Data[Fixup.getOffset()], Value, Endian);
+ support::endian::write<uint64_t>(Data, Value, Endian);
} else if (Fixup.getKind() == FK_Data_4 && Fixup.isPCRel()) {
Value = (uint32_t)((Value - 8) / 8);
if (Endian == llvm::endianness::little) {
- Data[Fixup.getOffset() + 1] = 0x10;
- support::endian::write32le(&Data[Fixup.getOffset() + 4], Value);
+ Data[1] = 0x10;
+ support::endian::write32le(Data + 4, Value);
} else {
- Data[Fixup.getOffset() + 1] = 0x1;
- support::endian::write32be(&Data[Fixup.getOffset() + 4], Value);
+ Data[1] = 0x1;
+ support::endian::write32be(Data + 4, Value);
}
} else if (Fixup.getKind() == BPF::FK_BPF_PCRel_4) {
// The input Value represents the number of bytes.
Value = (uint32_t)((Value - 8) / 8);
- support::endian::write<uint32_t>(&Data[Fixup.getOffset() + 4], Value,
- Endian);
+ support::endian::write<uint32_t>(Data + 4, Value, Endian);
} else {
assert(Fixup.getKind() == FK_Data_2 && Fixup.isPCRel());
@@ -103,8 +99,7 @@ void BPFAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
report_fatal_error("Branch target out of insn range");
Value = (uint16_t)((Value - 8) / 8);
- support::endian::write<uint16_t>(&Data[Fixup.getOffset() + 2], Value,
- Endian);
+ support::endian::write<uint16_t>(Data + 2, Value, Endian);
}
}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
index 694d9ea..6964998 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
@@ -197,9 +197,8 @@ std::optional<bool> CSKYAsmBackend::evaluateFixup(const MCFragment &F,
}
void CSKYAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (IsResolved && shouldForceRelocation(Fixup, Target))
IsResolved = false;
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
@@ -217,10 +216,10 @@ void CSKYAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Shift the value into position.
Value <<= Info.TargetOffset;
- unsigned Offset = Fixup.getOffset();
unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8;
- assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
@@ -228,14 +227,14 @@ void CSKYAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
bool IsInstFixup = (Kind >= FirstTargetFixupKind);
if (IsLittleEndian && IsInstFixup && (NumBytes == 4)) {
- Data[Offset + 0] |= uint8_t((Value >> 16) & 0xff);
- Data[Offset + 1] |= uint8_t((Value >> 24) & 0xff);
- Data[Offset + 2] |= uint8_t(Value & 0xff);
- Data[Offset + 3] |= uint8_t((Value >> 8) & 0xff);
+ Data[0] |= uint8_t((Value >> 16) & 0xff);
+ Data[1] |= uint8_t((Value >> 24) & 0xff);
+ Data[2] |= uint8_t(Value & 0xff);
+ Data[3] |= uint8_t((Value >> 8) & 0xff);
} else {
for (unsigned I = 0; I != NumBytes; I++) {
unsigned Idx = IsLittleEndian ? I : (NumBytes - 1 - I);
- Data[Offset + Idx] |= uint8_t((Value >> (I * 8)) & 0xff);
+ Data[Idx] |= uint8_t((Value >> (I * 8)) & 0xff);
}
}
}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
index 1c8516f..5d8826a 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
@@ -25,8 +25,7 @@ public:
std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &,
uint64_t &) override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
diff --git a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
index 5323be6..9a14c01 100644
--- a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
+++ b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
@@ -78,8 +78,7 @@ public:
~DXILAsmBackend() override = default;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override {}
+ uint8_t *Data, uint64_t Value, bool IsResolved) override {}
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 7d3074b..1a0f1ab 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -402,8 +402,7 @@ public:
}
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &,
- MutableArrayRef<char> Data, uint64_t FixupValue,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t FixupValue, bool IsResolved) override;
bool isInstRelaxable(MCInst const &HMI) const {
const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(*MCII, HMI);
@@ -649,8 +648,7 @@ public:
} // namespace
void HexagonAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data,
+ const MCValue &Target, uint8_t *InstAddr,
uint64_t FixupValue, bool IsResolved) {
if (IsResolved && shouldForceRelocation(Fixup))
IsResolved = false;
@@ -667,10 +665,9 @@ void HexagonAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// LLVM gives us an encoded value, we have to convert it back
// to a real offset before we can use it.
- uint32_t Offset = Fixup.getOffset();
unsigned NumBytes = getFixupKindNumBytes(Kind);
- assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
- char *InstAddr = Data.data() + Offset;
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
Value = adjustFixupValue(Kind, FixupValue);
if (!Value)
@@ -757,8 +754,8 @@ void HexagonAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
uint32_t OldData = 0; for (unsigned i = 0; i < NumBytes; i++) OldData |=
(InstAddr[i] << (i * 8)) & (0xff << (i * 8));
dbgs() << "\tBValue=0x"; dbgs().write_hex(Value) << ": AValue=0x";
- dbgs().write_hex(FixupValue)
- << ": Offset=" << Offset << ": Size=" << Data.size() << ": OInst=0x";
+ dbgs().write_hex(FixupValue) << ": Offset=" << Fixup.getOffset()
+ << ": Size=" << F.getSize() << ": OInst=0x";
dbgs().write_hex(OldData) << ": Reloc=0x"; dbgs().write_hex(Reloc););
// For each byte of the fragment that the fixup touches, mask in the
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
index 83d1697..3112dea 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
@@ -48,8 +48,7 @@ public:
: MCAsmBackend(llvm::endianness::big), OSType(OST) {}
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override;
@@ -72,9 +71,8 @@ bool LanaiAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
}
void LanaiAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (!IsResolved)
Asm->getWriter().recordRelocation(F, Fixup, Target, Value);
@@ -85,7 +83,6 @@ void LanaiAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Where in the object and where the number of bytes that need
// fixing up
- unsigned Offset = Fixup.getOffset();
unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
unsigned FullSize = 4;
@@ -95,8 +92,7 @@ void LanaiAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Load instruction and apply value
for (unsigned i = 0; i != NumBytes; ++i) {
unsigned Idx = (FullSize - 1 - i);
- CurVal |= static_cast<uint64_t>(static_cast<uint8_t>(Data[Offset + Idx]))
- << (i * 8);
+ CurVal |= static_cast<uint64_t>(static_cast<uint8_t>(Data[Idx])) << (i * 8);
}
uint64_t Mask =
@@ -106,7 +102,7 @@ void LanaiAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Write out the fixed up bytes back to the code/data bits.
for (unsigned i = 0; i != NumBytes; ++i) {
unsigned Idx = (FullSize - 1 - i);
- Data[Offset + Idx] = static_cast<uint8_t>((CurVal >> (i * 8)) & 0xff);
+ Data[Idx] = static_cast<uint8_t>((CurVal >> (i * 8)) & 0xff);
}
}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index d9ea88c..fda9d97 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -131,19 +131,18 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
}
-static void fixupLeb128(MCContext &Ctx, const MCFixup &Fixup,
- MutableArrayRef<char> Data, uint64_t Value) {
+static void fixupLeb128(MCContext &Ctx, const MCFixup &Fixup, uint8_t *Data,
+ uint64_t Value) {
unsigned I;
- for (I = 0; I != Data.size() && Value; ++I, Value >>= 7)
+ for (I = 0; Value; ++I, Value >>= 7)
Data[I] |= uint8_t(Value & 0x7f);
if (Value)
Ctx.reportError(Fixup.getLoc(), "Invalid uleb128 value!");
}
void LoongArchAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (IsResolved && shouldForceRelocation(Fixup, Target))
IsResolved = false;
IsResolved = addReloc(F, Fixup, Target, Value, IsResolved);
@@ -166,14 +165,14 @@ void LoongArchAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Shift the value into position.
Value <<= Info.TargetOffset;
- unsigned Offset = Fixup.getOffset();
unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8;
- assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
for (unsigned I = 0; I != NumBytes; ++I) {
- Data[Offset + I] |= uint8_t((Value >> (I * 8)) & 0xff);
+ Data[I] |= uint8_t((Value >> (I * 8)) & 0xff);
}
}
@@ -274,15 +273,14 @@ bool LoongArchAsmBackend::relaxDwarfLineAddr(MCFragment &F,
int64_t LineDelta = F.getDwarfLineDelta();
const MCExpr &AddrDelta = F.getDwarfAddrDelta();
- SmallVector<MCFixup, 1> Fixups;
size_t OldSize = F.getVarSize();
int64_t Value;
if (AddrDelta.evaluateAsAbsolute(Value, *Asm))
return false;
- bool IsAbsolute = AddrDelta.evaluateKnownAbsolute(Value, *Asm);
- assert(IsAbsolute && "CFA with invalid expression");
- (void)IsAbsolute;
+ [[maybe_unused]] bool IsAbsolute =
+ AddrDelta.evaluateKnownAbsolute(Value, *Asm);
+ assert(IsAbsolute);
SmallVector<char> Data;
raw_svector_ostream OS(Data);
@@ -293,33 +291,23 @@ bool LoongArchAsmBackend::relaxDwarfLineAddr(MCFragment &F,
encodeSLEB128(LineDelta, OS);
}
- unsigned Offset;
- std::pair<MCFixupKind, MCFixupKind> FK;
-
// According to the DWARF specification, the `DW_LNS_fixed_advance_pc` opcode
// takes a single unsigned half (unencoded) operand. The maximum encodable
// value is therefore 65535. Set a conservative upper bound for relaxation.
+ unsigned PCBytes;
if (Value > 60000) {
unsigned PtrSize = C.getAsmInfo()->getCodePointerSize();
-
- OS << uint8_t(dwarf::DW_LNS_extended_op);
- encodeULEB128(PtrSize + 1, OS);
-
- OS << uint8_t(dwarf::DW_LNE_set_address);
- Offset = OS.tell();
assert((PtrSize == 4 || PtrSize == 8) && "Unexpected pointer size");
- FK = getRelocPairForSize(PtrSize == 4 ? 32 : 64);
+ PCBytes = PtrSize;
+ OS << uint8_t(dwarf::DW_LNS_extended_op) << uint8_t(PtrSize + 1)
+ << uint8_t(dwarf::DW_LNE_set_address);
OS.write_zeros(PtrSize);
} else {
+ PCBytes = 2;
OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc);
- Offset = OS.tell();
- FK = getRelocPairForSize(16);
support::endian::write<uint16_t>(OS, 0, llvm::endianness::little);
}
-
- const MCBinaryExpr &MBE = cast<MCBinaryExpr>(AddrDelta);
- Fixups.push_back(MCFixup::create(Offset, MBE.getLHS(), std::get<0>(FK)));
- Fixups.push_back(MCFixup::create(Offset, MBE.getRHS(), std::get<1>(FK)));
+ auto Offset = OS.tell() - PCBytes;
if (LineDelta == INT64_MAX) {
OS << uint8_t(dwarf::DW_LNS_extended_op);
@@ -330,7 +318,8 @@ bool LoongArchAsmBackend::relaxDwarfLineAddr(MCFragment &F,
}
F.setVarContents(Data);
- F.setVarFixups(Fixups);
+ F.setVarFixups({MCFixup::create(Offset, &AddrDelta,
+ MCFixup::getDataKindForSize(PCBytes))});
WasRelaxed = OldSize != Data.size();
return true;
}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
index 3d929fc..1f13601 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
@@ -42,8 +42,7 @@ public:
uint64_t &FixedValue, bool IsResolved);
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
bool shouldForceRelocation(const MCFixup &Fixup, const MCValue &Target);
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
index 5e03903..fe83dc6 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
@@ -53,8 +53,7 @@ public:
.Default(false)) {}
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
bool mayNeedRelaxation(unsigned Opcode, ArrayRef<MCOperand> Operands,
const MCSubtargetInfo &STI) const override;
@@ -78,14 +77,13 @@ public:
} // end anonymous namespace
void M68kAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (!IsResolved)
Asm->getWriter().recordRelocation(F, Fixup, Target, Value);
unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
- assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + Size <= F.getSize() && "Invalid fixup offset!");
// Check that uppper bits are either all zeros or all ones.
// Specifically ignore overflow/underflow as long as the leakage is
// limited to the lower bits. This is to remain compatible with
@@ -95,8 +93,7 @@ void M68kAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Write in Big Endian
for (unsigned i = 0; i != Size; ++i)
- Data[Fixup.getOffset() + i] =
- uint8_t(static_cast<int64_t>(Value) >> ((Size - i - 1) * 8));
+ Data[i] = uint8_t(static_cast<int64_t>(Value) >> ((Size - i - 1) * 8));
}
/// cc—Carry clear GE—Greater than or equal
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
index 29e5bfa..d892b3a 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
@@ -36,8 +36,7 @@ public:
~MSP430AsmBackend() override = default;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
@@ -105,9 +104,8 @@ uint64_t MSP430AsmBackend::adjustFixupValue(const MCFixup &Fixup,
}
void MSP430AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
Value = adjustFixupValue(Fixup, Value, getContext());
MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
@@ -117,15 +115,14 @@ void MSP430AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Shift the value into position.
Value <<= Info.TargetOffset;
- unsigned Offset = Fixup.getOffset();
unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8;
-
- assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
for (unsigned i = 0; i != NumBytes; ++i) {
- Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ Data[i] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index c2169be..33aab71 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -283,9 +283,8 @@ static bool shouldForceRelocation(const MCFixup &Fixup) {
/// data fragment, at the offset specified by the fixup and following the
/// fixup kind as appropriate.
void MipsAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (shouldForceRelocation(Fixup))
IsResolved = false;
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
@@ -297,7 +296,6 @@ void MipsAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
return; // Doesn't change encoding.
// Where do we start in the object
- unsigned Offset = Fixup.getOffset();
// Number of bytes we need to fixup
unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
// Used to point to big endian bytes
@@ -328,7 +326,7 @@ void MipsAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
unsigned Idx = Endian == llvm::endianness::little
? (microMipsLEByteOrder ? calculateMMLEIndex(i) : i)
: (FullSize - 1 - i);
- CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8);
+ CurVal |= (uint64_t)((uint8_t)Data[Idx]) << (i * 8);
}
uint64_t Mask = ((uint64_t)(-1) >>
@@ -340,7 +338,7 @@ void MipsAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
unsigned Idx = Endian == llvm::endianness::little
? (microMipsLEByteOrder ? calculateMMLEIndex(i) : i)
: (FullSize - 1 - i);
- Data[Offset + Idx] = (uint8_t)((CurVal >> (i*8)) & 0xff);
+ Data[Idx] = (uint8_t)((CurVal >> (i * 8)) & 0xff);
}
}
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index 816626d..40b5853 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -40,8 +40,7 @@ public:
createObjectTargetWriter() const override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index d9680c7..7a8395a 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -1034,12 +1034,14 @@ MCELFStreamer &MipsTargetELFStreamer::getStreamer() {
void MipsTargetELFStreamer::emitGPRel32Value(const MCExpr *Value) {
auto &S = getStreamer();
+ S.ensureHeadroom(4);
S.addFixup(Value, Mips::fixup_Mips_GPREL32);
S.appendContents(4, 0);
}
void MipsTargetELFStreamer::emitGPRel64Value(const MCExpr *Value) {
auto &S = getStreamer();
+ S.ensureHeadroom(8);
// fixup_Mips_GPREL32 desginates R_MIPS_GPREL32+R_MIPS_64 on MIPS64.
S.addFixup(Value, Mips::fixup_Mips_GPREL32);
S.appendContents(8, 0);
@@ -1047,24 +1049,28 @@ void MipsTargetELFStreamer::emitGPRel64Value(const MCExpr *Value) {
void MipsTargetELFStreamer::emitDTPRel32Value(const MCExpr *Value) {
auto &S = getStreamer();
+ S.ensureHeadroom(4);
S.addFixup(Value, Mips::fixup_Mips_DTPREL32);
S.appendContents(4, 0);
}
void MipsTargetELFStreamer::emitDTPRel64Value(const MCExpr *Value) {
auto &S = getStreamer();
+ S.ensureHeadroom(8);
S.addFixup(Value, Mips::fixup_Mips_DTPREL64);
S.appendContents(8, 0);
}
void MipsTargetELFStreamer::emitTPRel32Value(const MCExpr *Value) {
auto &S = getStreamer();
+ S.ensureHeadroom(4);
S.addFixup(Value, Mips::fixup_Mips_TPREL32);
S.appendContents(4, 0);
}
void MipsTargetELFStreamer::emitTPRel64Value(const MCExpr *Value) {
auto &S = getStreamer();
+ S.ensureHeadroom(8);
S.addFixup(Value, Mips::fixup_Mips_TPREL64);
S.appendContents(8, 0);
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 65d1be3..15f45a1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -382,6 +382,54 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
}
}
+// We return an EVT that can hold N VTs
+// If the VT is a vector, the resulting EVT is a flat vector with the same
+// element type as VT's element type.
+static EVT getVectorizedVT(EVT VT, unsigned N, LLVMContext &C) {
+ if (N == 1)
+ return VT;
+
+ return VT.isVector() ? EVT::getVectorVT(C, VT.getScalarType(),
+ VT.getVectorNumElements() * N)
+ : EVT::getVectorVT(C, VT, N);
+}
+
+static SDValue getExtractVectorizedValue(SDValue V, unsigned I, EVT VT,
+ const SDLoc &dl, SelectionDAG &DAG) {
+ if (V.getValueType() == VT) {
+ assert(I == 0 && "Index must be 0 for scalar value");
+ return V;
+ }
+
+ if (!VT.isVector())
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, V,
+ DAG.getVectorIdxConstant(I, dl));
+
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, V,
+ DAG.getVectorIdxConstant(I * VT.getVectorNumElements(), dl));
+}
+
+template <typename T>
+static inline SDValue getBuildVectorizedValue(unsigned N, const SDLoc &dl,
+ SelectionDAG &DAG, T GetElement) {
+ if (N == 1)
+ return GetElement(0);
+
+ SmallVector<SDValue, 8> Values;
+ for (const unsigned I : llvm::seq(N)) {
+ SDValue Val = GetElement(I);
+ if (Val.getValueType().isVector())
+ DAG.ExtractVectorElements(Val, Values);
+ else
+ Values.push_back(Val);
+ }
+
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), Values[0].getValueType(),
+ Values.size());
+ return DAG.getBuildVector(VT, dl, Values);
+}
+
/// PromoteScalarIntegerPTX
/// Used to make sure the arguments/returns are suitable for passing
/// and promote them to a larger size if they're not.
@@ -420,9 +468,10 @@ static EVT promoteScalarIntegerPTX(const EVT VT) {
// parameter starting at index Idx using a single vectorized op of
// size AccessSize. If so, it returns the number of param pieces
// covered by the vector op. Otherwise, it returns 1.
-static unsigned CanMergeParamLoadStoresStartingAt(
+template <typename T>
+static unsigned canMergeParamLoadStoresStartingAt(
unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs,
- const SmallVectorImpl<uint64_t> &Offsets, Align ParamAlignment) {
+ const SmallVectorImpl<T> &Offsets, Align ParamAlignment) {
// Can't vectorize if param alignment is not sufficient.
if (ParamAlignment < AccessSize)
@@ -472,10 +521,11 @@ static unsigned CanMergeParamLoadStoresStartingAt(
// of the same size as ValueVTs indicating how each piece should be
// loaded/stored (i.e. as a scalar, or as part of a vector
// load/store).
+template <typename T>
static SmallVector<unsigned, 16>
VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
- const SmallVectorImpl<uint64_t> &Offsets,
- Align ParamAlignment, bool IsVAArg = false) {
+ const SmallVectorImpl<T> &Offsets, Align ParamAlignment,
+ bool IsVAArg = false) {
// Set vector size to match ValueVTs and mark all elements as
// scalars by default.
@@ -486,7 +536,7 @@ VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
const auto GetNumElts = [&](unsigned I) -> unsigned {
for (const unsigned AccessSize : {16, 8, 4, 2}) {
- const unsigned NumElts = CanMergeParamLoadStoresStartingAt(
+ const unsigned NumElts = canMergeParamLoadStoresStartingAt(
I, AccessSize, ValueVTs, Offsets, ParamAlignment);
assert((NumElts == 1 || NumElts == 2 || NumElts == 4) &&
"Unexpected vectorization size");
@@ -1384,6 +1434,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Type *RetTy = CLI.RetTy;
const CallBase *CB = CLI.CB;
const DataLayout &DL = DAG.getDataLayout();
+ LLVMContext &Ctx = *DAG.getContext();
const auto GetI32 = [&](const unsigned I) {
return DAG.getConstant(I, dl, MVT::i32);
@@ -1476,15 +1527,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const SDValue ParamSymbol =
getCallParamSymbol(DAG, IsVAArg ? FirstVAArg : ArgI, MVT::i32);
- SmallVector<EVT, 16> VTs;
- SmallVector<uint64_t, 16> Offsets;
-
assert((!IsByVal || Arg.IndirectType) &&
"byval arg must have indirect type");
Type *ETy = (IsByVal ? Arg.IndirectType : Arg.Ty);
- ComputePTXValueVTs(*this, DL, ETy, VTs, &Offsets, IsByVal ? 0 : VAOffset);
- assert(VTs.size() == Offsets.size() && "Size mismatch");
- assert((IsByVal || VTs.size() == ArgOuts.size()) && "Size mismatch");
const Align ArgAlign = [&]() {
if (IsByVal) {
@@ -1492,17 +1537,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// so we don't need to worry whether it's naturally aligned or not.
// See TargetLowering::LowerCallTo().
const Align InitialAlign = ArgOuts[0].Flags.getNonZeroByValAlign();
- const Align ByValAlign = getFunctionByValParamAlign(
- CB->getCalledFunction(), ETy, InitialAlign, DL);
- if (IsVAArg)
- VAOffset = alignTo(VAOffset, ByValAlign);
- return ByValAlign;
+ return getFunctionByValParamAlign(CB->getCalledFunction(), ETy,
+ InitialAlign, DL);
}
return getArgumentAlignment(CB, Arg.Ty, ArgI + 1, DL);
}();
- const unsigned TypeSize = DL.getTypeAllocSize(ETy);
- assert((!IsByVal || TypeSize == ArgOuts[0].Flags.getByValSize()) &&
+ const unsigned TySize = DL.getTypeAllocSize(ETy);
+ assert((!IsByVal || TySize == ArgOuts[0].Flags.getByValSize()) &&
"type size mismatch");
const SDValue ArgDeclare = [&]() {
@@ -1510,105 +1552,120 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
return VADeclareParam;
if (IsByVal || shouldPassAsArray(Arg.Ty))
- return MakeDeclareArrayParam(ParamSymbol, ArgAlign, TypeSize);
+ return MakeDeclareArrayParam(ParamSymbol, ArgAlign, TySize);
assert(ArgOuts.size() == 1 && "We must pass only one value as non-array");
assert((ArgOuts[0].VT.isInteger() || ArgOuts[0].VT.isFloatingPoint()) &&
"Only int and float types are supported as non-array arguments");
- return MakeDeclareScalarParam(ParamSymbol, TypeSize);
+ return MakeDeclareScalarParam(ParamSymbol, TySize);
}();
- // PTX Interoperability Guide 3.3(A): [Integer] Values shorter
- // than 32-bits are sign extended or zero extended, depending on
- // whether they are signed or unsigned types. This case applies
- // only to scalar parameters and not to aggregate values.
- const bool ExtendIntegerParam =
- Arg.Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Arg.Ty) < 32;
+ if (IsByVal) {
+ assert(ArgOutVals.size() == 1 && "We must pass only one value as byval");
+ SDValue SrcPtr = ArgOutVals[0];
+ const auto PointerInfo = refinePtrAS(SrcPtr, DAG, DL, *this);
+ const Align BaseSrcAlign = ArgOuts[0].Flags.getNonZeroByValAlign();
- const auto GetStoredValue = [&](const unsigned I, EVT EltVT,
- const MaybeAlign PartAlign) {
- if (IsByVal) {
- SDValue Ptr = ArgOutVals[0];
- auto MPI = refinePtrAS(Ptr, DAG, DL, *this);
- SDValue SrcAddr =
- DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(Offsets[I]));
-
- return DAG.getLoad(EltVT, dl, CallChain, SrcAddr, MPI, PartAlign);
+ if (IsVAArg)
+ VAOffset = alignTo(VAOffset, ArgAlign);
+
+ SmallVector<EVT, 4> ValueVTs, MemVTs;
+ SmallVector<TypeSize, 4> Offsets;
+ ComputeValueVTs(*this, DL, ETy, ValueVTs, &MemVTs, &Offsets);
+
+ unsigned J = 0;
+ const auto VI = VectorizePTXValueVTs(MemVTs, Offsets, ArgAlign, IsVAArg);
+ for (const unsigned NumElts : VI) {
+ EVT LoadVT = getVectorizedVT(MemVTs[J], NumElts, Ctx);
+ Align SrcAlign = commonAlignment(BaseSrcAlign, Offsets[J]);
+ SDValue SrcAddr = DAG.getObjectPtrOffset(dl, SrcPtr, Offsets[J]);
+ SDValue SrcLoad =
+ DAG.getLoad(LoadVT, dl, CallChain, SrcAddr, PointerInfo, SrcAlign);
+
+ TypeSize ParamOffset = Offsets[J].getWithIncrement(VAOffset);
+ Align ParamAlign = commonAlignment(ArgAlign, ParamOffset);
+ SDValue ParamAddr =
+ DAG.getObjectPtrOffset(dl, ParamSymbol, ParamOffset);
+ SDValue StoreParam =
+ DAG.getStore(ArgDeclare, dl, SrcLoad, ParamAddr,
+ MachinePointerInfo(ADDRESS_SPACE_PARAM), ParamAlign);
+ CallPrereqs.push_back(StoreParam);
+
+ J += NumElts;
}
- SDValue StVal = ArgOutVals[I];
- assert(promoteScalarIntegerPTX(StVal.getValueType()) ==
- StVal.getValueType() &&
- "OutVal type should always be legal");
-
- const EVT VTI = promoteScalarIntegerPTX(VTs[I]);
- const EVT StoreVT =
- ExtendIntegerParam ? MVT::i32 : (VTI == MVT::i1 ? MVT::i8 : VTI);
-
- return correctParamType(StVal, StoreVT, ArgOuts[I].Flags, DAG, dl);
- };
-
- const auto VectorInfo =
- VectorizePTXValueVTs(VTs, Offsets, ArgAlign, IsVAArg);
-
- unsigned J = 0;
- for (const unsigned NumElts : VectorInfo) {
- const int CurOffset = Offsets[J];
- const EVT EltVT = promoteScalarIntegerPTX(VTs[J]);
-
- if (IsVAArg && !IsByVal)
- // Align each part of the variadic argument to their type.
- VAOffset = alignTo(VAOffset, DAG.getEVTAlign(EltVT));
-
- assert((IsVAArg || VAOffset == 0) &&
- "VAOffset must be 0 for non-VA args");
+ if (IsVAArg)
+ VAOffset += TySize;
+ } else {
+ SmallVector<EVT, 16> VTs;
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, DL, Arg.Ty, VTs, &Offsets, VAOffset);
+ assert(VTs.size() == Offsets.size() && "Size mismatch");
+ assert(VTs.size() == ArgOuts.size() && "Size mismatch");
+
+ // PTX Interoperability Guide 3.3(A): [Integer] Values shorter
+ // than 32-bits are sign extended or zero extended, depending on
+ // whether they are signed or unsigned types. This case applies
+ // only to scalar parameters and not to aggregate values.
+ const bool ExtendIntegerParam =
+ Arg.Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Arg.Ty) < 32;
+
+ const auto GetStoredValue = [&](const unsigned I) {
+ SDValue StVal = ArgOutVals[I];
+ assert(promoteScalarIntegerPTX(StVal.getValueType()) ==
+ StVal.getValueType() &&
+ "OutVal type should always be legal");
+
+ const EVT VTI = promoteScalarIntegerPTX(VTs[I]);
+ const EVT StoreVT =
+ ExtendIntegerParam ? MVT::i32 : (VTI == MVT::i1 ? MVT::i8 : VTI);
+
+ return correctParamType(StVal, StoreVT, ArgOuts[I].Flags, DAG, dl);
+ };
+
+ unsigned J = 0;
+ const auto VI = VectorizePTXValueVTs(VTs, Offsets, ArgAlign, IsVAArg);
+ for (const unsigned NumElts : VI) {
+ const EVT EltVT = promoteScalarIntegerPTX(VTs[J]);
+
+ unsigned Offset;
+ if (IsVAArg) {
+ // TODO: We may need to support vector types that can be passed
+ // as scalars in variadic arguments.
+ assert(NumElts == 1 &&
+ "Vectorization should be disabled for vaargs.");
+
+ // Align each part of the variadic argument to their type.
+ VAOffset = alignTo(VAOffset, DAG.getEVTAlign(EltVT));
+ Offset = VAOffset;
+
+ const EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT;
+ VAOffset += DL.getTypeAllocSize(TheStoreType.getTypeForEVT(Ctx));
+ } else {
+ assert(VAOffset == 0 && "VAOffset must be 0 for non-VA args");
+ Offset = Offsets[J];
+ }
- const unsigned Offset =
- (VAOffset + ((IsVAArg && !IsByVal) ? 0 : CurOffset));
- SDValue Ptr =
- DAG.getObjectPtrOffset(dl, ParamSymbol, TypeSize::getFixed(Offset));
+ SDValue Ptr =
+ DAG.getObjectPtrOffset(dl, ParamSymbol, TypeSize::getFixed(Offset));
- const MaybeAlign CurrentAlign = ExtendIntegerParam
- ? MaybeAlign(std::nullopt)
- : commonAlignment(ArgAlign, Offset);
+ const MaybeAlign CurrentAlign = ExtendIntegerParam
+ ? MaybeAlign(std::nullopt)
+ : commonAlignment(ArgAlign, Offset);
- SDValue Val;
- if (NumElts == 1) {
- Val = GetStoredValue(J, EltVT, CurrentAlign);
- } else {
- SmallVector<SDValue, 8> StoreVals;
- for (const unsigned K : llvm::seq(NumElts)) {
- SDValue ValJ = GetStoredValue(J + K, EltVT, CurrentAlign);
- if (ValJ.getValueType().isVector())
- DAG.ExtractVectorElements(ValJ, StoreVals);
- else
- StoreVals.push_back(ValJ);
- }
+ SDValue Val =
+ getBuildVectorizedValue(NumElts, dl, DAG, [&](unsigned K) {
+ return GetStoredValue(J + K);
+ });
- EVT VT = EVT::getVectorVT(
- *DAG.getContext(), StoreVals[0].getValueType(), StoreVals.size());
- Val = DAG.getBuildVector(VT, dl, StoreVals);
- }
+ SDValue StoreParam =
+ DAG.getStore(ArgDeclare, dl, Val, Ptr,
+ MachinePointerInfo(ADDRESS_SPACE_PARAM), CurrentAlign);
+ CallPrereqs.push_back(StoreParam);
- SDValue StoreParam =
- DAG.getStore(ArgDeclare, dl, Val, Ptr,
- MachinePointerInfo(ADDRESS_SPACE_PARAM), CurrentAlign);
- CallPrereqs.push_back(StoreParam);
-
- // TODO: We may need to support vector types that can be passed
- // as scalars in variadic arguments.
- if (IsVAArg && !IsByVal) {
- assert(NumElts == 1 &&
- "Vectorization is expected to be disabled for variadics.");
- const EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT;
- VAOffset +=
- DL.getTypeAllocSize(TheStoreType.getTypeForEVT(*DAG.getContext()));
+ J += NumElts;
}
-
- J += NumElts;
}
- if (IsVAArg && IsByVal)
- VAOffset += TypeSize;
}
// Handle Result
@@ -1676,17 +1733,6 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallPrereqs.push_back(PrototypeDeclare);
}
- if (ConvertToIndirectCall) {
- // Copy the function ptr to a ptx register and use the register to call the
- // function.
- const MVT DestVT = Callee.getValueType().getSimpleVT();
- MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- Register DestReg = MRI.createVirtualRegister(TLI.getRegClassFor(DestVT));
- auto RegCopy = DAG.getCopyToReg(DAG.getEntryNode(), dl, DestReg, Callee);
- Callee = DAG.getCopyFromReg(RegCopy, dl, DestReg, DestVT);
- }
-
const unsigned Proto = IsIndirectCall ? UniqueCallSite : 0;
const unsigned NumArgs =
std::min<unsigned>(CLI.NumFixedArgs + 1, Args.size());
@@ -1703,10 +1749,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (!Ins.empty()) {
SmallVector<EVT, 16> VTs;
SmallVector<uint64_t, 16> Offsets;
- ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0);
+ ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets);
assert(VTs.size() == Ins.size() && "Bad value decomposition");
const Align RetAlign = getArgumentAlignment(CB, RetTy, 0, DL);
+ const SDValue RetSymbol = DAG.getExternalSymbol("retval0", MVT::i32);
// PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
// 32-bits are sign extended or zero extended, depending on whether
@@ -1714,9 +1761,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const bool ExtendIntegerRetVal =
RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
- const auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
unsigned I = 0;
- for (const unsigned NumElts : VectorInfo) {
+ const auto VI = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
+ for (const unsigned NumElts : VI) {
const MaybeAlign CurrentAlign =
ExtendIntegerRetVal ? MaybeAlign(std::nullopt)
: commonAlignment(RetAlign, Offsets[I]);
@@ -1724,16 +1771,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const EVT VTI = promoteScalarIntegerPTX(VTs[I]);
const EVT LoadVT =
ExtendIntegerRetVal ? MVT::i32 : (VTI == MVT::i1 ? MVT::i8 : VTI);
-
- const unsigned PackingAmt =
- LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1;
-
- const EVT VecVT = NumElts == 1 ? LoadVT
- : EVT::getVectorVT(*DAG.getContext(),
- LoadVT.getScalarType(),
- NumElts * PackingAmt);
-
- const SDValue RetSymbol = DAG.getExternalSymbol("retval0", MVT::i32);
+ const EVT VecVT = getVectorizedVT(LoadVT, NumElts, Ctx);
SDValue Ptr =
DAG.getObjectPtrOffset(dl, RetSymbol, TypeSize::getFixed(Offsets[I]));
@@ -1742,17 +1780,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachinePointerInfo(ADDRESS_SPACE_PARAM), CurrentAlign);
LoadChains.push_back(R.getValue(1));
-
- if (NumElts == 1)
- ProxyRegOps.push_back(R);
- else
- for (const unsigned J : llvm::seq(NumElts)) {
- SDValue Elt = DAG.getNode(
- LoadVT.isVector() ? ISD::EXTRACT_SUBVECTOR
- : ISD::EXTRACT_VECTOR_ELT,
- dl, LoadVT, R, DAG.getVectorIdxConstant(J * PackingAmt, dl));
- ProxyRegOps.push_back(Elt);
- }
+ for (const unsigned J : llvm::seq(NumElts))
+ ProxyRegOps.push_back(getExtractVectorizedValue(R, J, LoadVT, dl, DAG));
I += NumElts;
}
}
@@ -3227,11 +3256,10 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
- MachineFunction &MF = DAG.getMachineFunction();
const DataLayout &DL = DAG.getDataLayout();
auto PtrVT = getPointerTy(DAG.getDataLayout());
- const Function *F = &MF.getFunction();
+ const Function &F = DAG.getMachineFunction().getFunction();
SDValue Root = DAG.getRoot();
SmallVector<SDValue, 16> OutChains;
@@ -3247,7 +3275,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// See similar issue in LowerCall.
auto AllIns = ArrayRef(Ins);
- for (const auto &Arg : F->args()) {
+ for (const auto &Arg : F.args()) {
const auto ArgIns = AllIns.take_while(
[&](auto I) { return I.OrigArgIndex == Arg.getArgNo(); });
AllIns = AllIns.drop_front(ArgIns.size());
@@ -3287,7 +3315,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
assert(ByvalIn.VT == PtrVT && "ByVal argument must be a pointer");
SDValue P;
- if (isKernelFunction(*F)) {
+ if (isKernelFunction(F)) {
P = ArgSymbol;
P.getNode()->setIROrder(Arg.getArgNo() + 1);
} else {
@@ -3305,43 +3333,27 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
assert(VTs.size() == Offsets.size() && "Size mismatch");
const Align ArgAlign = getFunctionArgumentAlignment(
- F, Ty, Arg.getArgNo() + AttributeList::FirstArgIndex, DL);
+ &F, Ty, Arg.getArgNo() + AttributeList::FirstArgIndex, DL);
- const auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign);
unsigned I = 0;
- for (const unsigned NumElts : VectorInfo) {
+ const auto VI = VectorizePTXValueVTs(VTs, Offsets, ArgAlign);
+ for (const unsigned NumElts : VI) {
// i1 is loaded/stored as i8
const EVT LoadVT = VTs[I] == MVT::i1 ? MVT::i8 : VTs[I];
- // If the element is a packed type (ex. v2f16, v4i8, etc) holding
- // multiple elements.
- const unsigned PackingAmt =
- LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1;
-
- const EVT VecVT =
- NumElts == 1
- ? LoadVT
- : EVT::getVectorVT(F->getContext(), LoadVT.getScalarType(),
- NumElts * PackingAmt);
+ const EVT VecVT = getVectorizedVT(LoadVT, NumElts, *DAG.getContext());
SDValue VecAddr = DAG.getObjectPtrOffset(
dl, ArgSymbol, TypeSize::getFixed(Offsets[I]));
- const MaybeAlign PartAlign = commonAlignment(ArgAlign, Offsets[I]);
+ const Align PartAlign = commonAlignment(ArgAlign, Offsets[I]);
SDValue P =
DAG.getLoad(VecVT, dl, Root, VecAddr,
MachinePointerInfo(ADDRESS_SPACE_PARAM), PartAlign,
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
- if (P.getNode())
- P.getNode()->setIROrder(Arg.getArgNo() + 1);
+ P.getNode()->setIROrder(Arg.getArgNo() + 1);
for (const unsigned J : llvm::seq(NumElts)) {
- SDValue Elt =
- NumElts == 1
- ? P
- : DAG.getNode(LoadVT.isVector() ? ISD::EXTRACT_SUBVECTOR
- : ISD::EXTRACT_VECTOR_ELT,
- dl, LoadVT, P,
- DAG.getVectorIdxConstant(J * PackingAmt, dl));
+ SDValue Elt = getExtractVectorizedValue(P, J, LoadVT, dl, DAG);
Elt = correctParamType(Elt, ArgIns[I + J].VT, ArgIns[I + J].Flags,
DAG, dl);
@@ -3364,9 +3376,8 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const {
- const MachineFunction &MF = DAG.getMachineFunction();
- const Function &F = MF.getFunction();
- Type *RetTy = MF.getFunction().getReturnType();
+ const Function &F = DAG.getMachineFunction().getFunction();
+ Type *RetTy = F.getReturnType();
if (RetTy->isVoidTy()) {
assert(OutVals.empty() && Outs.empty() && "Return value expected for void");
@@ -3374,10 +3385,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
}
const DataLayout &DL = DAG.getDataLayout();
- SmallVector<EVT, 16> VTs;
- SmallVector<uint64_t, 16> Offsets;
- ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets);
- assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
+
+ const SDValue RetSymbol = DAG.getExternalSymbol("func_retval0", MVT::i32);
+ const auto RetAlign = getFunctionParamOptimizedAlign(&F, RetTy, DL);
// PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
// 32-bits are sign extended or zero extended, depending on whether
@@ -3385,6 +3395,11 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const bool ExtendIntegerRetVal =
RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
+ SmallVector<EVT, 16> VTs;
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets);
+ assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
+
const auto GetRetVal = [&](unsigned I) -> SDValue {
SDValue RetVal = OutVals[I];
assert(promoteScalarIntegerPTX(RetVal.getValueType()) ==
@@ -3397,33 +3412,16 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
return correctParamType(RetVal, StoreVT, Outs[I].Flags, DAG, dl);
};
- const auto RetAlign = getFunctionParamOptimizedAlign(&F, RetTy, DL);
- const auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
unsigned I = 0;
- for (const unsigned NumElts : VectorInfo) {
+ const auto VI = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
+ for (const unsigned NumElts : VI) {
const MaybeAlign CurrentAlign = ExtendIntegerRetVal
? MaybeAlign(std::nullopt)
: commonAlignment(RetAlign, Offsets[I]);
- SDValue Val;
- if (NumElts == 1) {
- Val = GetRetVal(I);
- } else {
- SmallVector<SDValue, 4> StoreVals;
- for (const unsigned J : llvm::seq(NumElts)) {
- SDValue ValJ = GetRetVal(I + J);
- if (ValJ.getValueType().isVector())
- DAG.ExtractVectorElements(ValJ, StoreVals);
- else
- StoreVals.push_back(ValJ);
- }
-
- EVT VT = EVT::getVectorVT(F.getContext(), StoreVals[0].getValueType(),
- StoreVals.size());
- Val = DAG.getBuildVector(VT, dl, StoreVals);
- }
+ SDValue Val = getBuildVectorizedValue(
+ NumElts, dl, DAG, [&](unsigned K) { return GetRetVal(I + K); });
- const SDValue RetSymbol = DAG.getExternalSymbol("func_retval0", MVT::i32);
SDValue Ptr =
DAG.getObjectPtrOffset(dl, RetSymbol, TypeSize::getFixed(Offsets[I]));
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index d8047d3..2ae7520 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1602,8 +1602,6 @@ foreach is_convergent = [0, 1] in {
}
defvar call_inst = !cast<NVPTXInst>("CALL" # convergent_suffix);
- def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, globaladdr:$addr, imm:$proto),
- (call_inst (to_tglobaladdr $addr), imm:$rets, imm:$params, imm:$proto)>;
def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, i32:$addr, imm:$proto),
(call_inst $addr, imm:$rets, imm:$params, imm:$proto)>;
def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, i64:$addr, imm:$proto),
@@ -1612,10 +1610,6 @@ foreach is_convergent = [0, 1] in {
defvar call_uni_inst = !cast<NVPTXInst>("CALL_UNI" # convergent_suffix);
def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, globaladdr:$addr, 0),
(call_uni_inst (to_tglobaladdr $addr), imm:$rets, imm:$params)>;
- def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, i32:$addr, 0),
- (call_uni_inst $addr, imm:$rets, imm:$params)>;
- def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, i64:$addr, 0),
- (call_uni_inst $addr, imm:$rets, imm:$params)>;
}
def DECLARE_PARAM_array :
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 0e8828f..ec97e2e 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -93,8 +93,8 @@ public:
MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
void applyFixup(const MCFragment &, const MCFixup &Fixup,
- const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsResolved) override;
+ const MCValue &Target, uint8_t *Data, uint64_t Value,
+ bool IsResolved) override;
bool shouldForceRelocation(const MCFixup &Fixup, const MCValue &Target) {
// If there is a @ specifier, unless it is optimized out (e.g. constant @l),
@@ -185,9 +185,8 @@ MCFixupKindInfo PPCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
}
void PPCAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &TargetVal,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &TargetVal, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
// In PPC64 ELFv1, .quad .TOC.@tocbase in the .opd section is expected to
// reference the null symbol.
auto Target = TargetVal;
@@ -205,7 +204,6 @@ void PPCAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
if (!Value)
return; // Doesn't change encoding.
- unsigned Offset = Fixup.getOffset();
unsigned NumBytes = getFixupKindNumBytes(Kind);
// For each byte of the fragment that the fixup touches, mask in the bits
@@ -213,7 +211,7 @@ void PPCAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// bitfields above.
for (unsigned i = 0; i != NumBytes; ++i) {
unsigned Idx = Endian == llvm::endianness::little ? i : (NumBytes - 1 - i);
- Data[Offset + i] |= uint8_t((Value >> (Idx * 8)) & 0xff);
+ Data[i] |= uint8_t((Value >> (Idx * 8)) & 0xff);
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 459525e..f179873 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7296,9 +7296,17 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
ValVT.isInteger() &&
ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
- SDValue ArgValueTrunc = DAG.getNode(
- ISD::TRUNCATE, dl, ArgVT.getSimpleVT() == MVT::i1 ? MVT::i8 : ArgVT,
- ArgValue);
+ // It is possible to have either real integer values
+ // or integers that were not originally integers.
+ // In the latter case, these could have came from structs,
+ // and these integers would not have an extend on the parameter.
+ // Since these types of integers do not have an extend specified
+ // in the first place, the type of extend that we do should not matter.
+ EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
+ ? MVT::i8
+ : ArgVT;
+ SDValue ArgValueTrunc =
+ DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
SDValue ArgValueExt =
ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
: DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
index 5eb1f01..b7e2263 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
@@ -100,10 +100,14 @@ bool PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
// This is a best effort to set things up for a post-RA pass. Optimizations
// like generating loads of multiple registers should ideally be done within
// the scheduler pass by combining the loads during DAG postprocessing.
- const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
- const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
- if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
- CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+ unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+ unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+ bool CandIsClusterSucc =
+ isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+ bool TryCandIsClusterSucc =
+ isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+
+ if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;
@@ -189,10 +193,14 @@ bool PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
return TryCand.Reason != NoCand;
// Keep clustered nodes together.
- const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
- const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
- if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
- CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+ unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+ unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+ bool CandIsClusterSucc =
+ isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+ bool TryCandIsClusterSucc =
+ isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+
+ if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 82e3b5c..eb7460e 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -327,19 +327,19 @@ bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F,
bool &WasRelaxed) const {
- MCContext &C = getContext();
-
int64_t LineDelta = F.getDwarfLineDelta();
const MCExpr &AddrDelta = F.getDwarfAddrDelta();
- SmallVector<MCFixup, 1> Fixups;
size_t OldSize = F.getVarSize();
int64_t Value;
+ // If the label difference can be resolved, use the default handling, which
+ // utilizes a shorter special opcode.
+ if (AddrDelta.evaluateAsAbsolute(Value, *Asm))
+ return false;
[[maybe_unused]] bool IsAbsolute =
AddrDelta.evaluateKnownAbsolute(Value, *Asm);
assert(IsAbsolute && "CFA with invalid expression");
- Fixups.clear();
SmallVector<char> Data;
raw_svector_ostream OS(Data);
@@ -349,33 +349,21 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F,
encodeSLEB128(LineDelta, OS);
}
- unsigned Offset;
- std::pair<MCFixupKind, MCFixupKind> Fixup;
-
// According to the DWARF specification, the `DW_LNS_fixed_advance_pc` opcode
// takes a single unsigned half (unencoded) operand. The maximum encodable
// value is therefore 65535. Set a conservative upper bound for relaxation.
+ unsigned PCBytes;
if (Value > 60000) {
- unsigned PtrSize = C.getAsmInfo()->getCodePointerSize();
-
- OS << uint8_t(dwarf::DW_LNS_extended_op);
- encodeULEB128(PtrSize + 1, OS);
-
- OS << uint8_t(dwarf::DW_LNE_set_address);
- Offset = OS.tell();
- assert((PtrSize == 4 || PtrSize == 8) && "Unexpected pointer size");
- Fixup = RISCV::getRelocPairForSize(PtrSize);
- OS.write_zeros(PtrSize);
+ PCBytes = getContext().getAsmInfo()->getCodePointerSize();
+ OS << uint8_t(dwarf::DW_LNS_extended_op) << uint8_t(PCBytes + 1)
+ << uint8_t(dwarf::DW_LNE_set_address);
+ OS.write_zeros(PCBytes);
} else {
+ PCBytes = 2;
OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc);
- Offset = OS.tell();
- Fixup = RISCV::getRelocPairForSize(2);
support::endian::write<uint16_t>(OS, 0, llvm::endianness::little);
}
-
- const MCBinaryExpr &MBE = cast<MCBinaryExpr>(AddrDelta);
- Fixups.push_back(MCFixup::create(Offset, MBE.getLHS(), std::get<0>(Fixup)));
- Fixups.push_back(MCFixup::create(Offset, MBE.getRHS(), std::get<1>(Fixup)));
+ auto Offset = OS.tell() - PCBytes;
if (LineDelta == INT64_MAX) {
OS << uint8_t(dwarf::DW_LNS_extended_op);
@@ -386,7 +374,8 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F,
}
F.setVarContents(Data);
- F.setVarFixups(Fixups);
+ F.setVarFixups({MCFixup::create(Offset, &AddrDelta,
+ MCFixup::getDataKindForSize(PCBytes))});
WasRelaxed = OldSize != Data.size();
return true;
}
@@ -881,9 +870,8 @@ bool RISCVAsmBackend::addReloc(const MCFragment &F, const MCFixup &Fixup,
}
void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
IsResolved = addReloc(F, Fixup, Target, Value, IsResolved);
MCFixupKind Kind = Fixup.getKind();
if (mc::isRelocation(Kind))
@@ -898,15 +886,14 @@ void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
// Shift the value into position.
Value <<= Info.TargetOffset;
- unsigned Offset = Fixup.getOffset();
unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8;
-
- assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
for (unsigned i = 0; i != NumBytes; ++i) {
- Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ Data[i] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index d97d632..adec1ec 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -46,8 +46,7 @@ public:
void maybeAddVendorReloc(const MCFragment &, const MCFixup &);
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
index f816561c..98c8738 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
@@ -68,27 +68,6 @@ enum Fixups {
fixup_riscv_invalid,
NumTargetFixupKinds = fixup_riscv_invalid - FirstTargetFixupKind
};
-
-static inline std::pair<MCFixupKind, MCFixupKind>
-getRelocPairForSize(unsigned Size) {
- switch (Size) {
- default:
- llvm_unreachable("unsupported fixup size");
- case 1:
- return std::make_pair(FirstLiteralRelocationKind + ELF::R_RISCV_ADD8,
- FirstLiteralRelocationKind + ELF::R_RISCV_SUB8);
- case 2:
- return std::make_pair(FirstLiteralRelocationKind + ELF::R_RISCV_ADD16,
- FirstLiteralRelocationKind + ELF::R_RISCV_SUB16);
- case 4:
- return std::make_pair(FirstLiteralRelocationKind + ELF::R_RISCV_ADD32,
- FirstLiteralRelocationKind + ELF::R_RISCV_SUB32);
- case 8:
- return std::make_pair(FirstLiteralRelocationKind + ELF::R_RISCV_ADD64,
- FirstLiteralRelocationKind + ELF::R_RISCV_SUB64);
- }
-}
-
} // end namespace llvm::RISCV
#endif
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index f223fdbe..5998653 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -2827,6 +2827,8 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
static bool isWorthFoldingAdd(SDValue Add) {
for (auto *User : Add->users()) {
if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
+ User->getOpcode() != RISCVISD::LD_RV32 &&
+ User->getOpcode() != RISCVISD::SD_RV32 &&
User->getOpcode() != ISD::ATOMIC_LOAD &&
User->getOpcode() != ISD::ATOMIC_STORE)
return false;
@@ -2841,6 +2843,9 @@ static bool isWorthFoldingAdd(SDValue Add) {
if (User->getOpcode() == ISD::ATOMIC_STORE &&
cast<AtomicSDNode>(User)->getVal() == Add)
return false;
+ if (User->getOpcode() == RISCVISD::SD_RV32 &&
+ (User->getOperand(0) == Add || User->getOperand(1) == Add))
+ return false;
if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
return false;
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c0ada51..adbfbeb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1819,6 +1819,13 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::riscv_seg6_load_mask:
case Intrinsic::riscv_seg7_load_mask:
case Intrinsic::riscv_seg8_load_mask:
+ case Intrinsic::riscv_sseg2_load_mask:
+ case Intrinsic::riscv_sseg3_load_mask:
+ case Intrinsic::riscv_sseg4_load_mask:
+ case Intrinsic::riscv_sseg5_load_mask:
+ case Intrinsic::riscv_sseg6_load_mask:
+ case Intrinsic::riscv_sseg7_load_mask:
+ case Intrinsic::riscv_sseg8_load_mask:
return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
/*IsUnitStrided*/ false, /*UsePtrVal*/ true);
case Intrinsic::riscv_seg2_store_mask:
@@ -10938,6 +10945,97 @@ static inline SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG,
return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
}
+static SDValue
+lowerFixedVectorSegLoadIntrinsics(unsigned IntNo, SDValue Op,
+ const RISCVSubtarget &Subtarget,
+ SelectionDAG &DAG) {
+ bool IsStrided;
+ switch (IntNo) {
+ case Intrinsic::riscv_seg2_load_mask:
+ case Intrinsic::riscv_seg3_load_mask:
+ case Intrinsic::riscv_seg4_load_mask:
+ case Intrinsic::riscv_seg5_load_mask:
+ case Intrinsic::riscv_seg6_load_mask:
+ case Intrinsic::riscv_seg7_load_mask:
+ case Intrinsic::riscv_seg8_load_mask:
+ IsStrided = false;
+ break;
+ case Intrinsic::riscv_sseg2_load_mask:
+ case Intrinsic::riscv_sseg3_load_mask:
+ case Intrinsic::riscv_sseg4_load_mask:
+ case Intrinsic::riscv_sseg5_load_mask:
+ case Intrinsic::riscv_sseg6_load_mask:
+ case Intrinsic::riscv_sseg7_load_mask:
+ case Intrinsic::riscv_sseg8_load_mask:
+ IsStrided = true;
+ break;
+ default:
+ llvm_unreachable("unexpected intrinsic ID");
+ };
+
+ static const Intrinsic::ID VlsegInts[7] = {
+ Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
+ Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
+ Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
+ Intrinsic::riscv_vlseg8_mask};
+ static const Intrinsic::ID VlssegInts[7] = {
+ Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask,
+ Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask,
+ Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask,
+ Intrinsic::riscv_vlsseg8_mask};
+
+ SDLoc DL(Op);
+ unsigned NF = Op->getNumValues() - 1;
+ assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT VT = Op->getSimpleValueType(0);
+ MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
+ ContainerVT.getScalarSizeInBits();
+ EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
+
+ // Operands: (chain, int_id, pointer, mask, vl) or
+ // (chain, int_id, pointer, offset, mask, vl)
+ SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
+ SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
+ MVT MaskVT = Mask.getSimpleValueType();
+ MVT MaskContainerVT =
+ ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
+ Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
+
+ SDValue IntID = DAG.getTargetConstant(
+ IsStrided ? VlssegInts[NF - 2] : VlsegInts[NF - 2], DL, XLenVT);
+ auto *Load = cast<MemIntrinsicSDNode>(Op);
+
+ SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
+ SmallVector<SDValue, 9> Ops = {
+ Load->getChain(),
+ IntID,
+ DAG.getUNDEF(VecTupTy),
+ Op.getOperand(2),
+ Mask,
+ VL,
+ DAG.getTargetConstant(
+ RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC, DL, XLenVT),
+ DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
+ // Insert the stride operand.
+ if (IsStrided)
+ Ops.insert(std::next(Ops.begin(), 4), Op.getOperand(3));
+
+ SDValue Result =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
+ Load->getMemoryVT(), Load->getMemOperand());
+ SmallVector<SDValue, 9> Results;
+ for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
+ SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
+ Result.getValue(0),
+ DAG.getTargetConstant(RetIdx, DL, MVT::i32));
+ Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
+ }
+ Results.push_back(Result.getValue(1));
+ return DAG.getMergeValues(Results, DL);
+}
+
SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = Op.getConstantOperandVal(1);
@@ -10950,57 +11048,16 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::riscv_seg5_load_mask:
case Intrinsic::riscv_seg6_load_mask:
case Intrinsic::riscv_seg7_load_mask:
- case Intrinsic::riscv_seg8_load_mask: {
- SDLoc DL(Op);
- static const Intrinsic::ID VlsegInts[7] = {
- Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
- Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
- Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
- Intrinsic::riscv_vlseg8_mask};
- unsigned NF = Op->getNumValues() - 1;
- assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
- MVT XLenVT = Subtarget.getXLenVT();
- MVT VT = Op->getSimpleValueType(0);
- MVT ContainerVT = getContainerForFixedLengthVector(VT);
- unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
- ContainerVT.getScalarSizeInBits();
- EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
-
- // Operands: (chain, int_id, pointer, mask, vl)
- SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
- SDValue Mask = Op.getOperand(3);
- MVT MaskVT = Mask.getSimpleValueType();
- MVT MaskContainerVT =
- ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
- Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
-
- SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
- auto *Load = cast<MemIntrinsicSDNode>(Op);
+ case Intrinsic::riscv_seg8_load_mask:
+ case Intrinsic::riscv_sseg2_load_mask:
+ case Intrinsic::riscv_sseg3_load_mask:
+ case Intrinsic::riscv_sseg4_load_mask:
+ case Intrinsic::riscv_sseg5_load_mask:
+ case Intrinsic::riscv_sseg6_load_mask:
+ case Intrinsic::riscv_sseg7_load_mask:
+ case Intrinsic::riscv_sseg8_load_mask:
+ return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG);
- SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
- SDValue Ops[] = {
- Load->getChain(),
- IntID,
- DAG.getUNDEF(VecTupTy),
- Op.getOperand(2),
- Mask,
- VL,
- DAG.getTargetConstant(
- RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC, DL, XLenVT),
- DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
- SDValue Result =
- DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
- Load->getMemoryVT(), Load->getMemOperand());
- SmallVector<SDValue, 9> Results;
- for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
- SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
- Result.getValue(0),
- DAG.getTargetConstant(RetIdx, DL, MVT::i32));
- Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
- }
- Results.push_back(Result.getValue(1));
- return DAG.getMergeValues(Results, DL);
- }
case Intrinsic::riscv_sf_vc_v_x_se:
return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
case Intrinsic::riscv_sf_vc_v_i_se:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 31ea2de..cc2977c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -910,7 +910,7 @@ foreach vti = AllIntegerVectors in {
foreach vti = I64IntegerVectors in {
let Predicates = [HasVInstructionsI64] in {
def : Pat<(add (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (SplatPat_imm64_neg i64:$rs2))),
+ (vti.Vector (SplatPat_imm64_neg (i64 GPR:$rs2)))),
(!cast<Instruction>("PseudoVSUB_VX_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
vti.RegClass:$rs1,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 695223b..acbccdd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2123,7 +2123,7 @@ foreach vti = AllIntegerVectors in {
foreach vti = I64IntegerVectors in {
let Predicates = [HasVInstructionsI64] in {
def : Pat<(riscv_add_vl (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (SplatPat_imm64_neg i64:$rs2)),
+ (vti.Vector (SplatPat_imm64_neg (i64 GPR:$rs2))),
vti.RegClass:$passthru, (vti.Mask VMV0:$vm), VLOpFrag),
(!cast<Instruction>("PseudoVSUB_VX_"#vti.LMul.MX#"_MASK")
vti.RegClass:$passthru, vti.RegClass:$rs1,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
index c0f7ab1..4c31ce4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
@@ -590,12 +590,12 @@ let Predicates = [HasVendorXTHeadBb, IsRV64] in {
def : PatGprImm<riscv_rorw, TH_SRRIW, uimm5>;
def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
(TH_SRRIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
-def : Pat<(sra (bswap i64:$rs1), (i64 32)),
- (TH_REVW i64:$rs1)>;
-def : Pat<(binop_allwusers<srl> (bswap i64:$rs1), (i64 32)),
- (TH_REVW i64:$rs1)>;
-def : Pat<(riscv_clzw i64:$rs1),
- (TH_FF0 (i64 (SLLI (i64 (XORI i64:$rs1, -1)), 32)))>;
+def : Pat<(i64 (sra (bswap GPR:$rs1), (i64 32))),
+ (TH_REVW GPR:$rs1)>;
+def : Pat<(binop_allwusers<srl> (bswap GPR:$rs1), (i64 32)),
+ (TH_REVW GPR:$rs1)>;
+def : Pat<(riscv_clzw GPR:$rs1),
+ (TH_FF0 (i64 (SLLI (i64 (XORI GPR:$rs1, -1)), 32)))>;
} // Predicates = [HasVendorXTHeadBb, IsRV64]
let Predicates = [HasVendorXTHeadBs] in {
@@ -697,11 +697,13 @@ def uimm2_4 : Operand<XLenVT>, ImmLeaf<XLenVT, [{
}], uimm2_4_XFORM>;
let Predicates = [HasVendorXTHeadMemPair, IsRV64] in {
-def : Pat<(th_lwud i64:$rs1, uimm2_3:$uimm2_3), (TH_LWUD i64:$rs1, uimm2_3:$uimm2_3, 3)>;
-def : Pat<(th_ldd i64:$rs1, uimm2_4:$uimm2_4), (TH_LDD i64:$rs1, uimm2_4:$uimm2_4, 4)>;
+def : Pat<(th_lwud GPR:$rs1, (i64 uimm2_3:$uimm2_3)),
+ (TH_LWUD GPR:$rs1, uimm2_3:$uimm2_3, 3)>;
+def : Pat<(th_ldd GPR:$rs1, (i64 uimm2_4:$uimm2_4)),
+ (TH_LDD GPR:$rs1, uimm2_4:$uimm2_4, 4)>;
-def : Pat<(th_sdd i64:$rd1, i64:$rd2, i64:$rs1, uimm2_4:$uimm2_4),
- (TH_SDD i64:$rd1, i64:$rd2, i64:$rs1, uimm2_4:$uimm2_4, 4)>;
+def : Pat<(th_sdd (i64 GPR:$rd1), GPR:$rd2, GPR:$rs1, uimm2_4:$uimm2_4),
+ (TH_SDD GPR:$rd1, GPR:$rd2, GPR:$rs1, uimm2_4:$uimm2_4, 4)>;
}
let Predicates = [HasVendorXTHeadMemPair] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
index a250ac8..1efe616 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -1128,13 +1128,13 @@ let Predicates = [HasStdExtZvkned] in {
let Predicates = [HasStdExtZvknha] in {
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32IntegerVectors>;
- defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CL", I32IntegerVectors>;
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32IntegerVectors, isSEWAware=true>;
} // Predicates = [HasStdExtZvknha]
let Predicates = [HasStdExtZvknhb] in {
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32I64IntegerVectors>;
- defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32I64IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CL", I32I64IntegerVectors>;
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32I64IntegerVectors, isSEWAware=true>;
} // Predicates = [HasStdExtZvknhb]
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp
index ef84d43..5710cf2 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp
@@ -21,8 +21,7 @@ public:
SPIRVAsmBackend(llvm::endianness Endian) : MCAsmBackend(Endian) {}
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override {}
+ uint8_t *Data, uint64_t Value, bool IsResolved) override {}
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 6ec7544..25cdf72 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -148,6 +148,7 @@ struct ConvertBuiltin {
bool IsSaturated;
bool IsRounded;
bool IsBfloat16;
+ bool IsTF32;
FPRoundingMode::FPRoundingMode RoundingMode;
};
@@ -230,6 +231,7 @@ std::string lookupBuiltinNameHelper(StringRef DemangledCall,
// - "__spirv_SubgroupImageMediaBlockReadINTEL"
// - "__spirv_SubgroupImageMediaBlockWriteINTEL"
// - "__spirv_Convert"
+ // - "__spirv_Round"
// - "__spirv_UConvert"
// - "__spirv_SConvert"
// - "__spirv_FConvert"
@@ -242,7 +244,7 @@ std::string lookupBuiltinNameHelper(StringRef DemangledCall,
"SDotKHR|SUDotKHR|SDotAccSatKHR|UDotAccSatKHR|SUDotAccSatKHR|"
"ReadClockKHR|SubgroupBlockReadINTEL|SubgroupImageBlockReadINTEL|"
"SubgroupImageMediaBlockReadINTEL|SubgroupImageMediaBlockWriteINTEL|"
- "Convert|"
+ "Convert|Round|"
"UConvert|SConvert|FConvert|SatConvert)[^_]*)(_R[^_]*_?(\\w+)?.*)?");
std::smatch Match;
if (std::regex_match(BuiltinName, Match, SpvWithR) && Match.size() > 1) {
@@ -697,7 +699,8 @@ static bool buildAtomicStoreInst(const SPIRV::IncomingCall *Call,
MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR) {
if (Call->isSpirvOp())
- return buildOpFromWrapper(MIRBuilder, SPIRV::OpAtomicStore, Call, Register(0));
+ return buildOpFromWrapper(MIRBuilder, SPIRV::OpAtomicStore, Call,
+ Register(0));
Register ScopeRegister =
buildConstantIntReg32(SPIRV::Scope::Device, MIRBuilder, GR);
@@ -2677,8 +2680,20 @@ static bool generateConvertInst(const StringRef DemangledCall,
}
} else if (GR->isScalarOrVectorOfType(Call->ReturnRegister,
SPIRV::OpTypeFloat)) {
- // Float -> Float
- Opcode = SPIRV::OpFConvert;
+ if (Builtin->IsTF32) {
+ const auto *ST = static_cast<const SPIRVSubtarget *>(
+ &MIRBuilder.getMF().getSubtarget());
+ if (!ST->canUseExtension(
+ SPIRV::Extension::SPV_INTEL_tensor_float32_conversion))
+ NeedExtMsg = "SPV_INTEL_tensor_float32_conversion";
+ IsRightComponentsNumber =
+ GR->getScalarOrVectorComponentCount(Call->Arguments[0]) ==
+ GR->getScalarOrVectorComponentCount(Call->ReturnRegister);
+ Opcode = SPIRV::OpRoundFToTF32INTEL;
+ } else {
+ // Float -> Float
+ Opcode = SPIRV::OpFConvert;
+ }
}
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index ea78dcd..d08560b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -1461,6 +1461,8 @@ class ConvertBuiltin<string name, InstructionSet set> {
bit IsRounded = !not(!eq(!find(name, "_rt"), -1));
bit IsBfloat16 = !or(!not(!eq(!find(name, "BF16"), -1)),
!not(!eq(!find(name, "bfloat16"), -1)));
+ bit IsTF32 = !or(!not(!eq(!find(name, "TF32"), -1)),
+ !not(!eq(!find(name, "tensor_float32"), -1)));
FPRoundingMode RoundingMode = !cond(!not(!eq(!find(name, "_rte"), -1)) : RTE,
!not(!eq(!find(name, "_rtz"), -1)) : RTZ,
!not(!eq(!find(name, "_rtp"), -1)) : RTP,
@@ -1472,7 +1474,7 @@ class ConvertBuiltin<string name, InstructionSet set> {
def ConvertBuiltins : GenericTable {
let FilterClass = "ConvertBuiltin";
let Fields = ["Name", "Set", "IsDestinationSigned", "IsSaturated",
- "IsRounded", "IsBfloat16", "RoundingMode"];
+ "IsRounded", "IsBfloat16", "IsTF32", "RoundingMode"];
string TypeOf_Set = "InstructionSet";
string TypeOf_RoundingMode = "FPRoundingMode";
}
@@ -1556,6 +1558,25 @@ foreach conv = ["FToBF16INTEL", "BF16ToFINTEL"] in {
def : ConvertBuiltin<!strconcat("__spirv_Convert", conv), OpenCL_std>;
}
+// cl_intel_tensor_float32_conversions / SPV_INTEL_tensor_float32_conversion
+// Multiclass used to define at the same time both a demangled builtin record
+// and a corresponding convert builtin record.
+multiclass DemangledTF32RoundBuiltin<string name1, string name2> {
+ // Create records for scalar and vector conversions.
+ foreach i = ["", "2", "3", "4", "8", "16"] in {
+ def : DemangledBuiltin<!strconcat("intel_round_", name1, i, name2, i), OpenCL_std, Convert, 1, 1>;
+ def : ConvertBuiltin<!strconcat("intel_round_", name1, i, name2, i), OpenCL_std>;
+ }
+}
+
+defm : DemangledTF32RoundBuiltin<"tensor_float32", "_as_float">;
+defm : DemangledTF32RoundBuiltin<"as_tensor_float32", "_float">;
+
+foreach conv = ["FToTF32INTEL"] in {
+ def : DemangledBuiltin<!strconcat("__spirv_Round", conv), OpenCL_std, Convert, 1, 1>;
+ def : ConvertBuiltin<!strconcat("__spirv_Round", conv), OpenCL_std>;
+}
+
//===----------------------------------------------------------------------===//
// Class defining a vector data load/store builtin record used for lowering
// into OpExtInst instruction.
diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index 2726203..d9265f4 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -102,7 +102,9 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>>
SPIRV::Extension::Extension::SPV_INTEL_2d_block_io},
{"SPV_INTEL_int4", SPIRV::Extension::Extension::SPV_INTEL_int4},
{"SPV_KHR_float_controls2",
- SPIRV::Extension::Extension::SPV_KHR_float_controls2}};
+ SPIRV::Extension::Extension::SPV_KHR_float_controls2},
+ {"SPV_INTEL_tensor_float32_conversion",
+ SPIRV::Extension::Extension::SPV_INTEL_tensor_float32_conversion}};
bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName,
StringRef ArgValue,
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 3c631ce..947b574 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -194,6 +194,42 @@ class SPIRVEmitIntrinsics
void useRoundingMode(ConstrainedFPIntrinsic *FPI, IRBuilder<> &B);
+ // Tries to walk the type accessed by the given GEP instruction.
+ // For each nested type access, one of the 2 callbacks is called:
+ // - OnLiteralIndexing when the index is a known constant value.
+ // Parameters:
+ // PointedType: the pointed type resulting of this indexing.
+ // If the parent type is an array, this is the index in the array.
+ // If the parent type is a struct, this is the field index.
+ // Index: index of the element in the parent type.
+ // - OnDynamnicIndexing when the index is a non-constant value.
+ // This callback is only called when indexing into an array.
+ // Parameters:
+ // ElementType: the type of the elements stored in the parent array.
+ // Offset: the Value* containing the byte offset into the array.
+ // Return true if an error occured during the walk, false otherwise.
+ bool walkLogicalAccessChain(
+ GetElementPtrInst &GEP,
+ const std::function<void(Type *PointedType, uint64_t Index)>
+ &OnLiteralIndexing,
+ const std::function<void(Type *ElementType, Value *Offset)>
+ &OnDynamicIndexing);
+
+ // Returns the type accessed using the given GEP instruction by relying
+ // on the GEP type.
+ // FIXME: GEP types are not supposed to be used to retrieve the pointed
+ // type. This must be fixed.
+ Type *getGEPType(GetElementPtrInst *GEP);
+
+ // Returns the type accessed using the given GEP instruction by walking
+ // the source type using the GEP indices.
+ // FIXME: without help from the frontend, this method cannot reliably retrieve
+ // the stored type, nor can robustly determine the depth of the type
+ // we are accessing.
+ Type *getGEPTypeLogical(GetElementPtrInst *GEP);
+
+ Instruction *buildLogicalAccessChainFromGEP(GetElementPtrInst &GEP);
+
public:
static char ID;
SPIRVEmitIntrinsics(SPIRVTargetMachine *TM = nullptr)
@@ -246,6 +282,17 @@ bool expectIgnoredInIRTranslation(const Instruction *I) {
}
}
+// Returns the source pointer from `I` ignoring intermediate ptrcast.
+Value *getPointerRoot(Value *I) {
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::spv_ptrcast) {
+ Value *V = II->getArgOperand(0);
+ return getPointerRoot(V);
+ }
+ }
+ return I;
+}
+
} // namespace
char SPIRVEmitIntrinsics::ID = 0;
@@ -555,7 +602,112 @@ void SPIRVEmitIntrinsics::maybeAssignPtrType(Type *&Ty, Value *Op, Type *RefTy,
Ty = RefTy;
}
-Type *getGEPType(GetElementPtrInst *Ref) {
+bool SPIRVEmitIntrinsics::walkLogicalAccessChain(
+ GetElementPtrInst &GEP,
+ const std::function<void(Type *, uint64_t)> &OnLiteralIndexing,
+ const std::function<void(Type *, Value *)> &OnDynamicIndexing) {
+ // We only rewrite i8* GEP. Other should be left as-is.
+ // Valid i8* GEP must always have a single index.
+ assert(GEP.getSourceElementType() ==
+ IntegerType::getInt8Ty(CurrF->getContext()));
+ assert(GEP.getNumIndices() == 1);
+
+ auto &DL = CurrF->getDataLayout();
+ Value *Src = getPointerRoot(GEP.getPointerOperand());
+ Type *CurType = deduceElementType(Src, true);
+
+ Value *Operand = *GEP.idx_begin();
+ ConstantInt *CI = dyn_cast<ConstantInt>(Operand);
+ if (!CI) {
+ ArrayType *AT = dyn_cast<ArrayType>(CurType);
+ // Operand is not constant. Either we have an array and accept it, or we
+ // give up.
+ if (AT)
+ OnDynamicIndexing(AT->getElementType(), Operand);
+ return AT == nullptr;
+ }
+
+ assert(CI);
+ uint64_t Offset = CI->getZExtValue();
+
+ do {
+ if (ArrayType *AT = dyn_cast<ArrayType>(CurType)) {
+ uint32_t EltTypeSize = DL.getTypeSizeInBits(AT->getElementType()) / 8;
+ assert(Offset < AT->getNumElements() * EltTypeSize);
+ uint64_t Index = Offset / EltTypeSize;
+ Offset = Offset - (Index * EltTypeSize);
+ CurType = AT->getElementType();
+ OnLiteralIndexing(CurType, Index);
+ } else if (StructType *ST = dyn_cast<StructType>(CurType)) {
+ uint32_t StructSize = DL.getTypeSizeInBits(ST) / 8;
+ assert(Offset < StructSize);
+ (void)StructSize;
+ const auto &STL = DL.getStructLayout(ST);
+ unsigned Element = STL->getElementContainingOffset(Offset);
+ Offset -= STL->getElementOffset(Element);
+ CurType = ST->getElementType(Element);
+ OnLiteralIndexing(CurType, Element);
+ } else {
+ // Vector type indexing should not use GEP.
+ // So if we have an index left, something is wrong. Giving up.
+ return true;
+ }
+ } while (Offset > 0);
+
+ return false;
+}
+
+Instruction *
+SPIRVEmitIntrinsics::buildLogicalAccessChainFromGEP(GetElementPtrInst &GEP) {
+ auto &DL = CurrF->getDataLayout();
+ IRBuilder<> B(GEP.getParent());
+ B.SetInsertPoint(&GEP);
+
+ std::vector<Value *> Indices;
+ Indices.push_back(ConstantInt::get(
+ IntegerType::getInt32Ty(CurrF->getContext()), 0, /* Signed= */ false));
+ walkLogicalAccessChain(
+ GEP,
+ [&Indices, &B](Type *EltType, uint64_t Index) {
+ Indices.push_back(
+ ConstantInt::get(B.getInt64Ty(), Index, /* Signed= */ false));
+ },
+ [&Indices, &B, &DL](Type *EltType, Value *Offset) {
+ uint32_t EltTypeSize = DL.getTypeSizeInBits(EltType) / 8;
+ Value *Index = B.CreateUDiv(
+ Offset, ConstantInt::get(Offset->getType(), EltTypeSize,
+ /* Signed= */ false));
+ Indices.push_back(Index);
+ });
+
+ SmallVector<Type *, 2> Types = {GEP.getType(), GEP.getOperand(0)->getType()};
+ SmallVector<Value *, 4> Args;
+ Args.push_back(B.getInt1(GEP.isInBounds()));
+ Args.push_back(GEP.getOperand(0));
+ llvm::append_range(Args, Indices);
+ auto *NewI = B.CreateIntrinsic(Intrinsic::spv_gep, {Types}, {Args});
+ replaceAllUsesWithAndErase(B, &GEP, NewI);
+ return NewI;
+}
+
+Type *SPIRVEmitIntrinsics::getGEPTypeLogical(GetElementPtrInst *GEP) {
+
+ Type *CurType = GEP->getResultElementType();
+
+ bool Interrupted = walkLogicalAccessChain(
+ *GEP, [&CurType](Type *EltType, uint64_t Index) { CurType = EltType; },
+ [&CurType](Type *EltType, Value *Index) { CurType = EltType; });
+
+ return Interrupted ? GEP->getResultElementType() : CurType;
+}
+
+Type *SPIRVEmitIntrinsics::getGEPType(GetElementPtrInst *Ref) {
+ if (Ref->getSourceElementType() ==
+ IntegerType::getInt8Ty(CurrF->getContext()) &&
+ TM->getSubtargetImpl()->isLogicalSPIRV()) {
+ return getGEPTypeLogical(Ref);
+ }
+
Type *Ty = nullptr;
// TODO: not sure if GetElementPtrInst::getTypeAtIndex() does anything
// useful here
@@ -1395,6 +1547,13 @@ Instruction *SPIRVEmitIntrinsics::visitSwitchInst(SwitchInst &I) {
}
Instruction *SPIRVEmitIntrinsics::visitGetElementPtrInst(GetElementPtrInst &I) {
+ if (I.getSourceElementType() == IntegerType::getInt8Ty(CurrF->getContext()) &&
+ TM->getSubtargetImpl()->isLogicalSPIRV()) {
+ Instruction *Result = buildLogicalAccessChainFromGEP(I);
+ if (Result)
+ return Result;
+ }
+
IRBuilder<> B(I.getParent());
B.SetInsertPoint(&I);
SmallVector<Type *, 2> Types = {I.getType(), I.getOperand(0)->getType()};
@@ -1588,7 +1747,24 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I,
}
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
Value *Pointer = GEPI->getPointerOperand();
- Type *OpTy = GEPI->getSourceElementType();
+ Type *OpTy = nullptr;
+
+ // Knowing the accessed type is mandatory for logical SPIR-V. Sadly,
+ // the GEP source element type should not be used for this purpose, and
+ // the alternative type-scavenging method is not working.
+ // Physical SPIR-V can work around this, but not logical, hence still
+ // try to rely on the broken type scavenging for logical.
+ bool IsRewrittenGEP =
+ GEPI->getSourceElementType() == IntegerType::getInt8Ty(I->getContext());
+ if (IsRewrittenGEP && TM->getSubtargetImpl()->isLogicalSPIRV()) {
+ Value *Src = getPointerRoot(Pointer);
+ OpTy = GR->findDeducedElementType(Src);
+ }
+
+ // In all cases, fall back to the GEP type if type scavenging failed.
+ if (!OpTy)
+ OpTy = GEPI->getSourceElementType();
+
replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B);
if (isNestedPointer(OpTy))
insertTodoType(Pointer);
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index 049ba02..f0b938d 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -445,6 +445,9 @@ def OpCrossWorkgroupCastToPtrINTEL : UnOp<"OpCrossWorkgroupCastToPtrINTEL", 5938
def OpConvertFToBF16INTEL : UnOp<"OpConvertFToBF16INTEL", 6116>;
def OpConvertBF16ToFINTEL : UnOp<"OpConvertBF16ToFINTEL", 6117>;
+// SPV_INTEL_tensor_float32_conversion
+def OpRoundFToTF32INTEL : UnOp<"OpRoundFToTF32INTEL", 6426>;
+
// 3.42.12 Composite Instructions
def OpVectorExtractDynamic: Op<77, (outs ID:$res), (ins TYPE:$type, vID:$vec, ID:$idx),
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index ad976e5..0cd9d78 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1564,6 +1564,13 @@ void addInstrRequirements(const MachineInstr &MI,
Reqs.addCapability(SPIRV::Capability::BFloat16ConversionINTEL);
}
break;
+ case SPIRV::OpRoundFToTF32INTEL:
+ if (ST.canUseExtension(
+ SPIRV::Extension::SPV_INTEL_tensor_float32_conversion)) {
+ Reqs.addExtension(SPIRV::Extension::SPV_INTEL_tensor_float32_conversion);
+ Reqs.addCapability(SPIRV::Capability::TensorFloat32RoundingINTEL);
+ }
+ break;
case SPIRV::OpVariableLengthArrayINTEL:
case SPIRV::OpSaveMemoryINTEL:
case SPIRV::OpRestoreMemoryINTEL:
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 548e9b7..614e83a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -320,6 +320,7 @@ defm SPV_INTEL_subgroup_matrix_multiply_accumulate : ExtensionOperand<121>;
defm SPV_INTEL_2d_block_io : ExtensionOperand<122>;
defm SPV_INTEL_int4 : ExtensionOperand<123>;
defm SPV_KHR_float_controls2 : ExtensionOperand<124>;
+defm SPV_INTEL_tensor_float32_conversion : ExtensionOperand<125>;
//===----------------------------------------------------------------------===//
// Multiclass used to define Capabilities enum values and at the same time
@@ -529,6 +530,7 @@ defm Subgroup2DBlockTransformINTEL : CapabilityOperand<6229, 0, 0, [SPV_INTEL_2d
defm Subgroup2DBlockTransposeINTEL : CapabilityOperand<6230, 0, 0, [SPV_INTEL_2d_block_io], [Subgroup2DBlockIOINTEL]>;
defm Int4TypeINTEL : CapabilityOperand<5112, 0, 0, [SPV_INTEL_int4], []>;
defm Int4CooperativeMatrixINTEL : CapabilityOperand<5114, 0, 0, [SPV_INTEL_int4], [Int4TypeINTEL, CooperativeMatrixKHR]>;
+defm TensorFloat32RoundingINTEL : CapabilityOperand<6425, 0, 0, [SPV_INTEL_tensor_float32_conversion], []>;
//===----------------------------------------------------------------------===//
// Multiclass used to define SourceLanguage enum values and at the same time
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index ba023af..bc60842 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -127,8 +127,7 @@ public:
std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
bool writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const override {
@@ -253,21 +252,19 @@ MCFixupKindInfo SparcAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
}
void SparcAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
if (!IsResolved)
return;
Value = adjustFixupValue(Fixup.getKind(), Value);
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- unsigned Offset = Fixup.getOffset();
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
for (unsigned i = 0; i != NumBytes; ++i) {
unsigned Idx = Endian == llvm::endianness::little ? i : (NumBytes - 1) - i;
- Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
+ Data[Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index d5f8492..d692cbe 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -113,8 +113,7 @@ public:
std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
bool writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const override;
};
@@ -152,20 +151,18 @@ MCFixupKindInfo SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
}
void SystemZMCAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
if (Target.getSpecifier())
IsResolved = false;
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
MCFixupKind Kind = Fixup.getKind();
if (mc::isRelocation(Kind))
return;
- unsigned Offset = Fixup.getOffset();
unsigned BitSize = getFixupKindInfo(Kind).TargetSize;
unsigned Size = (BitSize + 7) / 8;
- assert(Offset + Size <= Data.size() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + Size <= F.getSize() && "Invalid fixup offset!");
// Big-endian insertion of Size bytes.
Value = extractBitsForFixup(Kind, Value, Fixup, getContext());
@@ -173,7 +170,7 @@ void SystemZMCAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
Value &= ((uint64_t)1 << BitSize) - 1;
unsigned ShiftValue = (Size * 8) - 8;
for (unsigned I = 0; I != Size; ++I) {
- Data[Offset + I] |= uint8_t(Value >> ShiftValue);
+ Data[I] |= uint8_t(Value >> ShiftValue);
ShiftValue -= 8;
}
}
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
index f987621..c1b9d9f 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
@@ -112,8 +112,7 @@ public:
}
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &,
- MutableArrayRef<char>, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *, uint64_t Value, bool IsResolved) override;
bool mayNeedRelaxation(unsigned Opcode, ArrayRef<MCOperand> Operands,
const MCSubtargetInfo &STI) const override {
@@ -152,7 +151,7 @@ public:
} // end anonymous namespace
void VEAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target, MutableArrayRef<char> Data,
+ const MCValue &Target, uint8_t *Data,
uint64_t Value, bool IsResolved) {
switch (Fixup.getKind()) {
case VE::fixup_ve_tls_gd_hi32:
@@ -173,14 +172,14 @@ void VEAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
Value <<= Info.TargetOffset;
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the bits
// from the fixup value. The Value has been "split up" into the
// appropriate bitfields above.
for (unsigned i = 0; i != NumBytes; ++i) {
unsigned Idx = Endian == llvm::endianness::little ? i : (NumBytes - 1) - i;
- Data[Offset + Idx] |= static_cast<uint8_t>((Value >> (i * 8)) & 0xff);
+ Data[Idx] |= static_cast<uint8_t>((Value >> (i * 8)) & 0xff);
}
}
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 837fd8e..eecef31 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -39,7 +39,7 @@ public:
MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value, bool) override;
+ uint8_t *Data, uint64_t Value, bool) override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override;
@@ -80,8 +80,7 @@ bool WebAssemblyAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
void WebAssemblyAsmBackend::applyFixup(const MCFragment &F,
const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data,
+ const MCValue &Target, uint8_t *Data,
uint64_t Value, bool IsResolved) {
if (!IsResolved)
Asm->getWriter().recordRelocation(F, Fixup, Target, Value);
@@ -96,13 +95,13 @@ void WebAssemblyAsmBackend::applyFixup(const MCFragment &F,
// Shift the value into position.
Value <<= Info.TargetOffset;
- unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + NumBytes <= F.getSize() &&
+ "Invalid fixup offset!");
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
for (unsigned I = 0; I != NumBytes; ++I)
- Data[Offset + I] |= uint8_t((Value >> (I * 8)) & 0xff);
+ Data[I] |= uint8_t((Value >> (I * 8)) & 0xff);
}
std::unique_ptr<MCObjectTargetWriter>
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 7f9d474..1f02e56 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -174,8 +174,7 @@ public:
std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &,
uint64_t &) override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
bool mayNeedRelaxation(unsigned Opcode, ArrayRef<MCOperand> Operands,
const MCSubtargetInfo &STI) const override;
@@ -676,9 +675,8 @@ std::optional<bool> X86AsmBackend::evaluateFixup(const MCFragment &,
}
void X86AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
// Force relocation when there is a specifier. This might be too conservative
// - GAS doesn't emit a relocation for call local@plt; local:.
if (Target.getSpecifier())
@@ -690,7 +688,7 @@ void X86AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
return;
unsigned Size = getFixupKindSize(Kind);
- assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
+ assert(Fixup.getOffset() + Size <= F.getSize() && "Invalid fixup offset!");
int64_t SignedValue = static_cast<int64_t>(Value);
if (IsResolved && Fixup.isPCRel()) {
@@ -710,7 +708,7 @@ void X86AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
}
for (unsigned i = 0; i != Size; ++i)
- Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
+ Data[i] = uint8_t(Value >> (i * 8));
}
bool X86AsmBackend::mayNeedRelaxation(unsigned Opcode,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bbbb1d9..ce4c061 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23486,7 +23486,6 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, X86::CondCode X86CC,
}
// Try to shrink i64 compares if the input has enough zero bits.
- // TODO: Add sign-bits equivalent for isX86CCSigned(X86CC)?
if (CmpVT == MVT::i64 && !isX86CCSigned(X86CC) &&
Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub.
DAG.MaskedValueIsZero(Op1, APInt::getHighBitsSet(64, 32)) &&
@@ -23496,6 +23495,16 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, X86::CondCode X86CC,
Op1 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op1);
}
+ // Try to shrink all i64 compares if the inputs are representable as signed
+ // i32.
+ if (CmpVT == MVT::i64 &&
+ Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub.
+ DAG.ComputeNumSignBits(Op1) > 32 && DAG.ComputeNumSignBits(Op0) > 32) {
+ CmpVT = MVT::i32;
+ Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0);
+ Op1 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op1);
+ }
+
// 0-x == y --> x+y == 0
// 0-x != y --> x+y != 0
if (Op0.getOpcode() == ISD::SUB && isNullConstant(Op0.getOperand(0)) &&
diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp
index 9167794..08936ad 100644
--- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp
+++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaAsmBackend.cpp
@@ -37,8 +37,7 @@ public:
std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &,
uint64_t &) override;
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) override;
+ uint8_t *Data, uint64_t Value, bool IsResolved) override;
bool writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const override;
@@ -153,9 +152,8 @@ std::optional<bool> XtensaAsmBackend::evaluateFixup(const MCFragment &F,
}
void XtensaAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
- const MCValue &Target,
- MutableArrayRef<char> Data, uint64_t Value,
- bool IsResolved) {
+ const MCValue &Target, uint8_t *Data,
+ uint64_t Value, bool IsResolved) {
maybeAddReloc(F, Fixup, Target, Value, IsResolved);
MCContext &Ctx = getContext();
MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
@@ -168,11 +166,10 @@ void XtensaAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
if (!Value)
return; // Doesn't change encoding.
- unsigned Offset = Fixup.getOffset();
unsigned FullSize = getSize(Fixup.getKind());
for (unsigned i = 0; i != FullSize; ++i) {
- Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ Data[i] |= uint8_t((Value >> (i * 8)) & 0xff);
}
}