aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td28
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp4
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h66
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp205
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormats.td16
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td22
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td9
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td143
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrPredicates.td36
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp21
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td68
16 files changed, 592 insertions, 54 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index dbe74b1..5700468 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -2394,15 +2394,19 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
else if (((SGMask & SchedGroupMask::ALU) != SchedGroupMask::NONE) &&
(TII->isVALU(MI) || TII->isMFMAorWMMA(MI) || TII->isSALU(MI) ||
TII->isTRANS(MI)))
- Result = true;
+ Result = !MI.mayLoadOrStore();
else if (((SGMask & SchedGroupMask::VALU) != SchedGroupMask::NONE) &&
- TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI))
- Result = true;
+ TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI)) {
+ // Some memory instructions may be marked as VALU (e.g. BUFFER_LOAD_*_LDS).
+ // For our purposes, these shall not be classified as VALU as this results
+ // in unexpected behavior.
+ Result = !MI.mayLoadOrStore();
+ }
else if (((SGMask & SchedGroupMask::SALU) != SchedGroupMask::NONE) &&
TII->isSALU(MI))
- Result = true;
+ Result = !MI.mayLoadOrStore();
else if (((SGMask & SchedGroupMask::MFMA) != SchedGroupMask::NONE) &&
TII->isMFMAorWMMA(MI))
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 64e34db..5f6d742 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -260,8 +260,12 @@ class NSAHelper {
}
class MIMGNSAHelper<int num_addrs,
- list<RegisterClass> addr_types=!listsplat(VGPR_32, num_addrs)>
- : NSAHelper<> {
+ list<RegisterOperand> addr_types_in=[]>
+ : NSAHelper<> {
+ list<RegisterOperand> addr_types =
+ !if(!empty(addr_types_in), !listsplat(VGPROp_32, num_addrs),
+ addr_types_in);
+
list<string> AddrAsmNames = !foreach(i, !range(num_addrs), "vaddr" # i);
let AddrIns = !dag(ins, addr_types, AddrAsmNames);
let AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]";
@@ -358,7 +362,7 @@ class MIMG_gfx11<int op, dag outs, string dns = "">
// Base class for all NSA MIMG instructions.
// Note that 1-dword addresses always use non-NSA variants.
class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="",
- list<RegisterClass> addr_types=[],
+ list<RegisterOperand> addr_types=[],
RegisterOperand LastAddrRC = VGPROp_32>
: MIMG<outs, dns>, MIMGe_gfx11<op> {
let SubtargetPredicate = isGFX11Only;
@@ -378,7 +382,7 @@ class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="",
}
class VIMAGE_gfx12<int op, dag outs, int num_addrs, string dns="",
- list<RegisterClass> addr_types=[]>
+ list<RegisterOperand> addr_types=[]>
: VIMAGE<outs, dns>, VIMAGEe<op> {
let SubtargetPredicate = isGFX12Plus;
let AssemblerPredicate = isGFX12Plus;
@@ -1521,12 +1525,12 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> {
int VAddrDwords = !srl(Size, 5);
int GFX11PlusNSAAddrs = !if(IsA16, 4, 5);
- RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
- list<RegisterClass> GFX11PlusAddrTypes =
- !cond(isBVH8 : [node_ptr_type, VReg_64, VReg_96, VReg_96, VGPR_32],
- isDual : [node_ptr_type, VReg_64, VReg_96, VReg_96, VReg_64],
- IsA16 : [node_ptr_type, VGPR_32, VReg_96, VReg_96],
- true : [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]);
+ RegisterOperand node_ptr_type = !if(Is64, VGPROp_64, VGPROp_32);
+ list<RegisterOperand> GFX11PlusAddrTypes =
+ !cond(isBVH8 : [node_ptr_type, VGPROp_64, VGPROp_96, VGPROp_96, VGPROp_32],
+ isDual : [node_ptr_type, VGPROp_64, VGPROp_96, VGPROp_96, VGPROp_64],
+ IsA16 : [node_ptr_type, VGPROp_32, VGPROp_96, VGPROp_96],
+ true : [node_ptr_type, VGPROp_32, VGPROp_96, VGPROp_96, VGPROp_96]);
}
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterOperand AddrRC>
@@ -1552,7 +1556,7 @@ class MIMG_IntersectRay_gfx11<mimgopc op, string opcode, RegisterOperand AddrRC>
}
class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
- list<RegisterClass> addr_types>
+ list<RegisterOperand> addr_types>
: MIMG_nsa_gfx11<op.GFX11, (outs VReg_128:$vdata), num_addrs, "GFX11",
addr_types> {
let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$srsrc, A16:$a16));
@@ -1561,7 +1565,7 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs,
bit isDual, bit isBVH8,
- list<RegisterClass> addr_types>
+ list<RegisterOperand> addr_types>
: VIMAGE_gfx12<op.GFX12, !if(!or(isDual, isBVH8),
(outs VReg_320:$vdata, VReg_96:$ray_origin_out,
VReg_96:$ray_dir_out),
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ec5c5bb3..a44a247 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -899,7 +899,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
if (DestReg == AMDGPU::VCC) {
- if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
+ if (AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) {
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
.addReg(SrcReg, getKillRegState(KillSrc));
} else {
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 4b54231..8851a0f 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -1659,6 +1659,10 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return generateImmOutOfRangeError(
Operands, ErrorInfo, -1, (1 << 5) - 1,
"immediate must be non-zero in the range");
+ case Match_InvalidXSfmmVType: {
+ SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return generateXSfmmVTypeError(ErrorLoc);
+ }
case Match_InvalidVTypeI: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return generateVTypeError(ErrorLoc);
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 70b7c43..e75dfe3 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -142,6 +142,22 @@ enum {
ReadsPastVLShift = DestEEWShift + 2,
ReadsPastVLMask = 1ULL << ReadsPastVLShift,
+
+ // 0 -> Don't care about altfmt bit in VTYPE.
+ // 1 -> Is not altfmt.
+ // 2 -> Is altfmt(BF16).
+ AltFmtTypeShift = ReadsPastVLShift + 1,
+ AltFmtTypeMask = 3ULL << AltFmtTypeShift,
+
+ // XSfmmbase
+ HasTWidenOpShift = AltFmtTypeShift + 2,
+ HasTWidenOpMask = 1ULL << HasTWidenOpShift,
+
+ HasTMOpShift = HasTWidenOpShift + 1,
+ HasTMOpMask = 1ULL << HasTMOpShift,
+
+ HasTKOpShift = HasTMOpShift + 1,
+ HasTKOpMask = 1ULL << HasTKOpShift,
};
// Helper functions to read TSFlags.
@@ -183,6 +199,11 @@ static inline bool hasRoundModeOp(uint64_t TSFlags) {
return TSFlags & HasRoundModeOpMask;
}
+enum class AltFmtType { DontCare, NotAltFmt, AltFmt };
+static inline AltFmtType getAltFmtType(uint64_t TSFlags) {
+ return static_cast<AltFmtType>((TSFlags & AltFmtTypeMask) >> AltFmtTypeShift);
+}
+
/// \returns true if this instruction uses vxrm
static inline bool usesVXRM(uint64_t TSFlags) { return TSFlags & UsesVXRMMask; }
@@ -204,11 +225,47 @@ static inline bool readsPastVL(uint64_t TSFlags) {
return TSFlags & ReadsPastVLMask;
}
+// XSfmmbase
+static inline bool hasTWidenOp(uint64_t TSFlags) {
+ return TSFlags & HasTWidenOpMask;
+}
+
+static inline bool hasTMOp(uint64_t TSFlags) { return TSFlags & HasTMOpMask; }
+
+static inline bool hasTKOp(uint64_t TSFlags) { return TSFlags & HasTKOpMask; }
+
+static inline unsigned getTNOpNum(const MCInstrDesc &Desc) {
+ const uint64_t TSFlags = Desc.TSFlags;
+ assert(hasTWidenOp(TSFlags) && hasVLOp(TSFlags));
+ unsigned Offset = 3;
+ if (hasTKOp(TSFlags))
+ Offset = 4;
+ return Desc.getNumOperands() - Offset;
+}
+
+static inline unsigned getTMOpNum(const MCInstrDesc &Desc) {
+ const uint64_t TSFlags = Desc.TSFlags;
+ assert(hasTWidenOp(TSFlags) && hasTMOp(TSFlags));
+ if (hasTKOp(TSFlags))
+ return Desc.getNumOperands() - 5;
+ // vtzero.t
+ return Desc.getNumOperands() - 4;
+}
+
+static inline unsigned getTKOpNum(const MCInstrDesc &Desc) {
+ [[maybe_unused]] const uint64_t TSFlags = Desc.TSFlags;
+ assert(hasTWidenOp(TSFlags) && hasTKOp(TSFlags));
+ return Desc.getNumOperands() - 3;
+}
+
static inline unsigned getVLOpNum(const MCInstrDesc &Desc) {
const uint64_t TSFlags = Desc.TSFlags;
// This method is only called if we expect to have a VL operand, and all
// instructions with VL also have SEW.
assert(hasSEWOp(TSFlags) && hasVLOp(TSFlags));
+ // In Xsfmmbase, TN is an alias for VL, so here we use the same TSFlags bit.
+ if (hasTWidenOp(TSFlags))
+ return getTNOpNum(Desc);
unsigned Offset = 2;
if (hasVecPolicyOp(TSFlags))
Offset = 3;
@@ -226,7 +283,7 @@ static inline unsigned getSEWOpNum(const MCInstrDesc &Desc) {
const uint64_t TSFlags = Desc.TSFlags;
assert(hasSEWOp(TSFlags));
unsigned Offset = 1;
- if (hasVecPolicyOp(TSFlags))
+ if (hasVecPolicyOp(TSFlags) || hasTWidenOp(TSFlags))
Offset = 2;
return Desc.getNumOperands() - Offset;
}
@@ -243,6 +300,9 @@ static inline int getFRMOpNum(const MCInstrDesc &Desc) {
if (!hasRoundModeOp(TSFlags) || usesVXRM(TSFlags))
return -1;
+ if (hasTWidenOp(TSFlags) && hasTMOp(TSFlags))
+ return getTMOpNum(Desc) - 1;
+
// The operand order
// --------------------------------------
// | n-1 (if any) | n-2 | n-3 | n-4 |
@@ -385,7 +445,9 @@ enum OperandType : unsigned {
OPERAND_SEW_MASK,
// Vector rounding mode for VXRM or FRM.
OPERAND_VEC_RM,
- OPERAND_LAST_RISCV_IMM = OPERAND_VEC_RM,
+ // Vtype operand for XSfmm extension.
+ OPERAND_XSFMM_VTYPE,
+ OPERAND_LAST_RISCV_IMM = OPERAND_XSFMM_VTYPE,
// Operand is either a register or uimm5, this is used by V extension pseudo
// instructions to represent a value that be passed as AVL to either vsetvli
// or vsetivli.
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index cf8d120..1b7cb9b 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -168,10 +168,13 @@ struct DemandedFields {
// If this is true, we demand that VTYPE is set to some legal state, i.e. that
// vill is unset.
bool VILL = false;
+ bool TWiden = false;
+ bool AltFmt = false;
// Return true if any part of VTYPE was used
bool usedVTYPE() const {
- return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL;
+ return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL ||
+ TWiden || AltFmt;
}
// Return true if any property of VL was used
@@ -187,6 +190,8 @@ struct DemandedFields {
TailPolicy = true;
MaskPolicy = true;
VILL = true;
+ TWiden = true;
+ AltFmt = true;
}
// Mark all VL properties as demanded
@@ -212,6 +217,8 @@ struct DemandedFields {
TailPolicy |= B.TailPolicy;
MaskPolicy |= B.MaskPolicy;
VILL |= B.VILL;
+ AltFmt |= B.AltFmt;
+ TWiden |= B.TWiden;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -258,7 +265,9 @@ struct DemandedFields {
OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
OS << "TailPolicy=" << TailPolicy << ", ";
OS << "MaskPolicy=" << MaskPolicy << ", ";
- OS << "VILL=" << VILL;
+ OS << "VILL=" << VILL << ", ";
+ OS << "AltFmt=" << AltFmt << ", ";
+ OS << "TWiden=" << TWiden;
OS << "}";
}
#endif
@@ -328,6 +337,15 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
RISCVVType::isMaskAgnostic(NewVType))
return false;
+ if (Used.TWiden && (RISCVVType::hasXSfmmWiden(CurVType) !=
+ RISCVVType::hasXSfmmWiden(NewVType) ||
+ (RISCVVType::hasXSfmmWiden(CurVType) &&
+ RISCVVType::getXSfmmWiden(CurVType) !=
+ RISCVVType::getXSfmmWiden(NewVType))))
+ return false;
+ if (Used.AltFmt &&
+ RISCVVType::isAltFmt(CurVType) != RISCVVType::isAltFmt(NewVType))
+ return false;
return true;
}
@@ -479,6 +497,11 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
Res.TailPolicy = false;
}
+ Res.AltFmt = RISCVII::getAltFmtType(MI.getDesc().TSFlags) !=
+ RISCVII::AltFmtType::DontCare;
+ Res.TWiden = RISCVII::hasTWidenOp(MI.getDesc().TSFlags) ||
+ RISCVInstrInfo::isXSfmmVectorConfigInstr(MI);
+
return Res;
}
@@ -510,6 +533,8 @@ class VSETVLIInfo {
uint8_t TailAgnostic : 1;
uint8_t MaskAgnostic : 1;
uint8_t SEWLMULRatioOnly : 1;
+ uint8_t AltFmt : 1;
+ uint8_t TWiden : 3;
public:
VSETVLIInfo()
@@ -586,6 +611,8 @@ public:
RISCVVType::VLMUL getVLMUL() const { return VLMul; }
bool getTailAgnostic() const { return TailAgnostic; }
bool getMaskAgnostic() const { return MaskAgnostic; }
+ bool getAltFmt() const { return AltFmt; }
+ unsigned getTWiden() const { return TWiden; }
bool hasNonZeroAVL(const LiveIntervals *LIS) const {
if (hasAVLImm())
@@ -647,21 +674,31 @@ public:
SEW = RISCVVType::getSEW(VType);
TailAgnostic = RISCVVType::isTailAgnostic(VType);
MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
+ AltFmt = RISCVVType::isAltFmt(VType);
+ TWiden =
+ RISCVVType::hasXSfmmWiden(VType) ? RISCVVType::getXSfmmWiden(VType) : 0;
}
- void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA) {
+ void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA, bool Altfmt,
+ unsigned W) {
assert(isValid() && !isUnknown() &&
"Can't set VTYPE for uninitialized or unknown");
VLMul = L;
SEW = S;
TailAgnostic = TA;
MaskAgnostic = MA;
+ AltFmt = Altfmt;
+ TWiden = W;
}
+ void setAltFmt(bool AF) { AltFmt = AF; }
+
void setVLMul(RISCVVType::VLMUL VLMul) { this->VLMul = VLMul; }
unsigned encodeVTYPE() const {
assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
"Can't encode VTYPE for uninitialized or unknown");
+ if (TWiden != 0)
+ return RISCVVType::encodeXSfmmVType(SEW, TWiden, AltFmt);
return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
}
@@ -674,9 +711,9 @@ public:
"Can't compare VTYPE in unknown state");
assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
"Can't compare when only LMUL/SEW ratio is valid.");
- return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
+ return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, TWiden) ==
std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
- Other.MaskAgnostic);
+ Other.MaskAgnostic, Other.AltFmt, Other.TWiden);
}
unsigned getSEWLMULRatio() const {
@@ -825,7 +862,9 @@ public:
<< "SEW=e" << (unsigned)SEW << ", "
<< "TailAgnostic=" << (bool)TailAgnostic << ", "
<< "MaskAgnostic=" << (bool)MaskAgnostic << ", "
- << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
+ << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << ", "
+ << "TWiden=" << (unsigned)TWiden << ", "
+ << "AltFmt=" << (bool)AltFmt << "}";
}
#endif
};
@@ -853,6 +892,11 @@ struct BlockData {
BlockData() = default;
};
+enum TKTMMode {
+ VSETTK = 0,
+ VSETTM = 1,
+};
+
class RISCVInsertVSETVLI : public MachineFunctionPass {
const RISCVSubtarget *ST;
const TargetInstrInfo *TII;
@@ -908,6 +952,7 @@ private:
VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
void forwardVSETVLIAVL(VSETVLIInfo &Info) const;
+ bool insertVSETMTK(MachineBasicBlock &MBB, TKTMMode Mode) const;
};
} // end anonymous namespace
@@ -945,6 +990,18 @@ RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
VSETVLIInfo NewInfo;
if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
NewInfo.setAVLImm(MI.getOperand(1).getImm());
+ } else if (RISCVInstrInfo::isXSfmmVectorConfigTNInstr(MI)) {
+ assert(MI.getOpcode() == RISCV::PseudoSF_VSETTNT ||
+ MI.getOpcode() == RISCV::PseudoSF_VSETTNTX0);
+ switch (MI.getOpcode()) {
+ case RISCV::PseudoSF_VSETTNTX0:
+ NewInfo.setAVLVLMAX();
+ break;
+ case RISCV::PseudoSF_VSETTNT:
+ Register ATNReg = MI.getOperand(1).getReg();
+ NewInfo.setAVLRegDef(getVNInfoFromReg(ATNReg, MI, LIS), ATNReg);
+ break;
+ }
} else {
assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
MI.getOpcode() == RISCV::PseudoVSETVLIX0);
@@ -1005,11 +1062,34 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags);
+ bool AltFmt = RISCVII::getAltFmtType(TSFlags) == RISCVII::AltFmtType::AltFmt;
+ InstrInfo.setAltFmt(AltFmt);
+
unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
// A Log2SEW of 0 is an operation on mask registers only.
unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
+ if (RISCVII::hasTWidenOp(TSFlags)) {
+ const MachineOperand &TWidenOp =
+ MI.getOperand(MI.getNumExplicitOperands() - 1);
+ unsigned TWiden = TWidenOp.getImm();
+
+ InstrInfo.setAVLVLMAX();
+ if (RISCVII::hasVLOp(TSFlags)) {
+ const MachineOperand &TNOp =
+ MI.getOperand(RISCVII::getTNOpNum(MI.getDesc()));
+
+ if (TNOp.getReg().isVirtual())
+ InstrInfo.setAVLRegDef(getVNInfoFromReg(TNOp.getReg(), MI, LIS),
+ TNOp.getReg());
+ }
+
+ InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt, TWiden);
+
+ return InstrInfo;
+ }
+
if (RISCVII::hasVLOp(TSFlags)) {
const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
if (VLOp.isImm()) {
@@ -1045,7 +1125,9 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
}
#endif
- InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
+ // TODO: Propagate the twiden from previous vtype for potential reuse.
+ InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic, AltFmt,
+ /*TWiden*/ 0);
forwardVSETVLIAVL(InstrInfo);
@@ -1053,10 +1135,33 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
}
void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertPt, DebugLoc DL,
- const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
-
+ MachineBasicBlock::iterator InsertPt,
+ DebugLoc DL, const VSETVLIInfo &Info,
+ const VSETVLIInfo &PrevInfo) {
++NumInsertedVSETVL;
+
+ if (Info.getTWiden()) {
+ if (Info.hasAVLVLMAX()) {
+ Register DestReg = MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass);
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoSF_VSETTNTX0))
+ .addReg(DestReg, RegState::Define | RegState::Dead)
+ .addReg(RISCV::X0, RegState::Kill)
+ .addImm(Info.encodeVTYPE());
+ if (LIS) {
+ LIS->InsertMachineInstrInMaps(*MI);
+ LIS->createAndComputeVirtRegInterval(DestReg);
+ }
+ } else {
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoSF_VSETTNT))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(Info.getAVLReg())
+ .addImm(Info.encodeVTYPE());
+ if (LIS)
+ LIS->InsertMachineInstrInMaps(*MI);
+ }
+ return;
+ }
+
if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
// Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
// VLMAX.
@@ -1198,7 +1303,8 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
// be coalesced into another vsetvli since we won't demand any fields.
VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly
NewInfo.setAVLImm(1);
- NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true);
+ NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true,
+ /*AltFmt*/ false, /*W*/ 0);
Info = NewInfo;
return;
}
@@ -1240,7 +1346,9 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
(Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
IncomingInfo.getTailAgnostic(),
(Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
- IncomingInfo.getMaskAgnostic());
+ IncomingInfo.getMaskAgnostic(),
+ (Demanded.AltFmt ? IncomingInfo : Info).getAltFmt(),
+ Demanded.TWiden ? IncomingInfo.getTWiden() : 0);
// If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
// the AVL.
@@ -1293,7 +1401,8 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
if (RISCVInstrInfo::isVectorConfigInstr(MI) ||
RISCVII::hasSEWOp(MI.getDesc().TSFlags) ||
- isVectorCopy(ST->getRegisterInfo(), MI))
+ isVectorCopy(ST->getRegisterInfo(), MI) ||
+ RISCVInstrInfo::isXSfmmVectorConfigInstr(MI))
HadVectorOp = true;
transferAfter(Info, MI);
@@ -1675,6 +1784,12 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
};
for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
+ // TODO: Support XSfmm.
+ if (RISCVII::hasTWidenOp(MI.getDesc().TSFlags) ||
+ RISCVInstrInfo::isXSfmmVectorConfigInstr(MI)) {
+ NextMI = nullptr;
+ continue;
+ }
if (!RISCVInstrInfo::isVectorConfigInstr(MI)) {
Used.doUnion(getDemanded(MI, ST));
@@ -1788,6 +1903,65 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
}
}
+bool RISCVInsertVSETVLI::insertVSETMTK(MachineBasicBlock &MBB,
+ TKTMMode Mode) const {
+
+ bool Changed = false;
+ for (auto &MI : MBB) {
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ if (RISCVInstrInfo::isXSfmmVectorConfigTMTKInstr(MI) ||
+ !RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasTWidenOp(TSFlags))
+ continue;
+
+ VSETVLIInfo CurrInfo = computeInfoForInstr(MI);
+
+ if (Mode == VSETTK && !RISCVII::hasTKOp(TSFlags))
+ continue;
+
+ if (Mode == VSETTM && !RISCVII::hasTMOp(TSFlags))
+ continue;
+
+ unsigned OpNum = 0;
+ unsigned Opcode = 0;
+ switch (Mode) {
+ case VSETTK:
+ OpNum = RISCVII::getTKOpNum(MI.getDesc());
+ Opcode = RISCV::PseudoSF_VSETTK;
+ break;
+ case VSETTM:
+ OpNum = RISCVII::getTMOpNum(MI.getDesc());
+ Opcode = RISCV::PseudoSF_VSETTM;
+ break;
+ }
+
+ assert(OpNum && Opcode && "Invalid OpNum or Opcode");
+
+ MachineOperand &Op = MI.getOperand(OpNum);
+
+ auto TmpMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opcode))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(Op.getReg())
+ .addImm(Log2_32(CurrInfo.getSEW()))
+ .addImm(Log2_32(CurrInfo.getTWiden()) + 1);
+
+ Changed = true;
+ Register Reg = Op.getReg();
+ Op.setReg(Register());
+ Op.setIsKill(false);
+ if (LIS) {
+ LIS->InsertMachineInstrInMaps(*TmpMI);
+ LiveInterval &LI = LIS->getInterval(Reg);
+
+ // Erase the AVL operand from the instruction.
+ LIS->shrinkToUses(&LI);
+ // TODO: Enable this once needVSETVLIPHI is supported.
+ // SmallVector<LiveInterval *> SplitLIs;
+ // LIS->splitSeparateComponents(LI, SplitLIs);
+ }
+ }
+ return Changed;
+}
+
bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// Skip if the vector extension is not enabled.
ST = &MF.getSubtarget<RISCVSubtarget>();
@@ -1865,6 +2039,11 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
insertReadVL(MBB);
+ for (MachineBasicBlock &MBB : MF) {
+ insertVSETMTK(MBB, VSETTM);
+ insertVSETMTK(MBB, VSETTK);
+ }
+
BlockInfo.clear();
return HaveVectorOp;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 2afd77a..5b06303 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -267,6 +267,22 @@ class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr,
// operands' VLs.
bit ReadsPastVL = 0;
let TSFlags{26} = ReadsPastVL;
+
+ // 0 -> Don't care about altfmt bit in VTYPE.
+ // 1 -> Is not altfmt.
+ // 2 -> Is altfmt(BF16).
+ bits<2> AltFmtType = 0;
+ let TSFlags{28-27} = AltFmtType;
+
+ // XSfmmbase
+ bit HasTWidenOp = 0;
+ let TSFlags{29} = HasTWidenOp;
+
+ bit HasTmOp = 0;
+ let TSFlags{30} = HasTmOp;
+
+ bit HasTkOp = 0;
+ let TSFlags{31} = HasTkOp;
}
class RVInst<dag outs, dag ins, string opcodestr, string argstr,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 96e1078..ddb53a2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3005,6 +3005,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
else
Ok = RISCVFPRndMode::isValidRoundingMode(Imm);
break;
+ case RISCVOp::OPERAND_XSFMM_VTYPE:
+ Ok = RISCVVType::isValidXSfmmVType(Imm);
+ break;
}
if (!Ok) {
ErrInfo = "Invalid immediate";
@@ -3670,6 +3673,11 @@ std::string RISCVInstrInfo::createMIROperandComment(
RISCVVType::printVType(Imm, OS);
break;
}
+ case RISCVOp::OPERAND_XSFMM_VTYPE: {
+ unsigned Imm = Op.getImm();
+ RISCVVType::printXSfmmVType(Imm, OS);
+ break;
+ }
case RISCVOp::OPERAND_SEW:
case RISCVOp::OPERAND_SEW_MASK: {
unsigned Log2SEW = Op.getImm();
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 298d35a..c1b23af 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -128,6 +128,9 @@ defvar TAIL_AGNOSTIC = 1;
defvar TU_MU = 0;
defvar TA_MU = 1;
defvar TA_MA = 3;
+defvar DONT_CARE_ALTFMT = 0;
+defvar IS_NOT_ALTFMT = 1;
+defvar IS_ALTFMT = 2;
//===----------------------------------------------------------------------===//
// Utilities.
@@ -159,7 +162,8 @@ class PseudoToVInst<string PseudoInst> {
["_M4", ""],
["_M8", ""],
["_SE", ""],
- ["_RM", ""]
+ ["_RM", ""],
+ ["_ALT", ""]
];
string VInst = !foldl(PseudoInst, AffixSubsts, Acc, AffixSubst,
!subst(AffixSubst[0], AffixSubst[1], Acc));
@@ -6396,7 +6400,7 @@ let Defs = [VXSAT] in {
// 13. Vector Floating-Point Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructionsAnyF] in {
+let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in {
//===----------------------------------------------------------------------===//
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
@@ -6565,7 +6569,7 @@ defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W_RM;
defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W;
} // mayRaiseFPException = true
-} // Predicates = [HasVInstructionsAnyF]
+} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT
//===----------------------------------------------------------------------===//
// 14. Vector Reduction Operations
@@ -6593,7 +6597,7 @@ defm PseudoVWREDSUM : VPseudoVWRED_VS;
}
} // Predicates = [HasVInstructions]
-let Predicates = [HasVInstructionsAnyF] in {
+let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in {
//===----------------------------------------------------------------------===//
// 14.3. Vector Single-Width Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
@@ -6612,7 +6616,7 @@ defm PseudoVFWREDUSUM : VPseudoVFWRED_VS_RM;
defm PseudoVFWREDOSUM : VPseudoVFWREDO_VS_RM;
}
-} // Predicates = [HasVInstructionsAnyF]
+} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT
//===----------------------------------------------------------------------===//
// 15. Vector Mask Instructions
@@ -6703,7 +6707,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
// 16.2. Floating-Point Scalar Move Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructionsAnyF] in {
+let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
foreach f = FPList in {
let HasSEWOp = 1, BaseInstr = VFMV_F_S in
@@ -6718,7 +6722,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
Sched<[WriteVMovSF, ReadVMovSF_V, ReadVMovSF_F]>;
}
}
-} // Predicates = [HasVInstructionsAnyF]
+} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT
//===----------------------------------------------------------------------===//
// 16.3. Vector Slide Instructions
@@ -6730,10 +6734,10 @@ let Predicates = [HasVInstructions] in {
defm PseudoVSLIDE1DOWN : VPseudoVSLD1_VX;
} // Predicates = [HasVInstructions]
-let Predicates = [HasVInstructionsAnyF] in {
+let Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT in {
defm PseudoVFSLIDE1UP : VPseudoVSLD1_VF<"@earlyclobber $rd">;
defm PseudoVFSLIDE1DOWN : VPseudoVSLD1_VF;
-} // Predicates = [HasVInstructionsAnyF]
+} // Predicates = [HasVInstructionsAnyF], AltFmtType = IS_NOT_ALTFMT
//===----------------------------------------------------------------------===//
// 16.4. Vector Register Gather Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 557d873..6a4119a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -438,8 +438,10 @@ let Predicates = [HasVendorXSfvcp] in {
}
foreach f = FPList in {
foreach m = f.MxList in {
- defm f.FX # "V" : VPseudoVC_XV<m, f.fprclass, payload1>;
- defm f.FX # "VV" : VPseudoVC_XVV<m, f.fprclass, payload1>;
+ let AltFmtType = IS_NOT_ALTFMT in {
+ defm f.FX # "V" : VPseudoVC_XV<m, f.fprclass, payload1>;
+ defm f.FX # "VV" : VPseudoVC_XVV<m, f.fprclass, payload1>;
+ }
}
}
foreach m = MxListW in {
@@ -449,7 +451,8 @@ let Predicates = [HasVendorXSfvcp] in {
}
foreach f = FPListW in {
foreach m = f.MxList in
- defm f.FX # "VW" : VPseudoVC_XVW<m, f.fprclass, payload1>;
+ let AltFmtType = IS_NOT_ALTFMT in
+ defm f.FX # "VW" : VPseudoVC_XVW<m, f.fprclass, payload1>;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
index a5ee701..5ad22e6b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSfmm.td
@@ -225,7 +225,7 @@ let Predicates = [HasVendorXSfmmbase] in {
def SF_VSETTM : SFInstSetSingle<(outs GPR:$rd), (ins GPR:$rs1), 0b00001,
"sf.vsettm", "$rd, $rs1">;
def SF_VSETTK : SFInstSetSingle<(outs GPR:$rd), (ins GPR:$rs1), 0b00010,
- "sf.vsettk", "$rd, $rs1">;
+ "sf.vsettk", "$rd, $rs1">;
def SF_VTDISCARD : SFInstVtDiscard<"sf.vtdiscard">;
def SF_VTMV_V_T : SFInstTileMoveOp<0b010000, (outs VR:$vd), (ins GPR:$rs1),
@@ -277,3 +277,144 @@ let Uses = [FRM], mayRaiseFPException = true in {
} // Predicates = [HasVendorXSfmm32a8f]
} // DecoderNamespace = "XSfvector"
+
+class VPseudoSF_VTileLoad
+ : RISCVVPseudo<(outs), (ins GPR:$rs2, GPR:$rs1, AVL:$atn, ixlenimm:$sew,
+ ixlenimm:$twiden)> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let HasVLOp = 1; // Tn
+ let HasSEWOp = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+}
+
+class VPseudoSF_VTileStore
+ : RISCVVPseudo<(outs), (ins GPR:$rs2, GPR:$rs1, AVL:$atn, ixlenimm:$sew,
+ ixlenimm:$twiden)> {
+ let mayLoad = 0;
+ let mayStore = 1;
+ let HasVLOp = 1; // Tn
+ let HasSEWOp = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+}
+
+class VPseudoSF_VTileMove_V_T
+ : RISCVVPseudo<(outs VRM8:$vd), (ins GPR:$rs1, AVL:$atn, ixlenimm:$sew,
+ ixlenimm:$twiden)> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasVLOp = 1; // Tn
+ let HasSEWOp = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+}
+
+class VPseudoSF_VTileMove_T_V
+ : RISCVVPseudo<(outs), (ins GPR:$rs1, VRM8:$vs2, AVL:$atn, ixlenimm:$sew,
+ ixlenimm:$twiden)> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasVLOp = 1; // Tn
+ let HasSEWOp = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+}
+
+class VPseudoSF_MatMul<RegisterClass mtd_class>
+ : RISCVVPseudo<(outs),
+ (ins mtd_class:$rd, VRM8:$vs2, VRM8:$vs1, AVL:$atm, AVL:$atn,
+ AVL:$atk, ixlenimm:$sew, ixlenimm:$twiden)> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasTmOp = 1;
+ let HasVLOp = 1; // Tn
+ let HasTkOp = 1;
+ let HasSEWOp = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+}
+
+class VPseudoSF_MatMul_FRM<RegisterClass mtd_class>
+ : RISCVVPseudo<(outs),
+ (ins mtd_class:$rd, VRM8:$vs2, VRM8:$vs1, ixlenimm:$frm,
+ AVL:$atm, AVL:$atn, AVL:$atk, ixlenimm:$sew,
+ ixlenimm:$twiden), []> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasTmOp = 1;
+ let HasVLOp = 1; // Tn
+ let HasTkOp = 1;
+ let HasSEWOp = 1;
+ let HasRoundModeOp = 1;
+ let hasPostISelHook = 1;
+ let HasTWidenOp = 1;
+ let hasSideEffects = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+let Defs = [VL, VTYPE] in {
+ def PseudoSF_VSETTNT
+ : Pseudo<(outs GPR:$rd),
+ (ins GPRNoX0:$rs1, XSfmmVTypeOp:$vtypei), []>,
+ PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
+ def PseudoSF_VSETTNTX0
+ : Pseudo<(outs GPRNoX0:$rd),
+ (ins GPRX0:$rs1, XSfmmVTypeOp:$vtypei), []>,
+ PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
+ def PseudoSF_VSETTNTX0X0
+ : Pseudo<(outs GPRX0:$rd),
+ (ins GPRX0:$rs1, XSfmmVTypeOp:$vtypei), []>,
+ PseudoInstExpansion<(VSETVLI GPR:$rd, GPR:$rs1, VTypeIOp11:$vtypei)>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
+}
+
+let Defs = [VTYPE], Uses = [VTYPE], HasTWidenOp = 1, HasSEWOp = 1 in {
+ def PseudoSF_VSETTM
+ : Pseudo<(outs GPR:$rd),
+ (ins GPR:$rs1, ixlenimm:$log2sew, ixlenimm:$twiden), []>,
+ PseudoInstExpansion<(SF_VSETTM GPR:$rd, GPR:$rs1)>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
+ def PseudoSF_VSETTK
+ : Pseudo<(outs GPR:$rd),
+ (ins GPR:$rs1, ixlenimm:$logwsew, ixlenimm:$twiden), []>,
+ PseudoInstExpansion<(SF_VSETTK GPR:$rd, GPR:$rs1)>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
+}
+}
+
+foreach eew = [8, 16, 32, 64] in {
+ def PseudoSF_VLTE # eew : VPseudoSF_VTileLoad;
+ def PseudoSF_VSTE # eew : VPseudoSF_VTileStore;
+}
+
+def PseudoSF_VTMV_T_V : VPseudoSF_VTileMove_T_V;
+def PseudoSF_VTMV_V_T : VPseudoSF_VTileMove_V_T;
+
+foreach a = I8Encodes in
+ foreach b = I8Encodes in
+ def PseudoSF_MM_ # !toupper(a.Name) # _ # !toupper(b.Name)
+ : VPseudoSF_MatMul<TRM4>;
+
+let AltFmtType = IS_NOT_ALTFMT in
+ def PseudoSF_MM_F_F : VPseudoSF_MatMul_FRM<TRM2>;
+let AltFmtType = IS_ALTFMT in
+ def PseudoSF_MM_F_F_ALT : VPseudoSF_MatMul_FRM<TRM2>;
+
+foreach e1 = [5, 4] in
+ foreach e2 = [5, 4] in
+ def PseudoSF_MM_E # e1 # M # !sub(7, e1) # _E # e2 # M # !sub(7, e2)
+ : VPseudoSF_MatMul_FRM<TRM4>;
+
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
+ let HasVLOp = 1, HasTmOp = 1, HasTWidenOp = 1, HasSEWOp = 1 in
+ def PseudoSF_VTZERO_T
+ : RISCVVPseudo<(outs),
+ (ins TR:$rd, AVL:$atm, AVL:$atn, ixlenimm:$sew,
+ ixlenimm:$twiden)>;
+ def PseudoSF_VTDISCARD : RISCVVPseudo<(outs), (ins), []>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
index 3658817..dcae977 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
@@ -78,7 +78,41 @@ def isVectorConfigInstr
PseudoVSETVLI,
PseudoVSETVLIX0,
PseudoVSETVLIX0X0,
- PseudoVSETIVLI
+ PseudoVSETIVLI,
+ PseudoSF_VSETTNT,
+ PseudoSF_VSETTNTX0,
+ PseudoSF_VSETTNTX0X0
+ ]>>>;
+
+// Returns true if this is a PseudoSF_VSETTNT* instructions.
+def isXSfmmVectorConfigTNInstr
+ : TIIPredicate<"isXSfmmVectorConfigTNInstr",
+ MCReturnStatement<
+ CheckOpcode<[
+ PseudoSF_VSETTNT,
+ PseudoSF_VSETTNTX0,
+ PseudoSF_VSETTNTX0X0
+ ]>>>;
+
+// Returns true if this is PseudoSF_VSETTM or PseudoSF_VSETTK.
+def isXSfmmVectorConfigTMTKInstr
+ : TIIPredicate<"isXSfmmVectorConfigTMTKInstr",
+ MCReturnStatement<
+ CheckOpcode<[
+ PseudoSF_VSETTM,
+ PseudoSF_VSETTK
+ ]>>>;
+
+// Returns true if this is a XSfmm vector configuration instruction.
+def isXSfmmVectorConfigInstr
+ : TIIPredicate<"isXSfmmVectorConfigInstr",
+ MCReturnStatement<
+ CheckOpcode<[
+ PseudoSF_VSETTNT,
+ PseudoSF_VSETTNTX0,
+ PseudoSF_VSETTNTX0X0,
+ PseudoSF_VSETTM,
+ PseudoSF_VSETTK
]>>>;
// Return true if this is 'vsetvli x0, x0, vtype' which preserves
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 40b6416..e9f43b9 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -178,6 +178,10 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Shadow stack pointer.
markSuperRegs(Reserved, RISCV::SSP);
+ // XSfmmbase
+ for (MCPhysReg Reg = RISCV::T0; Reg <= RISCV::T15; Reg++)
+ markSuperRegs(Reserved, Reg);
+
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 6472334..47c24fc 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -317,6 +317,15 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom);
}
+ if (Subtarget->hasFP16()) {
+ setOperationAction(ISD::FMA, MVT::v8f16, Legal);
+ }
+
+ if (Subtarget->hasRelaxedSIMD()) {
+ setOperationAction(ISD::FMULADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMULADD, MVT::v2f64, Legal);
+ }
+
// Partial MLA reductions.
for (auto Op : {ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA}) {
setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
@@ -1120,6 +1129,18 @@ WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
+ const MachineFunction &MF, EVT VT) const {
+ if (!Subtarget->hasFP16() || !VT.isVector())
+ return false;
+
+ EVT ScalarVT = VT.getScalarType();
+ if (!ScalarVT.isSimple())
+ return false;
+
+ return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
+}
+
bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
SDValue Op, const TargetLoweringOpt &TLO) const {
// ISel process runs DAGCombiner after legalization; this step is called
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index b33a853..472ec67 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -81,6 +81,8 @@ private:
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const override;
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 49af78b..0f6e1ca 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1213,6 +1213,27 @@ defm EXTMUL_LOW_U :
defm EXTMUL_HIGH_U :
SIMDExtBinary<I64x2, extmul_high_u, "extmul_high_i32x4_u", 0xdf>;
+// Pattern for i32x4.dot_i16x8_s
+def : Pat<
+ (v4i32 (add
+ (wasm_shuffle
+ (v4i32 (extmul_low_s v8i16:$lhs, v8i16:$rhs)),
+ (v4i32 (extmul_high_s v8i16:$lhs, v8i16:$rhs)),
+ (i32 0), (i32 1), (i32 2), (i32 3),
+ (i32 8), (i32 9), (i32 10), (i32 11),
+ (i32 16), (i32 17), (i32 18), (i32 19),
+ (i32 24), (i32 25), (i32 26), (i32 27)),
+ (wasm_shuffle
+ (v4i32 (extmul_low_s v8i16:$lhs, v8i16:$rhs)),
+ (v4i32 (extmul_high_s v8i16:$lhs, v8i16:$rhs)),
+ (i32 4), (i32 5), (i32 6), (i32 7),
+ (i32 12), (i32 13), (i32 14), (i32 15),
+ (i32 20), (i32 21), (i32 22), (i32 23),
+ (i32 28), (i32 29), (i32 30), (i32 31)))
+ ),
+ (v4i32 (DOT v8i16:$lhs, v8i16:$rhs))
+>;
+
//===----------------------------------------------------------------------===//
// Floating-point unary arithmetic
//===----------------------------------------------------------------------===//
@@ -1626,7 +1647,8 @@ defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_unsigned_zero,
// Relaxed (Negative) Multiply-Add (madd/nmadd)
//===----------------------------------------------------------------------===//
-multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> reqs> {
+multiclass RELAXED_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS,
+ list<Predicate> reqs> {
defm MADD_#vec :
SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
[(set (vec.vt V128:$dst), (int_wasm_relaxed_madd
@@ -1640,16 +1662,46 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate>
vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c",
vec.prefix#".relaxed_nmadd", simdopS, reqs>;
- def : Pat<(fadd_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
- (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+ def : Pat<(fadd_contract (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b)), (vec.vt V128:$c)),
+ (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
+ def : Pat<(fmuladd (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)),
+ (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
- def : Pat<(fsub_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
- (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+ def : Pat<(fsub_contract (vec.vt V128:$c), (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b))),
+ (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
+ def : Pat<(fmuladd (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)),
+ (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
}
-defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
-defm "" : SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
-defm "" : SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>;
+defm "" : RELAXED_SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
+defm "" : RELAXED_SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
+
+//===----------------------------------------------------------------------===//
+// FP16 (Negative) Multiply-Add (madd/nmadd)
+//===----------------------------------------------------------------------===//
+
+multiclass HALF_PRECISION_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS,
+ list<Predicate> reqs> {
+ defm MADD_#vec :
+ SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
+ [(set (vec.vt V128:$dst), (fma
+ (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
+ vec.prefix#".madd\t$dst, $a, $b, $c",
+ vec.prefix#".madd", simdopA, reqs>;
+ defm NMADD_#vec :
+ SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
+ [(set (vec.vt V128:$dst), (fma
+ (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)))],
+ vec.prefix#".nmadd\t$dst, $a, $b, $c",
+ vec.prefix#".nmadd", simdopS, reqs>;
+}
+defm "" : HALF_PRECISION_SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>;
+
+// TODO: I think separate intrinsics should be introduced for these FP16 operations.
+def : Pat<(v8f16 (int_wasm_relaxed_madd (v8f16 V128:$a), (v8f16 V128:$b), (v8f16 V128:$c))),
+ (MADD_F16x8 V128:$a, V128:$b, V128:$c)>;
+def : Pat<(v8f16 (int_wasm_relaxed_nmadd (v8f16 V128:$a), (v8f16 V128:$b), (v8f16 V128:$c))),
+ (NMADD_F16x8 V128:$a, V128:$b, V128:$c)>;
//===----------------------------------------------------------------------===//
// Laneselect