aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td140
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp29
6 files changed, 127 insertions, 83 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 54d94b1..4fe194c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2366,6 +2366,18 @@ def isGFX8GFX9NotGFX90A :
" Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>;
+// Pre-90A GFX9s allow the NV bit in FLAT instructions.
+def isNVAllowedInFlat :
+ Predicate<"!Subtarget->hasGFX90AInsts() &&"
+ " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
+ AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX90AInsts), (not FeatureGFX10Insts))>;
+
+// GFX8 or GFX90A+ do not allow the NV bit in FLAT instructions.
+def isNVNotAllowedInFlat :
+ Predicate<"(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) ||"
+ " ((Subtarget->getGeneration() == AMDGPUSubtarget::GFX9) && Subtarget->hasGFX90AInsts())">,
+ AssemblerPredicate <(any_of FeatureVolcanicIslands, FeatureGFX90AInsts)>;
+
def isGFX90AOnly :
Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">,
AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index 0a59132..fdff21b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -1565,8 +1565,11 @@ void SplitPtrStructs::processConditionals() {
} else if (isa<SelectInst>(I)) {
if (MaybeRsrc) {
if (auto *RsrcInst = dyn_cast<Instruction>(Rsrc)) {
- ConditionalTemps.push_back(RsrcInst);
- RsrcInst->replaceAllUsesWith(*MaybeRsrc);
+ // Guard against conditionals that were already folded away.
+ if (RsrcInst != *MaybeRsrc) {
+ ConditionalTemps.push_back(RsrcInst);
+ RsrcInst->replaceAllUsesWith(*MaybeRsrc);
+ }
}
for (Value *V : Seen)
FoundRsrcs[V] = *MaybeRsrc;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 09338c5..2808c44 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1602,6 +1602,11 @@ public:
bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
+ bool isFlatInstAndNVAllowed(const MCInst &Inst) const {
+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+ return (TSFlags & SIInstrFlags::FLAT) && isGFX9() && !isGFX90A();
+ }
+
AMDGPUTargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<AMDGPUTargetStreamer &>(TS);
@@ -5370,7 +5375,7 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
Error(S, "scale_offset is not supported on this GPU");
}
- if (CPol & CPol::NV) {
+ if ((CPol & CPol::NV) && !isFlatInstAndNVAllowed(Inst)) {
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
StringRef CStr(S.getPointer());
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
@@ -7145,6 +7150,13 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
unsigned Enabled = 0, Seen = 0;
for (;;) {
SMLoc S = getLoc();
+
+ if (isGFX9() && trySkipId("nv")) {
+ Enabled |= CPol::NV;
+ Seen |= CPol::NV;
+ continue;
+ }
+
bool Disabling;
unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
if (!CPol)
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 8ea64d1..6ef2241 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -125,7 +125,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
bits<7> saddr;
bits<10> vdst;
- bits<5> cpol;
+ bits<6> cpol;
// Only valid on gfx9
bits<1> lds = ps.lds; // LDS DMA for global and scratch
@@ -2693,29 +2693,52 @@ class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
!subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
}
+class FLAT_Real_vi_ex_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
+ FLAT_Real_vi <op, ps, has_sccb> {
+ let AssemblerPredicate = isNVNotAllowedInFlat;
+}
+
+class FLAT_Real_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
+ FLAT_Real_vi <op, ps, has_sccb> {
+ let AssemblerPredicate = isNVAllowedInFlat;
+ let Subtarget = SIEncodingFamily.GFX9;
+ let DecoderNamespace = "GFX9";
+ let Inst{55} = cpol{CPolBit.NV}; // nv - GFX9 (pre-90A) uses bit 55 as the non-volatile bit.
+}
+
+multiclass FLAT_Real_mc_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> {
+ def _vi: FLAT_Real_vi_ex_gfx9<op, ps, has_sccb>;
+ def _gfx9: FLAT_Real_gfx9<op, ps, has_sccb>;
+}
+
multiclass FLAT_Real_AllAddr_vi<bits<7> op,
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
- def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
- def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
+ defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
+ defm _SADDR : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
+}
+
+multiclass FLAT_Real_AllAddr_vi_ex_gfx9<bits<7> op,
+ bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
+ def _vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
+ def _SADDR_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
}
class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
FLAT_Real <op, ps>,
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
let AssemblerPredicate = isGFX940Plus;
- let DecoderNamespace = "GFX9";
+ let DecoderNamespace = "GFX940";
let Inst{13} = ps.sve;
let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
}
multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
- def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
- let AssemblerPredicate = isGFX8GFX9NotGFX940;
- let OtherPredicates = [isGFX8GFX9NotGFX940];
- }
- def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
- let DecoderNamespace = "GFX9";
+ let OtherPredicates = [isGFX8GFX9NotGFX940] in {
+ defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME)>;
}
+
+ defm _SADDR_vi : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
+
let AssemblerPredicate = isGFX940Plus in {
def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
@@ -2728,11 +2751,11 @@ multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
let OtherPredicates = [isGFX8GFX9NotGFX940] in {
- def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
- let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds";
+ let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in {
+ defm "" : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
}
- def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
- let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds";
+ let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in {
+ defm _SADDR : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
}
}
@@ -2748,47 +2771,66 @@ multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
}
-def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
-def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
-def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
-def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
-def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
-def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
-def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
-def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
-
-def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
-def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
-def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
-def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
-def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
-def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
-def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
-def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
-
-def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
-def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
-def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
-def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
-def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
-def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
+defm FLAT_LOAD_UBYTE_vi : FLAT_Real_mc_vi <0x10, FLAT_LOAD_UBYTE>;
+defm FLAT_LOAD_SBYTE_vi : FLAT_Real_mc_vi <0x11, FLAT_LOAD_SBYTE>;
+defm FLAT_LOAD_USHORT_vi : FLAT_Real_mc_vi <0x12, FLAT_LOAD_USHORT>;
+defm FLAT_LOAD_SSHORT_vi : FLAT_Real_mc_vi <0x13, FLAT_LOAD_SSHORT>;
+defm FLAT_LOAD_DWORD_vi : FLAT_Real_mc_vi <0x14, FLAT_LOAD_DWORD>;
+defm FLAT_LOAD_DWORDX2_vi : FLAT_Real_mc_vi <0x15, FLAT_LOAD_DWORDX2>;
+defm FLAT_LOAD_DWORDX4_vi : FLAT_Real_mc_vi <0x17, FLAT_LOAD_DWORDX4>;
+defm FLAT_LOAD_DWORDX3_vi : FLAT_Real_mc_vi <0x16, FLAT_LOAD_DWORDX3>;
+
+defm FLAT_STORE_BYTE_vi : FLAT_Real_mc_vi <0x18, FLAT_STORE_BYTE>;
+defm FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_mc_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
+defm FLAT_STORE_SHORT_vi : FLAT_Real_mc_vi <0x1a, FLAT_STORE_SHORT>;
+defm FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
+defm FLAT_STORE_DWORD_vi : FLAT_Real_mc_vi <0x1c, FLAT_STORE_DWORD>;
+defm FLAT_STORE_DWORDX2_vi : FLAT_Real_mc_vi <0x1d, FLAT_STORE_DWORDX2>;
+defm FLAT_STORE_DWORDX4_vi : FLAT_Real_mc_vi <0x1f, FLAT_STORE_DWORDX4>;
+defm FLAT_STORE_DWORDX3_vi : FLAT_Real_mc_vi <0x1e, FLAT_STORE_DWORDX3>;
+
+defm FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_mc_vi <0x20, FLAT_LOAD_UBYTE_D16>;
+defm FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
+defm FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_mc_vi <0x22, FLAT_LOAD_SBYTE_D16>;
+defm FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
+defm FLAT_LOAD_SHORT_D16_vi : FLAT_Real_mc_vi <0x24, FLAT_LOAD_SHORT_D16>;
+defm FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
multiclass FLAT_Real_Atomics_vi <bits<7> op,
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
defvar ps = !cast<FLAT_Pseudo>(NAME);
- def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
- def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
- def _RTN_agpr_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN_agpr"), has_sccb>;
+ defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
+ defm _RTN : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
+ def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN_agpr"), has_sccb>;
+}
+
+multiclass FLAT_Real_Atomics_vi_ex_gfx9 <bits<7> op,
+ bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
+ defvar ps = !cast<FLAT_Pseudo>(NAME);
+ def _vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
+ def _RTN_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
+
+ def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN_agpr"), has_sccb>;
}
multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
FLAT_Real_AllAddr_vi<op, has_sccb> {
- def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
- def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
+ defm _RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
+ defm _SADDR_RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
+
+ def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_RTN_agpr"), has_sccb>;
+ def _SADDR_RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN_agpr"), has_sccb>;
+}
+
+multiclass FLAT_Global_Real_Atomics_vi_ex_gfx9<bits<7> op,
+ bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
+ FLAT_Real_AllAddr_vi_ex_gfx9<op, has_sccb> {
+ def _RTN_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
+ def _SADDR_RTN_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
- def _RTN_agpr_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN_agpr"), has_sccb>;
- def _SADDR_RTN_agpr_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN_agpr"), has_sccb>;
+ def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_RTN_agpr"), has_sccb>;
+ def _SADDR_RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN_agpr"), has_sccb>;
}
defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40>;
@@ -2950,10 +2992,10 @@ let AssemblerPredicate = isGFX940Plus in {
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>;
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>;
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>;
- defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>;
- defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>;
- defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>;
- defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
+ defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi_ex_gfx9<0x4d>;
+ defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi_ex_gfx9<0x4e>;
+ defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi_ex_gfx9<0x52>;
+ defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi_ex_gfx9<0x52>;
} // End AssemblerPredicate = isGFX940Plus
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 703ec0a..3e6f35d 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -186,8 +186,12 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
O << " dlc";
if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
- if (Imm & ~CPol::ALL_pregfx12)
- O << " /* unexpected cache policy bit */";
+ if (Imm & ~CPol::ALL_pregfx12) {
+ if ((Imm & CPol::NV) && AMDGPU::isGFX9(STI) && !AMDGPU::isGFX90A(STI))
+ O << " nv";
+ else
+ O << " /* unexpected cache policy bit */";
+ }
}
void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 6616b30..84984a0 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1129,40 +1129,11 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx))
return false;
- MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
if (OpToFold.isImm() && OpToFold.isOperandLegal(*TII, *UseMI, UseOpIdx)) {
appendFoldCandidate(FoldList, UseMI, UseOpIdx, OpToFold);
return true;
}
- // TODO: Verify the following code handles subregisters correctly.
- // TODO: Handle extract of global reference
- if (UseOp.getSubReg())
- return false;
-
- if (!OpToFold.isReg())
- return false;
-
- Register UseReg = OpToFold.getReg();
- if (!UseReg.isVirtual())
- return false;
-
- // Maybe it is just a COPY of an immediate itself.
-
- // FIXME: Remove this handling. There is already special case folding of
- // immediate into copy in foldOperand. This is looking for the def of the
- // value the folding started from in the first place.
- MachineInstr *Def = MRI->getVRegDef(UseReg);
- if (Def && TII->isFoldableCopy(*Def)) {
- MachineOperand &DefOp = Def->getOperand(1);
- if (DefOp.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
- FoldableDef FoldableImm(DefOp.getImm(), OpToFold.DefRC,
- OpToFold.DefSubReg);
- appendFoldCandidate(FoldList, UseMI, UseOpIdx, FoldableImm);
- return true;
- }
- }
-
return false;
}