aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorSebastian Neubauer <sebastian.neubauer@amd.com>2020-01-29 10:04:36 +0100
committerSebastian Neubauer <sebastian.neubauer@amd.com>2020-02-10 09:04:23 +0100
commit8756869170e67019151bff0fc7657597f37fced2 (patch)
treed1cba76ee210a140535c953fe4a8a73ea7b910dc /llvm/lib
parentd2e434a46107b3f191c1dffddd52fc04a50b8460 (diff)
downloadllvm-8756869170e67019151bff0fc7657597f37fced2.zip
llvm-8756869170e67019151bff0fc7657597f37fced2.tar.gz
llvm-8756869170e67019151bff0fc7657597f37fced2.tar.bz2
[AMDGPU] Add a16 feature to gfx10
Based on D72931 This adds a new feature called A16 which is enabled for gfx10. gfx9 keeps the R128A16 feature so it can share all the instruction encodings with gfx7/8. Differential Revision: https://reviews.llvm.org/D73956
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td14
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h5
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp18
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h2
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td32
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrFormats.td2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp38
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td1
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h1
14 files changed, 102 insertions, 36 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 42b477e..1064803 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -360,7 +360,13 @@ def FeatureDPP8 : SubtargetFeature<"dpp8",
def FeatureR128A16 : SubtargetFeature<"r128-a16",
"HasR128A16",
"true",
- "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
+ "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128"
+>;
+
+def FeatureGFX10A16 : SubtargetFeature<"a16",
+ "HasGFX10A16",
+ "true",
+ "Support gfx10-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands"
>;
def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
@@ -682,7 +688,8 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
FeatureVOP3Literal, FeatureDPP8,
- FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC
+ FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC,
+ FeatureGFX10A16
]
>;
@@ -1094,6 +1101,9 @@ def HasDPP8 : Predicate<"Subtarget->hasDPP8()">,
def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
AssemblerPredicate<"FeatureR128A16">;
+def HasGFX10A16 : Predicate<"Subtarget->hasGFX10A16()">,
+ AssemblerPredicate<"FeatureGFX10A16">;
+
def HasDPP16 : Predicate<"Subtarget->hasDPP()">,
AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP">;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 75c4fba..31c06ce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -241,6 +241,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasDPP(false),
HasDPP8(false),
HasR128A16(false),
+ HasGFX10A16(false),
HasNSAEncoding(false),
HasDLInsts(false),
HasDot1Insts(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 6f3ca01..13fa915 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -342,6 +342,7 @@ protected:
bool HasDPP;
bool HasDPP8;
bool HasR128A16;
+ bool HasGFX10A16;
bool HasNSAEncoding;
bool HasDLInsts;
bool HasDot1Insts;
@@ -992,6 +993,10 @@ public:
return HasR128A16;
}
+ bool hasGFX10A16() const {
+ return HasGFX10A16;
+ }
+
bool hasOffset3fBug() const {
return HasOffset3fBug;
}
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index ea70307..8c1f63e 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -163,6 +163,7 @@ public:
ImmTyUNorm,
ImmTyDA,
ImmTyR128A16,
+ ImmTyA16,
ImmTyLWE,
ImmTyExpTgt,
ImmTyExpCompr,
@@ -315,6 +316,7 @@ public:
bool isUNorm() const { return isImmTy(ImmTyUNorm); }
bool isDA() const { return isImmTy(ImmTyDA); }
bool isR128A16() const { return isImmTy(ImmTyR128A16); }
+ bool isGFX10A16() const { return isImmTy(ImmTyA16); }
bool isLWE() const { return isImmTy(ImmTyLWE); }
bool isOff() const { return isImmTy(ImmTyOff); }
bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
@@ -847,6 +849,7 @@ public:
case ImmTyUNorm: OS << "UNorm"; break;
case ImmTyDA: OS << "DA"; break;
case ImmTyR128A16: OS << "R128A16"; break;
+ case ImmTyA16: OS << "A16"; break;
case ImmTyLWE: OS << "LWE"; break;
case ImmTyOff: OS << "Off"; break;
case ImmTyExpTgt: OS << "ExpTgt"; break;
@@ -1157,6 +1160,10 @@ public:
return AMDGPU::hasPackedD16(getSTI());
}
+ bool hasGFX10A16() const {
+ return AMDGPU::hasGFX10A16(getSTI());
+ }
+
bool isSI() const {
return AMDGPU::isSI(getSTI());
}
@@ -4650,9 +4657,9 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
case AsmToken::Identifier: {
StringRef Tok = Parser.getTok().getString();
if (Tok == Name) {
- if (Tok == "r128" && isGFX9())
+ if (Tok == "r128" && !hasMIMG_R128())
Error(S, "r128 modifier is not supported on this GPU");
- if (Tok == "a16" && !isGFX9() && !isGFX10())
+ if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
Error(S, "a16 modifier is not supported on this GPU");
Bit = 1;
Parser.Lex();
@@ -4672,6 +4679,9 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
return MatchOperand_ParseFail;
+ if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
+ ImmTy = AMDGPUOperand::ImmTyR128A16;
+
Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
return MatchOperand_Success;
}
@@ -5987,6 +5997,8 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
+ if (IsGFX10)
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
if (!IsGFX10)
@@ -6096,7 +6108,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
{"da", AMDGPUOperand::ImmTyDA, true, nullptr},
{"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
- {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
+ {"a16", AMDGPUOperand::ImmTyA16, true, nullptr},
{"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
{"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index f65dc25..55146b5 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -244,6 +244,11 @@ void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
printNamedBit(MI, OpNo, O, "r128");
}
+void AMDGPUInstPrinter::printGFX10A16(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ printNamedBit(MI, OpNo, O, "a16");
+}
+
void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "lwe");
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index ba53003..83f99f1 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -86,6 +86,8 @@ private:
raw_ostream &O);
void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printGFX10A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printLWE(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printD16(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 4006a62..8a43942 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -238,9 +238,9 @@ class MIMG_NoSampler_gfx10<int op, string opcode,
: MIMG_gfx10<op, (outs DataRC:$vdata), dns> {
let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
- SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -251,9 +251,9 @@ class MIMG_NoSampler_nsa_gfx10<int op, string opcode,
let InOperandList = !con(AddrIns,
(ins SReg_256:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
- SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -331,9 +331,9 @@ class MIMG_Store_gfx10<int op, string opcode,
: MIMG_gfx10<op, (outs), dns> {
let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
- GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -345,9 +345,9 @@ class MIMG_Store_nsa_gfx10<int op, string opcode,
AddrIns,
(ins SReg_256:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
- SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -436,8 +436,8 @@ class MIMG_Atomic_gfx10<mimg op, string opcode,
let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
- GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe);
- let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe";
+ GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe);
+ let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe";
}
class MIMG_Atomic_nsa_gfx10<mimg op, string opcode,
@@ -452,8 +452,8 @@ class MIMG_Atomic_nsa_gfx10<mimg op, string opcode,
AddrIns,
(ins SReg_256:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
- SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe));
- let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe";
+ SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe";
}
multiclass MIMG_Atomic_Addr_Helper_m <mimg op, string asm,
@@ -522,10 +522,10 @@ class MIMG_Sampler_gfx10<int op, string opcode,
: MIMG_gfx10<op, (outs DataRC:$vdata), dns> {
let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp,
DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
- GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm"
- #"$dlc$glc$slc$r128$tfe$lwe"
+ #"$dlc$glc$slc$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -536,10 +536,10 @@ class MIMG_Sampler_nsa_gfx10<int op, string opcode,
let InOperandList = !con(AddrIns,
(ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask,
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
- SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm"
- #"$dlc$glc$slc$r128$tfe$lwe"
+ #"$dlc$glc$slc$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 475a2b8..e60ad75 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5401,7 +5401,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
const MVT VAddrScalarVT = VAddrVT.getScalarType();
if (((VAddrScalarVT == MVT::f16) || (VAddrScalarVT == MVT::i16))) {
// Illegal to use a16 images
- if (!ST->hasFeature(AMDGPU::FeatureR128A16))
+ if (!ST->hasFeature(AMDGPU::FeatureR128A16) && !ST->hasFeature(AMDGPU::FeatureGFX10A16))
return Op;
IsA16 = true;
@@ -5546,10 +5546,12 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
Ops.push_back(DLC);
Ops.push_back(GLC);
Ops.push_back(SLC);
- Ops.push_back(IsA16 && // a16 or r128
+ Ops.push_back(IsA16 && // r128, a16 for gfx9
ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
- Ops.push_back(TFE); // tfe
- Ops.push_back(LWE); // lwe
+ if (IsGFX10)
+ Ops.push_back(IsA16 ? True : False);
+ Ops.push_back(TFE);
+ Ops.push_back(LWE);
if (!IsGFX10)
Ops.push_back(DimInfo->DA ? True : False);
if (BaseOpcode->HasD16)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index 4dcbe92..a4d1178 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -303,7 +303,7 @@ class MIMGe_gfx10 <bits<8> op> : MIMGe {
bits<3> dim;
bits<2> nsa;
bits<1> dlc;
- bits<1> a16 = 0; // TODO: this should be an operand
+ bits<1> a16;
let Inst{0} = op{7};
let Inst{2-1} = nsa;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a9800bb..73690ae 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3742,11 +3742,34 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
return false;
}
+ bool IsA16 = false;
+ if (ST.hasR128A16()) {
+ const MachineOperand *R128A16 = getNamedOperand(MI, AMDGPU::OpName::r128);
+ IsA16 = R128A16->getImm() != 0;
+ } else if (ST.hasGFX10A16()) {
+ const MachineOperand *A16 = getNamedOperand(MI, AMDGPU::OpName::a16);
+ IsA16 = A16->getImm() != 0;
+ }
+
+ bool PackDerivatives = IsA16; // Either A16 or G16
bool IsNSA = SRsrcIdx - VAddr0Idx > 1;
- unsigned AddrWords = BaseOpcode->NumExtraArgs +
- (BaseOpcode->Gradients ? Dim->NumGradients : 0) +
- (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
- (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+
+ unsigned AddrWords = BaseOpcode->NumExtraArgs;
+ unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
+ (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+ if (IsA16)
+ AddrWords += (AddrComponents + 1) / 2;
+ else
+ AddrWords += AddrComponents;
+
+ if (BaseOpcode->Gradients) {
+ if (PackDerivatives)
+ // There are two gradients per coordinate, we pack them separately.
+ // For the 3d case, we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
+ AddrWords += (Dim->NumGradients / 2 + 1) / 2 * 2;
+ else
+ AddrWords += Dim->NumGradients;
+ }
unsigned VAddrWords;
if (IsNSA) {
@@ -3758,11 +3781,10 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
AddrWords = 16;
else if (AddrWords > 4)
AddrWords = 8;
- else if (AddrWords == 3 && VAddrWords == 4) {
- // CodeGen uses the V4 variant of instructions for three addresses,
- // because the selection DAG does not support non-power-of-two types.
+ else if (AddrWords == 4)
AddrWords = 4;
- }
+ else if (AddrWords == 3)
+ AddrWords = 3;
}
if (VAddrWords != AddrWords) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index bf6f029..559ede8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1090,6 +1090,7 @@ def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>;
def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
+def GFX10A16 : NamedOperandBit<"GFX10A16", NamedMatchClass<"GFX10A16">>;
def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;
def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 806f8af..dd94a29 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -686,7 +686,8 @@ bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI,
// Check other optional immediate operands for equality.
unsigned OperandsToMatch[] = {AMDGPU::OpName::glc, AMDGPU::OpName::slc,
AMDGPU::OpName::d16, AMDGPU::OpName::unorm,
- AMDGPU::OpName::da, AMDGPU::OpName::r128};
+ AMDGPU::OpName::da, AMDGPU::OpName::r128,
+ AMDGPU::OpName::a16};
for (auto op : OperandsToMatch) {
int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 1188671..a5ed441 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -927,7 +927,11 @@ bool hasSRAMECC(const MCSubtargetInfo &STI) {
}
bool hasMIMG_R128(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
+ return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16];
+}
+
+bool hasGFX10A16(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX10A16];
}
bool hasPackedD16(const MCSubtargetInfo &STI) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index da2f4dc..4cfb2ad 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -551,6 +551,7 @@ inline bool isKernel(CallingConv::ID CC) {
bool hasXNACK(const MCSubtargetInfo &STI);
bool hasSRAMECC(const MCSubtargetInfo &STI);
bool hasMIMG_R128(const MCSubtargetInfo &STI);
+bool hasGFX10A16(const MCSubtargetInfo &STI);
bool hasPackedD16(const MCSubtargetInfo &STI);
bool isSI(const MCSubtargetInfo &STI);