diff options
author | Sebastian Neubauer <sebastian.neubauer@amd.com> | 2020-01-29 10:04:36 +0100 |
---|---|---|
committer | Sebastian Neubauer <sebastian.neubauer@amd.com> | 2020-02-10 09:04:23 +0100 |
commit | 8756869170e67019151bff0fc7657597f37fced2 (patch) | |
tree | d1cba76ee210a140535c953fe4a8a73ea7b910dc /llvm/lib | |
parent | d2e434a46107b3f191c1dffddd52fc04a50b8460 (diff) | |
download | llvm-8756869170e67019151bff0fc7657597f37fced2.zip llvm-8756869170e67019151bff0fc7657597f37fced2.tar.gz llvm-8756869170e67019151bff0fc7657597f37fced2.tar.bz2 |
[AMDGPU] Add a16 feature to gfx10
Based on D72931
This adds a new feature called A16 which is enabled for gfx10.
gfx9 keeps the R128A16 feature so it can share all the instruction encodings
with gfx7/8.
Differential Revision: https://reviews.llvm.org/D73956
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 14 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 18 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MIMGInstructions.td | 32 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrFormats.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 38 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 1 |
14 files changed, 102 insertions, 36 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 42b477e..1064803 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -360,7 +360,13 @@ def FeatureDPP8 : SubtargetFeature<"dpp8", def FeatureR128A16 : SubtargetFeature<"r128-a16", "HasR128A16", "true", - "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9" + "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128" +>; + +def FeatureGFX10A16 : SubtargetFeature<"a16", + "HasGFX10A16", + "true", + "Support gfx10-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands" >; def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding", @@ -682,7 +688,8 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts, FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking, FeatureVOP3Literal, FeatureDPP8, - FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC + FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC, + FeatureGFX10A16 ] >; @@ -1094,6 +1101,9 @@ def HasDPP8 : Predicate<"Subtarget->hasDPP8()">, def HasR128A16 : Predicate<"Subtarget->hasR128A16()">, AssemblerPredicate<"FeatureR128A16">; +def HasGFX10A16 : Predicate<"Subtarget->hasGFX10A16()">, + AssemblerPredicate<"FeatureGFX10A16">; + def HasDPP16 : Predicate<"Subtarget->hasDPP()">, AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP">; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 75c4fba..31c06ce 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -241,6 +241,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasDPP(false), HasDPP8(false), HasR128A16(false), + HasGFX10A16(false), HasNSAEncoding(false), HasDLInsts(false), HasDot1Insts(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 6f3ca01..13fa915 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -342,6 +342,7 @@ protected: bool HasDPP; bool HasDPP8; bool HasR128A16; + bool HasGFX10A16; bool HasNSAEncoding; bool HasDLInsts; bool HasDot1Insts; @@ -992,6 +993,10 @@ public: return HasR128A16; } + bool hasGFX10A16() const { + return HasGFX10A16; + } + bool hasOffset3fBug() const { return HasOffset3fBug; } diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index ea70307..8c1f63e 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -163,6 +163,7 @@ public: ImmTyUNorm, ImmTyDA, ImmTyR128A16, + ImmTyA16, ImmTyLWE, ImmTyExpTgt, ImmTyExpCompr, @@ -315,6 +316,7 @@ public: bool isUNorm() const { return isImmTy(ImmTyUNorm); } bool isDA() const { return isImmTy(ImmTyDA); } bool isR128A16() const { return isImmTy(ImmTyR128A16); } + bool isGFX10A16() const { return isImmTy(ImmTyA16); } bool isLWE() const { return isImmTy(ImmTyLWE); } bool isOff() const { return isImmTy(ImmTyOff); } bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } @@ -847,6 +849,7 @@ public: case ImmTyUNorm: OS << "UNorm"; break; case ImmTyDA: OS << "DA"; break; case ImmTyR128A16: OS << "R128A16"; break; + case ImmTyA16: OS << "A16"; break; case ImmTyLWE: OS << "LWE"; break; case ImmTyOff: OS << "Off"; break; case ImmTyExpTgt: OS << "ExpTgt"; break; @@ -1157,6 +1160,10 @@ public: return AMDGPU::hasPackedD16(getSTI()); } + bool hasGFX10A16() const { + return AMDGPU::hasGFX10A16(getSTI()); + } + bool isSI() const { return AMDGPU::isSI(getSTI()); } @@ -4650,9 +4657,9 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, case AsmToken::Identifier: { StringRef Tok = Parser.getTok().getString(); if (Tok == Name) { - if (Tok == "r128" && isGFX9()) + if (Tok == "r128" && !hasMIMG_R128()) Error(S, "r128 modifier is not supported on this GPU"); - if (Tok == "a16" && !isGFX9() && !isGFX10()) + if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) Error(S, "a16 modifier is not supported on this GPU"); Bit = 1; Parser.Lex(); @@ -4672,6 +4679,9 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) return MatchOperand_ParseFail; + if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) + ImmTy = AMDGPUOperand::ImmTyR128A16; + Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); return MatchOperand_Success; } @@ -5987,6 +5997,8 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); + if (IsGFX10) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); if (!IsGFX10) @@ -6096,7 +6108,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, - {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, + {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index f65dc25..55146b5 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -244,6 +244,11 @@ void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo, printNamedBit(MI, OpNo, O, "r128"); } +void AMDGPUInstPrinter::printGFX10A16(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { + printNamedBit(MI, OpNo, O, "a16"); +} + void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { printNamedBit(MI, OpNo, O, "lwe"); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index ba53003..83f99f1 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -86,6 +86,8 @@ private: raw_ostream &O); void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printGFX10A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printLWE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printD16(const MCInst *MI, unsigned OpNo, diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 4006a62..8a43942 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -238,9 +238,9 @@ class MIMG_NoSampler_gfx10<int op, string opcode, : MIMG_gfx10<op, (outs DataRC:$vdata), dns> { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, - SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe" + let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -251,9 +251,9 @@ class MIMG_NoSampler_nsa_gfx10<int op, string opcode, let InOperandList = !con(AddrIns, (ins SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, - SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe" + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -331,9 +331,9 @@ class MIMG_Store_gfx10<int op, string opcode, : MIMG_gfx10<op, (outs), dns> { let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, - GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe" + let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -345,9 +345,9 @@ class MIMG_Store_nsa_gfx10<int op, string opcode, AddrIns, (ins SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, - SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe" + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -436,8 +436,8 @@ class MIMG_Atomic_gfx10<mimg op, string opcode, let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, - GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe); - let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"; + GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe); + let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"; } class MIMG_Atomic_nsa_gfx10<mimg op, string opcode, @@ -452,8 +452,8 @@ class MIMG_Atomic_nsa_gfx10<mimg op, string opcode, AddrIns, (ins SReg_256:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, - SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe)); - let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"; + SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe)); + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"; } multiclass MIMG_Atomic_Addr_Helper_m <mimg op, string asm, @@ -522,10 +522,10 @@ class MIMG_Sampler_gfx10<int op, string opcode, : MIMG_gfx10<op, (outs DataRC:$vdata), dns> { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, - GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm" - #"$dlc$glc$slc$r128$tfe$lwe" + #"$dlc$glc$slc$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -536,10 +536,10 @@ class MIMG_Sampler_nsa_gfx10<int op, string opcode, let InOperandList = !con(AddrIns, (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, - SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe), + SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm" - #"$dlc$glc$slc$r128$tfe$lwe" + #"$dlc$glc$slc$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 475a2b8..e60ad75 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5401,7 +5401,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op, const MVT VAddrScalarVT = VAddrVT.getScalarType(); if (((VAddrScalarVT == MVT::f16) || (VAddrScalarVT == MVT::i16))) { // Illegal to use a16 images - if (!ST->hasFeature(AMDGPU::FeatureR128A16)) + if (!ST->hasFeature(AMDGPU::FeatureR128A16) && !ST->hasFeature(AMDGPU::FeatureGFX10A16)) return Op; IsA16 = true; @@ -5546,10 +5546,12 @@ SDValue SITargetLowering::lowerImage(SDValue Op, Ops.push_back(DLC); Ops.push_back(GLC); Ops.push_back(SLC); - Ops.push_back(IsA16 && // a16 or r128 + Ops.push_back(IsA16 && // r128, a16 for gfx9 ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False); - Ops.push_back(TFE); // tfe - Ops.push_back(LWE); // lwe + if (IsGFX10) + Ops.push_back(IsA16 ? True : False); + Ops.push_back(TFE); + Ops.push_back(LWE); if (!IsGFX10) Ops.push_back(DimInfo->DA ? True : False); if (BaseOpcode->HasD16) diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index 4dcbe92..a4d1178 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -303,7 +303,7 @@ class MIMGe_gfx10 <bits<8> op> : MIMGe { bits<3> dim; bits<2> nsa; bits<1> dlc; - bits<1> a16 = 0; // TODO: this should be an operand + bits<1> a16; let Inst{0} = op{7}; let Inst{2-1} = nsa; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index a9800bb..73690ae 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3742,11 +3742,34 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, return false; } + bool IsA16 = false; + if (ST.hasR128A16()) { + const MachineOperand *R128A16 = getNamedOperand(MI, AMDGPU::OpName::r128); + IsA16 = R128A16->getImm() != 0; + } else if (ST.hasGFX10A16()) { + const MachineOperand *A16 = getNamedOperand(MI, AMDGPU::OpName::a16); + IsA16 = A16->getImm() != 0; + } + + bool PackDerivatives = IsA16; // Either A16 or G16 bool IsNSA = SRsrcIdx - VAddr0Idx > 1; - unsigned AddrWords = BaseOpcode->NumExtraArgs + - (BaseOpcode->Gradients ? Dim->NumGradients : 0) + - (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + - (BaseOpcode->LodOrClampOrMip ? 1 : 0); + + unsigned AddrWords = BaseOpcode->NumExtraArgs; + unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + + (BaseOpcode->LodOrClampOrMip ? 1 : 0); + if (IsA16) + AddrWords += (AddrComponents + 1) / 2; + else + AddrWords += AddrComponents; + + if (BaseOpcode->Gradients) { + if (PackDerivatives) + // There are two gradients per coordinate, we pack them separately. + // For the 3d case, we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv) + AddrWords += (Dim->NumGradients / 2 + 1) / 2 * 2; + else + AddrWords += Dim->NumGradients; + } unsigned VAddrWords; if (IsNSA) { @@ -3758,11 +3781,10 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, AddrWords = 16; else if (AddrWords > 4) AddrWords = 8; - else if (AddrWords == 3 && VAddrWords == 4) { - // CodeGen uses the V4 variant of instructions for three addresses, - // because the selection DAG does not support non-power-of-two types. + else if (AddrWords == 4) AddrWords = 4; - } + else if (AddrWords == 3) + AddrWords = 3; } if (VAddrWords != AddrWords) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index bf6f029..559ede8 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1090,6 +1090,7 @@ def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>; def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>; def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>; def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>; +def GFX10A16 : NamedOperandBit<"GFX10A16", NamedMatchClass<"GFX10A16">>; def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>; def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>; def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>; diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 806f8af..dd94a29 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -686,7 +686,8 @@ bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI, // Check other optional immediate operands for equality. unsigned OperandsToMatch[] = {AMDGPU::OpName::glc, AMDGPU::OpName::slc, AMDGPU::OpName::d16, AMDGPU::OpName::unorm, - AMDGPU::OpName::da, AMDGPU::OpName::r128}; + AMDGPU::OpName::da, AMDGPU::OpName::r128, + AMDGPU::OpName::a16}; for (auto op : OperandsToMatch) { int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 1188671..a5ed441 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -927,7 +927,11 @@ bool hasSRAMECC(const MCSubtargetInfo &STI) { } bool hasMIMG_R128(const MCSubtargetInfo &STI) { - return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128]; + return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16]; +} + +bool hasGFX10A16(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX10A16]; } bool hasPackedD16(const MCSubtargetInfo &STI) { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index da2f4dc..4cfb2ad 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -551,6 +551,7 @@ inline bool isKernel(CallingConv::ID CC) { bool hasXNACK(const MCSubtargetInfo &STI); bool hasSRAMECC(const MCSubtargetInfo &STI); bool hasMIMG_R128(const MCSubtargetInfo &STI); +bool hasGFX10A16(const MCSubtargetInfo &STI); bool hasPackedD16(const MCSubtargetInfo &STI); bool isSI(const MCSubtargetInfo &STI); |