aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2025-09-02 14:22:57 -0700
committerGitHub <noreply@github.com>2025-09-02 14:22:57 -0700
commitcc9acb9df7f7e598a6c93eaa1f2b1405a6b73bad (patch)
tree9036021cf58fbb314549c207261dc5913e537994
parent81131f37455e9960ed22fa48d95e69f8a0149347 (diff)
downloadllvm-cc9acb9df7f7e598a6c93eaa1f2b1405a6b73bad.zip
llvm-cc9acb9df7f7e598a6c93eaa1f2b1405a6b73bad.tar.gz
llvm-cc9acb9df7f7e598a6c93eaa1f2b1405a6b73bad.tar.bz2
[AMDGPU] Add s_set_vgpr_msb gfx1250 instruction (#156524)
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td10
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td8
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s8
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_err.s10
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt6
7 files changed, 65 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 1038797..0e0b84f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1210,6 +1210,12 @@ def Feature64BitLiterals : SubtargetFeature<"64-bit-literals",
"Can use 64-bit literals with single DWORD instructions"
>;
+def Feature1024AddressableVGPRs : SubtargetFeature<"1024-addressable-vgprs",
+ "Has1024AddressableVGPRs",
+ "true",
+ "Has 1024 addressable VGPRs"
+>;
+
def FeatureWaitXcnt : SubtargetFeature<"wait-xcnt",
"HasWaitXcnt",
"true",
@@ -2033,6 +2039,7 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureCUStores,
FeatureAddressableLocalMemorySize327680,
FeatureCuMode,
+ Feature1024AddressableVGPRs,
Feature64BitLiterals,
FeatureLDSBankCount32,
FeatureDLInsts,
@@ -2841,6 +2848,9 @@ def HasBVHDualAndBVH8Insts : Predicate<"Subtarget->hasBVHDualAndBVH8Insts()">,
def Has64BitLiterals : Predicate<"Subtarget->has64BitLiterals()">,
AssemblerPredicate<(all_of Feature64BitLiterals)>;
+def Has1024AddressableVGPRs : Predicate<"Subtarget->has1024AddressableVGPRs()">,
+ AssemblerPredicate<(all_of Feature1024AddressableVGPRs)>;
+
def HasWaitXcnt : Predicate<"Subtarget->hasWaitXcnt()">,
AssemblerPredicate<(all_of FeatureWaitXcnt)>;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index a23c2fc..93083f2 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1886,6 +1886,7 @@ private:
bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
const unsigned CPol);
bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
+ bool validateSetVgprMSB(const MCInst &Inst, const OperandVector &Operands);
std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
unsigned getConstantBusLimit(unsigned Opcode) const;
@@ -5542,6 +5543,22 @@ bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
return true;
}
+bool AMDGPUAsmParser::validateSetVgprMSB(const MCInst &Inst,
+ const OperandVector &Operands) {
+ if (Inst.getOpcode() != AMDGPU::S_SET_VGPR_MSB_gfx12)
+ return true;
+
+ int Simm16Pos =
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::simm16);
+ if ((unsigned)Inst.getOperand(Simm16Pos).getImm() > 255) {
+ SMLoc Loc = Operands[1]->getStartLoc();
+ Error(Loc, "s_set_vgpr_msb accepts values in range [0..255]");
+ return false;
+ }
+
+ return true;
+}
+
bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
const OperandVector &Operands) {
unsigned Opc = Inst.getOpcode();
@@ -5706,6 +5723,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
if (!validateTFE(Inst, Operands)) {
return false;
}
+ if (!validateSetVgprMSB(Inst, Operands)) {
+ return false;
+ }
if (!validateWMMA(Inst, Operands)) {
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index fb12da5..4475c8d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -236,6 +236,7 @@ protected:
bool HasPseudoScalarTrans = false;
bool HasRestrictedSOffset = false;
bool Has64BitLiterals = false;
+ bool Has1024AddressableVGPRs = false;
bool HasBitOp3Insts = false;
bool HasTanhInsts = false;
bool HasTensorCvtLutInsts = false;
@@ -1437,6 +1438,8 @@ public:
bool hasAddPC64Inst() const { return GFX1250Insts; }
+ bool has1024AddressableVGPRs() const { return Has1024AddressableVGPRs; }
+
bool hasMinimum3Maximum3PKF16() const {
return HasMinimum3Maximum3PKF16;
}
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 3097409..dbe0b8c 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1844,6 +1844,13 @@ let SubtargetPredicate = HasWaitXcnt, hasSideEffects = 1 in {
SOPP_Pseudo<"s_wait_xcnt", (ins s16imm:$simm16), "$simm16">;
} // End SubtargetPredicate = hasWaitXcnt, hasSideEffects = 1
+let SubtargetPredicate = Has1024AddressableVGPRs in {
+ def S_SET_VGPR_MSB : SOPP_Pseudo<"s_set_vgpr_msb" , (ins i16imm:$simm16), "$simm16"> {
+ let hasSideEffects = 1;
+ let Defs = [MODE];
+ }
+}
+
//===----------------------------------------------------------------------===//
// SOP1 Patterns
//===----------------------------------------------------------------------===//
@@ -2691,6 +2698,7 @@ defm S_WAIT_STORECNT_DSCNT : SOPP_Real_32_gfx12<0x049>;
//===----------------------------------------------------------------------===//
// SOPP - GFX1250 only.
//===----------------------------------------------------------------------===//
+defm S_SET_VGPR_MSB : SOPP_Real_32_gfx12<0x006>;
defm S_SETPRIO_INC_WG : SOPP_Real_32_gfx12<0x03e>;
defm S_WAIT_XCNT : SOPP_Real_32_gfx12<0x045>;
defm S_WAIT_ASYNCCNT : SOPP_Real_32_gfx12<0x04a>;
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s b/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s
index bfc3544..4f7ca47 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s
@@ -37,6 +37,14 @@ s_setprio_inc_wg 100
// GFX1250: [0x64,0x00,0xbe,0xbf]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+s_set_vgpr_msb 10
+// GFX1250: [0x0a,0x00,0x86,0xbf]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_set_vgpr_msb 255
+// GFX1250: [0xff,0x00,0x86,0xbf]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
s_monitor_sleep 1
// GFX1250: s_monitor_sleep 1 ; encoding: [0x01,0x00,0x84,0xbf]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_err.s b/llvm/test/MC/AMDGPU/gfx1250_err.s
index 676eb48..9d1131e 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_err.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_err.s
@@ -1,5 +1,15 @@
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX1250-ERR --implicit-check-not=error: -strict-whitespace %s
+s_set_vgpr_msb -1
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: s_set_vgpr_msb accepts values in range [0..255]
+// GFX1250-ERR: s_set_vgpr_msb -1
+// GFX1250-ERR: ^
+
+s_set_vgpr_msb 256
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: s_set_vgpr_msb accepts values in range [0..255]
+// GFX1250-ERR: s_set_vgpr_msb 256
+// GFX1250-ERR: ^
+
s_load_b32 s4, s[2:3], 10 th:TH_LOAD_NT th:TH_LOAD_NT
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX1250-ERR: s_load_b32 s4, s[2:3], 10 th:TH_LOAD_NT th:TH_LOAD_NT
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt
index af94fbc..a8627d6 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt
@@ -27,6 +27,12 @@
# GFX1250: s_setprio_inc_wg 0x64 ; encoding: [0x64,0x00,0xbe,0xbf]
0x64,0x00,0xbe,0xbf
+# GFX1250: s_set_vgpr_msb 10 ; encoding: [0x0a,0x00,0x86,0xbf]
+0x0a,0x00,0x86,0xbf
+
+# GFX1250: s_set_vgpr_msb 0xff ; encoding: [0xff,0x00,0x86,0xbf]
+0xff,0x00,0x86,0xbf
+
# GFX1250: s_monitor_sleep 0 ; encoding: [0x00,0x00,0x84,0xbf]
0x00,0x00,0x84,0xbf