diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2025-07-28 16:11:28 +0900 |
---|---|---|
committer | Matt Arsenault <arsenm2@gmail.com> | 2025-07-30 00:26:45 +0900 |
commit | 2cef45d9731db40cc9297c5922f2b69c4d09ee62 (patch) | |
tree | e42da03a6879baad7cd25c19a6e85a7b8a188732 | |
parent | 042ffe97be46e596ee2b0347b5710295ba9f0e64 (diff) | |
download | llvm-users/arsenm/amdgpu/parse-num-regs-asm-attributor.zip llvm-users/arsenm/amdgpu/parse-num-regs-asm-attributor.tar.gz llvm-users/arsenm/amdgpu/parse-num-regs-asm-attributor.tar.bz2 |
AMDGPU: Figure out required AGPR count for inline asmusers/arsenm/amdgpu/parse-num-regs-asm-attributor
For now just try to compute the minimum number of AGPRs required
to allocate the asm. Leave the attributor changes to turn this
into an integer value for later.
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 59 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll | 199 |
2 files changed, 251 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 59cc1df..1f5634a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1200,16 +1200,61 @@ AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP, llvm_unreachable("AAAMDWavesPerEU is only valid for function position"); } -static bool inlineAsmUsesAGPRs(const InlineAsm *IA) { - for (const auto &CI : IA->ParseConstraints()) { +/// Compute the minimum number of AGPRs required to allocate the inline asm. +static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA, + const CallBase &Call) { + unsigned ArgNo = 0; + unsigned ResNo = 0; + unsigned AGPRDefCount = 0; + unsigned AGPRUseCount = 0; + unsigned MaxPhysReg = 0; + const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout(); + + for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) { + Type *Ty = nullptr; + switch (CI.Type) { + case InlineAsm::isOutput: { + Ty = Call.getType(); + if (auto *STy = dyn_cast<StructType>(Ty)) + Ty = STy->getElementType(ResNo); + ++ResNo; + break; + } + case InlineAsm::isInput: { + Ty = Call.getArgOperand(ArgNo++)->getType(); + break; + } + case InlineAsm::isLabel: + continue; + case InlineAsm::isClobber: + // Parse the physical register reference. + break; + } + for (StringRef Code : CI.Codes) { - Code.consume_front("{"); - if (Code.starts_with("a")) - return true; + if (Code.starts_with("a")) { + // Virtual register, compute number of registers based on the type. + // + // We ought to be going through TargetLowering to get the number of + // registers, but we should avoid the dependence on CodeGen here. + unsigned RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32); + if (CI.Type == InlineAsm::isOutput) { + AGPRDefCount += RegCount; + if (CI.isEarlyClobber) + AGPRUseCount += RegCount; + } else + AGPRUseCount += RegCount; + } else { + // Physical register reference + auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code); + if (Kind == 'a') + MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u)); + } } } - return false; + unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount); + return std::min(MaxVirtReg + MaxPhysReg, 256u); } // TODO: Migrate to range merge of amdgpu-agpr-alloc. @@ -1251,7 +1296,7 @@ struct AAAMDGPUNoAGPR const Function *Callee = dyn_cast<Function>(CalleeOp); if (!Callee) { if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) - return !inlineAsmUsesAGPRs(IA); + return inlineAsmGetNumRequiredAGPRs(IA, CB) == 0; return false; } diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll index 181dab8..e502995 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll @@ -251,6 +251,205 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) { ret void } +define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect " +; CHECK-NEXT: ret void +; + %def = call {i32, i32} asm sideeffect "; def $0", "=a,=a"() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect " +; CHECK-NEXT: ret void +; + %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=a"() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect " +; CHECK-NEXT: ret void +; + %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=v"() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a"(ptr poison) + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[DEF:%.*]] = call ptr asm sideeffect " +; CHECK-NEXT: ret void +; + %def = call ptr asm sideeffect "; def $0", "=a"() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[DEF:%.*]] = call <2 x ptr> asm sideeffect " +; CHECK-NEXT: ret void +; + %def = call <2 x ptr> asm sideeffect "; def $0", "=a"() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect " +; CHECK-NEXT: ret void +; + %def = call {i32, i32} asm sideeffect "; def $0", "={a0},={a[4:5]}"() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_clobber() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; + call void asm sideeffect "; clobber $0", "~{a4}"() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_tuple( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; + call void asm sideeffect "; clobber $0", "~{a[10:13]}"() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_clobber_oob() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_oob( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; + call void asm sideeffect "; clobber $0", "~{a256}"() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_clobber_max() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_max( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; + call void asm sideeffect "; clobber $0", "~{a255}"() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_physreg_oob() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_oob( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "{a256}"(i32 poison) + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect " +; CHECK-NEXT: ret void +; + %def = call <32 x i32> asm sideeffect "; def $0", "=a"() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a"(<32 x i32> poison) + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect " +; CHECK-NEXT: ret void +; + %def = call <32 x i32> asm sideeffect "; use $0", "=a,a"(<32 x i32> poison) + ret void +} + +define amdgpu_kernel void @vreg_use_exceeds_register_file() { +; CHECK-LABEL: define amdgpu_kernel void @vreg_use_exceeds_register_file( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a"(<257 x i32> poison) + ret void +} + +define amdgpu_kernel void @vreg_def_exceeds_register_file() { +; CHECK-LABEL: define amdgpu_kernel void @vreg_def_exceeds_register_file( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[DEF:%.*]] = call <257 x i32> asm sideeffect " +; CHECK-NEXT: ret void +; + %def = call <257 x i32> asm sideeffect "; def $0", "=a"() + ret void +} + +define amdgpu_kernel void @multiple() { +; CHECK-LABEL: define amdgpu_kernel void @multiple( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[DEF:%.*]] = call { <16 x i32>, <8 x i32>, <8 x i32> } asm sideeffect " +; CHECK-NEXT: ret void +; + %def = call {<16 x i32>, <8 x i32>, <8 x i32>} asm sideeffect "; def $0", "=a,=a,=a,a,a,a"(<4 x i32> splat (i32 0), <8 x i32> splat (i32 1), i64 999) + ret void +} + +define amdgpu_kernel void @earlyclobber_0() { +; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_0( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[DEF:%.*]] = call <8 x i32> asm sideeffect " +; CHECK-NEXT: ret void +; + %def = call <8 x i32> asm sideeffect "; def $0", "=&a,a"(i32 0) + ret void +} + +define amdgpu_kernel void @earlyclobber_1() { +; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_1( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[DEF:%.*]] = call { <8 x i32>, <16 x i32> } asm sideeffect " +; CHECK-NEXT: ret void +; + %def = call { <8 x i32>, <16 x i32 > } asm sideeffect "; def $0, $1", "=&a,=&a,a,a"(i32 0, <16 x i32> splat (i32 1)) + ret void +} attributes #0 = { "amdgpu-agpr-alloc"="0" } ;. |