diff options
author | Ana Mihajlovic <Ana.Mihajlovic@amd.com> | 2025-09-02 10:42:11 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-09-02 10:42:11 +0200 |
commit | c4885849adf0addf8c154bfcaf143d959ffda961 (patch) | |
tree | ba65b6ca5fe55a0fc6b6e3b593dcd94fea4deba1 | |
parent | f8a53b0b01591ff3e29c390957bff42ab56a55e0 (diff) | |
download | llvm-c4885849adf0addf8c154bfcaf143d959ffda961.zip llvm-c4885849adf0addf8c154bfcaf143d959ffda961.tar.gz llvm-c4885849adf0addf8c154bfcaf143d959ffda961.tar.bz2 |
[AMDGPU] Fix hw stage metadata setting for unsigned values (#154502)
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/lds-size-pal-metadata.ll | 26 |
4 files changed, 42 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 9e2f16e..1fff188 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1444,9 +1444,10 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD, MD->setComputeRegisters(".dynamic_vgpr_en", true); } - MD->setHwStage(CC, ".lds_size", - (unsigned)(CurrentProgramInfo.LdsSize * - getLdsDwGranularity(ST) * sizeof(uint32_t))); + MD->updateHwStageMaximum( + CC, ".lds_size", + (unsigned)(CurrentProgramInfo.LdsSize * getLdsDwGranularity(ST) * + sizeof(uint32_t))); } // This is the equivalent of EmitProgramInfoSI above, but for when the OS type diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp index fd6253d..a7a0e33 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -1061,6 +1061,17 @@ VersionTuple AMDGPUPALMetadata::getPALVersion() { return VersionTuple(getPALVersion(0), getPALVersion(1)); } +// Set the field in a given .hardware_stages entry to a maximum value +void AMDGPUPALMetadata::updateHwStageMaximum(unsigned CC, StringRef field, + unsigned Val) { + msgpack::MapDocNode HwStageFieldMapNode = getHwStage(CC); + auto &Node = HwStageFieldMapNode[field]; + if (Node.isEmpty()) + Node = Val; + else + Node = std::max<unsigned>(Node.getUInt(), Val); +} + // Set the field in a given .hardware_stages entry void AMDGPUPALMetadata::setHwStage(unsigned CC, StringRef field, unsigned Val) { getHwStage(CC)[field] = Val; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h index 4830db5..e50150c 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h @@ -156,6 +156,7 @@ public: unsigned getPALMinorVersion(); VersionTuple getPALVersion(); + void updateHwStageMaximum(unsigned CC, StringRef field, unsigned Val); void setHwStage(unsigned CC, StringRef field, unsigned Val); void setHwStage(unsigned CC, StringRef field, bool Val); void setHwStage(unsigned CC, StringRef field, msgpack::Type Type, diff --git a/llvm/test/CodeGen/AMDGPU/lds-size-pal-metadata.ll b/llvm/test/CodeGen/AMDGPU/lds-size-pal-metadata.ll new file mode 100644 index 0000000..270c17f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lds-size-pal-metadata.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=PAL %s + +;test if zero lds_size of f2 doesn't overwrite f1 +@x = addrspace(3) global i32 poison + +; PAL: .hardware_stages: +; PAL: .lds_size: 0x200 +; PAL: .shader_functions: +; PAL: f1: +; PAL: .lds_size: 0x4 +; PAL: f2: +; PAL: .lds_size: 0 + +define amdgpu_gfx void @f1(i32 %val) { + store i32 %val, ptr addrspace(3) @x + ret void +} + +define amdgpu_gfx void @f2(i32 %a, ptr addrspace(1) %ptr) { + store i32 %a, ptr addrspace(1) %ptr + ret void +} + +!amdgpu.pal.metadata.msgpack = !{!8} +!8 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C3\AA.tgid_y_en\C3\AA.tgid_z_en\C3\AF.tidig_comp_cnt\00\B0.hardware_stages\81\A3.cs\8D\AF.checksum_value\00\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93 \01\01\AD.trap_present\00\B2.user_data_reg_map\DC\00 \CE\10\00\00\00\CE\10\00\00\06\CE\FF\FF\FF\FF\00\01\02\03\04\05\06\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\10\00\00\02\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\AB.user_sgprs\10\AB.vgpr_limit\CC\80\AF.wavefront_size \AF.wg_round_robin\C2\B7.internal_pipeline_hash\92\CF\F6\B5\A6D\E3\BE\9D\D6\CFF\\=l\09\AB\F0#\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\00\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CD\FF\FF\A5.type\A2Cs\B0.user_data_limit\07\A9.uses_cps\C3\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF-ua\DD\EA7\19\94\CF\80\16\9A\FC\9B\A6\1Dk\AD.llpc_version\A477.4\AEamdpal.version\92\03\00"} |