aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/MachinePipeliner.cpp
diff options
context:
space:
mode:
authorValery Pykhtin <valery.pykhtin@amd.com>2025-10-13 16:17:11 +0200
committerGitHub <noreply@github.com>2025-10-13 16:17:11 +0200
commit8823efe77dad40eaea63b539c4d3d1036587ceb0 (patch)
treea4282f10d23b679dbe43558f0406db5399935d5b /llvm/lib/CodeGen/MachinePipeliner.cpp
parenta80a6b31a721c8d73445c458f1ea2312d9262750 (diff)
downloadllvm-8823efe77dad40eaea63b539c4d3d1036587ceb0.zip
llvm-8823efe77dad40eaea63b539c4d3d1036587ceb0.tar.gz
llvm-8823efe77dad40eaea63b539c4d3d1036587ceb0.tar.bz2
[AMDGPU] Add register usage debug printing the point of maximum register pressure. (#161850)
Basically this allows to analyze "why so many VGPRs used?". It prints all live registers at the point of maximum register pressure and for each register its defs/uses are dumped. Currently can be run before and after the scheduler but would be nice if it can be ran inbetween any passes (not sure this is possible with legacy pass-manager). Requires debug or built with asserts compiler. Highly recommended to run with debug info to have debug locations for instructions. Example output: ``` *** Register pressure info (VGPRs) for _ZN7ck_tile6ken.... *** Max pressure is 256 VGPRs at 41780e@BB.18 (LoopHdr BB.16, Depth 1): %9858:vreg_512_align2 = contract V_MFMA_F32_32X32X16_BF16_mac_vgprcd_e64 %10137:vreg_128_align2, %10141:vreg_128_align2, %9858:vreg_512_align2(tied-def 0), 0, 0, 0, implicit $mode, implicit $exec Live registers with single definition (123 VGPRs): %10126:VReg_128_Align2, LiveMask 00000000000000FF (4 VGPRs) def 41600r@BB.18 (LoopHdr BB.16, Depth 1): undef %10126.sub0_sub1:vreg_128_align2 = DS_READ_B64_TR_B16 %478:vgpr_32, 15232, 0, implicit $exec :: (load (s64) from %ir.sunkaddr1314, !noalias !60, addrspace 3) def 41608r@BB.18 (LoopHdr BB.16, Depth 1): %10126.sub2_sub3:vreg_128_align2 = DS_READ_B64_TR_B16 %478:vgpr_32, 16320, 0, implicit $exec :: (load (s64) from %ir.sunkaddr1315, !noalias !60, addrspace 3) use 41848r@BB.18 (LoopHdr BB.16, Depth 1): %9856:vreg_512_align2 = contract V_MFMA_F32_32X32X16_BF16_mac_vgprcd_e64 %10126:vreg_128_align2, %10138:vreg_128_align2, %9856:vreg_512_align2(tied-def 0), 0, 0, 0, implicit $mode, implicit $exec %10136:VReg_128_Align2, LiveMask 00000000000000FF (4 VGPRs) def 41264r@BB.18 (LoopHdr BB.16, Depth 1): undef %10136.sub0_sub1:vreg_128_align2 = DS_READ_B64_TR_B16 %478:vgpr_32, 2176, 0, implicit $exec :: (load (s64) from %ir.sunkaddr1294, !noalias !60, addrspace 3) def 41272r@BB.18 (LoopHdr BB.16, Depth 1): %10136.sub2_sub3:vreg_128_align2 = DS_READ_B64_TR_B16 %478:vgpr_32, 3264, 0, implicit $exec :: (load (s64) from %ir.sunkaddr1295, !noalias !60, addrspace 3) use 41788r@BB.18 (LoopHdr BB.16, Depth 1): %9858:vreg_512_align2 = contract V_MFMA_F32_32X32X16_BF16_mac_vgprcd_e64 %10136:vreg_128_align2, %10140:vreg_128_align2, %9858:vreg_512_align2(tied-def 0), 0, 0, 0, implicit $mode, implicit $exec %10129:VReg_128_Align2, LiveMask 00000000000000FF (4 VGPRs) ... Live registers with multiple definitions (133 VGPRs): %9856:VReg_512_Align2, LiveMask 00000000FFFFFFFF (16 VGPRs) def 16544r@BB.8: INLINEASM &"v_pk_mul_f32 $0, $1, $2" [sideeffect] [isconvergent] [attdialect], $0:[regdef:VReg_64_Align2], def undef %9856.sub0_sub1:vreg_512_align2, $1:[reguse:VReg_64_Align2], %4069:vreg_64_align2, $2:[reguse:VReg_64_Align2], %10159:vreg_64_align2, !52 def 16592r@BB.8: INLINEASM &"v_pk_mul_f32 $0, $1, $2" [sideeffect] [isconvergent] [attdialect], $0:[regdef:VReg_64_Align2], def %9856.sub2_sub3:vreg_512_align2, $1:[reguse:VReg_64_Align2], %4069:vreg_64_align2, $2:[reguse:VReg_64_Align2], %10159:vreg_64_align2, !52 def 16608r@BB.8: INLINEASM &"v_pk_mul_f32 $0, $1, $2" [sideeffect] [isconvergent] [attdialect], $0:[regdef:VReg_64_Align2], def %9856.sub4_sub5:vreg_512_align2, $1:[reguse:VReg_64_Align2], %4069:vreg_64_align2, $2:[reguse:VReg_64_Align2], %10159:vreg_64_align2, !52 def 16656r@BB.8: INLINEASM &"v_pk_mul_f32 $0, $1, $2" [sideeffect] [isconvergent] [attdialect], $0:[regdef:VReg_64_Align2], def %9856.sub6_sub7:vreg_512_align2, $1:[reguse:VReg_64_Align2], %4069:vreg_64_align2, $2:[reguse:VReg_64_Align2], %10159:vreg_64_align2, !52 def 16672r@BB.8: INLINEASM &"v_pk_mul_f32 $0, $1, $2" [sideeffect] [isconvergent] [attdialect], $0:[regdef:VReg_64_Align2], def %9856.sub8_sub9:vreg_512_align2, $1:[reguse:VReg_64_Align2], %4069:vreg_64_align2, $2:[reguse:VReg_64_Align2], %10159:vreg_64_align2, !52 def 16720r@BB.8: INLINEASM &"v_pk_mul_f32 $0, $1, $2" [sideeffect] [isconvergent] [attdialect], $0:[regdef:VReg_64_Align2], def %9856.sub10_sub11:vreg_512_align2, $1:[reguse:VReg_64_Align2], %4069:vreg_64_align2, $2:[reguse:VReg_64_Align2], %10159:vreg_64_align2, !52 def 16736r@BB.8: INLINEASM &"v_pk_mul_f32 $0, $1, $2" [sideeffect] [isconvergent] [attdialect], $0:[regdef:VReg_64_Align2], def %9856.sub12_sub13:vreg_512_align2, $1:[reguse:VReg_64_Align2], %4069:vreg_64_align2, $2:[reguse:VReg_64_Align2], %10159:vreg_64_align2, !52 def 16784r@BB.8: INLINEASM &"v_pk_mul_f32 $0, $1, $2" [sideeffect] [isconvergent] [attdialect], $0:[regdef:VReg_64_Align2], def %9856.sub14_sub15:vreg_512_align2, $1:[reguse:VReg_64_Align2], %4069:vreg_64_align2, $2:[reguse:VReg_64_Align2], %10159:vreg_64_align2, !52 def use 41828r@BB.18 (LoopHdr BB.16, Depth 1): %9856:vreg_512_align2 = contract V_MFMA_F32_... ... ********** INTERVALS ********** ... ********** MACHINEINSTRS ********** # Machine code for function _ZN7ck_tile6kentr... ```
Diffstat (limited to 'llvm/lib/CodeGen/MachinePipeliner.cpp')
0 files changed, 0 insertions, 0 deletions