diff options
author | Jay Foad <jay.foad@amd.com> | 2021-09-20 14:20:28 +0100 |
---|---|---|
committer | Jay Foad <jay.foad@amd.com> | 2021-09-21 11:57:45 +0100 |
commit | 86dcb592069f2d18a183fa1daa611029ae80ef4c (patch) | |
tree | 9460ff1a28669e1c8d7142bf675747f5d468c370 /llvm/test/CodeGen/AMDGPU/udiv.ll | |
parent | e83629280f32102cd93a216490188922843af06c (diff) | |
download | llvm-86dcb592069f2d18a183fa1daa611029ae80ef4c.zip llvm-86dcb592069f2d18a183fa1daa611029ae80ef4c.tar.gz llvm-86dcb592069f2d18a183fa1daa611029ae80ef4c.tar.bz2 |
[AMDGPU] Prefer v_fmac over v_fma only when no source modifiers are used
v_fmac with source modifiers forces VOP3 encoding, but it is strictly
better to use the VOP3-only v_fma instead, because $dst and $src2 are
not tied so it gives the register allocator more freedom and avoids a
copy in some cases.
This is the same strategy we already use for v_mad vs v_mac and
v_fma_legacy vs v_fmac_legacy.
Differential Revision: https://reviews.llvm.org/D110070
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/udiv.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/udiv.ll | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll index 149d7b4..ca6190e 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv.ll @@ -187,7 +187,7 @@ define amdgpu_kernel void @test_udiv_3_mulhu(i32 %p) { ; GCN-LABEL: {{^}}fdiv_test_denormals ; VI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX1030: v_fmac_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} +; GFX1030: v_fma_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @fdiv_test_denormals(i8 addrspace(1)* nocapture readonly %arg) { bb: %tmp = load i8, i8 addrspace(1)* null, align 1 |