aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/udiv.ll
diff options
context:
space:
mode:
authorJay Foad <jay.foad@amd.com>2021-09-20 14:20:28 +0100
committerJay Foad <jay.foad@amd.com>2021-09-21 11:57:45 +0100
commit86dcb592069f2d18a183fa1daa611029ae80ef4c (patch)
tree9460ff1a28669e1c8d7142bf675747f5d468c370 /llvm/test/CodeGen/AMDGPU/udiv.ll
parente83629280f32102cd93a216490188922843af06c (diff)
downloadllvm-86dcb592069f2d18a183fa1daa611029ae80ef4c.zip
llvm-86dcb592069f2d18a183fa1daa611029ae80ef4c.tar.gz
llvm-86dcb592069f2d18a183fa1daa611029ae80ef4c.tar.bz2
[AMDGPU] Prefer v_fmac over v_fma only when no source modifiers are used
v_fmac with source modifiers forces VOP3 encoding, but it is strictly better to use the VOP3-only v_fma instead, because $dst and $src2 are not tied so it gives the register allocator more freedom and avoids a copy in some cases. This is the same strategy we already use for v_mad vs v_mac and v_fma_legacy vs v_fmac_legacy. Differential Revision: https://reviews.llvm.org/D110070
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/udiv.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/udiv.ll2
1 files changed, 1 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll
index 149d7b4..ca6190e 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv.ll
@@ -187,7 +187,7 @@ define amdgpu_kernel void @test_udiv_3_mulhu(i32 %p) {
; GCN-LABEL: {{^}}fdiv_test_denormals
; VI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; GFX1030: v_fmac_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
+; GFX1030: v_fma_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @fdiv_test_denormals(i8 addrspace(1)* nocapture readonly %arg) {
bb:
%tmp = load i8, i8 addrspace(1)* null, align 1