aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/bf16.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/bf16.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/bf16.ll903
1 files changed, 190 insertions, 713 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index ebb77c1..9865883 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -16968,7 +16968,7 @@ define bfloat @v_fabs_bf16(bfloat %a) {
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
+; GCN-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
;
@@ -16977,7 +16977,7 @@ define bfloat @v_fabs_bf16(bfloat %a) {
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
+; GFX7-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
@@ -17163,9 +17163,9 @@ define bfloat @v_fneg_fabs_bf16(bfloat %a) {
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
+; GCN-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
;
@@ -17174,9 +17174,9 @@ define bfloat @v_fneg_fabs_bf16(bfloat %a) {
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
+; GFX7-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GFX7-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
@@ -17280,8 +17280,6 @@ define bfloat @v_minnum_bf16(bfloat %a, bfloat %b) {
; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_min_f32_e32 v0, v0, v1
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -17293,8 +17291,6 @@ define bfloat @v_minnum_bf16(bfloat %a, bfloat %b) {
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -17375,10 +17371,6 @@ define <2 x bfloat> @v_minnum_v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) {
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GCN-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_min_f32_e32 v1, v1, v3
; GCN-NEXT: v_min_f32_e32 v0, v0, v2
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
@@ -17396,10 +17388,6 @@ define <2 x bfloat> @v_minnum_v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) {
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
@@ -17522,12 +17510,6 @@ define <3 x bfloat> @v_minnum_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b) {
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GCN-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e32 v5, 1.0, v5
-; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GCN-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_min_f32_e32 v2, v2, v5
; GCN-NEXT: v_min_f32_e32 v1, v1, v4
; GCN-NEXT: v_min_f32_e32 v0, v0, v3
@@ -17551,12 +17533,6 @@ define <3 x bfloat> @v_minnum_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b) {
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_min_f32_e32 v2, v2, v5
; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
@@ -17688,14 +17664,6 @@ define <4 x bfloat> @v_minnum_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GCN-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e32 v7, 1.0, v7
-; GCN-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GCN-NEXT: v_mul_f32_e32 v6, 1.0, v6
-; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GCN-NEXT: v_mul_f32_e32 v5, 1.0, v5
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_min_f32_e32 v3, v3, v7
; GCN-NEXT: v_min_f32_e32 v2, v2, v6
; GCN-NEXT: v_min_f32_e32 v1, v1, v5
@@ -17725,14 +17693,6 @@ define <4 x bfloat> @v_minnum_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
@@ -17951,22 +17911,6 @@ define <8 x bfloat> @v_minnum_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) {
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GCN-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e32 v15, 1.0, v15
-; GCN-NEXT: v_mul_f32_e32 v7, 1.0, v7
-; GCN-NEXT: v_mul_f32_e32 v14, 1.0, v14
-; GCN-NEXT: v_mul_f32_e32 v6, 1.0, v6
-; GCN-NEXT: v_mul_f32_e32 v13, 1.0, v13
-; GCN-NEXT: v_mul_f32_e32 v5, 1.0, v5
-; GCN-NEXT: v_mul_f32_e32 v12, 1.0, v12
-; GCN-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GCN-NEXT: v_mul_f32_e32 v11, 1.0, v11
-; GCN-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GCN-NEXT: v_mul_f32_e32 v10, 1.0, v10
-; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v9
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v8
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_min_f32_e32 v7, v7, v15
; GCN-NEXT: v_min_f32_e32 v6, v6, v14
; GCN-NEXT: v_min_f32_e32 v5, v5, v13
@@ -18020,22 +17964,6 @@ define <8 x bfloat> @v_minnum_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) {
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX7-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15
-; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
-; GFX7-NEXT: v_mul_f32_e32 v14, 1.0, v14
-; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
-; GFX7-NEXT: v_mul_f32_e32 v13, 1.0, v13
-; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
-; GFX7-NEXT: v_mul_f32_e32 v12, 1.0, v12
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GFX7-NEXT: v_mul_f32_e32 v11, 1.0, v11
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX7-NEXT: v_mul_f32_e32 v10, 1.0, v10
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v8, 1.0, v8
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_min_f32_e32 v7, v7, v15
; GFX7-NEXT: v_min_f32_e32 v6, v6, v14
; GFX7-NEXT: v_min_f32_e32 v5, v5, v13
@@ -18382,71 +18310,51 @@ define <16 x bfloat> @v_minnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GCN-NEXT: v_mul_f32_e32 v30, 1.0, v30
; GCN-NEXT: v_and_b32_e32 v30, 0xffff0000, v30
; GCN-NEXT: v_and_b32_e32 v14, 0xffff0000, v14
-; GCN-NEXT: v_mul_f32_e32 v30, 1.0, v30
-; GCN-NEXT: v_mul_f32_e32 v14, 1.0, v14
; GCN-NEXT: v_min_f32_e32 v14, v14, v30
; GCN-NEXT: v_mul_f32_e32 v13, 1.0, v13
; GCN-NEXT: v_mul_f32_e32 v29, 1.0, v29
; GCN-NEXT: v_and_b32_e32 v29, 0xffff0000, v29
; GCN-NEXT: v_and_b32_e32 v13, 0xffff0000, v13
-; GCN-NEXT: v_mul_f32_e32 v29, 1.0, v29
-; GCN-NEXT: v_mul_f32_e32 v13, 1.0, v13
; GCN-NEXT: v_min_f32_e32 v13, v13, v29
; GCN-NEXT: v_mul_f32_e32 v12, 1.0, v12
; GCN-NEXT: v_mul_f32_e32 v28, 1.0, v28
; GCN-NEXT: v_and_b32_e32 v28, 0xffff0000, v28
; GCN-NEXT: v_and_b32_e32 v12, 0xffff0000, v12
-; GCN-NEXT: v_mul_f32_e32 v28, 1.0, v28
-; GCN-NEXT: v_mul_f32_e32 v12, 1.0, v12
; GCN-NEXT: v_min_f32_e32 v12, v12, v28
; GCN-NEXT: v_mul_f32_e32 v11, 1.0, v11
; GCN-NEXT: v_mul_f32_e32 v27, 1.0, v27
; GCN-NEXT: v_and_b32_e32 v27, 0xffff0000, v27
; GCN-NEXT: v_and_b32_e32 v11, 0xffff0000, v11
-; GCN-NEXT: v_mul_f32_e32 v27, 1.0, v27
-; GCN-NEXT: v_mul_f32_e32 v11, 1.0, v11
; GCN-NEXT: v_min_f32_e32 v11, v11, v27
; GCN-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GCN-NEXT: v_mul_f32_e32 v26, 1.0, v26
; GCN-NEXT: v_and_b32_e32 v26, 0xffff0000, v26
; GCN-NEXT: v_and_b32_e32 v10, 0xffff0000, v10
-; GCN-NEXT: v_mul_f32_e32 v26, 1.0, v26
-; GCN-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GCN-NEXT: v_min_f32_e32 v10, v10, v26
; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GCN-NEXT: v_mul_f32_e32 v25, 1.0, v25
; GCN-NEXT: v_and_b32_e32 v25, 0xffff0000, v25
; GCN-NEXT: v_and_b32_e32 v9, 0xffff0000, v9
-; GCN-NEXT: v_mul_f32_e32 v25, 1.0, v25
-; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GCN-NEXT: v_min_f32_e32 v9, v9, v25
; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GCN-NEXT: v_mul_f32_e32 v24, 1.0, v24
; GCN-NEXT: v_and_b32_e32 v24, 0xffff0000, v24
; GCN-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
-; GCN-NEXT: v_mul_f32_e32 v24, 1.0, v24
-; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GCN-NEXT: v_min_f32_e32 v8, v8, v24
; GCN-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GCN-NEXT: v_mul_f32_e32 v23, 1.0, v23
; GCN-NEXT: v_and_b32_e32 v23, 0xffff0000, v23
; GCN-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
-; GCN-NEXT: v_mul_f32_e32 v23, 1.0, v23
-; GCN-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GCN-NEXT: v_min_f32_e32 v7, v7, v23
; GCN-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GCN-NEXT: v_mul_f32_e32 v22, 1.0, v22
; GCN-NEXT: v_and_b32_e32 v22, 0xffff0000, v22
; GCN-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
-; GCN-NEXT: v_mul_f32_e32 v22, 1.0, v22
-; GCN-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GCN-NEXT: v_min_f32_e32 v6, v6, v22
; GCN-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GCN-NEXT: v_mul_f32_e32 v21, 1.0, v21
; GCN-NEXT: v_and_b32_e32 v21, 0xffff0000, v21
; GCN-NEXT: v_and_b32_e32 v5, 0xffff0000, v5
-; GCN-NEXT: v_mul_f32_e32 v21, 1.0, v21
-; GCN-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GCN-NEXT: v_min_f32_e32 v5, v5, v21
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_mul_f32_e32 v16, 1.0, v16
@@ -18461,8 +18369,6 @@ define <16 x bfloat> @v_minnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GCN-NEXT: v_mul_f32_e32 v15, 1.0, v15
; GCN-NEXT: v_and_b32_e32 v20, 0xffff0000, v20
; GCN-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
-; GCN-NEXT: v_mul_f32_e32 v20, 1.0, v20
-; GCN-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GCN-NEXT: v_min_f32_e32 v4, v4, v20
; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s32
; GCN-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
@@ -18474,21 +18380,10 @@ define <16 x bfloat> @v_minnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GCN-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e32 v15, 1.0, v15
-; GCN-NEXT: v_mul_f32_e32 v19, 1.0, v19
-; GCN-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GCN-NEXT: v_mul_f32_e32 v18, 1.0, v18
-; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GCN-NEXT: v_mul_f32_e32 v17, 1.0, v17
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v16, 1.0, v16
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_min_f32_e32 v3, v3, v19
; GCN-NEXT: v_min_f32_e32 v2, v2, v18
; GCN-NEXT: v_min_f32_e32 v1, v1, v17
; GCN-NEXT: v_min_f32_e32 v0, v0, v16
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v16, 1.0, v20
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GCN-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
@@ -18503,8 +18398,9 @@ define <16 x bfloat> @v_minnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GCN-NEXT: v_and_b32_e32 v11, 0xffff0000, v11
; GCN-NEXT: v_and_b32_e32 v12, 0xffff0000, v12
; GCN-NEXT: v_and_b32_e32 v13, 0xffff0000, v13
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mul_f32_e32 v16, 1.0, v20
; GCN-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
-; GCN-NEXT: v_mul_f32_e32 v16, 1.0, v16
; GCN-NEXT: v_min_f32_e32 v15, v15, v16
; GCN-NEXT: v_and_b32_e32 v14, 0xffff0000, v14
; GCN-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
@@ -18513,14 +18409,12 @@ define <16 x bfloat> @v_minnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GFX7-LABEL: v_minnum_v16bf16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
-; GFX7-NEXT: v_mul_f32_e32 v25, 1.0, v25
-; GFX7-NEXT: v_and_b32_e32 v25, 0xffff0000, v25
-; GFX7-NEXT: v_and_b32_e32 v9, 0xffff0000, v9
-; GFX7-NEXT: v_mul_f32_e32 v25, 1.0, v25
-; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
-; GFX7-NEXT: v_min_f32_e32 v9, v9, v25
-; GFX7-NEXT: buffer_load_dword v25, off, s[0:3], s32
+; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
+; GFX7-NEXT: v_mul_f32_e32 v22, 1.0, v22
+; GFX7-NEXT: v_and_b32_e32 v22, 0xffff0000, v22
+; GFX7-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
+; GFX7-NEXT: v_min_f32_e32 v6, v6, v22
+; GFX7-NEXT: buffer_load_dword v22, off, s[0:3], s32
; GFX7-NEXT: v_mul_f32_e32 v14, 1.0, v14
; GFX7-NEXT: v_mul_f32_e32 v30, 1.0, v30
; GFX7-NEXT: v_mul_f32_e32 v13, 1.0, v13
@@ -18531,13 +18425,13 @@ define <16 x bfloat> @v_minnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GFX7-NEXT: v_mul_f32_e32 v27, 1.0, v27
; GFX7-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GFX7-NEXT: v_mul_f32_e32 v26, 1.0, v26
-; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15
+; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
+; GFX7-NEXT: v_mul_f32_e32 v25, 1.0, v25
; GFX7-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GFX7-NEXT: v_mul_f32_e32 v24, 1.0, v24
; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GFX7-NEXT: v_mul_f32_e32 v23, 1.0, v23
-; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
-; GFX7-NEXT: v_mul_f32_e32 v22, 1.0, v22
+; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15
; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GFX7-NEXT: v_mul_f32_e32 v21, 1.0, v21
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
@@ -18560,13 +18454,13 @@ define <16 x bfloat> @v_minnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GFX7-NEXT: v_and_b32_e32 v11, 0xffff0000, v11
; GFX7-NEXT: v_and_b32_e32 v26, 0xffff0000, v26
; GFX7-NEXT: v_and_b32_e32 v10, 0xffff0000, v10
-; GFX7-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
+; GFX7-NEXT: v_and_b32_e32 v25, 0xffff0000, v25
+; GFX7-NEXT: v_and_b32_e32 v9, 0xffff0000, v9
; GFX7-NEXT: v_and_b32_e32 v24, 0xffff0000, v24
; GFX7-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
; GFX7-NEXT: v_and_b32_e32 v23, 0xffff0000, v23
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
-; GFX7-NEXT: v_and_b32_e32 v22, 0xffff0000, v22
-; GFX7-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
+; GFX7-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
; GFX7-NEXT: v_and_b32_e32 v21, 0xffff0000, v21
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v5
; GFX7-NEXT: v_and_b32_e32 v20, 0xffff0000, v20
@@ -18579,48 +18473,14 @@ define <16 x bfloat> @v_minnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX7-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v30, 1.0, v30
-; GFX7-NEXT: v_mul_f32_e32 v14, 1.0, v14
-; GFX7-NEXT: v_mul_f32_e32 v29, 1.0, v29
-; GFX7-NEXT: v_mul_f32_e32 v13, 1.0, v13
-; GFX7-NEXT: v_mul_f32_e32 v28, 1.0, v28
-; GFX7-NEXT: v_mul_f32_e32 v12, 1.0, v12
-; GFX7-NEXT: v_mul_f32_e32 v27, 1.0, v27
-; GFX7-NEXT: v_mul_f32_e32 v11, 1.0, v11
-; GFX7-NEXT: v_mul_f32_e32 v26, 1.0, v26
-; GFX7-NEXT: v_mul_f32_e32 v10, 1.0, v10
-; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15
-; GFX7-NEXT: v_mul_f32_e32 v24, 1.0, v24
-; GFX7-NEXT: v_mul_f32_e32 v8, 1.0, v8
-; GFX7-NEXT: v_mul_f32_e32 v23, 1.0, v23
-; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
-; GFX7-NEXT: v_mul_f32_e32 v22, 1.0, v22
-; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
-; GFX7-NEXT: v_mul_f32_e32 v21, 1.0, v21
-; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
-; GFX7-NEXT: v_mul_f32_e32 v20, 1.0, v20
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v25, 1.0, v25
-; GFX7-NEXT: v_and_b32_e32 v25, 0xffff0000, v25
-; GFX7-NEXT: v_mul_f32_e32 v25, 1.0, v25
-; GFX7-NEXT: v_mul_f32_e32 v19, 1.0, v19
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX7-NEXT: v_mul_f32_e32 v18, 1.0, v18
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX7-NEXT: v_mul_f32_e32 v17, 1.0, v17
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v16, 1.0, v16
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_min_f32_e32 v14, v14, v30
; GFX7-NEXT: v_min_f32_e32 v13, v13, v29
; GFX7-NEXT: v_min_f32_e32 v12, v12, v28
; GFX7-NEXT: v_min_f32_e32 v11, v11, v27
; GFX7-NEXT: v_min_f32_e32 v10, v10, v26
-; GFX7-NEXT: v_min_f32_e32 v15, v15, v25
+; GFX7-NEXT: v_min_f32_e32 v9, v9, v25
; GFX7-NEXT: v_min_f32_e32 v8, v8, v24
; GFX7-NEXT: v_min_f32_e32 v7, v7, v23
-; GFX7-NEXT: v_min_f32_e32 v6, v6, v22
; GFX7-NEXT: v_min_f32_e32 v5, v5, v21
; GFX7-NEXT: v_min_f32_e32 v4, v4, v20
; GFX7-NEXT: v_min_f32_e32 v3, v3, v19
@@ -18634,6 +18494,10 @@ define <16 x bfloat> @v_minnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v5
; GFX7-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v22, 1.0, v22
+; GFX7-NEXT: v_and_b32_e32 v22, 0xffff0000, v22
+; GFX7-NEXT: v_min_f32_e32 v15, v15, v22
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
; GFX7-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
; GFX7-NEXT: v_and_b32_e32 v9, 0xffff0000, v9
@@ -19267,287 +19131,223 @@ define <32 x bfloat> @v_minnum_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b) {
; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
; GCN-NEXT: v_and_b32_e32 v31, 0xffff0000, v31
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
-; GCN-NEXT: v_mul_f32_e32 v31, 1.0, v31
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:124
; GCN-NEXT: v_min_f32_e32 v31, v31, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
; GCN-NEXT: v_mul_f32_e32 v30, 1.0, v30
; GCN-NEXT: v_and_b32_e32 v30, 0xffff0000, v30
-; GCN-NEXT: v_mul_f32_e32 v30, 1.0, v30
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:120
; GCN-NEXT: v_min_f32_e32 v30, v30, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
; GCN-NEXT: v_mul_f32_e32 v29, 1.0, v29
; GCN-NEXT: v_and_b32_e32 v29, 0xffff0000, v29
-; GCN-NEXT: v_mul_f32_e32 v29, 1.0, v29
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:116
; GCN-NEXT: v_min_f32_e32 v29, v29, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:116
; GCN-NEXT: v_mul_f32_e32 v28, 1.0, v28
; GCN-NEXT: v_and_b32_e32 v28, 0xffff0000, v28
-; GCN-NEXT: v_mul_f32_e32 v28, 1.0, v28
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:112
; GCN-NEXT: v_min_f32_e32 v28, v28, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
; GCN-NEXT: v_mul_f32_e32 v27, 1.0, v27
; GCN-NEXT: v_and_b32_e32 v27, 0xffff0000, v27
-; GCN-NEXT: v_mul_f32_e32 v27, 1.0, v27
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:108
; GCN-NEXT: v_min_f32_e32 v27, v27, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:108
; GCN-NEXT: v_mul_f32_e32 v26, 1.0, v26
; GCN-NEXT: v_and_b32_e32 v26, 0xffff0000, v26
-; GCN-NEXT: v_mul_f32_e32 v26, 1.0, v26
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:104
; GCN-NEXT: v_min_f32_e32 v26, v26, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
; GCN-NEXT: v_mul_f32_e32 v25, 1.0, v25
; GCN-NEXT: v_and_b32_e32 v25, 0xffff0000, v25
-; GCN-NEXT: v_mul_f32_e32 v25, 1.0, v25
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:100
; GCN-NEXT: v_min_f32_e32 v25, v25, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:100
; GCN-NEXT: v_mul_f32_e32 v24, 1.0, v24
; GCN-NEXT: v_and_b32_e32 v24, 0xffff0000, v24
-; GCN-NEXT: v_mul_f32_e32 v24, 1.0, v24
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:96
; GCN-NEXT: v_min_f32_e32 v24, v24, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
; GCN-NEXT: v_mul_f32_e32 v23, 1.0, v23
; GCN-NEXT: v_and_b32_e32 v23, 0xffff0000, v23
-; GCN-NEXT: v_mul_f32_e32 v23, 1.0, v23
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:92
; GCN-NEXT: v_min_f32_e32 v23, v23, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:92
; GCN-NEXT: v_mul_f32_e32 v22, 1.0, v22
; GCN-NEXT: v_and_b32_e32 v22, 0xffff0000, v22
-; GCN-NEXT: v_mul_f32_e32 v22, 1.0, v22
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:88
; GCN-NEXT: v_min_f32_e32 v22, v22, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
; GCN-NEXT: v_mul_f32_e32 v21, 1.0, v21
; GCN-NEXT: v_and_b32_e32 v21, 0xffff0000, v21
-; GCN-NEXT: v_mul_f32_e32 v21, 1.0, v21
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:84
; GCN-NEXT: v_min_f32_e32 v21, v21, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:84
; GCN-NEXT: v_mul_f32_e32 v20, 1.0, v20
; GCN-NEXT: v_and_b32_e32 v20, 0xffff0000, v20
-; GCN-NEXT: v_mul_f32_e32 v20, 1.0, v20
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:80
; GCN-NEXT: v_min_f32_e32 v20, v20, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
; GCN-NEXT: v_mul_f32_e32 v19, 1.0, v19
; GCN-NEXT: v_and_b32_e32 v19, 0xffff0000, v19
-; GCN-NEXT: v_mul_f32_e32 v19, 1.0, v19
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:76
; GCN-NEXT: v_min_f32_e32 v19, v19, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:76
; GCN-NEXT: v_mul_f32_e32 v18, 1.0, v18
; GCN-NEXT: v_and_b32_e32 v18, 0xffff0000, v18
-; GCN-NEXT: v_mul_f32_e32 v18, 1.0, v18
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:72
; GCN-NEXT: v_min_f32_e32 v18, v18, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
; GCN-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GCN-NEXT: v_and_b32_e32 v17, 0xffff0000, v17
-; GCN-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:68
; GCN-NEXT: v_min_f32_e32 v17, v17, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:68
; GCN-NEXT: v_mul_f32_e32 v16, 1.0, v16
; GCN-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
-; GCN-NEXT: v_mul_f32_e32 v16, 1.0, v16
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:64
; GCN-NEXT: v_min_f32_e32 v16, v16, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
; GCN-NEXT: v_mul_f32_e32 v15, 1.0, v15
; GCN-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
-; GCN-NEXT: v_mul_f32_e32 v15, 1.0, v15
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60
; GCN-NEXT: v_min_f32_e32 v15, v15, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:60
; GCN-NEXT: v_mul_f32_e32 v14, 1.0, v14
; GCN-NEXT: v_and_b32_e32 v14, 0xffff0000, v14
-; GCN-NEXT: v_mul_f32_e32 v14, 1.0, v14
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56
; GCN-NEXT: v_min_f32_e32 v14, v14, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
; GCN-NEXT: v_mul_f32_e32 v13, 1.0, v13
; GCN-NEXT: v_and_b32_e32 v13, 0xffff0000, v13
-; GCN-NEXT: v_mul_f32_e32 v13, 1.0, v13
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:52
; GCN-NEXT: v_min_f32_e32 v13, v13, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52
; GCN-NEXT: v_mul_f32_e32 v12, 1.0, v12
; GCN-NEXT: v_and_b32_e32 v12, 0xffff0000, v12
-; GCN-NEXT: v_mul_f32_e32 v12, 1.0, v12
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:48
; GCN-NEXT: v_min_f32_e32 v12, v12, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
; GCN-NEXT: v_mul_f32_e32 v11, 1.0, v11
; GCN-NEXT: v_and_b32_e32 v11, 0xffff0000, v11
-; GCN-NEXT: v_mul_f32_e32 v11, 1.0, v11
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:44
; GCN-NEXT: v_min_f32_e32 v11, v11, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:44
; GCN-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GCN-NEXT: v_and_b32_e32 v10, 0xffff0000, v10
-; GCN-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:40
; GCN-NEXT: v_min_f32_e32 v10, v10, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GCN-NEXT: v_and_b32_e32 v9, 0xffff0000, v9
-; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:36
; GCN-NEXT: v_min_f32_e32 v9, v9, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:36
; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GCN-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
-; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:32
; GCN-NEXT: v_min_f32_e32 v8, v8, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
; GCN-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GCN-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
-; GCN-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:28
; GCN-NEXT: v_min_f32_e32 v7, v7, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:28
; GCN-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GCN-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
-; GCN-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:24
; GCN-NEXT: v_min_f32_e32 v6, v6, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
; GCN-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GCN-NEXT: v_and_b32_e32 v5, 0xffff0000, v5
-; GCN-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:20
; GCN-NEXT: v_min_f32_e32 v5, v5, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:20
; GCN-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GCN-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
-; GCN-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:16
; GCN-NEXT: v_min_f32_e32 v4, v4, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
; GCN-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GCN-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
-; GCN-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
; GCN-NEXT: v_min_f32_e32 v3, v3, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:12
; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GCN-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
-; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
; GCN-NEXT: v_min_f32_e32 v2, v2, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:4
; GCN-NEXT: v_min_f32_e32 v1, v1, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GCN-NEXT: v_min_f32_e32 v0, v0, v32
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
@@ -19590,322 +19390,258 @@ define <32 x bfloat> @v_minnum_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b) {
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128
; GFX7-NEXT: v_mul_f32_e32 v30, 1.0, v30
; GFX7-NEXT: v_and_b32_e32 v30, 0xffff0000, v30
-; GFX7-NEXT: v_mul_f32_e32 v30, 1.0, v30
; GFX7-NEXT: v_mul_f32_e32 v29, 1.0, v29
; GFX7-NEXT: v_and_b32_e32 v29, 0xffff0000, v29
-; GFX7-NEXT: v_mul_f32_e32 v29, 1.0, v29
; GFX7-NEXT: v_mul_f32_e32 v28, 1.0, v28
; GFX7-NEXT: v_and_b32_e32 v28, 0xffff0000, v28
-; GFX7-NEXT: v_mul_f32_e32 v28, 1.0, v28
; GFX7-NEXT: v_mul_f32_e32 v27, 1.0, v27
; GFX7-NEXT: v_and_b32_e32 v27, 0xffff0000, v27
-; GFX7-NEXT: v_mul_f32_e32 v27, 1.0, v27
; GFX7-NEXT: v_mul_f32_e32 v26, 1.0, v26
; GFX7-NEXT: v_and_b32_e32 v26, 0xffff0000, v26
-; GFX7-NEXT: v_mul_f32_e32 v26, 1.0, v26
; GFX7-NEXT: v_mul_f32_e32 v25, 1.0, v25
; GFX7-NEXT: v_and_b32_e32 v25, 0xffff0000, v25
-; GFX7-NEXT: v_mul_f32_e32 v25, 1.0, v25
; GFX7-NEXT: v_mul_f32_e32 v24, 1.0, v24
; GFX7-NEXT: v_and_b32_e32 v24, 0xffff0000, v24
-; GFX7-NEXT: v_mul_f32_e32 v24, 1.0, v24
; GFX7-NEXT: v_mul_f32_e32 v23, 1.0, v23
; GFX7-NEXT: v_and_b32_e32 v23, 0xffff0000, v23
-; GFX7-NEXT: v_mul_f32_e32 v23, 1.0, v23
; GFX7-NEXT: v_mul_f32_e32 v22, 1.0, v22
; GFX7-NEXT: v_and_b32_e32 v22, 0xffff0000, v22
-; GFX7-NEXT: v_mul_f32_e32 v22, 1.0, v22
; GFX7-NEXT: v_mul_f32_e32 v21, 1.0, v21
; GFX7-NEXT: v_and_b32_e32 v21, 0xffff0000, v21
-; GFX7-NEXT: v_mul_f32_e32 v21, 1.0, v21
; GFX7-NEXT: v_mul_f32_e32 v20, 1.0, v20
; GFX7-NEXT: v_and_b32_e32 v20, 0xffff0000, v20
-; GFX7-NEXT: v_mul_f32_e32 v20, 1.0, v20
; GFX7-NEXT: v_mul_f32_e32 v19, 1.0, v19
; GFX7-NEXT: v_and_b32_e32 v19, 0xffff0000, v19
-; GFX7-NEXT: v_mul_f32_e32 v19, 1.0, v19
; GFX7-NEXT: v_mul_f32_e32 v18, 1.0, v18
; GFX7-NEXT: v_and_b32_e32 v18, 0xffff0000, v18
-; GFX7-NEXT: v_mul_f32_e32 v18, 1.0, v18
; GFX7-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GFX7-NEXT: v_and_b32_e32 v17, 0xffff0000, v17
-; GFX7-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GFX7-NEXT: v_mul_f32_e32 v16, 1.0, v16
; GFX7-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
-; GFX7-NEXT: v_mul_f32_e32 v16, 1.0, v16
; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15
; GFX7-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
-; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15
; GFX7-NEXT: v_mul_f32_e32 v14, 1.0, v14
; GFX7-NEXT: v_and_b32_e32 v14, 0xffff0000, v14
-; GFX7-NEXT: v_mul_f32_e32 v14, 1.0, v14
; GFX7-NEXT: v_mul_f32_e32 v13, 1.0, v13
; GFX7-NEXT: v_and_b32_e32 v13, 0xffff0000, v13
-; GFX7-NEXT: v_mul_f32_e32 v13, 1.0, v13
; GFX7-NEXT: v_mul_f32_e32 v12, 1.0, v12
; GFX7-NEXT: v_and_b32_e32 v12, 0xffff0000, v12
-; GFX7-NEXT: v_mul_f32_e32 v12, 1.0, v12
; GFX7-NEXT: v_mul_f32_e32 v11, 1.0, v11
; GFX7-NEXT: v_and_b32_e32 v11, 0xffff0000, v11
-; GFX7-NEXT: v_mul_f32_e32 v11, 1.0, v11
; GFX7-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GFX7-NEXT: v_and_b32_e32 v10, 0xffff0000, v10
-; GFX7-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GFX7-NEXT: v_and_b32_e32 v9, 0xffff0000, v9
-; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GFX7-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GFX7-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
-; GFX7-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
-; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GFX7-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
-; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v5
-; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
-; GFX7-NEXT: s_waitcnt vmcnt(1)
-; GFX7-NEXT: v_mul_f32_e32 v31, 1.0, v31
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
-; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_and_b32_e32 v31, 0xffff0000, v31
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
-; GFX7-NEXT: v_mul_f32_e32 v31, 1.0, v31
-; GFX7-NEXT: v_min_f32_e32 v31, v31, v32
-; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_and_b32_e32 v31, 0xffff0000, v31
+; GFX7-NEXT: s_waitcnt vmcnt(1)
+; GFX7-NEXT: v_mul_f32_e32 v31, 1.0, v31
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
+; GFX7-NEXT: v_and_b32_e32 v31, 0xffff0000, v31
+; GFX7-NEXT: v_min_f32_e32 v31, v31, v32
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
+; GFX7-NEXT: v_and_b32_e32 v31, 0xffff0000, v31
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
; GFX7-NEXT: v_min_f32_e32 v30, v30, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
; GFX7-NEXT: v_and_b32_e32 v30, 0xffff0000, v30
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v29, v29, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:116
; GFX7-NEXT: v_and_b32_e32 v29, 0xffff0000, v29
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v28, v28, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
; GFX7-NEXT: v_and_b32_e32 v28, 0xffff0000, v28
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v27, v27, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:108
; GFX7-NEXT: v_and_b32_e32 v27, 0xffff0000, v27
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v26, v26, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
; GFX7-NEXT: v_and_b32_e32 v26, 0xffff0000, v26
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v25, v25, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:100
; GFX7-NEXT: v_and_b32_e32 v25, 0xffff0000, v25
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v24, v24, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
; GFX7-NEXT: v_and_b32_e32 v24, 0xffff0000, v24
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v23, v23, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:92
; GFX7-NEXT: v_and_b32_e32 v23, 0xffff0000, v23
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v22, v22, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
; GFX7-NEXT: v_and_b32_e32 v22, 0xffff0000, v22
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v21, v21, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:84
; GFX7-NEXT: v_and_b32_e32 v21, 0xffff0000, v21
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v20, v20, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
; GFX7-NEXT: v_and_b32_e32 v20, 0xffff0000, v20
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v19, v19, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:76
; GFX7-NEXT: v_and_b32_e32 v19, 0xffff0000, v19
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v18, v18, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
; GFX7-NEXT: v_and_b32_e32 v18, 0xffff0000, v18
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v17, v17, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:68
; GFX7-NEXT: v_and_b32_e32 v17, 0xffff0000, v17
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v16, v16, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
; GFX7-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v15, v15, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:60
; GFX7-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v14, v14, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
; GFX7-NEXT: v_and_b32_e32 v14, 0xffff0000, v14
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v13, v13, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52
; GFX7-NEXT: v_and_b32_e32 v13, 0xffff0000, v13
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v12, v12, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
; GFX7-NEXT: v_and_b32_e32 v12, 0xffff0000, v12
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v11, v11, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:44
; GFX7-NEXT: v_and_b32_e32 v11, 0xffff0000, v11
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v10, v10, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
; GFX7-NEXT: v_and_b32_e32 v10, 0xffff0000, v10
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v9, v9, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:36
; GFX7-NEXT: v_and_b32_e32 v9, 0xffff0000, v9
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v8, v8, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
; GFX7-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v7, v7, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:28
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v6, v6, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
; GFX7-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v5, v5, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:20
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v5
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v4, v4, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v3, v3, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:12
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v2, v2, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v1, v1, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v0, v0, v32
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -21097,8 +20833,6 @@ define bfloat @v_maxnum_bf16(bfloat %a, bfloat %b) {
; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_max_f32_e32 v0, v0, v1
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -21110,8 +20844,6 @@ define bfloat @v_maxnum_bf16(bfloat %a, bfloat %b) {
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -21192,10 +20924,6 @@ define <2 x bfloat> @v_maxnum_v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) {
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GCN-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_max_f32_e32 v1, v1, v3
; GCN-NEXT: v_max_f32_e32 v0, v0, v2
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
@@ -21213,10 +20941,6 @@ define <2 x bfloat> @v_maxnum_v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) {
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
@@ -21339,12 +21063,6 @@ define <3 x bfloat> @v_maxnum_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b) {
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GCN-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e32 v5, 1.0, v5
-; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GCN-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_max_f32_e32 v2, v2, v5
; GCN-NEXT: v_max_f32_e32 v1, v1, v4
; GCN-NEXT: v_max_f32_e32 v0, v0, v3
@@ -21368,12 +21086,6 @@ define <3 x bfloat> @v_maxnum_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b) {
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_max_f32_e32 v2, v2, v5
; GFX7-NEXT: v_max_f32_e32 v1, v1, v4
; GFX7-NEXT: v_max_f32_e32 v0, v0, v3
@@ -21505,14 +21217,6 @@ define <4 x bfloat> @v_maxnum_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GCN-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e32 v7, 1.0, v7
-; GCN-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GCN-NEXT: v_mul_f32_e32 v6, 1.0, v6
-; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GCN-NEXT: v_mul_f32_e32 v5, 1.0, v5
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_max_f32_e32 v3, v3, v7
; GCN-NEXT: v_max_f32_e32 v2, v2, v6
; GCN-NEXT: v_max_f32_e32 v1, v1, v5
@@ -21542,14 +21246,6 @@ define <4 x bfloat> @v_maxnum_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
@@ -21768,22 +21464,6 @@ define <8 x bfloat> @v_maxnum_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) {
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GCN-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e32 v15, 1.0, v15
-; GCN-NEXT: v_mul_f32_e32 v7, 1.0, v7
-; GCN-NEXT: v_mul_f32_e32 v14, 1.0, v14
-; GCN-NEXT: v_mul_f32_e32 v6, 1.0, v6
-; GCN-NEXT: v_mul_f32_e32 v13, 1.0, v13
-; GCN-NEXT: v_mul_f32_e32 v5, 1.0, v5
-; GCN-NEXT: v_mul_f32_e32 v12, 1.0, v12
-; GCN-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GCN-NEXT: v_mul_f32_e32 v11, 1.0, v11
-; GCN-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GCN-NEXT: v_mul_f32_e32 v10, 1.0, v10
-; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v9
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v8
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_max_f32_e32 v7, v7, v15
; GCN-NEXT: v_max_f32_e32 v6, v6, v14
; GCN-NEXT: v_max_f32_e32 v5, v5, v13
@@ -21837,22 +21517,6 @@ define <8 x bfloat> @v_maxnum_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) {
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX7-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15
-; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
-; GFX7-NEXT: v_mul_f32_e32 v14, 1.0, v14
-; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
-; GFX7-NEXT: v_mul_f32_e32 v13, 1.0, v13
-; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
-; GFX7-NEXT: v_mul_f32_e32 v12, 1.0, v12
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GFX7-NEXT: v_mul_f32_e32 v11, 1.0, v11
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX7-NEXT: v_mul_f32_e32 v10, 1.0, v10
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v8, 1.0, v8
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_max_f32_e32 v7, v7, v15
; GFX7-NEXT: v_max_f32_e32 v6, v6, v14
; GFX7-NEXT: v_max_f32_e32 v5, v5, v13
@@ -22199,71 +21863,51 @@ define <16 x bfloat> @v_maxnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GCN-NEXT: v_mul_f32_e32 v30, 1.0, v30
; GCN-NEXT: v_and_b32_e32 v30, 0xffff0000, v30
; GCN-NEXT: v_and_b32_e32 v14, 0xffff0000, v14
-; GCN-NEXT: v_mul_f32_e32 v30, 1.0, v30
-; GCN-NEXT: v_mul_f32_e32 v14, 1.0, v14
; GCN-NEXT: v_max_f32_e32 v14, v14, v30
; GCN-NEXT: v_mul_f32_e32 v13, 1.0, v13
; GCN-NEXT: v_mul_f32_e32 v29, 1.0, v29
; GCN-NEXT: v_and_b32_e32 v29, 0xffff0000, v29
; GCN-NEXT: v_and_b32_e32 v13, 0xffff0000, v13
-; GCN-NEXT: v_mul_f32_e32 v29, 1.0, v29
-; GCN-NEXT: v_mul_f32_e32 v13, 1.0, v13
; GCN-NEXT: v_max_f32_e32 v13, v13, v29
; GCN-NEXT: v_mul_f32_e32 v12, 1.0, v12
; GCN-NEXT: v_mul_f32_e32 v28, 1.0, v28
; GCN-NEXT: v_and_b32_e32 v28, 0xffff0000, v28
; GCN-NEXT: v_and_b32_e32 v12, 0xffff0000, v12
-; GCN-NEXT: v_mul_f32_e32 v28, 1.0, v28
-; GCN-NEXT: v_mul_f32_e32 v12, 1.0, v12
; GCN-NEXT: v_max_f32_e32 v12, v12, v28
; GCN-NEXT: v_mul_f32_e32 v11, 1.0, v11
; GCN-NEXT: v_mul_f32_e32 v27, 1.0, v27
; GCN-NEXT: v_and_b32_e32 v27, 0xffff0000, v27
; GCN-NEXT: v_and_b32_e32 v11, 0xffff0000, v11
-; GCN-NEXT: v_mul_f32_e32 v27, 1.0, v27
-; GCN-NEXT: v_mul_f32_e32 v11, 1.0, v11
; GCN-NEXT: v_max_f32_e32 v11, v11, v27
; GCN-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GCN-NEXT: v_mul_f32_e32 v26, 1.0, v26
; GCN-NEXT: v_and_b32_e32 v26, 0xffff0000, v26
; GCN-NEXT: v_and_b32_e32 v10, 0xffff0000, v10
-; GCN-NEXT: v_mul_f32_e32 v26, 1.0, v26
-; GCN-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GCN-NEXT: v_max_f32_e32 v10, v10, v26
; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GCN-NEXT: v_mul_f32_e32 v25, 1.0, v25
; GCN-NEXT: v_and_b32_e32 v25, 0xffff0000, v25
; GCN-NEXT: v_and_b32_e32 v9, 0xffff0000, v9
-; GCN-NEXT: v_mul_f32_e32 v25, 1.0, v25
-; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GCN-NEXT: v_max_f32_e32 v9, v9, v25
; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GCN-NEXT: v_mul_f32_e32 v24, 1.0, v24
; GCN-NEXT: v_and_b32_e32 v24, 0xffff0000, v24
; GCN-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
-; GCN-NEXT: v_mul_f32_e32 v24, 1.0, v24
-; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GCN-NEXT: v_max_f32_e32 v8, v8, v24
; GCN-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GCN-NEXT: v_mul_f32_e32 v23, 1.0, v23
; GCN-NEXT: v_and_b32_e32 v23, 0xffff0000, v23
; GCN-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
-; GCN-NEXT: v_mul_f32_e32 v23, 1.0, v23
-; GCN-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GCN-NEXT: v_max_f32_e32 v7, v7, v23
; GCN-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GCN-NEXT: v_mul_f32_e32 v22, 1.0, v22
; GCN-NEXT: v_and_b32_e32 v22, 0xffff0000, v22
; GCN-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
-; GCN-NEXT: v_mul_f32_e32 v22, 1.0, v22
-; GCN-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GCN-NEXT: v_max_f32_e32 v6, v6, v22
; GCN-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GCN-NEXT: v_mul_f32_e32 v21, 1.0, v21
; GCN-NEXT: v_and_b32_e32 v21, 0xffff0000, v21
; GCN-NEXT: v_and_b32_e32 v5, 0xffff0000, v5
-; GCN-NEXT: v_mul_f32_e32 v21, 1.0, v21
-; GCN-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GCN-NEXT: v_max_f32_e32 v5, v5, v21
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_mul_f32_e32 v16, 1.0, v16
@@ -22278,8 +21922,6 @@ define <16 x bfloat> @v_maxnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GCN-NEXT: v_mul_f32_e32 v15, 1.0, v15
; GCN-NEXT: v_and_b32_e32 v20, 0xffff0000, v20
; GCN-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
-; GCN-NEXT: v_mul_f32_e32 v20, 1.0, v20
-; GCN-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GCN-NEXT: v_max_f32_e32 v4, v4, v20
; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s32
; GCN-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
@@ -22291,21 +21933,10 @@ define <16 x bfloat> @v_maxnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GCN-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e32 v15, 1.0, v15
-; GCN-NEXT: v_mul_f32_e32 v19, 1.0, v19
-; GCN-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GCN-NEXT: v_mul_f32_e32 v18, 1.0, v18
-; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GCN-NEXT: v_mul_f32_e32 v17, 1.0, v17
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v16, 1.0, v16
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_max_f32_e32 v3, v3, v19
; GCN-NEXT: v_max_f32_e32 v2, v2, v18
; GCN-NEXT: v_max_f32_e32 v1, v1, v17
; GCN-NEXT: v_max_f32_e32 v0, v0, v16
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v16, 1.0, v20
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GCN-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
@@ -22320,8 +21951,9 @@ define <16 x bfloat> @v_maxnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GCN-NEXT: v_and_b32_e32 v11, 0xffff0000, v11
; GCN-NEXT: v_and_b32_e32 v12, 0xffff0000, v12
; GCN-NEXT: v_and_b32_e32 v13, 0xffff0000, v13
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mul_f32_e32 v16, 1.0, v20
; GCN-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
-; GCN-NEXT: v_mul_f32_e32 v16, 1.0, v16
; GCN-NEXT: v_max_f32_e32 v15, v15, v16
; GCN-NEXT: v_and_b32_e32 v14, 0xffff0000, v14
; GCN-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
@@ -22330,14 +21962,12 @@ define <16 x bfloat> @v_maxnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GFX7-LABEL: v_maxnum_v16bf16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
-; GFX7-NEXT: v_mul_f32_e32 v25, 1.0, v25
-; GFX7-NEXT: v_and_b32_e32 v25, 0xffff0000, v25
-; GFX7-NEXT: v_and_b32_e32 v9, 0xffff0000, v9
-; GFX7-NEXT: v_mul_f32_e32 v25, 1.0, v25
-; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
-; GFX7-NEXT: v_max_f32_e32 v9, v9, v25
-; GFX7-NEXT: buffer_load_dword v25, off, s[0:3], s32
+; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
+; GFX7-NEXT: v_mul_f32_e32 v22, 1.0, v22
+; GFX7-NEXT: v_and_b32_e32 v22, 0xffff0000, v22
+; GFX7-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
+; GFX7-NEXT: v_max_f32_e32 v6, v6, v22
+; GFX7-NEXT: buffer_load_dword v22, off, s[0:3], s32
; GFX7-NEXT: v_mul_f32_e32 v14, 1.0, v14
; GFX7-NEXT: v_mul_f32_e32 v30, 1.0, v30
; GFX7-NEXT: v_mul_f32_e32 v13, 1.0, v13
@@ -22348,13 +21978,13 @@ define <16 x bfloat> @v_maxnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GFX7-NEXT: v_mul_f32_e32 v27, 1.0, v27
; GFX7-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GFX7-NEXT: v_mul_f32_e32 v26, 1.0, v26
-; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15
+; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
+; GFX7-NEXT: v_mul_f32_e32 v25, 1.0, v25
; GFX7-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GFX7-NEXT: v_mul_f32_e32 v24, 1.0, v24
; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GFX7-NEXT: v_mul_f32_e32 v23, 1.0, v23
-; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
-; GFX7-NEXT: v_mul_f32_e32 v22, 1.0, v22
+; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15
; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GFX7-NEXT: v_mul_f32_e32 v21, 1.0, v21
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
@@ -22377,13 +22007,13 @@ define <16 x bfloat> @v_maxnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GFX7-NEXT: v_and_b32_e32 v11, 0xffff0000, v11
; GFX7-NEXT: v_and_b32_e32 v26, 0xffff0000, v26
; GFX7-NEXT: v_and_b32_e32 v10, 0xffff0000, v10
-; GFX7-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
+; GFX7-NEXT: v_and_b32_e32 v25, 0xffff0000, v25
+; GFX7-NEXT: v_and_b32_e32 v9, 0xffff0000, v9
; GFX7-NEXT: v_and_b32_e32 v24, 0xffff0000, v24
; GFX7-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
; GFX7-NEXT: v_and_b32_e32 v23, 0xffff0000, v23
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
-; GFX7-NEXT: v_and_b32_e32 v22, 0xffff0000, v22
-; GFX7-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
+; GFX7-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
; GFX7-NEXT: v_and_b32_e32 v21, 0xffff0000, v21
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v5
; GFX7-NEXT: v_and_b32_e32 v20, 0xffff0000, v20
@@ -22396,48 +22026,14 @@ define <16 x bfloat> @v_maxnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX7-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v30, 1.0, v30
-; GFX7-NEXT: v_mul_f32_e32 v14, 1.0, v14
-; GFX7-NEXT: v_mul_f32_e32 v29, 1.0, v29
-; GFX7-NEXT: v_mul_f32_e32 v13, 1.0, v13
-; GFX7-NEXT: v_mul_f32_e32 v28, 1.0, v28
-; GFX7-NEXT: v_mul_f32_e32 v12, 1.0, v12
-; GFX7-NEXT: v_mul_f32_e32 v27, 1.0, v27
-; GFX7-NEXT: v_mul_f32_e32 v11, 1.0, v11
-; GFX7-NEXT: v_mul_f32_e32 v26, 1.0, v26
-; GFX7-NEXT: v_mul_f32_e32 v10, 1.0, v10
-; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15
-; GFX7-NEXT: v_mul_f32_e32 v24, 1.0, v24
-; GFX7-NEXT: v_mul_f32_e32 v8, 1.0, v8
-; GFX7-NEXT: v_mul_f32_e32 v23, 1.0, v23
-; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
-; GFX7-NEXT: v_mul_f32_e32 v22, 1.0, v22
-; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
-; GFX7-NEXT: v_mul_f32_e32 v21, 1.0, v21
-; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
-; GFX7-NEXT: v_mul_f32_e32 v20, 1.0, v20
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v25, 1.0, v25
-; GFX7-NEXT: v_and_b32_e32 v25, 0xffff0000, v25
-; GFX7-NEXT: v_mul_f32_e32 v25, 1.0, v25
-; GFX7-NEXT: v_mul_f32_e32 v19, 1.0, v19
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
-; GFX7-NEXT: v_mul_f32_e32 v18, 1.0, v18
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
-; GFX7-NEXT: v_mul_f32_e32 v17, 1.0, v17
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GFX7-NEXT: v_mul_f32_e32 v16, 1.0, v16
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_max_f32_e32 v14, v14, v30
; GFX7-NEXT: v_max_f32_e32 v13, v13, v29
; GFX7-NEXT: v_max_f32_e32 v12, v12, v28
; GFX7-NEXT: v_max_f32_e32 v11, v11, v27
; GFX7-NEXT: v_max_f32_e32 v10, v10, v26
-; GFX7-NEXT: v_max_f32_e32 v15, v15, v25
+; GFX7-NEXT: v_max_f32_e32 v9, v9, v25
; GFX7-NEXT: v_max_f32_e32 v8, v8, v24
; GFX7-NEXT: v_max_f32_e32 v7, v7, v23
-; GFX7-NEXT: v_max_f32_e32 v6, v6, v22
; GFX7-NEXT: v_max_f32_e32 v5, v5, v21
; GFX7-NEXT: v_max_f32_e32 v4, v4, v20
; GFX7-NEXT: v_max_f32_e32 v3, v3, v19
@@ -22451,6 +22047,10 @@ define <16 x bfloat> @v_maxnum_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v5
; GFX7-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v22, 1.0, v22
+; GFX7-NEXT: v_and_b32_e32 v22, 0xffff0000, v22
+; GFX7-NEXT: v_max_f32_e32 v15, v15, v22
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
; GFX7-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
; GFX7-NEXT: v_and_b32_e32 v9, 0xffff0000, v9
@@ -23084,287 +22684,223 @@ define <32 x bfloat> @v_maxnum_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b) {
; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
; GCN-NEXT: v_and_b32_e32 v31, 0xffff0000, v31
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
-; GCN-NEXT: v_mul_f32_e32 v31, 1.0, v31
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:124
; GCN-NEXT: v_max_f32_e32 v31, v31, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
; GCN-NEXT: v_mul_f32_e32 v30, 1.0, v30
; GCN-NEXT: v_and_b32_e32 v30, 0xffff0000, v30
-; GCN-NEXT: v_mul_f32_e32 v30, 1.0, v30
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:120
; GCN-NEXT: v_max_f32_e32 v30, v30, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
; GCN-NEXT: v_mul_f32_e32 v29, 1.0, v29
; GCN-NEXT: v_and_b32_e32 v29, 0xffff0000, v29
-; GCN-NEXT: v_mul_f32_e32 v29, 1.0, v29
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:116
; GCN-NEXT: v_max_f32_e32 v29, v29, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:116
; GCN-NEXT: v_mul_f32_e32 v28, 1.0, v28
; GCN-NEXT: v_and_b32_e32 v28, 0xffff0000, v28
-; GCN-NEXT: v_mul_f32_e32 v28, 1.0, v28
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:112
; GCN-NEXT: v_max_f32_e32 v28, v28, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
; GCN-NEXT: v_mul_f32_e32 v27, 1.0, v27
; GCN-NEXT: v_and_b32_e32 v27, 0xffff0000, v27
-; GCN-NEXT: v_mul_f32_e32 v27, 1.0, v27
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:108
; GCN-NEXT: v_max_f32_e32 v27, v27, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:108
; GCN-NEXT: v_mul_f32_e32 v26, 1.0, v26
; GCN-NEXT: v_and_b32_e32 v26, 0xffff0000, v26
-; GCN-NEXT: v_mul_f32_e32 v26, 1.0, v26
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:104
; GCN-NEXT: v_max_f32_e32 v26, v26, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
; GCN-NEXT: v_mul_f32_e32 v25, 1.0, v25
; GCN-NEXT: v_and_b32_e32 v25, 0xffff0000, v25
-; GCN-NEXT: v_mul_f32_e32 v25, 1.0, v25
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:100
; GCN-NEXT: v_max_f32_e32 v25, v25, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:100
; GCN-NEXT: v_mul_f32_e32 v24, 1.0, v24
; GCN-NEXT: v_and_b32_e32 v24, 0xffff0000, v24
-; GCN-NEXT: v_mul_f32_e32 v24, 1.0, v24
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:96
; GCN-NEXT: v_max_f32_e32 v24, v24, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
; GCN-NEXT: v_mul_f32_e32 v23, 1.0, v23
; GCN-NEXT: v_and_b32_e32 v23, 0xffff0000, v23
-; GCN-NEXT: v_mul_f32_e32 v23, 1.0, v23
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:92
; GCN-NEXT: v_max_f32_e32 v23, v23, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:92
; GCN-NEXT: v_mul_f32_e32 v22, 1.0, v22
; GCN-NEXT: v_and_b32_e32 v22, 0xffff0000, v22
-; GCN-NEXT: v_mul_f32_e32 v22, 1.0, v22
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:88
; GCN-NEXT: v_max_f32_e32 v22, v22, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
; GCN-NEXT: v_mul_f32_e32 v21, 1.0, v21
; GCN-NEXT: v_and_b32_e32 v21, 0xffff0000, v21
-; GCN-NEXT: v_mul_f32_e32 v21, 1.0, v21
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:84
; GCN-NEXT: v_max_f32_e32 v21, v21, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:84
; GCN-NEXT: v_mul_f32_e32 v20, 1.0, v20
; GCN-NEXT: v_and_b32_e32 v20, 0xffff0000, v20
-; GCN-NEXT: v_mul_f32_e32 v20, 1.0, v20
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:80
; GCN-NEXT: v_max_f32_e32 v20, v20, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
; GCN-NEXT: v_mul_f32_e32 v19, 1.0, v19
; GCN-NEXT: v_and_b32_e32 v19, 0xffff0000, v19
-; GCN-NEXT: v_mul_f32_e32 v19, 1.0, v19
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:76
; GCN-NEXT: v_max_f32_e32 v19, v19, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:76
; GCN-NEXT: v_mul_f32_e32 v18, 1.0, v18
; GCN-NEXT: v_and_b32_e32 v18, 0xffff0000, v18
-; GCN-NEXT: v_mul_f32_e32 v18, 1.0, v18
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:72
; GCN-NEXT: v_max_f32_e32 v18, v18, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
; GCN-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GCN-NEXT: v_and_b32_e32 v17, 0xffff0000, v17
-; GCN-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:68
; GCN-NEXT: v_max_f32_e32 v17, v17, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:68
; GCN-NEXT: v_mul_f32_e32 v16, 1.0, v16
; GCN-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
-; GCN-NEXT: v_mul_f32_e32 v16, 1.0, v16
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:64
; GCN-NEXT: v_max_f32_e32 v16, v16, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
; GCN-NEXT: v_mul_f32_e32 v15, 1.0, v15
; GCN-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
-; GCN-NEXT: v_mul_f32_e32 v15, 1.0, v15
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60
; GCN-NEXT: v_max_f32_e32 v15, v15, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:60
; GCN-NEXT: v_mul_f32_e32 v14, 1.0, v14
; GCN-NEXT: v_and_b32_e32 v14, 0xffff0000, v14
-; GCN-NEXT: v_mul_f32_e32 v14, 1.0, v14
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56
; GCN-NEXT: v_max_f32_e32 v14, v14, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
; GCN-NEXT: v_mul_f32_e32 v13, 1.0, v13
; GCN-NEXT: v_and_b32_e32 v13, 0xffff0000, v13
-; GCN-NEXT: v_mul_f32_e32 v13, 1.0, v13
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:52
; GCN-NEXT: v_max_f32_e32 v13, v13, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52
; GCN-NEXT: v_mul_f32_e32 v12, 1.0, v12
; GCN-NEXT: v_and_b32_e32 v12, 0xffff0000, v12
-; GCN-NEXT: v_mul_f32_e32 v12, 1.0, v12
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:48
; GCN-NEXT: v_max_f32_e32 v12, v12, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
; GCN-NEXT: v_mul_f32_e32 v11, 1.0, v11
; GCN-NEXT: v_and_b32_e32 v11, 0xffff0000, v11
-; GCN-NEXT: v_mul_f32_e32 v11, 1.0, v11
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:44
; GCN-NEXT: v_max_f32_e32 v11, v11, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:44
; GCN-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GCN-NEXT: v_and_b32_e32 v10, 0xffff0000, v10
-; GCN-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:40
; GCN-NEXT: v_max_f32_e32 v10, v10, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GCN-NEXT: v_and_b32_e32 v9, 0xffff0000, v9
-; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:36
; GCN-NEXT: v_max_f32_e32 v9, v9, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:36
; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GCN-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
-; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:32
; GCN-NEXT: v_max_f32_e32 v8, v8, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
; GCN-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GCN-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
-; GCN-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:28
; GCN-NEXT: v_max_f32_e32 v7, v7, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:28
; GCN-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GCN-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
-; GCN-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:24
; GCN-NEXT: v_max_f32_e32 v6, v6, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
; GCN-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GCN-NEXT: v_and_b32_e32 v5, 0xffff0000, v5
-; GCN-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:20
; GCN-NEXT: v_max_f32_e32 v5, v5, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:20
; GCN-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GCN-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
-; GCN-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:16
; GCN-NEXT: v_max_f32_e32 v4, v4, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
; GCN-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GCN-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
-; GCN-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
; GCN-NEXT: v_max_f32_e32 v3, v3, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:12
; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GCN-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
-; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
; GCN-NEXT: v_max_f32_e32 v2, v2, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:4
; GCN-NEXT: v_max_f32_e32 v1, v1, v32
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v33
; GCN-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GCN-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GCN-NEXT: v_max_f32_e32 v0, v0, v32
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
@@ -23407,322 +22943,258 @@ define <32 x bfloat> @v_maxnum_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b) {
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:128
; GFX7-NEXT: v_mul_f32_e32 v30, 1.0, v30
; GFX7-NEXT: v_and_b32_e32 v30, 0xffff0000, v30
-; GFX7-NEXT: v_mul_f32_e32 v30, 1.0, v30
; GFX7-NEXT: v_mul_f32_e32 v29, 1.0, v29
; GFX7-NEXT: v_and_b32_e32 v29, 0xffff0000, v29
-; GFX7-NEXT: v_mul_f32_e32 v29, 1.0, v29
; GFX7-NEXT: v_mul_f32_e32 v28, 1.0, v28
; GFX7-NEXT: v_and_b32_e32 v28, 0xffff0000, v28
-; GFX7-NEXT: v_mul_f32_e32 v28, 1.0, v28
; GFX7-NEXT: v_mul_f32_e32 v27, 1.0, v27
; GFX7-NEXT: v_and_b32_e32 v27, 0xffff0000, v27
-; GFX7-NEXT: v_mul_f32_e32 v27, 1.0, v27
; GFX7-NEXT: v_mul_f32_e32 v26, 1.0, v26
; GFX7-NEXT: v_and_b32_e32 v26, 0xffff0000, v26
-; GFX7-NEXT: v_mul_f32_e32 v26, 1.0, v26
; GFX7-NEXT: v_mul_f32_e32 v25, 1.0, v25
; GFX7-NEXT: v_and_b32_e32 v25, 0xffff0000, v25
-; GFX7-NEXT: v_mul_f32_e32 v25, 1.0, v25
; GFX7-NEXT: v_mul_f32_e32 v24, 1.0, v24
; GFX7-NEXT: v_and_b32_e32 v24, 0xffff0000, v24
-; GFX7-NEXT: v_mul_f32_e32 v24, 1.0, v24
; GFX7-NEXT: v_mul_f32_e32 v23, 1.0, v23
; GFX7-NEXT: v_and_b32_e32 v23, 0xffff0000, v23
-; GFX7-NEXT: v_mul_f32_e32 v23, 1.0, v23
; GFX7-NEXT: v_mul_f32_e32 v22, 1.0, v22
; GFX7-NEXT: v_and_b32_e32 v22, 0xffff0000, v22
-; GFX7-NEXT: v_mul_f32_e32 v22, 1.0, v22
; GFX7-NEXT: v_mul_f32_e32 v21, 1.0, v21
; GFX7-NEXT: v_and_b32_e32 v21, 0xffff0000, v21
-; GFX7-NEXT: v_mul_f32_e32 v21, 1.0, v21
; GFX7-NEXT: v_mul_f32_e32 v20, 1.0, v20
; GFX7-NEXT: v_and_b32_e32 v20, 0xffff0000, v20
-; GFX7-NEXT: v_mul_f32_e32 v20, 1.0, v20
; GFX7-NEXT: v_mul_f32_e32 v19, 1.0, v19
; GFX7-NEXT: v_and_b32_e32 v19, 0xffff0000, v19
-; GFX7-NEXT: v_mul_f32_e32 v19, 1.0, v19
; GFX7-NEXT: v_mul_f32_e32 v18, 1.0, v18
; GFX7-NEXT: v_and_b32_e32 v18, 0xffff0000, v18
-; GFX7-NEXT: v_mul_f32_e32 v18, 1.0, v18
; GFX7-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GFX7-NEXT: v_and_b32_e32 v17, 0xffff0000, v17
-; GFX7-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GFX7-NEXT: v_mul_f32_e32 v16, 1.0, v16
; GFX7-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
-; GFX7-NEXT: v_mul_f32_e32 v16, 1.0, v16
; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15
; GFX7-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
-; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15
; GFX7-NEXT: v_mul_f32_e32 v14, 1.0, v14
; GFX7-NEXT: v_and_b32_e32 v14, 0xffff0000, v14
-; GFX7-NEXT: v_mul_f32_e32 v14, 1.0, v14
; GFX7-NEXT: v_mul_f32_e32 v13, 1.0, v13
; GFX7-NEXT: v_and_b32_e32 v13, 0xffff0000, v13
-; GFX7-NEXT: v_mul_f32_e32 v13, 1.0, v13
; GFX7-NEXT: v_mul_f32_e32 v12, 1.0, v12
; GFX7-NEXT: v_and_b32_e32 v12, 0xffff0000, v12
-; GFX7-NEXT: v_mul_f32_e32 v12, 1.0, v12
; GFX7-NEXT: v_mul_f32_e32 v11, 1.0, v11
; GFX7-NEXT: v_and_b32_e32 v11, 0xffff0000, v11
-; GFX7-NEXT: v_mul_f32_e32 v11, 1.0, v11
; GFX7-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GFX7-NEXT: v_and_b32_e32 v10, 0xffff0000, v10
-; GFX7-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GFX7-NEXT: v_and_b32_e32 v9, 0xffff0000, v9
-; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GFX7-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GFX7-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
-; GFX7-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
-; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GFX7-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
-; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v5
-; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
-; GFX7-NEXT: s_waitcnt vmcnt(1)
-; GFX7-NEXT: v_mul_f32_e32 v31, 1.0, v31
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
-; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_and_b32_e32 v31, 0xffff0000, v31
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
-; GFX7-NEXT: v_mul_f32_e32 v31, 1.0, v31
-; GFX7-NEXT: v_max_f32_e32 v31, v31, v32
-; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
-; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
-; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
-; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
-; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GFX7-NEXT: v_and_b32_e32 v31, 0xffff0000, v31
+; GFX7-NEXT: s_waitcnt vmcnt(1)
+; GFX7-NEXT: v_mul_f32_e32 v31, 1.0, v31
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
+; GFX7-NEXT: v_and_b32_e32 v31, 0xffff0000, v31
+; GFX7-NEXT: v_max_f32_e32 v31, v31, v32
+; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
+; GFX7-NEXT: v_and_b32_e32 v31, 0xffff0000, v31
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
+; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
; GFX7-NEXT: v_max_f32_e32 v30, v30, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:120
; GFX7-NEXT: v_and_b32_e32 v30, 0xffff0000, v30
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v29, v29, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:116
; GFX7-NEXT: v_and_b32_e32 v29, 0xffff0000, v29
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v28, v28, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:112
; GFX7-NEXT: v_and_b32_e32 v28, 0xffff0000, v28
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v27, v27, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:108
; GFX7-NEXT: v_and_b32_e32 v27, 0xffff0000, v27
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v26, v26, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:104
; GFX7-NEXT: v_and_b32_e32 v26, 0xffff0000, v26
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v25, v25, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:100
; GFX7-NEXT: v_and_b32_e32 v25, 0xffff0000, v25
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v24, v24, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:96
; GFX7-NEXT: v_and_b32_e32 v24, 0xffff0000, v24
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v23, v23, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:92
; GFX7-NEXT: v_and_b32_e32 v23, 0xffff0000, v23
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v22, v22, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:88
; GFX7-NEXT: v_and_b32_e32 v22, 0xffff0000, v22
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v21, v21, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:84
; GFX7-NEXT: v_and_b32_e32 v21, 0xffff0000, v21
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v20, v20, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80
; GFX7-NEXT: v_and_b32_e32 v20, 0xffff0000, v20
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v19, v19, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:76
; GFX7-NEXT: v_and_b32_e32 v19, 0xffff0000, v19
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v18, v18, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72
; GFX7-NEXT: v_and_b32_e32 v18, 0xffff0000, v18
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v17, v17, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:68
; GFX7-NEXT: v_and_b32_e32 v17, 0xffff0000, v17
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v16, v16, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
; GFX7-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v15, v15, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:60
; GFX7-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v14, v14, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56
; GFX7-NEXT: v_and_b32_e32 v14, 0xffff0000, v14
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v13, v13, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52
; GFX7-NEXT: v_and_b32_e32 v13, 0xffff0000, v13
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v12, v12, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:48
; GFX7-NEXT: v_and_b32_e32 v12, 0xffff0000, v12
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v11, v11, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:44
; GFX7-NEXT: v_and_b32_e32 v11, 0xffff0000, v11
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v10, v10, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40
; GFX7-NEXT: v_and_b32_e32 v10, 0xffff0000, v10
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v9, v9, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:36
; GFX7-NEXT: v_and_b32_e32 v9, 0xffff0000, v9
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v8, v8, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32
; GFX7-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v7, v7, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:28
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v6, v6, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24
; GFX7-NEXT: v_and_b32_e32 v6, 0xffff0000, v6
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v5, v5, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:20
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v5
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v4, v4, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v3, v3, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:12
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v2, v2, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v1, v1, v32
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v32
-; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_max_f32_e32 v0, v0, v32
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -25176,7 +24648,6 @@ define { bfloat, i16 } @v_frexp_bf16_i16(bfloat %a) {
; GCN-NEXT: v_frexp_exp_i32_f32_e32 v2, v0
; GCN-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -26818,11 +26289,17 @@ define bfloat @v_canonicalize_bf16(bfloat %a) {
; GCN-LABEL: v_canonicalize_bf16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_canonicalize_bf16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_canonicalize_bf16: